From d8678e02ecec9375bec1dcf1388c6fc8b4ce3ad2 Mon Sep 17 00:00:00 2001
From: Jacques Lucke <jacques@blender.org>
Date: Tue, 9 Jun 2020 10:10:56 +0200
Subject: BLI: generally improve C++ data structures

The main focus here was to improve the docs significantly. Furthermore,
I reimplemented `Set`, `Map` and `VectorSet`. They are now (usually)
faster, simpler and more customizable. I also rewrote `Stack` to make
it more efficient by avoiding unnecessary copies.

Thanks to everyone who helped with constructive feedback.

Approved by brecht and sybren.

Differential Revision: https://developer.blender.org/D7931
---
 source/blender/blenlib/BLI_allocator.hh            |   45 +-
 source/blender/blenlib/BLI_array.hh                |  158 ++-
 source/blender/blenlib/BLI_array_ref.hh            |  239 ++--
 source/blender/blenlib/BLI_dot_export.hh           |    3 +-
 source/blender/blenlib/BLI_hash.hh                 |  107 +-
 source/blender/blenlib/BLI_hash_tables.hh          |  350 ++++++
 source/blender/blenlib/BLI_index_range.hh          |   38 +-
 source/blender/blenlib/BLI_linear_allocator.hh     |    6 +-
 source/blender/blenlib/BLI_listbase_wrapper.hh     |    6 +-
 source/blender/blenlib/BLI_map.hh                  | 1289 +++++++++++++-------
 source/blender/blenlib/BLI_map_slots.hh            |  361 ++++++
 source/blender/blenlib/BLI_math_bits.h             |    1 +
 source/blender/blenlib/BLI_memory_utils.hh         |  189 ++-
 source/blender/blenlib/BLI_open_addressing.hh      |  316 -----
 source/blender/blenlib/BLI_optional.hh             |   10 -
 source/blender/blenlib/BLI_probing_strategies.hh   |  250 ++++
 source/blender/blenlib/BLI_set.hh                  |  872 ++++++++-----
 source/blender/blenlib/BLI_set_slots.hh            |  415 +++++++
 source/blender/blenlib/BLI_stack.hh                |  345 +++++-
 source/blender/blenlib/BLI_string_map.hh           |  540 --------
 source/blender/blenlib/BLI_string_ref.hh           |  129 +-
 source/blender/blenlib/BLI_utility_mixins.hh       |    6 +
 source/blender/blenlib/BLI_vector.hh               |  413 +++++--
 source/blender/blenlib/BLI_vector_set.hh           |  814 +++++++-----
 source/blender/blenlib/BLI_vector_set_slots.hh     |  171 +++
 source/blender/blenlib/CMakeLists.txt              |    7 +-
 source/blender/blenlib/intern/BLI_index_range.cc   |    2 +-
 source/blender/blenlib/intern/math_bits_inline.c   |   11 +-
 .../blender/depsgraph/intern/node/deg_node_id.cc   |    7 +
 source/blender/depsgraph/intern/node/deg_node_id.h |   14 +-
 source/blender/functions/FN_cpp_type.hh            |   27 +-
 31 files changed, 4852 insertions(+), 2289 deletions(-)
 create mode 100644 source/blender/blenlib/BLI_hash_tables.hh
 create mode 100644 source/blender/blenlib/BLI_map_slots.hh
 delete mode 100644 source/blender/blenlib/BLI_open_addressing.hh
 create mode 100644 source/blender/blenlib/BLI_probing_strategies.hh
 create mode 100644 source/blender/blenlib/BLI_set_slots.hh
 delete mode 100644 source/blender/blenlib/BLI_string_map.hh
 create mode 100644 source/blender/blenlib/BLI_vector_set_slots.hh

(limited to 'source')

diff --git a/source/blender/blenlib/BLI_allocator.hh b/source/blender/blenlib/BLI_allocator.hh
index c52db4aab53..cf81b024277 100644
--- a/source/blender/blenlib/BLI_allocator.hh
+++ b/source/blender/blenlib/BLI_allocator.hh
@@ -19,14 +19,23 @@
 /** \file
  * \ingroup bli
  *
- * This file offers a couple of memory allocators that can be used with containers such as Vector
- * and Map. Note that these allocators are not designed to work with standard containers like
+ * An `Allocator` can allocate and deallocate memory. It is used by containers such as BLI::Vector.
+ * The allocators defined in this file do not work with standard library containers such as
  * std::vector.
  *
- * Also see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2271.html for why the standard
- * allocators are not a good fit applications like Blender. The current implementations in this
- * file are fairly simple still, more complexity can be added when necessary. For now they do their
- * job good enough.
+ * Every allocator has to implement two methods:
+ *   void *allocate(size_t size, size_t alignment, const char *name);
+ *   void deallocate(void *ptr);
+ *
+ * We don't use the std::allocator interface, because it does more than is really necessary for an
+ * allocator and has some other quirks. It mixes the concepts of allocation and construction. It is
+ * essentially forced to be a template, even though the allocator should not care about the type.
+ * Also see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2271.html#std_allocator. Some
+ * of these aspects have been improved in new versions of C++, so we might have to reevaluate the
+ * strategy later on.
+ *
+ * The allocator interface dictated by this file is very simplistic, but for now that is all we
+ * need. More complexity can be added when it seems necessary.
  */
 
 #include <algorithm>
@@ -40,18 +49,14 @@
 namespace BLI {
 
 /**
- * Use Blenders guarded allocator (aka MEM_malloc). This should always be used except there is a
+ * Use Blender's guarded allocator (aka MEM_*). This should always be used except there is a
  * good reason not to use it.
  */
 class GuardedAllocator {
  public:
-  void *allocate(uint size, const char *name)
-  {
-    return MEM_mallocN(size, name);
-  }
-
-  void *allocate_aligned(uint size, uint alignment, const char *name)
+  void *allocate(size_t size, size_t alignment, const char *name)
   {
+    /* Should we use MEM_mallocN, when alignment is small? If yes, how small must alignment be? */
     return MEM_mallocN_aligned(size, alignment, name);
   }
 
@@ -62,8 +67,9 @@ class GuardedAllocator {
 };
 
 /**
- * This is a simple wrapper around malloc/free. Only use this when the GuardedAllocator cannot be
- * used. This can be the case when the allocated element might live longer than Blenders Allocator.
+ * This is a wrapper around malloc/free. Only use this when the GuardedAllocator cannot be
+ * used. This can be the case when the allocated memory might live longer than Blender's
+ * allocator. For example, when the memory is owned by a static variable.
  */
 class RawAllocator {
  private:
@@ -72,14 +78,7 @@ class RawAllocator {
   };
 
  public:
-  void *allocate(uint size, const char *UNUSED(name))
-  {
-    void *ptr = malloc(size + sizeof(MemHead));
-    ((MemHead *)ptr)->offset = sizeof(MemHead);
-    return POINTER_OFFSET(ptr, sizeof(MemHead));
-  }
-
-  void *allocate_aligned(uint size, uint alignment, const char *UNUSED(name))
+  void *allocate(size_t size, size_t alignment, const char *UNUSED(name))
   {
     BLI_assert(is_power_of_2_i((int)alignment));
     void *ptr = malloc(size + alignment + sizeof(MemHead));
diff --git a/source/blender/blenlib/BLI_array.hh b/source/blender/blenlib/BLI_array.hh
index 9dd8341aa76..09eeb321abf 100644
--- a/source/blender/blenlib/BLI_array.hh
+++ b/source/blender/blenlib/BLI_array.hh
@@ -19,8 +19,23 @@
 /** \file
  * \ingroup bli
  *
- * This is a container that contains a fixed size array. Note however, the size of the array is not
- * a template argument. Instead it can be specified at the construction time.
+ * A `BLI::Array<T>` is a container for a fixed size array the size of which is NOT known at
+ * compile time.
+ *
+ * If the size is known at compile time, `std::array<T, N>` should be used instead.
+ *
+ * BLI::Array should usually be used instead of BLI::Vector whenever the number of elements is
+ * known at construction time. Note however, that BLI::Array will default construct all elements
+ * when initialized with the size-constructor. For trivial types, this does nothing. In all other
+ * cases, this adds overhead. If this becomes a problem, a different constructor which does not do
+ * default construction can be added.
+ *
+ * A main benefit of using Array over Vector is that it expresses the intent of the developer
+ * better. It indicates that the size of the data structure is not expected to change. Furthermore,
+ * you can be more certain that an array does not overallocate.
+ *
+ * BLI::Array supports small object optimization to improve performance when the size turns out to
+ * be small at run-time.
  */
 
 #include "BLI_allocator.hh"
@@ -31,42 +46,83 @@
 
 namespace BLI {
 
-template<typename T, uint InlineBufferCapacity = 4, typename Allocator = GuardedAllocator>
+template<
+    /**
+     * The type of the values stored in the array.
+     */
+    typename T,
+    /**
+     * The number of values that can be stored in the array, without doing a heap allocation.
+     *
+     * When T is large, the small buffer optimization is disabled by default to avoid large
+     * unexpected allocations on the stack. It can still be enabled explicitely though.
+     */
+    uint InlineBufferCapacity = (sizeof(T) < 100) ? 4 : 0,
+    /**
+     * The allocator used by this array. Should rarely be changed, except when you don't want that
+     * MEM_* functions are used internally.
+     */
+    typename Allocator = GuardedAllocator>
 class Array {
  private:
+  /** The beginning of the array. It might point into the inline buffer. */
   T *m_data;
+
+  /** Number of elements in the array. */
   uint m_size;
+
+  /** Used for allocations when the inline buffer is too small. */
   Allocator m_allocator;
-  AlignedBuffer<sizeof(T) * InlineBufferCapacity, alignof(T)> m_inline_storage;
+
+  /** A placeholder buffer that will remain uninitialized until it is used. */
+  AlignedBuffer<sizeof(T) * InlineBufferCapacity, alignof(T)> m_inline_buffer;
 
  public:
+  /**
+   * By default an empty array is created.
+   */
   Array()
   {
-    m_data = this->inline_storage();
+    m_data = this->inline_buffer();
     m_size = 0;
   }
 
+  /**
+   * Create a new array that contains copies of all values.
+   */
   Array(ArrayRef<T> values)
   {
     m_size = values.size();
     m_data = this->get_buffer_for_size(values.size());
-    uninitialized_copy_n(values.begin(), m_size, m_data);
+    uninitialized_copy_n(values.data(), m_size, m_data);
   }
 
+  /**
+   * Create a new array that contains copies of all values.
+   */
   Array(const std::initializer_list<T> &values) : Array(ArrayRef<T>(values))
   {
   }
 
+  /**
+   * Create a new array with the given size. All values will be default constructed. For trivial
+   * types like int, default construction does nothing.
+   *
+   * We might want another version of this in the future, that does not do default construction
+   * even for non-trivial types. This should not be the default though, because one can easily mess
+   * up when dealing with uninitialized memory.
+   */
   explicit Array(uint size)
   {
     m_size = size;
     m_data = this->get_buffer_for_size(size);
-
-    for (uint i = 0; i < m_size; i++) {
-      new (m_data + i) T();
-    }
+    default_construct_n(m_data, size);
   }
 
+  /**
+   * Create a new array with the given size. All values will be initialized by copying the given
+   * default.
+   */
   Array(uint size, const T &value)
   {
     m_size = size;
@@ -74,21 +130,19 @@ class Array {
     uninitialized_fill_n(m_data, m_size, value);
   }
 
-  Array(const Array &other)
+  Array(const Array &other) : m_allocator(other.m_allocator)
   {
     m_size = other.size();
-    m_allocator = other.m_allocator;
 
     m_data = this->get_buffer_for_size(other.size());
-    uninitialized_copy_n(other.begin(), m_size, m_data);
+    uninitialized_copy_n(other.data(), m_size, m_data);
   }
 
-  Array(Array &&other) noexcept
+  Array(Array &&other) noexcept : m_allocator(other.m_allocator)
   {
     m_size = other.m_size;
-    m_allocator = other.m_allocator;
 
-    if (!other.uses_inline_storage()) {
+    if (!other.uses_inline_buffer()) {
       m_data = other.m_data;
     }
     else {
@@ -96,14 +150,14 @@ class Array {
       uninitialized_relocate_n(other.m_data, m_size, m_data);
     }
 
-    other.m_data = other.inline_storage();
+    other.m_data = other.inline_buffer();
     other.m_size = 0;
   }
 
   ~Array()
   {
     destruct_n(m_data, m_size);
-    if (!this->uses_inline_storage()) {
+    if (!this->uses_inline_buffer()) {
       m_allocator.deallocate((void *)m_data);
     }
   }
@@ -162,21 +216,50 @@ class Array {
     return m_data[index];
   }
 
+  /**
+   * Returns the number of elements in the array.
+   */
   uint size() const
   {
     return m_size;
   }
 
+  /**
+   * Returns true when the number of elements in the array is zero.
+   */
+  bool is_empty() const
+  {
+    return m_size == 0;
+  }
+
+  /**
+   * Copies the value to all indices in the array.
+   */
   void fill(const T &value)
   {
-    MutableArrayRef<T>(*this).fill(value);
+    initialized_fill_n(m_data, m_size, value);
   }
 
+  /**
+   * Copies the value to the given indices in the array.
+   */
   void fill_indices(ArrayRef<uint> indices, const T &value)
   {
     MutableArrayRef<T>(*this).fill_indices(indices, value);
   }
 
+  /**
+   * Get a pointer to the beginning of the array.
+   */
+  const T *data() const
+  {
+    return m_data;
+  }
+  T *data()
+  {
+    return m_data;
+  }
+
   const T *begin() const
   {
     return m_data;
@@ -197,41 +280,64 @@ class Array {
     return m_data + m_size;
   }
 
+  /**
+   * Get an index range containing all valid indices for this array.
+   */
   IndexRange index_range() const
   {
     return IndexRange(m_size);
   }
 
+  /**
+   * Sets the size to zero. This should only be used when you have manually destructed all elements
+   * in the array beforehand. Use with care.
+   */
+  void clear_without_destruct()
+  {
+    m_size = 0;
+  }
+
+  /**
+   * Access the allocator used by this array.
+   */
   Allocator &allocator()
   {
     return m_allocator;
   }
 
+  /**
+   * Get the value of the InlineBufferCapacity template argument. This is the number of elements
+   * that can be stored without doing an allocation.
+   */
+  static uint inline_buffer_capacity()
+  {
+    return InlineBufferCapacity;
+  }
+
  private:
   T *get_buffer_for_size(uint size)
   {
     if (size <= InlineBufferCapacity) {
-      return this->inline_storage();
+      return this->inline_buffer();
     }
     else {
       return this->allocate(size);
     }
   }
 
-  T *inline_storage() const
+  T *inline_buffer() const
   {
-    return (T *)m_inline_storage.ptr();
+    return (T *)m_inline_buffer.ptr();
   }
 
   T *allocate(uint size)
   {
-    return (T *)m_allocator.allocate_aligned(
-        size * sizeof(T), std::alignment_of<T>::value, __func__);
+    return (T *)m_allocator.allocate(size * sizeof(T), alignof(T), AT);
   }
 
-  bool uses_inline_storage() const
+  bool uses_inline_buffer() const
   {
-    return m_data == this->inline_storage();
+    return m_data == this->inline_buffer();
   }
 };
 
diff --git a/source/blender/blenlib/BLI_array_ref.hh b/source/blender/blenlib/BLI_array_ref.hh
index c0484493bda..34745c0af83 100644
--- a/source/blender/blenlib/BLI_array_ref.hh
+++ b/source/blender/blenlib/BLI_array_ref.hh
@@ -20,19 +20,38 @@
 /** \file
  * \ingroup bli
  *
- * These classes offer a convenient way to work with continuous chunks of memory of a certain type.
- * We differentiate #ArrayRef and #MutableArrayRef. The elements in the former are const while the
- * elements in the other are not.
+ * A `BLI::ArrayRef<T>` references an array that is owned by someone else. It is just a pointer and
+ * a size. Since the memory is not owned, ArrayRef should not be used to transfer ownership. The
+ * array cannot be modified through the ArrayRef. However, if T is a non-const pointer, the
+ * pointed-to elements can be modified.
  *
- * Passing array references as parameters has multiple benefits:
- *   - Less templates are used because the function does not have to work with different
- *     container types.
- *   - It encourages an Struct-of-Arrays data layout which is often beneficial when
- *     writing high performance code. Also it makes it easier to reuse code.
- *   - Array references offer convenient ways of slicing and other operations.
+ * There is also `BLI::MutableArrayRef<T>`. It is mostly the same as ArrayRef, but allows the array
+ * to be modified.
  *
- * The instances of #ArrayRef and #MutableArrayRef are very small and should be passed by value.
- * Since array references do not own any memory, it is generally not save to store them.
+ * An (Mutable)ArrayRef can refer to data owned by many different data structures including
+ * BLI::Vector, BLI::Array, BLI::VectorSet, std::vector, std::array, std::string,
+ * std::initializer_list and c-style array.
+ *
+ * `BLI::ArrayRef<T>` should be your default choice when you have to pass a read-only array into a
+ * function. It is better than passing a `const Vector &`, because then the function only works for
+ * vectors and not for e.g. arrays. Using ArrayRef as function parameter makes it usable in more
+ * contexts, better expresses the intent and does not sacrifice performance. It is also better than
+ * passing a raw pointer and size separately, because it is more convenient and safe.
+ *
+ * `BLI::MutableArrayRef<T>` can be used when a function is supposed to return an array, the size
+ * of which is known before the function is called. One advantage of this approach is that the
+ * caller is responsible for allocation and deallocation. Furthermore, the function can focus on
+ * its task, without having to worry about memory allocation. Alternatively, a function could
+ * return an Array or Vector.
+ *
+ * Note: When a function has a MutableArrayRef<T> output parameter and T is not a trivial type,
+ * then the function has to specify whether the referenced array is expected to be initialized or
+ * not.
+ *
+ * Since the arrays are only referenced, it is generally unsafe to store an ArrayRef. When you
+ * store one, you should know who owns the memory.
+ *
+ * Instances of ArrayRef and MutableArrayRef are small and should be passed by value.
  */
 
 #include <algorithm>
@@ -48,7 +67,8 @@
 namespace BLI {
 
 /**
- * References an array of data. The elements in the array should not be changed.
+ * References an array of type T that is owned by someone else. The data in the array cannot be
+ * modified.
  */
 template<typename T> class ArrayRef {
  private:
@@ -58,7 +78,6 @@ template<typename T> class ArrayRef {
  public:
   /**
    * Create a reference to an empty array.
-   * The pointer is allowed to be nullptr.
    */
   ArrayRef() = default;
 
@@ -66,11 +85,22 @@ template<typename T> class ArrayRef {
   {
   }
 
-  ArrayRef(const std::initializer_list<T> &list) : ArrayRef(list.begin(), list.size())
+  /**
+   * Reference an initializer_list. Note that the data in the initializer_list is only valid until
+   * the expression containing it is fully computed.
+   *
+   * Do:
+   *  call_function_with_array({1, 2, 3, 4});
+   *
+   * Don't:
+   *  ArrayRef<int> ref = {1, 2, 3, 4};
+   *  call_function_with_array(ref);
+   */
+  ArrayRef(const std::initializer_list<T> &list) : ArrayRef(list.begin(), (uint)list.size())
   {
   }
 
-  ArrayRef(const std::vector<T> &vector) : ArrayRef(vector.data(), vector.size())
+  ArrayRef(const std::vector<T> &vector) : ArrayRef(vector.data(), (uint)vector.size())
   {
   }
 
@@ -79,18 +109,19 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * ArrayRef<T *> -> ArrayRef<const T *>
-   * ArrayRef<Derived *> -> ArrayRef<Base *>
+   * Support implicit conversions like the ones below:
+   *   ArrayRef<T *> -> ArrayRef<const T *>
+   *   ArrayRef<Derived *> -> ArrayRef<Base *>
    */
   template<typename U,
            typename std::enable_if<std::is_convertible<U *, T>::value>::type * = nullptr>
-  ArrayRef(ArrayRef<U *> array) : ArrayRef((T *)array.begin(), array.size())
+  ArrayRef(ArrayRef<U *> array) : ArrayRef((T *)array.data(), array.size())
   {
   }
 
   /**
-   * Return a continuous part of the array.
-   * Asserts that the slice stays within the array.
+   * Returns a contiguous part of the array. This invokes undefined behavior when the slice does
+   * not stay within the bounds of the array.
    */
   ArrayRef slice(uint start, uint size) const
   {
@@ -104,28 +135,28 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Return a new ArrayRef with n elements removed from the beginning.
-   * Asserts that the array contains enough elements.
+   * Returns a new ArrayRef with n elements removed from the beginning. This invokes undefined
+   * behavior when the array is too small.
    */
-  ArrayRef drop_front(uint n = 1) const
+  ArrayRef drop_front(uint n) const
   {
     BLI_assert(n <= this->size());
     return this->slice(n, this->size() - n);
   }
 
   /**
-   * Return a new ArrayRef with n elements removed from the beginning.
-   * Asserts that the array contains enough elements.
+   * Returns a new ArrayRef with n elements removed from the beginning. This invokes undefined
+   * behavior when the array is too small.
    */
-  ArrayRef drop_back(uint n = 1) const
+  ArrayRef drop_back(uint n) const
   {
     BLI_assert(n <= this->size());
     return this->slice(0, this->size() - n);
   }
 
   /**
-   * Return a new ArrayRef that only contains the first n elements.
-   * Asserts that the array contains enough elements.
+   * Returns a new ArrayRef that only contains the first n elements. This invokes undefined
+   * behavior when the array is too small.
    */
   ArrayRef take_front(uint n) const
   {
@@ -134,8 +165,8 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Return a new ArrayRef that only contains the last n elements.
-   * Asserts that the array contains enough elements.
+   * Returns a new ArrayRef that only contains the last n elements. This invokes undefined
+   * behavior when the array is too small.
    */
   ArrayRef take_back(uint n) const
   {
@@ -144,11 +175,12 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Copy the values in this array to another array.
+   * Returns the pointer to the beginning of the referenced array. This may be nullptr when the
+   * size is zero.
    */
-  void copy_to(T *ptr) const
+  const T *data() const
   {
-    BLI::copy_n(m_start, m_size, ptr);
+    return m_start;
   }
 
   const T *begin() const
@@ -162,8 +194,8 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Access an element in the array.
-   * Asserts that the index is in the bounds of the array.
+   * Access an element in the array. This invokes undefined behavior when the index is out of
+   * bounds.
    */
   const T &operator[](uint index) const
   {
@@ -172,7 +204,7 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Return the number of elements in the referenced array.
+   * Returns the number of elements in the referenced array.
    */
   uint size() const
   {
@@ -180,16 +212,24 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Return the number of bytes referenced by this ArrayRef.
+   * Returns true if the size is zero.
    */
-  uint byte_size() const
+  bool is_empty() const
+  {
+    return m_size == 0;
+  }
+
+  /**
+   * Returns the number of bytes referenced by this ArrayRef.
+   */
+  uint size_in_bytes() const
   {
     return sizeof(T) * m_size;
   }
 
   /**
    * Does a linear search to see of the value is in the array.
-   * Return true if it is, otherwise false.
+   * Returns true if it is, otherwise false.
    */
   bool contains(const T &value) const
   {
@@ -202,7 +242,7 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Does a constant time check to see if the pointer is within the referenced array.
+   * Does a constant time check to see if the pointer points to a value in the referenced array.
    * Return true if it is, otherwise false.
    */
   bool contains_ptr(const T *ptr) const
@@ -226,8 +266,8 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Return a reference to the first element in the array.
-   * Asserts that the array is not empty.
+   * Return a reference to the first element in the array. This invokes undefined behavior when the
+   * array is empty.
    */
   const T &first() const
   {
@@ -236,8 +276,8 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Return a reference to the last element in the array.
-   * Asserts that the array is not empty.
+   * Returns a reference to the last element in the array. This invokes undefined behavior when the
+   * array is empty.
    */
   const T &last() const
   {
@@ -246,7 +286,8 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Get element at the given index. If the index is out of range, return the fallback value.
+   * Returns the element at the given index. If the index is out of range, return the fallback
+   * value.
    */
   T get(uint index, const T &fallback) const
   {
@@ -277,6 +318,11 @@ template<typename T> class ArrayRef {
     return false;
   }
 
+  /**
+   * Returns true when this and the other array have an element in common. This should only be
+   * called on small arrays, because it has a running time of O(n*m) where n and m are the sizes of
+   * the arrays.
+   */
   bool intersects__linear_search(ArrayRef other) const
   {
     /* The size should really be smaller than that. If it is not, the calling code should be
@@ -292,6 +338,10 @@ template<typename T> class ArrayRef {
     return false;
   }
 
+  /**
+   * Returns the index of the first occurrence of the given value. This invokes undefined behavior
+   * when the value is not in the array.
+   */
   uint first_index(const T &search_value) const
   {
     int index = this->first_index_try(search_value);
@@ -299,6 +349,9 @@ template<typename T> class ArrayRef {
     return (uint)index;
   }
 
+  /**
+   * Returns the index of the first occurrence of the given value or -1 if it does not exist.
+   */
   int first_index_try(const T &search_value) const
   {
     for (uint i = 0; i < m_size; i++) {
@@ -309,16 +362,6 @@ template<typename T> class ArrayRef {
     return -1;
   }
 
-  template<typename PredicateT> bool any(const PredicateT predicate)
-  {
-    for (uint i = 0; i < m_size; i++) {
-      if (predicate(m_start[i])) {
-        return true;
-      }
-    }
-    return false;
-  }
-
   /**
    * Utility to make it more convenient to iterate over all indices that can be used with this
    * array.
@@ -329,7 +372,7 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * Get a new array ref to the same underlying memory buffer. No conversions are done.
+   * Returns a new ArrayRef to the same underlying memory buffer. No conversions are done.
    */
   template<typename NewT> ArrayRef<NewT> cast() const
   {
@@ -339,7 +382,7 @@ template<typename T> class ArrayRef {
   }
 
   /**
-   * A debug utility to print the content of the array ref. Every element will be printed on a
+   * A debug utility to print the content of the ArrayRef. Every element will be printed on a
    * separate line using the given callback.
    */
   template<typename PrintLineF> void print_as_lines(std::string name, PrintLineF print_line) const
@@ -352,6 +395,10 @@ template<typename T> class ArrayRef {
     }
   }
 
+  /**
+   * A debug utility to print the content of the array ref. Every element be printed on a separate
+   * line.
+   */
   void print_as_lines(std::string name) const
   {
     this->print_as_lines(name, [](const T &value) { std::cout << value; });
@@ -359,7 +406,8 @@ template<typename T> class ArrayRef {
 };
 
 /**
- * Mostly the same as ArrayRef, except that one can change the array elements via this reference.
+ * Mostly the same as ArrayRef, except that one can change the array elements through a
+ * MutableArrayRef.
  */
 template<typename T> class MutableArrayRef {
  private:
@@ -373,6 +421,17 @@ template<typename T> class MutableArrayRef {
   {
   }
 
+  /**
+   * Reference an initializer_list. Note that the data in the initializer_list is only valid until
+   * the expression containing it is fully computed.
+   *
+   * Do:
+   *  call_function_with_array({1, 2, 3, 4});
+   *
+   * Don't:
+   *  MutableArrayRef<int> ref = {1, 2, 3, 4};
+   *  call_function_with_array(ref);
+   */
   MutableArrayRef(std::initializer_list<T> &list) : MutableArrayRef(list.begin(), list.size())
   {
   }
@@ -392,7 +451,7 @@ template<typename T> class MutableArrayRef {
   }
 
   /**
-   * Get the number of elements in the array.
+   * Returns the number of elements in the array.
    */
   uint size() const
   {
@@ -402,33 +461,30 @@ template<typename T> class MutableArrayRef {
   /**
    * Replace all elements in the referenced array with the given value.
    */
-  void fill(const T &element)
+  void fill(const T &value)
   {
-    std::fill_n(m_start, m_size, element);
+    initialized_fill_n(m_start, m_size, value);
   }
 
   /**
-   * Replace a subset of all elements with the given value.
+   * Replace a subset of all elements with the given value. This invokes undefined behavior when
+   * one of the indices is out of bounds.
    */
-  void fill_indices(ArrayRef<uint> indices, const T &element)
+  void fill_indices(ArrayRef<uint> indices, const T &value)
   {
     for (uint i : indices) {
-      m_start[i] = element;
+      BLI_assert(i < m_size);
+      m_start[i] = value;
     }
   }
 
   /**
-   * Copy the values from another array into the references array.
+   * Returns a pointer to the beginning of the referenced array. This may be nullptr, when the size
+   * is zero.
    */
-  void copy_from(const T *ptr)
-  {
-    BLI::copy_n(ptr, m_size, m_start);
-  }
-
-  void copy_from(ArrayRef<T> other)
+  T *data() const
   {
-    BLI_assert(this->size() == other.size());
-    this->copy_from(other.begin());
+    return m_start;
   }
 
   T *begin() const
@@ -448,8 +504,8 @@ template<typename T> class MutableArrayRef {
   }
 
   /**
-   * Return a continuous part of the array.
-   * Asserts that the slice stays in the array bounds.
+   * Returns a contiguous part of the array. This invokes undefined behavior when the slice would
+   * go out of bounds.
    */
   MutableArrayRef slice(uint start, uint length) const
   {
@@ -458,25 +514,28 @@ template<typename T> class MutableArrayRef {
   }
 
   /**
-   * Return a new MutableArrayRef with n elements removed from the beginning.
+   * Returns a new MutableArrayRef with n elements removed from the beginning. This invokes
+   * undefined behavior when the array is too small.
    */
-  MutableArrayRef drop_front(uint n = 1) const
+  MutableArrayRef drop_front(uint n) const
   {
     BLI_assert(n <= this->size());
     return this->slice(n, this->size() - n);
   }
 
   /**
-   * Return a new MutableArrayRef with n elements removed from the beginning.
+   * Returns a new MutableArrayRef with n elements removed from the end. This invokes undefined
+   * behavior when the array is too small.
    */
-  MutableArrayRef drop_back(uint n = 1) const
+  MutableArrayRef drop_back(uint n) const
   {
     BLI_assert(n <= this->size());
     return this->slice(0, this->size() - n);
   }
 
   /**
-   * Return a new MutableArrayRef that only contains the first n elements.
+   * Returns a new MutableArrayRef that only contains the first n elements. This invokes undefined
+   * behavior when the array is too small.
    */
   MutableArrayRef take_front(uint n) const
   {
@@ -485,7 +544,8 @@ template<typename T> class MutableArrayRef {
   }
 
   /**
-   * Return a new MutableArrayRef that only contains the last n elements.
+   * Return a new MutableArrayRef that only contains the last n elements. This invokes undefined
+   * behavior when the array is too small.
    */
   MutableArrayRef take_back(uint n) const
   {
@@ -493,16 +553,28 @@ template<typename T> class MutableArrayRef {
     return this->slice(this->size() - n, n);
   }
 
+  /**
+   * Returns an (immutable) ArrayRef that references the same array. This is usually not needed,
+   * due to implicit conversions. However, sometimes automatic type deduction needs some help.
+   */
   ArrayRef<T> as_ref() const
   {
     return ArrayRef<T>(m_start, m_size);
   }
 
+  /**
+   * Utility to make it more convenient to iterate over all indices that can be used with this
+   * array.
+   */
   IndexRange index_range() const
   {
     return IndexRange(m_size);
   }
 
+  /**
+   * Returns a reference to the last element. This invokes undefined behavior when the array is
+   * empty.
+   */
   const T &last() const
   {
     BLI_assert(m_size > 0);
@@ -510,7 +582,7 @@ template<typename T> class MutableArrayRef {
   }
 
   /**
-   * Get a new array ref to the same underlying memory buffer. No conversions are done.
+   * Returns a new array ref to the same underlying memory buffer. No conversions are done.
    */
   template<typename NewT> MutableArrayRef<NewT> cast() const
   {
@@ -528,6 +600,9 @@ template<typename T> ArrayRef<T> ref_c_array(const T *array, uint size)
   return ArrayRef<T>(array, size);
 }
 
+/**
+ * Utilities to check that arrays have the same size in debug builds.
+ */
 template<typename T1, typename T2> void assert_same_size(const T1 &v1, const T2 &v2)
 {
   UNUSED_VARS_NDEBUG(v1, v2);
diff --git a/source/blender/blenlib/BLI_dot_export.hh b/source/blender/blenlib/BLI_dot_export.hh
index 08c37fec01e..8da0ed6df14 100644
--- a/source/blender/blenlib/BLI_dot_export.hh
+++ b/source/blender/blenlib/BLI_dot_export.hh
@@ -27,7 +27,6 @@
 #include "BLI_map.hh"
 #include "BLI_optional.hh"
 #include "BLI_set.hh"
-#include "BLI_string_map.hh"
 #include "BLI_utility_mixins.hh"
 #include "BLI_vector.hh"
 
@@ -57,7 +56,7 @@ class AttributeList {
 
   void set(StringRef key, StringRef value)
   {
-    m_attributes.add_override(key, value);
+    m_attributes.add_overwrite(key, value);
   }
 };
 
diff --git a/source/blender/blenlib/BLI_hash.hh b/source/blender/blenlib/BLI_hash.hh
index 3b3448f66b1..029fa32553a 100644
--- a/source/blender/blenlib/BLI_hash.hh
+++ b/source/blender/blenlib/BLI_hash.hh
@@ -20,8 +20,58 @@
 /** \file
  * \ingroup bli
  *
- * This file provides default hash functions for some primitive types. The hash functions can be
- * used by containers such as Map and Set.
+ * A specialization of `BLI::DefaultHash<T>` provides a hash function for values of type T. This
+ * hash function is used by default in hash table implementations in blenlib.
+ *
+ * The actual hash function is in the `operator()` method of DefaultHash<T>. The following code
+ * computes the hash of some value using DefaultHash.
+ *
+ *   T value = ...;
+ *   DefaultHash<T> hash_function;
+ *   uint32_t hash = hash_function(value);
+ *
+ * Hash table implementations like BLI::Set support heterogeneous key lookups. That means that one
+ * can do a lookup with a key of type A in a hash table that stores keys of type B. This is
+ * commonly done when B is std::string, because the conversion from e.g. a StringRef to std::string
+ * can be costly and is unnecessary. To make this work, values of type A and B that compare equal
+ * have to have the same hash value. This is achieved by defining potentially multiple `operator()`
+ * in a specialization of DefaultHash. All those methods have to compute the same hash for values
+ * that compare equal.
+ *
+ * The computed hash is an unsigned 32 bit integer. Ideally, the hash function would generate
+ * uniformly random hash values for a set of keys. However, in many cases trivial hash functions
+ * are faster and produce a good enough distribution. In general it is better when more information
+ * is in the lower bits of the hash. By choosing a good probing strategy, the effects of a bad hash
+ * function are less noticable though. In this context a good probing strategy is one that takes
+ * all bits of the hash into account eventually. One has to check on a case by case basis to see if
+ * a better but more expensive or trivial hash function works better.
+ *
+ * There are three main ways to provide a hash table implementation with a custom hash function.
+ *
+ * - When you want to provide a default hash function for your own custom type: Add a `hash`
+ *   member function to it. The function should return `uint32_t` and take no arguments. This
+ *   method will be called by the default implementation of DefaultHash. It will automatically be
+ *   used by hash table implementations.
+ *
+ * - When you want to provide a default hash function for a type that you cannot modify: Add a new
+ *   specialization to the DefaultHash struct. This can be done by writing code like below in
+ *   either global or BLI namespace.
+ *
+ *     template<> struct BLI::DefaultHash<TheType> {
+ *       uint32_t operator()(const TheType &value) const {
+ *         return ...;
+ *       }
+ *     };
+ *
+ * - When you want to provide a different hash function for a type that already has a default hash
+ *   function: Implement a struct like the one below and pass it as template parameter to the hash
+ *   table explicitely.
+ *
+ *     struct MyCustomHash {
+ *       uint32_t operator()(const TheType &value) const {
+ *         return ...;
+ *       }
+ *     };
  */
 
 #include <functional>
@@ -35,7 +85,16 @@
 
 namespace BLI {
 
+/**
+ * If there is no other specialization of DefaultHash for a given type, try to call `hash()` on the
+ * value. If there is no such method, this will result in a compiler error. Usually that means that
+ * you have to implement a hash function using one of three strategies listed above.
+ */
 template<typename T> struct DefaultHash {
+  uint32_t operator()(const T &value) const
+  {
+    return value.hash();
+  }
 };
 
 #define TRIVIAL_DEFAULT_INT_HASH(TYPE) \
@@ -47,9 +106,9 @@ template<typename T> struct DefaultHash {
   }
 
 /**
- * Cannot make any assumptions about the distribution of keys, so use a trivial hash function by
- * default. The hash table implementations are designed to take all bits of the hash into account
- * to avoid really bad behavior when the lower bits are all zero. Special hash functions can be
+ * We cannot make any assumptions about the distribution of keys, so use a trivial hash function by
+ * default. The default probing strategy is designed to take all bits of the hash into account
+ * to avoid worst case behavior when the lower bits are all zero. Special hash functions can be
  * implemented when more knowledge about a specific key distribution is available.
  */
 TRIVIAL_DEFAULT_INT_HASH(int8_t);
@@ -58,9 +117,26 @@ TRIVIAL_DEFAULT_INT_HASH(int16_t);
 TRIVIAL_DEFAULT_INT_HASH(uint16_t);
 TRIVIAL_DEFAULT_INT_HASH(int32_t);
 TRIVIAL_DEFAULT_INT_HASH(uint32_t);
-TRIVIAL_DEFAULT_INT_HASH(int64_t);
-TRIVIAL_DEFAULT_INT_HASH(uint64_t);
 
+template<> struct DefaultHash<uint64_t> {
+  uint32_t operator()(uint64_t value) const
+  {
+    uint32_t low = (uint32_t)value;
+    uint32_t high = (uint32_t)(value >> 32);
+    return low ^ (high * 0x45d9f3b);
+  }
+};
+
+template<> struct DefaultHash<int64_t> {
+  uint32_t operator()(uint64_t value) const
+  {
+    return DefaultHash<uint64_t>{}((uint64_t)value);
+  }
+};
+
+/**
+ * One should try to avoid using floats as keys in hash tables, but sometimes it is convenient.
+ */
 template<> struct DefaultHash<float> {
   uint32_t operator()(float value) const
   {
@@ -78,35 +154,38 @@ inline uint32_t hash_string(StringRef str)
 }
 
 template<> struct DefaultHash<std::string> {
-  uint32_t operator()(const std::string &value) const
+  /**
+   * Take a StringRef as parameter to support heterogeneous lookups in hash table implementations
+   * when std::string is used as key.
+   */
+  uint32_t operator()(StringRef value) const
   {
     return hash_string(value);
   }
 };
 
 template<> struct DefaultHash<StringRef> {
-  uint32_t operator()(const StringRef &value) const
+  uint32_t operator()(StringRef value) const
   {
     return hash_string(value);
   }
 };
 
 template<> struct DefaultHash<StringRefNull> {
-  uint32_t operator()(const StringRefNull &value) const
+  uint32_t operator()(StringRef value) const
   {
     return hash_string(value);
   }
 };
 
 /**
- * While we cannot guarantee that the lower 3 bits or a pointer are zero, it is safe to assume
- * this in the general case. MEM_malloc only returns 8 byte aligned addresses on 64-bit systems.
+ * While we cannot guarantee that the lower 4 bits of a pointer are zero, it is often the case.
  */
 template<typename T> struct DefaultHash<T *> {
   uint32_t operator()(const T *value) const
   {
-    uintptr_t ptr = POINTER_AS_UINT(value);
-    uint32_t hash = (uint32_t)(ptr >> 3);
+    uintptr_t ptr = (uintptr_t)value;
+    uint32_t hash = (uint32_t)(ptr >> 4);
     return hash;
   }
 };
diff --git a/source/blender/blenlib/BLI_hash_tables.hh b/source/blender/blenlib/BLI_hash_tables.hh
new file mode 100644
index 00000000000..c171b26f770
--- /dev/null
+++ b/source/blender/blenlib/BLI_hash_tables.hh
@@ -0,0 +1,350 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __BLI_OPEN_ADDRESSING_HH__
+#define __BLI_OPEN_ADDRESSING_HH__
+
+/** \file
+ * \ingroup bli
+ *
+ * This file contains code that can be shared between different hash table implementations.
+ */
+
+#include <cmath>
+
+#include "BLI_allocator.hh"
+#include "BLI_array.hh"
+#include "BLI_math_base.h"
+#include "BLI_memory_utils.hh"
+#include "BLI_string.h"
+#include "BLI_utildefines.h"
+#include "BLI_vector.hh"
+
+namespace BLI {
+
+/* -------------------------------------------------------------------- */
+/** \name Constexpr Utility Functions
+ *
+ * Those should eventually be deduplicated with functions in BLI_math_base.h.
+ * \{ */
+
+inline constexpr int is_power_of_2_i_constexpr(int n)
+{
+  return (n & (n - 1)) == 0;
+}
+
+inline constexpr uint32_t log2_floor_u_constexpr(uint32_t x)
+{
+  return x <= 1 ? 0 : 1 + log2_floor_u_constexpr(x >> 1);
+}
+
+inline constexpr uint32_t log2_ceil_u_constexpr(uint32_t x)
+{
+  return (is_power_of_2_i_constexpr((int)x)) ? log2_floor_u_constexpr(x) :
+                                               log2_floor_u_constexpr(x) + 1;
+}
+
+inline constexpr uint32_t power_of_2_max_u_constexpr(uint32_t x)
+{
+  return 1 << log2_ceil_u_constexpr(x);
+}
+
+template<typename IntT> inline constexpr IntT ceil_division(IntT x, IntT y)
+{
+  BLI_STATIC_ASSERT(!std::is_signed<IntT>::value, "");
+  return x / y + ((x % y) != 0);
+}
+
+template<typename IntT> inline constexpr IntT floor_division(IntT x, IntT y)
+{
+  BLI_STATIC_ASSERT(!std::is_signed<IntT>::value, "");
+  return x / y;
+}
+
+inline constexpr uint32_t ceil_division_by_fraction(uint32_t x,
+                                                    uint32_t numerator,
+                                                    uint32_t denominator)
+{
+  return (uint32_t)ceil_division((uint64_t)x * (uint64_t)denominator, (uint64_t)numerator);
+}
+
+inline constexpr uint32_t floor_multiplication_with_fraction(uint32_t x,
+                                                             uint32_t numerator,
+                                                             uint32_t denominator)
+{
+  return (uint32_t)((uint64_t)x * (uint64_t)numerator / (uint64_t)denominator);
+}
+
+inline constexpr uint32_t total_slot_amount_for_usable_slots(uint32_t min_usable_slots,
+                                                             uint32_t max_load_factor_numerator,
+                                                             uint32_t max_load_factor_denominator)
+{
+  return power_of_2_max_u_constexpr(ceil_division_by_fraction(
+      min_usable_slots, max_load_factor_numerator, max_load_factor_denominator));
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Load Factor
+ *
+ * This is an abstraction for a fractional load factor. The hash table using this class is assumed
+ * to use arrays with a size that is a power of two.
+ *
+ * \{ */
+
+class LoadFactor {
+ private:
+  uint8_t m_numerator;
+  uint8_t m_denominator;
+
+ public:
+  LoadFactor(uint8_t numerator, uint8_t denominator)
+      : m_numerator(numerator), m_denominator(denominator)
+  {
+    BLI_assert(numerator > 0);
+    BLI_assert(numerator < denominator);
+  }
+
+  void compute_total_and_usable_slots(uint32_t min_total_slots,
+                                      uint32_t min_usable_slots,
+                                      uint32_t *r_total_slots,
+                                      uint32_t *r_usable_slots) const
+  {
+    BLI_assert(is_power_of_2_i((int)min_total_slots));
+
+    uint32_t total_slots = this->compute_total_slots(min_usable_slots, m_numerator, m_denominator);
+    total_slots = std::max(total_slots, min_total_slots);
+    uint32_t usable_slots = floor_multiplication_with_fraction(
+        total_slots, m_numerator, m_denominator);
+    BLI_assert(min_usable_slots <= usable_slots);
+
+    *r_total_slots = total_slots;
+    *r_usable_slots = usable_slots;
+  }
+
+  static constexpr uint32_t compute_total_slots(uint32_t min_usable_slots,
+                                                uint8_t numerator,
+                                                uint8_t denominator)
+  {
+    return total_slot_amount_for_usable_slots(min_usable_slots, numerator, denominator);
+  }
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Intrusive Key Info
+ *
+ * A hash table slot has to maintain state about whether the slot is empty, occupied or removed.
+ * Usually, this state information is stored in its own variable. While it only needs two bits in
+ * theory, in practice often 4 or 8 bytes are used, due to alignment requirements.
+ *
+ * One solution to deal with this problem is to embed the state information in the key. That means,
+ * two values of the key type are selected to indicate whether the slot is empty or removed.
+ *
+ * The classes below tell a slot implementation which special key values it can use. They can be
+ * used as KeyInfo in slot types like IntrusiveSetSlot and IntrusiveMapSlot.
+ *
+ * A KeyInfo type has to implement a couple of static methods that are descriped in
+ * TemplatedKeyInfo.
+ *
+ * \{ */
+
+/**
+ * The template arguments EmptyValue and RemovedValue define which special are used. This can be
+ * used when a hash table has integer keys and there are two specific integers that will never be
+ * used as keys.
+ */
+template<typename Key, Key EmptyValue, Key RemovedValue> struct TemplatedKeyInfo {
+  /**
+   * Get the value that indicates that the slot is empty. This is used to indicate new slots.
+   */
+  static Key get_empty()
+  {
+    return EmptyValue;
+  }
+
+  /**
+   * Modify the given key so that it represents a removed slot.
+   */
+  static void remove(Key &key)
+  {
+    key = RemovedValue;
+  }
+
+  /**
+   * Return true, when the given key indicates that the slot is empty.
+   */
+  static bool is_empty(const Key &key)
+  {
+    return key == EmptyValue;
+  }
+
+  /**
+   * Return true, when the given key indicates that the slot is removed.
+   */
+  static bool is_removed(const Key &key)
+  {
+    return key == RemovedValue;
+  }
+
+  /**
+   * Return true, when the key is valid, i.e. it can be contained in an occupied slot.
+   */
+  static bool is_not_empty_or_removed(const Key &key)
+  {
+    return key != EmptyValue && key != RemovedValue;
+  }
+};
+
+/**
+ * 0xffff...ffff indicates an empty slot.
+ * 0xffff...fffe indicates a removed slot.
+ *
+ * Those specific values are used, because with them a single comparison is enough to check whether
+ * a slot is occupied. The keys 0x0000...0000 and 0x0000...0001 also satisfy this constraint.
+ * However, nullptr is much more likely to be used as valid key.
+ */
+template<typename Pointer> struct PointerKeyInfo {
+  static Pointer get_empty()
+  {
+    return (Pointer)UINTPTR_MAX;
+  }
+
+  static void remove(Pointer &pointer)
+  {
+    pointer = (Pointer)(UINTPTR_MAX - 1);
+  }
+
+  static bool is_empty(Pointer pointer)
+  {
+    return (uintptr_t)pointer == UINTPTR_MAX;
+  }
+
+  static bool is_removed(Pointer pointer)
+  {
+    return (uintptr_t)pointer == UINTPTR_MAX - 1;
+  }
+
+  static bool is_not_empty_or_removed(Pointer pointer)
+  {
+    return (uintptr_t)pointer < UINTPTR_MAX - 1;
+  }
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Hash Table Stats
+ *
+ * A utility class that makes it easier for hash table implementations to provide statistics to the
+ * developer. These statistics can be helpful when trying to figure out why a hash table is slow.
+ *
+ * To use this utility, a hash table has to implement various methods, that are mentioned below.
+ *
+ * \{ */
+
+class HashTableStats {
+ private:
+  Vector<uint32_t> m_keys_by_collision_count;
+  uint32_t m_total_collisions;
+  float m_average_collisions;
+  uint32_t m_size;
+  uint32_t m_capacity;
+  uint32_t m_removed_amount;
+  float m_load_factor;
+  float m_removed_load_factor;
+  uint32_t m_size_per_element;
+  uint32_t m_size_in_bytes;
+  const void *m_address;
+
+ public:
+  /**
+   * Requires that the hash table has the following methods:
+   * - count_collisions(key) -> uint32_t
+   * - size() -> uint32_t
+   * - capacity() -> uint32_t
+   * - removed_amount() -> uint32_t
+   * - size_per_element() -> uint32_t
+   * - size_in_bytes() -> uint32_t
+   */
+  template<typename HashTable, typename Keys>
+  HashTableStats(const HashTable &hash_table, const Keys &keys)
+  {
+    m_total_collisions = 0;
+    m_size = hash_table.size();
+    m_capacity = hash_table.capacity();
+    m_removed_amount = hash_table.removed_amount();
+    m_size_per_element = hash_table.size_per_element();
+    m_size_in_bytes = hash_table.size_in_bytes();
+    m_address = (const void *)&hash_table;
+
+    for (const auto &key : keys) {
+      uint32_t collisions = hash_table.count_collisions(key);
+      if (m_keys_by_collision_count.size() <= collisions) {
+        m_keys_by_collision_count.append_n_times(
+            0, collisions - m_keys_by_collision_count.size() + 1);
+      }
+      m_keys_by_collision_count[collisions]++;
+      m_total_collisions += collisions;
+    }
+
+    m_average_collisions = (m_size == 0) ? 0 : (float)m_total_collisions / (float)m_size;
+    m_load_factor = (float)m_size / (float)m_capacity;
+    m_removed_load_factor = (float)m_removed_amount / (float)m_capacity;
+  }
+
+  void print(StringRef name = "")
+  {
+    std::cout << "Hash Table Stats: " << name << "\n";
+    std::cout << "  Address: " << m_address << "\n";
+    std::cout << "  Total Slots: " << m_capacity << "\n";
+    std::cout << "  Occupied Slots:  " << m_size << " (" << m_load_factor * 100.0f << " %)\n";
+    std::cout << "  Removed Slots: " << m_removed_amount << " (" << m_removed_load_factor * 100.0f
+              << " %)\n";
+
+    char memory_size_str[15];
+    BLI_str_format_byte_unit(memory_size_str, m_size_in_bytes, true);
+    std::cout << "  Size: ~" << memory_size_str << "\n";
+    std::cout << "  Size per Slot: " << m_size_per_element << " bytes\n";
+
+    std::cout << "  Average Collisions: " << m_average_collisions << "\n";
+    for (uint32_t collision_count : m_keys_by_collision_count.index_range()) {
+      std::cout << "  " << collision_count
+                << " Collisions: " << m_keys_by_collision_count[collision_count] << "\n";
+    }
+  }
+};
+
+/** \} */
+
+/**
+ * This struct provides an equality operator that returns true for all objects that compare equal
+ * when one would use the `==` operator. This is different from std::equal_to<T>, because that
+ * requires the parameters to be of type T. Our hash tables support lookups using other types
+ * without conversion, therefore DefaultEquality needs to be more generic.
+ */
+struct DefaultEquality {
+  template<typename T1, typename T2> bool operator()(const T1 &a, const T2 &b) const
+  {
+    return a == b;
+  }
+};
+
+}  // namespace BLI
+
+#endif /* __BLI_OPEN_ADDRESSING_HH__ */
diff --git a/source/blender/blenlib/BLI_index_range.hh b/source/blender/blenlib/BLI_index_range.hh
index e24fd567810..4a64ae16589 100644
--- a/source/blender/blenlib/BLI_index_range.hh
+++ b/source/blender/blenlib/BLI_index_range.hh
@@ -20,9 +20,37 @@
 /** \file
  * \ingroup bli
  *
- * Allows passing iterators over ranges of integers without actually allocating an array or passing
- * separate values. A range always has a step of one. If other step sizes are required in some
- * cases, a separate data structure should be used.
+ * A `BLI::IndexRange` wraps an interval of non-negative integers. It can be used to reference
+ * consecutive elements in an array. Furthermore, it can make for loops more convenient and less
+ * error prone, especially when using nested loops.
+ *
+ * I'd argue that the second loop is more readable and less error prone than the first one. That is
+ * not necessarily always the case, but often it is.
+ *
+ *  for (uint i = 0; i < 10; i++) {
+ *    for (uint j = 0; j < 20; j++) {
+ *       for (uint k = 0; k < 30; k++) {
+ *
+ *  for (uint i : IndexRange(10)) {
+ *    for (uint j : IndexRange(20)) {
+ *      for (uint k : IndexRange(30)) {
+ *
+ * Some containers like BLI::Vector have an index_range() method. This will return the IndexRange
+ * that contains all indices that can be used to access the container. This is particularly useful
+ * when you want to iterate over the indices and the elements (much like Python's enumerate(), just
+ * worse). Again, I think the second example here is better:
+ *
+ *  for (uint i = 0; i < my_vector_with_a_long_name.size(); i++) {
+ *    do_something(i, my_vector_with_a_long_name[i]);
+ *
+ *  for (uint i : my_vector_with_a_long_name.index_range()) {
+ *    do_something(i, my_vector_with_a_long_name[i]);
+ *
+ * Ideally this could be could be even closer to Python's enumerate(). We might get that in the
+ * future with newer C++ versions.
+ *
+ * One other important feature is the as_array_ref method. This method returns an ArrayRef<uint>
+ * that contains the interval as individual numbers.
  */
 
 #include <algorithm>
@@ -182,13 +210,15 @@ class IndexRange {
     return value >= m_start && value < m_start + m_size;
   }
 
+  /**
+   * Returns a new range, that contains a subinterval of the current one.
+   */
   IndexRange slice(uint start, uint size) const
   {
     uint new_start = m_start + start;
     BLI_assert(new_start + size <= m_start + m_size || size == 0);
     return IndexRange(new_start, size);
   }
-
   IndexRange slice(IndexRange range) const
   {
     return this->slice(range.start(), range.size());
diff --git a/source/blender/blenlib/BLI_linear_allocator.hh b/source/blender/blenlib/BLI_linear_allocator.hh
index 285af49f500..8b77855bb07 100644
--- a/source/blender/blenlib/BLI_linear_allocator.hh
+++ b/source/blender/blenlib/BLI_linear_allocator.hh
@@ -130,7 +130,7 @@ template<typename Allocator = GuardedAllocator> class LinearAllocator : NonCopya
   template<typename T> MutableArrayRef<T> construct_array_copy(ArrayRef<T> src)
   {
     MutableArrayRef<T> dst = this->allocate_array<T>(src.size());
-    uninitialized_copy_n(src.begin(), src.size(), dst.begin());
+    uninitialized_copy_n(src.data(), src.size(), dst.data());
     return dst;
   }
 
@@ -186,7 +186,7 @@ template<typename Allocator = GuardedAllocator> class LinearAllocator : NonCopya
     m_unused_borrowed_buffers.append(ArrayRef<char>((char *)buffer, size));
   }
 
-  template<uint Size, uint Alignment>
+  template<size_t Size, size_t Alignment>
   void provide_buffer(AlignedBuffer<Size, Alignment> &aligned_buffer)
   {
     this->provide_buffer(aligned_buffer.ptr(), Size);
@@ -208,7 +208,7 @@ template<typename Allocator = GuardedAllocator> class LinearAllocator : NonCopya
     uint size_in_bytes = power_of_2_min_u(std::max(min_allocation_size, m_next_min_alloc_size));
     m_next_min_alloc_size = size_in_bytes * 2;
 
-    void *buffer = m_allocator.allocate(size_in_bytes, __func__);
+    void *buffer = m_allocator.allocate(size_in_bytes, 8, AT);
     m_owned_buffers.append(buffer);
     m_current_begin = (uintptr_t)buffer;
     m_current_end = m_current_begin + size_in_bytes;
diff --git a/source/blender/blenlib/BLI_listbase_wrapper.hh b/source/blender/blenlib/BLI_listbase_wrapper.hh
index 02313d9d22d..70f3135d9e3 100644
--- a/source/blender/blenlib/BLI_listbase_wrapper.hh
+++ b/source/blender/blenlib/BLI_listbase_wrapper.hh
@@ -20,8 +20,10 @@
 /** \file
  * \ingroup bli
  *
- * The purpose of this wrapper is just to make it more comfortable to iterate of ListBase
- * instances, that are used in many places in Blender.
+ * `BLI::ListBaseWrapper` is a typed wrapper for the ListBase struct. That makes it safer and more
+ * convenient to use in C++ in some cases. However, if you find yourself iterating over a linked
+ * list a lot, consider to convert it into a vector for further processing. This improves
+ * performance and debugability.
  */
 
 #include "BLI_listbase.h"
diff --git a/source/blender/blenlib/BLI_map.hh b/source/blender/blenlib/BLI_map.hh
index ea5e5da4099..54e086cc36d 100644
--- a/source/blender/blenlib/BLI_map.hh
+++ b/source/blender/blenlib/BLI_map.hh
@@ -20,698 +20,986 @@
 /** \file
  * \ingroup bli
  *
- * This file provides a map implementation that uses open addressing with probing.
+ * A `BLI::Map<Key, Value>` is an unordered associative container that stores key-value pairs. The
+ * keys have to be unique. It is designed to be a more convenient and efficient replacement for
+ * `std::unordered_map`. All core operations (add, lookup, remove and contains) can be done in O(1)
+ * amortized expected time.
  *
- * The key and value objects are stored directly in the hash table to avoid indirect memory
- * lookups. Keys and values are stored in groups of four to avoid wasting memory due to padding.
+ * Your default choice for a hash map in Blender should be `BLI::Map`.
+ *
+ * BLI::Map is implemented using open addressing in a slot array with a power-of-two size. Every
+ * slot is in one of three states: empty, occupied or removed. If a slot is occupied, it contains
+ * a Key and Value instance.
+ *
+ * Benchmarking and comparing hash tables is hard, because many factors influence the result. The
+ * performance of a hash table depends on the combination of the hash function, probing strategy,
+ * max load factor, data types, slot type and data distribution. This implementation is designed to
+ * be relatively fast by default in all cases. However, it also offers many customization points
+ * that allow it to be optimized for a specific use case.
+ *
+ * A rudimentary benchmark can be found in BLI_map_test.cc. The results of that benchmark are there
+ * as well. The numbers show that in this specific case BLI::Map outperforms std::unordered_map
+ * consistently by a good amount.
+ *
+ * Some noteworthy information:
+ * - Key and Value must be movable types.
+ * - Pointers to keys and values might be invalidated when the map is changed or moved.
+ * - The hash function can be customized. See BLI_hash.hh for details.
+ * - The probing strategy can be customized. See BLI_probing_strategies.hh for details.
+ * - The slot type can be customized. See BLI_map_slots.hh for details.
+ * - Small buffer optimization is enabled by default, if Key and Value are not too large.
+ * - The methods `add_new` and `remove_contained` should be used instead of `add` and `remove`
+ *   whenever appropriate. Assumptions and intention are described better this way.
+ * - There are multiple methods to add and lookup keys for different use cases.
+ * - You cannot use a range-for loop on the map directly. Instead use the keys(), values() and
+ *   items() iterators. If your map is non-const, you can also change the values through those
+ *   iterators (but not the keys).
+ * - Lookups can be performed using types other than Key without conversion. For that use the
+ *   methods ending with `_as`. The template parameters Hash and IsEqual have to support the other
+ *   key type. This can greatly improve performance when the map uses strings as keys.
+ * - The default constructor is cheap, even when a large InlineBufferCapacity is used. A large
+ *   slot array will only be initialized when the first element is added.
+ * - The `print_stats` method can be used to get information about the distribution of keys and
+ *   memory usage of the map.
+ * - The method names don't follow the std::unordered_map names in many cases. Searching for such
+ *   names in this file will usually let you discover the new name.
+ * - There is a StdUnorderedMapWrapper class, that wraps std::unordered_map and gives it the same
+ *   interface as BLI::Map. This is useful for benchmarking.
  */
 
-#include "BLI_array_ref.hh"
+#include <unordered_map>
+
+#include "BLI_array.hh"
 #include "BLI_hash.hh"
-#include "BLI_open_addressing.hh"
+#include "BLI_hash_tables.hh"
+#include "BLI_map_slots.hh"
+#include "BLI_probing_strategies.hh"
 
 namespace BLI {
 
-// clang-format off
-
-#define ITER_SLOTS_BEGIN(KEY, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \
-  uint32_t hash = DefaultHash<KeyT>{}(KEY); \
-  uint32_t perturb = hash; \
-  while (true) { \
-    uint32_t item_index = (hash & ARRAY.slot_mask()) >> OFFSET_SHIFT; \
-    uint8_t R_OFFSET = hash & OFFSET_MASK; \
-    uint8_t initial_offset = R_OFFSET; \
-    OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \
-    do {
-
-#define ITER_SLOTS_END(R_OFFSET) \
-      R_OFFSET = (R_OFFSET + 1u) & OFFSET_MASK; \
-    } while (R_OFFSET != initial_offset); \
-    perturb >>= 5; \
-    hash = hash * 5 + 1 + perturb; \
-  } ((void)0)
-
-// clang-format on
-
-template<typename KeyT,
-         typename ValueT,
-         uint32_t InlineBufferCapacity = 4,
-         typename Allocator = GuardedAllocator>
+template<
+    /**
+     * Type of the keys stored in the map. Keys have to be movable. Furthermore, the hash and
+     * is-equal functions have to support it.
+     */
+    typename Key,
+    /**
+     * Type of the value that is stored per key. It has to be movable as well.
+     */
+    typename Value,
+    /**
+     * The minimum number of elements that can be stored in this Map without doing a heap
+     * allocation. This is useful when you expect to have many small maps. However, keep in mind
+     * that (unlike vector) initializing a map has a O(n) cost in the number of slots.
+     *
+     * When Key or Value are large, the small buffer optimization is disabled by default to avoid
+     * large unexpected allocations on the stack. It can still be enabled explicitely though.
+     */
+    uint32_t InlineBufferCapacity = (sizeof(Key) + sizeof(Value) < 100) ? 4 : 0,
+    /**
+     * The strategy used to deal with collistions. They are defined in BLI_probing_strategies.hh.
+     */
+    typename ProbingStrategy = DefaultProbingStrategy,
+    /**
+     * The hash function used to hash the keys. There is a default for many types. See BLI_hash.hh
+     * for examples on how to define a custom hash function.
+     */
+    typename Hash = DefaultHash<Key>,
+    /**
+     * The equality operator used to compare keys. By default it will simply compare keys using the
+     * `==` operator.
+     */
+    typename IsEqual = DefaultEquality,
+    /**
+     * This is what will actually be stored in the hash table array. At a minimum a slot has to be
+     * able to hold a key, a value and information about whether the slot is empty, occupied or
+     * removed. Using a non-standard slot type can improve performance or reduce the memory
+     * footprint for some types. Slot types are defined in BLI_map_slots.hh
+     */
+    typename Slot = typename DefaultMapSlot<Key, Value>::type,
+    /**
+     * The allocator used by this map. Should rarely be changed, except when you don't want that
+     * MEM_* is used internally.
+     */
+    typename Allocator = GuardedAllocator>
 class Map {
  private:
-  static constexpr uint OFFSET_MASK = 3;
-  static constexpr uint OFFSET_SHIFT = 2;
+  /**
+   * Slots are either empty, occupied or removed. The number of occupied slots can be computed by
+   * subtracting the removed slots from the occupied-and-removed slots.
+   */
+  uint32_t m_removed_slots;
+  uint32_t m_occupied_and_removed_slots;
 
-  class Item {
-   private:
-    static constexpr uint8_t IS_EMPTY = 0;
-    static constexpr uint8_t IS_SET = 1;
-    static constexpr uint8_t IS_DUMMY = 2;
+  /**
+   * The maximum number of slots that can be used (either occupied or removed) until the set has to
+   * grow. This is the total number of slots times the max load factor.
+   */
+  uint32_t m_usable_slots;
 
-    uint8_t m_status[4];
-    AlignedBuffer<4 * sizeof(KeyT), alignof(KeyT)> m_keys_buffer;
-    AlignedBuffer<4 * sizeof(ValueT), alignof(ValueT)> m_values_buffer;
+  /**
+   * The number of slots minus one. This is a bit mask that can be used to turn any integer into a
+   * valid slot index efficiently.
+   */
+  uint32_t m_slot_mask;
 
-   public:
-    static constexpr uint slots_per_item = 4;
+  /** This is called to hash incoming keys. */
+  Hash m_hash;
 
-    Item()
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        m_status[offset] = IS_EMPTY;
-      }
-    }
+  /** This is called to check equality of two keys. */
+  IsEqual m_is_equal;
 
-    ~Item()
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (m_status[offset] == IS_SET) {
-          this->key(offset)->~KeyT();
-          this->value(offset)->~ValueT();
-        }
-      }
-    }
+  /** The max load factor is 1/2 = 50% by default. */
+#define LOAD_FACTOR 1, 2
+  LoadFactor m_max_load_factor = LoadFactor(LOAD_FACTOR);
+  using SlotArray =
+      Array<Slot, LoadFactor::compute_total_slots(InlineBufferCapacity, LOAD_FACTOR), Allocator>;
+#undef LOAD_FACTOR
 
-    Item(const Item &other)
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        uint8_t status = other.m_status[offset];
-        m_status[offset] = status;
-        if (status == IS_SET) {
-          new (this->key(offset)) KeyT(*other.key(offset));
-          new (this->value(offset)) ValueT(*other.value(offset));
-        }
-      }
-    }
+  /**
+   * This is the array that contains the actual slots. There is always at least one empty slot and
+   * the size of the array is a power of two.
+   */
+  SlotArray m_slots;
 
-    Item(Item &&other) noexcept
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        uint8_t status = other.m_status[offset];
-        m_status[offset] = status;
-        if (status == IS_SET) {
-          new (this->key(offset)) KeyT(std::move(*other.key(offset)));
-          new (this->value(offset)) ValueT(std::move(*other.value(offset)));
-        }
-      }
-    }
+  /** Iterate over a slot index sequence for a given hash. */
+#define MAP_SLOT_PROBING_BEGIN(HASH, R_SLOT) \
+  SLOT_PROBING_BEGIN (ProbingStrategy, HASH, m_slot_mask, SLOT_INDEX) \
+    auto &R_SLOT = m_slots[SLOT_INDEX];
+#define MAP_SLOT_PROBING_END() SLOT_PROBING_END()
 
-    bool has_key(uint offset, const KeyT &key) const
-    {
-      return m_status[offset] == IS_SET && key == *this->key(offset);
-    }
+ public:
+  /**
+   * Initialize an empty map. This is a cheap operation no matter how large the inline buffer is.
+   * This is necessary to avoid a high cost when no elements are added at all. An optimized grow
+   * operation is performed on the first insertion.
+   */
+  Map()
+      : m_removed_slots(0),
+        m_occupied_and_removed_slots(0),
+        m_usable_slots(0),
+        m_slot_mask(0),
+        m_hash(),
+        m_is_equal(),
+        m_slots(1)
+  {
+  }
 
-    bool is_set(uint offset) const
-    {
-      return m_status[offset] == IS_SET;
-    }
+  ~Map() = default;
 
-    bool is_empty(uint offset) const
-    {
-      return m_status[offset] == IS_EMPTY;
-    }
+  Map(const Map &other) = default;
 
-    bool is_dummy(uint offset) const
-    {
-      return m_status[offset] == IS_DUMMY;
-    }
+  Map(Map &&other) noexcept
+      : m_removed_slots(other.m_removed_slots),
+        m_occupied_and_removed_slots(other.m_occupied_and_removed_slots),
+        m_usable_slots(other.m_usable_slots),
+        m_slot_mask(other.m_slot_mask),
+        m_hash(std::move(other.m_hash)),
+        m_is_equal(std::move(other.m_is_equal)),
+        m_slots(std::move(other.m_slots))
+  {
+    other.~Map();
+    new (&other) Map();
+  }
 
-    KeyT *key(uint offset) const
-    {
-      return (KeyT *)m_keys_buffer.ptr() + offset;
+  Map &operator=(const Map &other)
+  {
+    if (this == &other) {
+      return *this;
     }
 
-    ValueT *value(uint offset) const
-    {
-      return (ValueT *)m_values_buffer.ptr() + offset;
-    }
+    this->~Map();
+    new (this) Map(other);
 
-    template<typename ForwardKeyT, typename ForwardValueT>
-    void store(uint offset, ForwardKeyT &&key, ForwardValueT &&value)
-    {
-      this->store_without_value(offset, std::forward<ForwardKeyT>(key));
-      new (this->value(offset)) ValueT(std::forward<ForwardValueT>(value));
-    }
+    return *this;
+  }
 
-    template<typename ForwardKeyT> void store_without_value(uint offset, ForwardKeyT &&key)
-    {
-      BLI_assert(m_status[offset] != IS_SET);
-      m_status[offset] = IS_SET;
-      new (this->key(offset)) KeyT(std::forward<ForwardKeyT>(key));
+  Map &operator=(Map &&other)
+  {
+    if (this == &other) {
+      return *this;
     }
 
-    void set_dummy(uint offset)
-    {
-      BLI_assert(m_status[offset] == IS_SET);
-      m_status[offset] = IS_DUMMY;
-      destruct(this->key(offset));
-      destruct(this->value(offset));
-    }
-  };
+    this->~Map();
+    new (this) Map(std::move(other));
 
-  using ArrayType = OpenAddressingArray<Item, InlineBufferCapacity, Allocator>;
-  ArrayType m_array;
+    return *this;
+  }
 
- public:
-  Map() = default;
+  /**
+   * Insert a new key-value-pair into the map. This invokes undefined behavior when the key is in
+   * the map already.
+   */
+  void add_new(const Key &key, const Value &value)
+  {
+    this->add_new__impl(key, value, m_hash(key));
+  }
+  void add_new(const Key &key, Value &&value)
+  {
+    this->add_new__impl(key, std::move(value), m_hash(key));
+  }
+  void add_new(Key &&key, const Value &value)
+  {
+    this->add_new__impl(std::move(key), value, m_hash(key));
+  }
+  void add_new(Key &&key, Value &&value)
+  {
+    this->add_new__impl(std::move(key), std::move(value), m_hash(key));
+  }
 
   /**
-   * Allocate memory such that at least min_usable_slots can be added before the map has to grow
-   * again.
+   * Add a key-value-pair to the map. If the map contains the key already, nothing is changed.
+   * If you want to replace the currently stored value, use `add_overwrite`.
+   * Returns true when the key has been newly added.
+   *
+   * This is similar to std::unordered_map::insert.
    */
-  void reserve(uint min_usable_slots)
+  bool add(const Key &key, const Value &value)
   {
-    if (m_array.slots_usable() < min_usable_slots) {
-      this->grow(min_usable_slots);
-    }
+    return this->add_as(key, value);
+  }
+  bool add(const Key &key, Value &&value)
+  {
+    return this->add_as(key, std::move(value));
+  }
+  bool add(Key &&key, const Value &value)
+  {
+    return this->add_as(std::move(key), value);
+  }
+  bool add(Key &&key, Value &&value)
+  {
+    return this->add_as(std::move(key), std::move(value));
   }
 
   /**
-   * Remove all elements from the map.
+   * Same as `add`, but accepts other key types that are supported by the hash function.
    */
-  void clear()
+  template<typename ForwardKey, typename ForwardValue>
+  bool add_as(ForwardKey &&key, ForwardValue &&value)
   {
-    this->~Map();
-    new (this) Map();
+    return this->add__impl(
+        std::forward<ForwardKey>(key), std::forward<ForwardValue>(value), m_hash(key));
   }
 
   /**
-   * Insert a new key-value-pair in the map.
-   * Asserts when the key existed before.
+   * Adds a key-value-pair to the map. If the map contained the key already, the corresponding
+   * value will be replaced.
+   * Returns true when the key has been newly added.
+   *
+   * This is similar to std::unordered_map::insert_or_assign.
    */
-  void add_new(const KeyT &key, const ValueT &value)
+  bool add_overwrite(const Key &key, const Value &value)
   {
-    this->add_new__impl(key, value);
+    return this->add_overwrite_as(key, value);
   }
-  void add_new(const KeyT &key, ValueT &&value)
+  bool add_overwrite(const Key &key, Value &&value)
   {
-    this->add_new__impl(key, std::move(value));
+    return this->add_overwrite_as(key, std::move(value));
   }
-  void add_new(KeyT &&key, const ValueT &value)
+  bool add_overwrite(Key &&key, const Value &value)
   {
-    this->add_new__impl(std::move(key), value);
+    return this->add_overwrite_as(std::move(key), value);
   }
-  void add_new(KeyT &&key, ValueT &&value)
+  bool add_overwrite(Key &&key, Value &&value)
   {
-    this->add_new__impl(std::move(key), std::move(value));
+    return this->add_overwrite_as(std::move(key), std::move(value));
   }
 
   /**
-   * Insert a new key-value-pair in the map if the key does not exist yet.
-   * Returns true when the pair was newly inserted, otherwise false.
+   * Same as `add_overwrite`, but accepts other key types that are supported by the hash function.
    */
-  bool add(const KeyT &key, const ValueT &value)
+  template<typename ForwardKey, typename ForwardValue>
+  bool add_overwrite_as(ForwardKey &&key, ForwardValue &&value)
   {
-    return this->add__impl(key, value);
+    return this->add_overwrite__impl(
+        std::forward<ForwardKey>(key), std::forward<ForwardValue>(value), m_hash(key));
   }
-  bool add(const KeyT &key, ValueT &&value)
+
+  /**
+   * Returns true if there is a key in the map that compares equal to the given key.
+   *
+   * This is similar to std::unordered_map::contains.
+   */
+  bool contains(const Key &key) const
   {
-    return this->add__impl(key, std::move(value));
+    return this->contains_as(key);
   }
-  bool add(KeyT &&key, const ValueT &value)
+
+  /**
+   * Same as `contains`, but accepts other key types that are supported by the hash function.
+   */
+  template<typename ForwardKey> bool contains_as(const ForwardKey &key) const
   {
-    return this->add__impl(std::move(key), value);
+    return this->contains__impl(key, m_hash(key));
   }
-  bool add(KeyT &&key, ValueT &&value)
+
+  /**
+   * Deletes the key-value-pair with the given key. Returns true when the key was contained and is
+   * now removed, otherwise false.
+   *
+   * This is similar to std::unordered_map::erase.
+   */
+  bool remove(const Key &key)
   {
-    return this->add__impl(std::move(key), std::move(value));
+    return this->remove_as(key);
   }
 
   /**
-   * Remove the key from the map.
-   * Asserts when the key does not exist in the map.
+   * Same as `remove`, but accepts other key types that are supported by the hash function.
    */
-  void remove(const KeyT &key)
+  template<typename ForwardKey> bool remove_as(const ForwardKey &key)
   {
-    BLI_assert(this->contains(key));
-    ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
-      if (item.has_key(offset, key)) {
-        item.set_dummy(offset);
-        m_array.update__set_to_dummy();
-        return;
-      }
-    }
-    ITER_SLOTS_END(offset);
+    return this->remove__impl(key, m_hash(key));
   }
 
   /**
-   * Get the value for the given key and remove it from the map.
-   * Asserts when the key does not exist in the map.
+   * Deletes the key-value-pair with the given key. This invokes undefined behavior when the key is
+   * not in the map.
    */
-  ValueT pop(const KeyT &key)
+  void remove_contained(const Key &key)
   {
-    BLI_assert(this->contains(key));
-    ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
-      if (item.has_key(offset, key)) {
-        ValueT value = std::move(*item.value(offset));
-        item.set_dummy(offset);
-        m_array.update__set_to_dummy();
-        return value;
-      }
-    }
-    ITER_SLOTS_END(offset);
+    this->remove_contained_as(key);
   }
 
   /**
-   * Returns true when the key exists in the map, otherwise false.
+   * Same as `remove_contained`, but accepts other key types that are supported by the hash
+   * function.
    */
-  bool contains(const KeyT &key) const
+  template<typename ForwardKey> void remove_contained_as(const ForwardKey &key)
   {
-    ITER_SLOTS_BEGIN (key, m_array, const, item, offset) {
-      if (item.is_empty(offset)) {
-        return false;
-      }
-      else if (item.has_key(offset, key)) {
-        return true;
-      }
-    }
-    ITER_SLOTS_END(offset);
+    this->remove_contained__impl(key, m_hash(key));
+  }
+
+  /**
+   * Get the value that is stored for the given key and remove it from the map. This invokes
+   * undefined behavior when the key is not in the map.
+   */
+  Value pop(const Key &key)
+  {
+    return this->pop_as(key);
+  }
+
+  /**
+   * Same as `pop`, but accepts other key types that are supported by the hash function.
+   */
+  template<typename ForwardKey> Value pop_as(const ForwardKey &key)
+  {
+    return this->pop__impl(key, m_hash(key));
   }
 
   /**
-   * First, checks if the key exists in the map.
-   * If it does exist, call the modify function with a pointer to the corresponding value.
-   * If it does not exist, call the create function with a pointer to where the value should be
-   * created.
+   * This method can be used to implement more complex custom behavior without having to do
+   * multiple lookups
    *
-   * Returns whatever is returned from one of the callback functions. Both callbacks have to return
-   * the same type.
+   * When the key did not yet exist in the map, the create_value function is called. Otherwise the
+   * modify_value function is called.
    *
-   * CreateValueF: Takes a pointer to where the value should be created.
-   * ModifyValueF: Takes a pointer to the value that should be modified.
+   * Both functions are expected to take a single parameter of type `Value *`. In create_value,
+   * this pointer will point to uninitialized memory that has to be initialized by the function. In
+   * modify_value, it will point to an already initialized value.
+   *
+   * The function returns whatever is returned from the create_value or modify_value callback.
+   * Therefore, both callbacks have to have the same return type.
+   *
+   * In this example an integer is stored for every key. The initial value is five and we want to
+   * increase it every time the same key is used.
+   *   map.add_or_modify(key,
+   *                     [](int *value) { *value = 5; },
+   *                     [](int *value) { (*value)++; });
    */
   template<typename CreateValueF, typename ModifyValueF>
-  auto add_or_modify(const KeyT &key,
+  auto add_or_modify(const Key &key,
                      const CreateValueF &create_value,
                      const ModifyValueF &modify_value) -> decltype(create_value(nullptr))
   {
-    return this->add_or_modify__impl(key, create_value, modify_value);
+    return this->add_or_modify_as(key, create_value, modify_value);
   }
   template<typename CreateValueF, typename ModifyValueF>
-  auto add_or_modify(KeyT &&key,
-                     const CreateValueF &create_value,
-                     const ModifyValueF &modify_value) -> decltype(create_value(nullptr))
+  auto add_or_modify(Key &&key, const CreateValueF &create_value, const ModifyValueF &modify_value)
+      -> decltype(create_value(nullptr))
   {
-    return this->add_or_modify__impl(std::move(key), create_value, modify_value);
+    return this->add_or_modify_as(std::move(key), create_value, modify_value);
   }
 
   /**
-   * Similar to add, but overrides the value for the key when it exists already.
+   * Same as `add_or_modify`, but accepts other key types that are supported by the hash function.
    */
-  bool add_override(const KeyT &key, const ValueT &value)
+  template<typename ForwardKey, typename CreateValueF, typename ModifyValueF>
+  auto add_or_modify_as(ForwardKey &&key,
+                        const CreateValueF &create_value,
+                        const ModifyValueF &modify_value) -> decltype(create_value(nullptr))
   {
-    return this->add_override__impl(key, value);
+    return this->add_or_modify__impl(
+        std::forward<Key>(key), create_value, modify_value, m_hash(key));
   }
-  bool add_override(const KeyT &key, ValueT &&value)
+
+  /**
+   * Returns a pointer to the value that corresponds to the given key. If the key is not in the
+   * map, nullptr is returned.
+   *
+   * This is similar to std::unordered_map::find.
+   */
+  const Value *lookup_ptr(const Key &key) const
   {
-    return this->add_override__impl(key, std::move(value));
+    return this->lookup_ptr_as(key);
   }
-  bool add_override(KeyT &&key, const ValueT &value)
+  Value *lookup_ptr(const Key &key)
   {
-    return this->add_override__impl(std::move(key), value);
+    return this->lookup_ptr_as(key);
   }
-  bool add_override(KeyT &&key, ValueT &&value)
+
+  /**
+   * Same as `lookup_ptr`, but accepts other key types that are supported by the hash function.
+   */
+  template<typename ForwardKey> const Value *lookup_ptr_as(const ForwardKey &key) const
   {
-    return this->add_override__impl(std::move(key), std::move(value));
+    return this->lookup_ptr__impl(key, m_hash(key));
+  }
+  template<typename ForwardKey> Value *lookup_ptr_as(const ForwardKey &key)
+  {
+    return const_cast<Value *>(this->lookup_ptr__impl(key, m_hash(key)));
   }
 
   /**
-   * Check if the key exists in the map.
-   * Return a pointer to the value, when it exists.
-   * Otherwise return nullptr.
+   * Returns a reference to the value that corresponds to the given key. This invokes undefined
+   * behavior when the key is not in the map.
    */
-  const ValueT *lookup_ptr(const KeyT &key) const
+  const Value &lookup(const Key &key) const
   {
-    ITER_SLOTS_BEGIN (key, m_array, const, item, offset) {
-      if (item.is_empty(offset)) {
-        return nullptr;
-      }
-      else if (item.has_key(offset, key)) {
-        return item.value(offset);
-      }
-    }
-    ITER_SLOTS_END(offset);
+    return this->lookup_as(key);
   }
-
-  ValueT *lookup_ptr(const KeyT &key)
+  Value &lookup(const Key &key)
   {
-    const Map *const_this = this;
-    return const_cast<ValueT *>(const_this->lookup_ptr(key));
+    return this->lookup_as(key);
   }
 
   /**
-   * Lookup the value that corresponds to the key.
-   * Asserts when the key does not exist.
+   * Same as `lookup`, but accepts other key types that are supported by the hash function.
    */
-  const ValueT &lookup(const KeyT &key) const
+  template<typename ForwardKey> const Value &lookup_as(const ForwardKey &key) const
   {
-    const ValueT *ptr = this->lookup_ptr(key);
+    const Value *ptr = this->lookup_ptr_as(key);
+    BLI_assert(ptr != nullptr);
+    return *ptr;
+  }
+  template<typename ForwardKey> Value &lookup_as(const ForwardKey &key)
+  {
+    Value *ptr = this->lookup_ptr_as(key);
     BLI_assert(ptr != nullptr);
     return *ptr;
   }
 
-  ValueT &lookup(const KeyT &key)
+  /**
+   * Returns a copy of the value that corresponds to the given key. If the key is not in the
+   * map, the provided default_value is returned.
+   */
+  Value lookup_default(const Key &key, const Value &default_value) const
   {
-    const Map *const_this = this;
-    return const_cast<ValueT &>(const_this->lookup(key));
+    return this->lookup_default_as(key, default_value);
   }
 
   /**
-   * Check if the key exists in the map.
-   * If it does, return a copy of the value.
-   * Otherwise, return the default value.
+   * Same as `lookup_default`, but accepts other key types that are supported by the hash function.
    */
-  ValueT lookup_default(const KeyT &key, ValueT default_value) const
+  template<typename ForwardKey, typename ForwardValue>
+  Value lookup_default_as(const ForwardKey &key, ForwardValue &&default_value) const
   {
-    const ValueT *ptr = this->lookup_ptr(key);
+    const Value *ptr = this->lookup_ptr_as(key);
     if (ptr != nullptr) {
       return *ptr;
     }
     else {
-      return default_value;
+      return std::forward<ForwardValue>(default_value);
     }
   }
 
   /**
-   * Return the value that corresponds to the given key.
-   * If it does not exist yet, create and insert it first.
+   * Returns a reference to the value that corresponds to the given key. If the key is not yet in
+   * the map, it will be newly added.
+   *
+   * The create_value callback is only called when the key did not exist yet. It is expected to
+   * take no parameters and return the value to be inserted.
    */
   template<typename CreateValueF>
-  ValueT &lookup_or_add(const KeyT &key, const CreateValueF &create_value)
+  Value &lookup_or_add(const Key &key, const CreateValueF &create_value)
   {
-    return this->lookup_or_add__impl(key, create_value);
+    return this->lookup_or_add_as(key, create_value);
   }
-  template<typename CreateValueF>
-  ValueT &lookup_or_add(KeyT &&key, const CreateValueF &create_value)
+  template<typename CreateValueF> Value &lookup_or_add(Key &&key, const CreateValueF &create_value)
   {
-    return this->lookup_or_add__impl(std::move(key), create_value);
+    return this->lookup_or_add_as(std::move(key), create_value);
   }
 
   /**
-   * Return the value that corresponds to the given key.
-   * If it does not exist yet, insert a new default constructed value and return that.
+   * Same as `lookup_or_add`, but accepts other key types that are supported by the hash function.
    */
-  ValueT &lookup_or_add_default(const KeyT &key)
+  template<typename ForwardKey, typename CreateValueF>
+  Value &lookup_or_add_as(ForwardKey &&key, const CreateValueF &create_value)
   {
-    return this->lookup_or_add(key, []() { return ValueT(); });
-  }
-  ValueT &lookup_or_add_default(const KeyT &&key)
-  {
-    return this->lookup_or_add(std::move(key), []() { return ValueT(); });
+    return this->lookup_or_add__impl(std::forward<ForwardKey>(key), create_value, m_hash(key));
   }
 
   /**
-   * Get the number of elements in the map.
+   * Returns a reference to the value that corresponds to the given key. If the key is not yet in
+   * the map, it will be newly added. The newly added value will be default constructed.
    */
-  uint32_t size() const
+  Value &lookup_or_add_default(const Key &key)
   {
-    return m_array.slots_set();
+    return this->lookup_or_add_default_as(key);
+  }
+  Value &lookup_or_add_default(Key &&key)
+  {
+    return this->lookup_or_add_default_as(std::move(key));
   }
 
   /**
-   * Returns true if there are no elements in the map.
+   * Same as `lookup_or_add_default`, but accepts other key types that are supported by the hash
+   * function.
    */
-  bool is_empty() const
+  template<typename ForwardKey> Value &lookup_or_add_default_as(ForwardKey &&key)
   {
-    return this->size() == 0;
+    return this->lookup_or_add(std::forward<ForwardKey>(key), []() { return Value(); });
   }
 
   /**
-   * Calls the given function for each key-value-pair.
+   * Calls the provided callback for every key-value-pair in the map. The callback is expected
+   * to take a `const Key &` as first and a `const Value &` as second parameter.
    */
   template<typename FuncT> void foreach_item(const FuncT &func) const
   {
-    for (const Item &item : m_array) {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (item.is_set(offset)) {
-          const KeyT &key = *item.key(offset);
-          const ValueT &value = *item.value(offset);
-          func(key, value);
-        }
+    uint32_t size = this->size();
+    for (uint32_t i = 0; i < size; i++) {
+      const Slot &slot = m_slots[i];
+      if (slot.is_occupied()) {
+        const Key &key = *slot.key();
+        const Value &value = *slot.value();
+        func(key, value);
       }
     }
   }
 
-  void print_table() const
-  {
-    std::cout << "Hash Table:\n";
-    std::cout << "  Size: " << m_array.slots_set() << '\n';
-    std::cout << "  Capacity: " << m_array.slots_total() << '\n';
-    uint32_t item_index = 0;
-    for (const Item &item : m_array) {
-      std::cout << "   Item: " << item_index++ << '\n';
-      for (uint offset = 0; offset < 4; offset++) {
-        std::cout << "    " << offset << " \t";
-        if (item.is_empty(offset)) {
-          std::cout << "    <empty>\n";
-        }
-        else if (item.is_set(offset)) {
-          const KeyT &key = *item.key(offset);
-          const ValueT &value = *item.value(offset);
-          uint32_t collisions = this->count_collisions(key);
-          std::cout << "    " << key << " -> " << value << "  \t Collisions: " << collisions
-                    << '\n';
-        }
-        else if (item.is_dummy(offset)) {
-          std::cout << "    <dummy>\n";
-        }
-      }
-    }
-  }
-
-  template<typename SubIterator> class BaseIterator {
-   protected:
-    const Map *m_map;
-    uint32_t m_slot;
-
-   public:
-    BaseIterator(const Map *map, uint32_t slot) : m_map(map), m_slot(slot)
+  /**
+   * A utility iterator that reduces the amount of code when implementing the actual iterators.
+   * This uses the "curiously recurring template pattern" (CRTP).
+   */
+  template<typename SubIterator> struct BaseIterator {
+    Slot *m_slots;
+    uint32_t m_total_slots;
+    uint32_t m_current_slot;
+
+    BaseIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot)
+        : m_slots(const_cast<Slot *>(slots)),
+          m_total_slots(total_slots),
+          m_current_slot(current_slot)
     {
     }
 
     BaseIterator &operator++()
     {
-      m_slot = m_map->next_slot(m_slot + 1);
+      while (++m_current_slot < m_total_slots) {
+        if (m_slots[m_current_slot].is_occupied()) {
+          break;
+        }
+      }
       return *this;
     }
 
-    friend bool operator==(const BaseIterator &a, const BaseIterator &b)
-    {
-      BLI_assert(a.m_map == b.m_map);
-      return a.m_slot == b.m_slot;
-    }
-
     friend bool operator!=(const BaseIterator &a, const BaseIterator &b)
     {
-      return !(a == b);
+      BLI_assert(a.m_slots == b.m_slots);
+      BLI_assert(a.m_total_slots == b.m_total_slots);
+      return a.m_current_slot != b.m_current_slot;
     }
 
     SubIterator begin() const
     {
-      return SubIterator(m_map, m_map->next_slot(0));
+      for (uint32_t i = 0; i < m_total_slots; i++) {
+        if (m_slots[i].is_occupied()) {
+          return SubIterator(m_slots, m_total_slots, i);
+        }
+      }
+      return this->end();
     }
 
     SubIterator end() const
     {
-      return SubIterator(m_map, m_map->m_array.slots_total());
+      return SubIterator(m_slots, m_total_slots, m_total_slots);
+    }
+
+    Slot &current_slot() const
+    {
+      return m_slots[m_current_slot];
     }
   };
 
   class KeyIterator final : public BaseIterator<KeyIterator> {
    public:
-    KeyIterator(const Map *map, uint32_t slot) : BaseIterator<KeyIterator>(map, slot)
+    KeyIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot)
+        : BaseIterator<KeyIterator>(slots, total_slots, current_slot)
     {
     }
 
-    const KeyT &operator*() const
+    const Key &operator*() const
     {
-      uint32_t item_index = this->m_slot >> OFFSET_SHIFT;
-      uint offset = this->m_slot & OFFSET_MASK;
-      const Item &item = this->m_map->m_array.item(item_index);
-      BLI_assert(item.is_set(offset));
-      return *item.key(offset);
+      return *this->current_slot().key();
     }
   };
 
   class ValueIterator final : public BaseIterator<ValueIterator> {
    public:
-    ValueIterator(const Map *map, uint32_t slot) : BaseIterator<ValueIterator>(map, slot)
+    ValueIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot)
+        : BaseIterator<ValueIterator>(slots, total_slots, current_slot)
     {
     }
 
-    ValueT &operator*() const
+    const Value &operator*() const
     {
-      uint32_t item_index = this->m_slot >> OFFSET_SHIFT;
-      uint offset = this->m_slot & OFFSET_MASK;
-      const Item &item = this->m_map->m_array.item(item_index);
-      BLI_assert(item.is_set(offset));
-      return *item.value(offset);
+      return *this->current_slot().value();
     }
   };
 
-  class ItemIterator final : public BaseIterator<ItemIterator> {
+  class MutableValueIterator final : public BaseIterator<MutableValueIterator> {
    public:
-    ItemIterator(const Map *map, uint32_t slot) : BaseIterator<ItemIterator>(map, slot)
+    MutableValueIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot)
+        : BaseIterator<MutableValueIterator>(slots, total_slots, current_slot)
     {
     }
 
-    struct UserItem {
-      const KeyT &key;
-      ValueT &value;
+    Value &operator*()
+    {
+      return *this->current_slot().value();
+    }
+  };
 
-      friend std::ostream &operator<<(std::ostream &stream, const Item &item)
-      {
-        stream << item.key << " -> " << item.value;
-        return stream;
-      }
+  class ItemIterator final : public BaseIterator<ItemIterator> {
+   public:
+    ItemIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot)
+        : BaseIterator<ItemIterator>(slots, total_slots, current_slot)
+    {
+    }
+
+    struct Item {
+      const Key &key;
+      const Value &value;
     };
 
-    UserItem operator*() const
+    Item operator*() const
     {
-      uint32_t item_index = this->m_slot >> OFFSET_SHIFT;
-      uint offset = this->m_slot & OFFSET_MASK;
-      const Item &item = this->m_map->m_array.item(item_index);
-      BLI_assert(item.is_set(offset));
-      return {*item.key(offset), *item.value(offset)};
+      const Slot &slot = this->current_slot();
+      return {*slot.key(), *slot.value()};
     }
   };
 
-  template<typename SubIterator> friend class BaseIterator;
+  class MutableItemIterator final : public BaseIterator<MutableItemIterator> {
+   public:
+    MutableItemIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot)
+        : BaseIterator<MutableItemIterator>(slots, total_slots, current_slot)
+    {
+    }
+
+    struct Item {
+      const Key &key;
+      Value &value;
+    };
+
+    Item operator*() const
+    {
+      Slot &slot = this->current_slot();
+      return {*slot.key(), *slot.value()};
+    }
+  };
 
   /**
-   * Iterate over all keys in the map.
+   * Allows writing a range-for loop that iterates over all keys. The iterator is invalidated, when
+   * the map is changed.
    */
   KeyIterator keys() const
   {
-    return KeyIterator(this, 0);
+    return KeyIterator(m_slots.data(), m_slots.size(), 0);
   }
 
   /**
-   * Iterate over all values in the map.
+   * Returns an iterator over all values in the map. The iterator is invalidated, when the map is
+   * changed.
    */
   ValueIterator values() const
   {
-    return ValueIterator(this, 0);
+    return ValueIterator(m_slots.data(), m_slots.size(), 0);
   }
 
   /**
-   * Iterate over all key-value-pairs in the map.
-   * They can be accessed with item.key and item.value.
+   * Returns an iterator over all values in the map and allows you to change the values. The
+   * iterator is invalidated, when the map is changed.
+   */
+  MutableValueIterator values()
+  {
+    return MutableValueIterator(m_slots.data(), m_slots.size(), 0);
+  }
+
+  /**
+   * Returns an iterator over all key-value-pairs in the map. The key-value-pairs are stored in
+   * a temporary struct with a .key and a .value field.The iterator is invalidated, when the map is
+   * changed.
    */
   ItemIterator items() const
   {
-    return ItemIterator(this, 0);
+    return ItemIterator(m_slots.data(), m_slots.size(), 0);
   }
 
- private:
-  uint32_t next_slot(uint32_t slot) const
-  {
-    for (; slot < m_array.slots_total(); slot++) {
-      uint32_t item_index = slot >> OFFSET_SHIFT;
-      uint offset = slot & OFFSET_MASK;
-      const Item &item = m_array.item(item_index);
-      if (item.is_set(offset)) {
-        return slot;
-      }
+  /**
+   * Returns an iterator over all key-value-pairs in the map. The key-value-pairs are stored in
+   * a temporary struct with a .key and a .value field. The iterator is invalidated, when the map
+   * is changed.
+   *
+   * This iterator also allows you to modify the value (but not the key).
+   */
+  MutableItemIterator items()
+  {
+    return MutableItemIterator(m_slots.data(), m_slots.size(), 0);
+  }
+
+  /**
+   * Print common statistics like size and collision count. This is useful for debugging purposes.
+   */
+  void print_stats(StringRef name = "") const
+  {
+    HashTableStats stats(*this, this->keys());
+    stats.print();
+  }
+
+  /**
+   * Return the number of key-value-pairs that are stored in the map.
+   */
+  uint32_t size() const
+  {
+    return m_occupied_and_removed_slots - m_removed_slots;
+  }
+
+  /**
+   * Returns true if there are no elements in the map.
+   *
+   * This is similar to std::unordered_map::empty.
+   */
+  bool is_empty() const
+  {
+    return m_occupied_and_removed_slots == m_removed_slots;
+  }
+
+  /**
+   * Returns the number of available slots. This is mostly for debugging purposes.
+   */
+  uint32_t capacity() const
+  {
+    return m_slots.size();
+  }
+
+  /**
+   * Returns the amount of removed slots in the set. This is mostly for debugging purposes.
+   */
+  uint32_t removed_amount() const
+  {
+    return m_removed_slots;
+  }
+
+  /**
+   * Returns the bytes required per element. This is mostly for debugging purposes.
+   */
+  uint32_t size_per_element() const
+  {
+    return sizeof(Slot);
+  }
+
+  /**
+   * Returns the approximate memory requirements of the map in bytes. This becomes more exact the
+   * larger the map becomes.
+   */
+  uint32_t size_in_bytes() const
+  {
+    return sizeof(Slot) * m_slots.size();
+  }
+
+  /**
+   * Potentially resize the map such that the specified number of elements can be added without
+   * another grow operation.
+   */
+  void reserve(uint32_t n)
+  {
+    if (m_usable_slots < n) {
+      this->realloc_and_reinsert(n);
     }
-    return slot;
   }
 
-  uint32_t count_collisions(const KeyT &key) const
+  /**
+   * Removes all key-value-pairs from the map.
+   */
+  void clear()
   {
-    uint32_t collisions = 0;
-    ITER_SLOTS_BEGIN (key, m_array, const, item, offset) {
-      if (item.is_empty(offset) || item.has_key(offset, key)) {
-        return collisions;
+    this->~Map();
+    new (this) Map();
+  }
+
+  /**
+   * Get the number of collisions that the probing strategy has to go through to find the key or
+   * determine that it is not in the map.
+   */
+  uint32_t count_collisions(const Key &key) const
+  {
+    return this->count_collisions__impl(key, m_hash(key));
+  }
+
+ private:
+  BLI_NOINLINE void realloc_and_reinsert(uint32_t min_usable_slots)
+  {
+    uint32_t total_slots, usable_slots;
+    m_max_load_factor.compute_total_and_usable_slots(
+        SlotArray::inline_buffer_capacity(), min_usable_slots, &total_slots, &usable_slots);
+    uint32_t new_slot_mask = total_slots - 1;
+
+    /**
+     * Optimize the case when the map was empty beforehand. We can avoid some copies here.
+     */
+    if (this->size() == 0) {
+      m_slots.~Array();
+      new (&m_slots) SlotArray(total_slots);
+      m_removed_slots = 0;
+      m_occupied_and_removed_slots = 0;
+      m_usable_slots = usable_slots;
+      m_slot_mask = new_slot_mask;
+      return;
+    }
+
+    SlotArray new_slots(total_slots);
+
+    for (Slot &slot : m_slots) {
+      if (slot.is_occupied()) {
+        this->add_after_grow_and_destruct_old(slot, new_slots, new_slot_mask);
       }
-      collisions++;
     }
-    ITER_SLOTS_END(offset);
+
+    /* All occupied slots have been destructed already and empty/removed slots are assumed to be
+     * trivially destructible. */
+    m_slots.clear_without_destruct();
+    m_slots = std::move(new_slots);
+    m_occupied_and_removed_slots -= m_removed_slots;
+    m_usable_slots = usable_slots;
+    m_removed_slots = 0;
+    m_slot_mask = new_slot_mask;
   }
 
-  void ensure_can_add()
+  void add_after_grow_and_destruct_old(Slot &old_slot,
+                                       SlotArray &new_slots,
+                                       uint32_t new_slot_mask)
   {
-    if (UNLIKELY(m_array.should_grow())) {
-      this->grow(this->size() + 1);
+    uint32_t hash = old_slot.get_hash(Hash());
+    SLOT_PROBING_BEGIN (ProbingStrategy, hash, new_slot_mask, slot_index) {
+      Slot &slot = new_slots[slot_index];
+      if (slot.is_empty()) {
+        slot.relocate_occupied_here(old_slot, hash);
+        return;
+      }
     }
+    SLOT_PROBING_END();
   }
 
-  BLI_NOINLINE void grow(uint32_t min_usable_slots)
+  template<typename ForwardKey> bool contains__impl(const ForwardKey &key, uint32_t hash) const
   {
-    ArrayType new_array = m_array.init_reserved(min_usable_slots);
-    for (Item &old_item : m_array) {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (old_item.is_set(offset)) {
-          this->add_after_grow(*old_item.key(offset), *old_item.value(offset), new_array);
-        }
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        return false;
+      }
+      if (slot.contains(key, m_is_equal, hash)) {
+        return true;
       }
     }
-    m_array = std::move(new_array);
+    MAP_SLOT_PROBING_END();
   }
 
-  void add_after_grow(KeyT &key, ValueT &value, ArrayType &new_array)
+  template<typename ForwardKey, typename ForwardValue>
+  void add_new__impl(ForwardKey &&key, ForwardValue &&value, uint32_t hash)
   {
-    ITER_SLOTS_BEGIN (key, new_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        item.store(offset, std::move(key), std::move(value));
+    BLI_assert(!this->contains_as(key));
+
+    this->ensure_can_add();
+    m_occupied_and_removed_slots++;
+
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        slot.occupy(std::forward<ForwardKey>(key), std::forward<ForwardValue>(value), hash);
         return;
       }
     }
-    ITER_SLOTS_END(offset);
+    MAP_SLOT_PROBING_END();
   }
 
-  template<typename ForwardKeyT, typename ForwardValueT>
-  bool add_override__impl(ForwardKeyT &&key, ForwardValueT &&value)
+  template<typename ForwardKey, typename ForwardValue>
+  bool add__impl(ForwardKey &&key, ForwardValue &&value, uint32_t hash)
   {
-    auto create_func = [&](ValueT *dst) {
-      new (dst) ValueT(std::forward<ForwardValueT>(value));
-      return true;
-    };
-    auto modify_func = [&](ValueT *old_value) {
-      *old_value = std::forward<ForwardValueT>(value);
-      return false;
-    };
-    return this->add_or_modify(std::forward<ForwardKeyT>(key), create_func, modify_func);
+    this->ensure_can_add();
+
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        slot.occupy(std::forward<ForwardKey>(key), std::forward<ForwardValue>(value), hash);
+        m_occupied_and_removed_slots++;
+        return true;
+      }
+      if (slot.contains(key, m_is_equal, hash)) {
+        return false;
+      }
+    }
+    MAP_SLOT_PROBING_END();
   }
 
-  template<typename ForwardKeyT, typename ForwardValueT>
-  bool add__impl(ForwardKeyT &&key, ForwardValueT &&value)
+  template<typename ForwardKey> bool remove__impl(const ForwardKey &key, uint32_t hash)
   {
-    this->ensure_can_add();
-
-    ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        item.store(offset, std::forward<ForwardKeyT>(key), std::forward<ForwardValueT>(value));
-        m_array.update__empty_to_set();
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash)) {
+        slot.remove();
+        m_removed_slots++;
         return true;
       }
-      else if (item.has_key(offset, key)) {
+      if (slot.is_empty()) {
         return false;
       }
     }
-    ITER_SLOTS_END(offset);
+    MAP_SLOT_PROBING_END();
   }
 
-  template<typename ForwardKeyT, typename ForwardValueT>
-  void add_new__impl(ForwardKeyT &&key, ForwardValueT &&value)
+  template<typename ForwardKey> void remove_contained__impl(const ForwardKey &key, uint32_t hash)
   {
-    BLI_assert(!this->contains(key));
-    this->ensure_can_add();
+    BLI_assert(this->contains_as(key));
+
+    m_removed_slots++;
 
-    ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        item.store(offset, std::forward<ForwardKeyT>(key), std::forward<ForwardValueT>(value));
-        m_array.update__empty_to_set();
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash)) {
+        slot.remove();
         return;
       }
     }
-    ITER_SLOTS_END(offset);
+    MAP_SLOT_PROBING_END();
   }
 
-  template<typename ForwardKeyT, typename CreateValueF, typename ModifyValueF>
-  auto add_or_modify__impl(ForwardKeyT &&key,
+  template<typename ForwardKey> Value pop__impl(const ForwardKey &key, uint32_t hash)
+  {
+    BLI_assert(this->contains_as(key));
+
+    m_removed_slots++;
+
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash)) {
+        Value value = *slot.value();
+        slot.remove();
+        return value;
+      }
+    }
+    MAP_SLOT_PROBING_END();
+  }
+
+  template<typename ForwardKey, typename CreateValueF, typename ModifyValueF>
+  auto add_or_modify__impl(ForwardKey &&key,
                            const CreateValueF &create_value,
-                           const ModifyValueF &modify_value) -> decltype(create_value(nullptr))
+                           const ModifyValueF &modify_value,
+                           uint32_t hash) -> decltype(create_value(nullptr))
   {
     using CreateReturnT = decltype(create_value(nullptr));
     using ModifyReturnT = decltype(modify_value(nullptr));
@@ -720,42 +1008,159 @@ class Map {
 
     this->ensure_can_add();
 
-    ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        m_array.update__empty_to_set();
-        item.store_without_value(offset, std::forward<ForwardKeyT>(key));
-        ValueT *value_ptr = item.value(offset);
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        m_occupied_and_removed_slots++;
+        slot.occupy_without_value(std::forward<ForwardKey>(key), hash);
+        Value *value_ptr = slot.value();
         return create_value(value_ptr);
       }
-      else if (item.has_key(offset, key)) {
-        ValueT *value_ptr = item.value(offset);
+      if (slot.contains(key, m_is_equal, hash)) {
+        Value *value_ptr = slot.value();
         return modify_value(value_ptr);
       }
     }
-    ITER_SLOTS_END(offset);
+    MAP_SLOT_PROBING_END();
   }
 
-  template<typename ForwardKeyT, typename CreateValueF>
-  ValueT &lookup_or_add__impl(ForwardKeyT &&key, const CreateValueF &create_value)
+  template<typename ForwardKey, typename CreateValueF>
+  Value &lookup_or_add__impl(ForwardKey &&key, const CreateValueF &create_value, uint32_t hash)
   {
     this->ensure_can_add();
 
-    ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        item.store(offset, std::forward<ForwardKeyT>(key), create_value());
-        m_array.update__empty_to_set();
-        return *item.value(offset);
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        slot.occupy(std::forward<ForwardKey>(key), create_value(), hash);
+        m_occupied_and_removed_slots++;
+        return *slot.value();
+      }
+      if (slot.contains(key, m_is_equal, hash)) {
+        return *slot.value();
+      }
+    }
+    MAP_SLOT_PROBING_END();
+  }
+
+  template<typename ForwardKey, typename ForwardValue>
+  bool add_overwrite__impl(ForwardKey &&key, ForwardValue &&value, uint32_t hash)
+  {
+    auto create_func = [&](Value *ptr) {
+      new (ptr) Value(std::forward<ForwardValue>(value));
+      return true;
+    };
+    auto modify_func = [&](Value *ptr) {
+      *ptr = std::forward<ForwardValue>(value);
+      return false;
+    };
+    return this->add_or_modify__impl(
+        std::forward<ForwardKey>(key), create_func, modify_func, hash);
+  }
+
+  template<typename ForwardKey>
+  const Value *lookup_ptr__impl(const ForwardKey &key, uint32_t hash) const
+  {
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        return nullptr;
+      }
+      if (slot.contains(key, m_is_equal, hash)) {
+        return slot.value();
       }
-      else if (item.has_key(offset, key)) {
-        return *item.value(offset);
+    }
+    MAP_SLOT_PROBING_END();
+  }
+
+  template<typename ForwardKey>
+  uint32_t count_collisions__impl(const ForwardKey &key, uint32_t hash) const
+  {
+    uint32_t collisions = 0;
+
+    MAP_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash)) {
+        return collisions;
+      }
+      if (slot.is_empty()) {
+        return collisions;
       }
+      collisions++;
+    }
+    MAP_SLOT_PROBING_END();
+  }
+
+  void ensure_can_add()
+  {
+    if (m_occupied_and_removed_slots >= m_usable_slots) {
+      this->realloc_and_reinsert(this->size() + 1);
+      BLI_assert(m_occupied_and_removed_slots < m_usable_slots);
     }
-    ITER_SLOTS_END(offset);
   }
 };
 
-#undef ITER_SLOTS_BEGIN
-#undef ITER_SLOTS_END
+/**
+ * A wrapper for std::unordered_map with the API of BLI::Map. This can be used for benchmarking.
+ */
+template<typename Key, typename Value> class StdUnorderedMapWrapper {
+ private:
+  using MapType = std::unordered_map<Key, Value, BLI::DefaultHash<Key>>;
+  MapType m_map;
+
+ public:
+  uint32_t size() const
+  {
+    return (uint32_t)m_map.size();
+  }
+
+  bool is_empty() const
+  {
+    return m_map.empty();
+  }
+
+  void reserve(uint32_t n)
+  {
+    m_map.reserve(n);
+  }
+
+  template<typename ForwardKey, typename ForwardValue>
+  void add_new(ForwardKey &&key, ForwardValue &&value)
+  {
+    m_map.insert({std::forward<ForwardKey>(key), std::forward<ForwardValue>(value)});
+  }
+
+  template<typename ForwardKey, typename ForwardValue>
+  bool add(ForwardKey &&key, ForwardValue &&value)
+  {
+    return m_map.insert({std::forward<ForwardKey>(key), std::forward<ForwardValue>(value)}).second;
+  }
+
+  bool contains(const Key &key) const
+  {
+    return m_map.find(key) != m_map.end();
+  }
+
+  bool remove(const Key &key)
+  {
+    return (bool)m_map.erase(key);
+  }
+
+  Value &lookup(const Key &key)
+  {
+    return m_map.find(key)->second;
+  }
+
+  const Value &lookup(const Key &key) const
+  {
+    return m_map.find(key)->second;
+  }
+
+  void clear()
+  {
+    m_map.clear();
+  }
+
+  void print_stats(StringRef UNUSED(name) = "") const
+  {
+  }
+};
 
 }  // namespace BLI
 
diff --git a/source/blender/blenlib/BLI_map_slots.hh b/source/blender/blenlib/BLI_map_slots.hh
new file mode 100644
index 00000000000..dd6d706e2af
--- /dev/null
+++ b/source/blender/blenlib/BLI_map_slots.hh
@@ -0,0 +1,361 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __BLI_MAP_SLOTS_HH__
+#define __BLI_MAP_SLOTS_HH__
+
+/** \file
+ * \ingroup bli
+ *
+ * This file contains slot types that are supposed to be used with BLI::Map.
+ *
+ * Every slot type has to be able to hold a value of type Key, a value of type Value and state
+ * information. A map slot has three possible states: empty, occupied and removed.
+ *
+ * When a slot is occupied, it stores instances of type Key and Value.
+ *
+ * A map slot type has to implement a couple of methods that are explained in SimpleMapSlot.
+ * A slot type is assumed to be trivially destructible, when it is not in occupied state. So the
+ * destructor might not be called in that case.
+ *
+ * Possible Improvements:
+ * - Implement slot type that stores the hash.
+ */
+
+#include "BLI_memory_utils.hh"
+
+namespace BLI {
+
+/**
+ * The simplest possible map slot. It stores the slot state and the optional key and value
+ * instances in separate variables. Depending on the alignment requirement of the key and value,
+ * many bytes might be wasted.
+ */
+template<typename Key, typename Value> class SimpleMapSlot {
+ private:
+  enum State : uint8_t {
+    Empty = 0,
+    Occupied = 1,
+    Removed = 2,
+  };
+
+  State m_state;
+  AlignedBuffer<sizeof(Key), alignof(Key)> m_key_buffer;
+  AlignedBuffer<sizeof(Value), alignof(Value)> m_value_buffer;
+
+ public:
+  /**
+   * After the default constructor has run, the slot has to be in the empty state.
+   */
+  SimpleMapSlot()
+  {
+    m_state = Empty;
+  }
+
+  /**
+   * The destructor also has to destruct the key and value, if the slot is currently occupied.
+   */
+  ~SimpleMapSlot()
+  {
+    if (m_state == Occupied) {
+      this->key()->~Key();
+      this->value()->~Value();
+    }
+  }
+
+  /**
+   * The copy constructor has to copy the state. If the other slot was occupied, a copy of the key
+   * and value have to be made as well.
+   */
+  SimpleMapSlot(const SimpleMapSlot &other)
+  {
+    m_state = other.m_state;
+    if (other.m_state == Occupied) {
+      new (this->key()) Key(*other.key());
+      new (this->value()) Value(*other.value());
+    }
+  }
+
+  /**
+   * The move construtor has to copy the state. If the other slot was occupied, the key and value
+   * from the other have to moved as well. The other slot stays in the state it was in before. Its
+   * optionally stored key and value remain in a moved-from state.
+   */
+  SimpleMapSlot(SimpleMapSlot &&other) noexcept
+  {
+    m_state = other.m_state;
+    if (other.m_state == Occupied) {
+      new (this->key()) Key(std::move(*other.key()));
+      new (this->value()) Value(std::move(*other.value()));
+    }
+  }
+
+  /**
+   * Returns a non-const pointer to the position where the key is stored.
+   */
+  Key *key()
+  {
+    return (Key *)m_key_buffer.ptr();
+  }
+
+  /**
+   * Returns a const pointer to the position where the key is stored.
+   */
+  const Key *key() const
+  {
+    return (const Key *)m_key_buffer.ptr();
+  }
+
+  /**
+   * Returns a non-const pointer to the position where the value is stored.
+   */
+  Value *value()
+  {
+    return (Value *)m_value_buffer.ptr();
+  }
+
+  /**
+   * Returns a const pointer to the position where the value is stored.
+   */
+  const Value *value() const
+  {
+    return (const Value *)m_value_buffer.ptr();
+  }
+
+  /**
+   * Returns true if the slot currently contains a key and a value.
+   */
+  bool is_occupied() const
+  {
+    return m_state == Occupied;
+  }
+
+  /**
+   * Returns true if the slot is empty, i.e. it does not contain a key and is not in removed state.
+   */
+  bool is_empty() const
+  {
+    return m_state == Empty;
+  }
+
+  /**
+   * Returns the hash of the currently stored key. In this simple map slot implementation, we just
+   * computed the hash here. Other implementations might store the hash in the slot instead.
+   */
+  template<typename Hash> uint32_t get_hash(const Hash &hash)
+  {
+    BLI_assert(this->is_occupied());
+    return hash(*this->key());
+  }
+
+  /**
+   * Move the other slot into this slot and destruct it. We do destruction here, because this way
+   * we can avoid a comparison with the state, since we know the slot is occupied.
+   */
+  void relocate_occupied_here(SimpleMapSlot &other, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    BLI_assert(other.is_occupied());
+    m_state = Occupied;
+    new (this->key()) Key(std::move(*other.key()));
+    new (this->value()) Value(std::move(*other.value()));
+    other.key()->~Key();
+    other.value()->~Value();
+  }
+
+  /**
+   * Returns true, when this slot is occupied and contains a key that compares equal to the given
+   * key. The hash can be used by other slot implementations to determine inequality faster.
+   */
+  template<typename ForwardKey, typename IsEqual>
+  bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t UNUSED(hash)) const
+  {
+    if (m_state == Occupied) {
+      return is_equal(key, *this->key());
+    }
+    return false;
+  }
+
+  /**
+   * Change the state of this slot from empty/removed to occupied. The key/value has to be
+   * constructed by calling the constructor with the given key/value as parameter.
+   */
+  template<typename ForwardKey, typename ForwardValue>
+  void occupy(ForwardKey &&key, ForwardValue &&value, uint32_t hash)
+  {
+    BLI_assert(!this->is_occupied());
+    this->occupy_without_value(std::forward<ForwardKey>(key), hash);
+    new (this->value()) Value(std::forward<ForwardValue>(value));
+  }
+
+  /**
+   * Change the state of this slot from empty/removed to occupied, but leave the value
+   * uninitialized. The caller is responsible to construct the value afterwards.
+   */
+  template<typename ForwardKey> void occupy_without_value(ForwardKey &&key, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    m_state = Occupied;
+    new (this->key()) Key(std::forward<ForwardKey>(key));
+  }
+
+  /**
+   * Change the state of this slot from occupied to removed. The key and value have to be
+   * destructed as well.
+   */
+  void remove()
+  {
+    BLI_assert(this->is_occupied());
+    m_state = Removed;
+    this->key()->~Key();
+    this->value()->~Value();
+  }
+};
+
+/**
+ * An IntrusiveMapSlot uses two special values of the key to indicate whether the slot is empty or
+ * removed. This saves some memory in all cases and is more efficient in many cases. The KeyInfo
+ * type indicates which specific values are used. An example for a KeyInfo implementation is
+ * PointerKeyInfo.
+ *
+ * The special key values are expected to be trivially destructible.
+ */
+template<typename Key, typename Value, typename KeyInfo> class IntrusiveMapSlot {
+ private:
+  Key m_key = KeyInfo::get_empty();
+  AlignedBuffer<sizeof(Value), alignof(Value)> m_value_buffer;
+
+ public:
+  IntrusiveMapSlot() = default;
+
+  ~IntrusiveMapSlot()
+  {
+    if (KeyInfo::is_not_empty_or_removed(m_key)) {
+      this->value()->~Value();
+    }
+  }
+
+  IntrusiveMapSlot(const IntrusiveMapSlot &other) : m_key(other.m_key)
+  {
+    if (KeyInfo::is_not_empty_or_removed(m_key)) {
+      new (this->value()) Value(*other.value());
+    }
+  }
+
+  IntrusiveMapSlot(IntrusiveMapSlot &&other) noexcept : m_key(other.m_key)
+  {
+    if (KeyInfo::is_not_empty_or_removed(m_key)) {
+      new (this->value()) Value(std::move(*other.value()));
+    }
+  }
+
+  Key *key()
+  {
+    return &m_key;
+  }
+
+  const Key *key() const
+  {
+    return &m_key;
+  }
+
+  Value *value()
+  {
+    return (Value *)m_value_buffer.ptr();
+  }
+
+  const Value *value() const
+  {
+    return (const Value *)m_value_buffer.ptr();
+  }
+
+  bool is_occupied() const
+  {
+    return KeyInfo::is_not_empty_or_removed(m_key);
+  }
+
+  bool is_empty() const
+  {
+    return KeyInfo::is_empty(m_key);
+  }
+
+  template<typename Hash> uint32_t get_hash(const Hash &hash)
+  {
+    BLI_assert(this->is_occupied());
+    return hash(*this->key());
+  }
+
+  void relocate_occupied_here(IntrusiveMapSlot &other, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    BLI_assert(other.is_occupied());
+    m_key = std::move(other.m_key);
+    new (this->value()) Value(std::move(*other.value()));
+    other.m_key.~Key();
+    other.value()->~Value();
+  }
+
+  template<typename ForwardKey, typename IsEqual>
+  bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t UNUSED(hash)) const
+  {
+    BLI_assert(KeyInfo::is_not_empty_or_removed(key));
+    return is_equal(key, m_key);
+  }
+
+  template<typename ForwardKey, typename ForwardValue>
+  void occupy(ForwardKey &&key, ForwardValue &&value, uint32_t hash)
+  {
+    BLI_assert(!this->is_occupied());
+    BLI_assert(KeyInfo::is_not_empty_or_removed(key));
+    this->occupy_without_value(std::forward<ForwardKey>(key), hash);
+    new (this->value()) Value(std::forward<ForwardValue>(value));
+  }
+
+  template<typename ForwardKey> void occupy_without_value(ForwardKey &&key, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    BLI_assert(KeyInfo::is_not_empty_or_removed(key));
+    m_key = std::forward<ForwardKey>(key);
+  }
+
+  void remove()
+  {
+    BLI_assert(this->is_occupied());
+    KeyInfo::remove(m_key);
+    this->value()->~Value();
+  }
+};
+
+template<typename Key, typename Value> struct DefaultMapSlot;
+
+/**
+ * Use SimpleMapSlot by default, because it is the smallest slot type, that works for all keys.
+ */
+template<typename Key, typename Value> struct DefaultMapSlot {
+  using type = SimpleMapSlot<Key, Value>;
+};
+
+/**
+ * Use a special slot type for pointer keys, because we can store whether a slot is empty or
+ * removed with special pointer values.
+ */
+template<typename Key, typename Value> struct DefaultMapSlot<Key *, Value> {
+  using type = IntrusiveMapSlot<Key *, Value, PointerKeyInfo<Key *>>;
+};
+
+}  // namespace BLI
+
+#endif /* __BLI_MAP_SLOTS_HH__ */
diff --git a/source/blender/blenlib/BLI_math_bits.h b/source/blender/blenlib/BLI_math_bits.h
index 842fce22f91..0283622ca89 100644
--- a/source/blender/blenlib/BLI_math_bits.h
+++ b/source/blender/blenlib/BLI_math_bits.h
@@ -23,6 +23,7 @@
  */
 
 #include "BLI_math_inline.h"
+#include "BLI_utildefines.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/source/blender/blenlib/BLI_memory_utils.hh b/source/blender/blenlib/BLI_memory_utils.hh
index d9acf08a43f..2f7b27240f3 100644
--- a/source/blender/blenlib/BLI_memory_utils.hh
+++ b/source/blender/blenlib/BLI_memory_utils.hh
@@ -21,71 +21,191 @@
  * \ingroup bli
  */
 
-#include <algorithm>
 #include <memory>
 
 #include "BLI_utildefines.h"
 
 namespace BLI {
 
-using std::copy;
-using std::copy_n;
-using std::uninitialized_copy;
-using std::uninitialized_copy_n;
-using std::uninitialized_fill;
-using std::uninitialized_fill_n;
+/**
+ * Call the default constructor on n consecutive elements. For trivially constructible types, this
+ * does nothing.
+ *
+ * Before:
+ *  ptr: uninitialized
+ * After:
+ *  ptr: initialized
+ */
+template<typename T> void default_construct_n(T *ptr, uint n)
+{
+  /* This is not strictly necessary, because the loop below will be optimized away anyway. It is
+   * nice to make behavior this explicitely, though. */
+  if (std::is_trivially_constructible<T>::value) {
+    return;
+  }
 
-template<typename T> void construct_default(T *ptr)
+  for (uint i = 0; i < n; i++) {
+    new (ptr + i) T;
+  }
+}
+
+/**
+ * Call the destructor on n consecutive values. For trivially destructible types, this does
+ * nothing.
+ *
+ * Before:
+ *  ptr: initialized
+ * After:
+ *  ptr: uninitialized
+ */
+template<typename T> void destruct_n(T *ptr, uint n)
 {
-  new (ptr) T();
+  /* This is not strictly necessary, because the loop below will be optimized away anyway. It is
+   * nice to make behavior this explicitely, though. */
+  if (std::is_trivially_destructible<T>::value) {
+    return;
+  }
+
+  for (uint i = 0; i < n; i++) {
+    ptr[i].~T();
+  }
 }
 
-template<typename T> void destruct(T *ptr)
+/**
+ * Copy n values from src to dst.
+ *
+ * Before:
+ *  src: initialized
+ *  dst: initialized
+ * After:
+ *  src: initialized
+ *  dst: initialized
+ */
+template<typename T> void initialized_copy_n(const T *src, uint n, T *dst)
 {
-  ptr->~T();
+  for (uint i = 0; i < n; i++) {
+    dst[i] = src[i];
+  }
 }
 
-template<typename T> void destruct_n(T *ptr, uint n)
+/**
+ * Copy n values from src to dst.
+ *
+ * Before:
+ *  src: initialized
+ *  dst: uninitialized
+ * After:
+ *  src: initialized
+ *  dst: initialized
+ */
+template<typename T> void uninitialized_copy_n(const T *src, uint n, T *dst)
 {
   for (uint i = 0; i < n; i++) {
-    ptr[i].~T();
+    new (dst + i) T(src[i]);
   }
 }
 
-template<typename T> void uninitialized_move_n(T *src, uint n, T *dst)
+/**
+ * Move n values from src to dst.
+ *
+ * Before:
+ *  src: initialized
+ *  dst: initialized
+ * After:
+ *  src: initialized, moved-from
+ *  dst: initialized
+ */
+template<typename T> void initialized_move_n(T *src, uint n, T *dst)
 {
-  std::uninitialized_copy_n(std::make_move_iterator(src), n, dst);
+  for (uint i = 0; i < n; i++) {
+    dst[i] = std::move(src[i]);
+  }
 }
 
-template<typename T> void move_n(T *src, uint n, T *dst)
+/**
+ * Move n values from src to dst.
+ *
+ * Before:
+ *  src: initialized
+ *  dst: uninitialized
+ * After:
+ *  src: initialized, moved-from
+ *  dst: initialized
+ */
+template<typename T> void uninitialized_move_n(T *src, uint n, T *dst)
 {
-  std::copy_n(std::make_move_iterator(src), n, dst);
+  for (uint i = 0; i < n; i++) {
+    new (dst + i) T(std::move(src[i]));
+  }
 }
 
-template<typename T> void uninitialized_relocate(T *src, T *dst)
+/**
+ * Relocate n values from src to dst. Relocation is a move followed by destruction of the src
+ * value.
+ *
+ * Before:
+ *  src: initialized
+ *  dst: initialized
+ * After:
+ *  src: uninitialized
+ *  dst: initialized
+ */
+template<typename T> void initialized_relocate_n(T *src, uint n, T *dst)
 {
-  new (dst) T(std::move(*src));
-  destruct(src);
+  initialized_move_n(src, n, dst);
+  destruct_n(src, n);
 }
 
+/**
+ * Relocate n values from src to dst. Relocation is a move followed by destruction of the src
+ * value.
+ *
+ * Before:
+ *  src: initialized
+ *  dst: uinitialized
+ * After:
+ *  src: uninitialized
+ *  dst: initialized
+ */
 template<typename T> void uninitialized_relocate_n(T *src, uint n, T *dst)
 {
   uninitialized_move_n(src, n, dst);
   destruct_n(src, n);
 }
 
-template<typename T> void relocate(T *src, T *dst)
+/**
+ * Copy the value to n consecutive elements.
+ *
+ * Before:
+ *  dst: initialized
+ * After:
+ *  dst: initialized
+ */
+template<typename T> void initialized_fill_n(T *dst, uint n, const T &value)
 {
-  *dst = std::move(*src);
-  destruct(src);
+  for (uint i = 0; i < n; i++) {
+    dst[i] = value;
+  }
 }
 
-template<typename T> void relocate_n(T *src, uint n, T *dst)
+/**
+ * Copy the value to n consecutive elements.
+ *
+ * Before:
+ *  dst: uninitialized
+ * After:
+ *  dst: initialized
+ */
+template<typename T> void uninitialized_fill_n(T *dst, uint n, const T &value)
 {
-  move_n(src, n, dst);
-  destruct_n(src, n);
+  for (uint i = 0; i < n; i++) {
+    new (dst + i) T(value);
+  }
 }
 
+/**
+ * The same as std::unique_ptr. This can be removed when we start using C++14.
+ */
 template<typename T, typename... Args> std::unique_ptr<T> make_unique(Args &&... args)
 {
   return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
@@ -98,13 +218,24 @@ template<typename T> struct DestructValueAtAddress {
   }
 };
 
+/**
+ * A destruct_ptr is like unique_ptr, but it will only call the destructor and will not free the
+ * memory. This is useful when using custom allocators.
+ */
 template<typename T> using destruct_ptr = std::unique_ptr<T, DestructValueAtAddress<T>>;
 
-template<uint Size, uint Alignment> class alignas(Alignment) AlignedBuffer {
+/**
+ * An `AlignedBuffer` is simply a byte array with the given size and alignment. The buffer will
+ * not be initialized by the default constructor.
+ *
+ * This can be used to reserve memory for C++ objects whose lifetime is different from the
+ * lifetime of the object they are embedded in. It's used by containers with small buffer
+ * optimization and hash table implementations.
+ */
+template<size_t Size, size_t Alignment> class alignas(Alignment) AlignedBuffer {
  private:
   /* Don't create an empty array. This causes problems with some compilers. */
-  static constexpr uint ActualSize = (Size > 0) ? Size : 1;
-  char m_buffer[ActualSize];
+  char m_buffer[(Size > 0) ? Size : 1];
 
  public:
   void *ptr()
diff --git a/source/blender/blenlib/BLI_open_addressing.hh b/source/blender/blenlib/BLI_open_addressing.hh
deleted file mode 100644
index 3bd932350d0..00000000000
--- a/source/blender/blenlib/BLI_open_addressing.hh
+++ /dev/null
@@ -1,316 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef __BLI_OPEN_ADDRESSING_HH__
-#define __BLI_OPEN_ADDRESSING_HH__
-
-/** \file
- * \ingroup bli
- *
- * This class offers a useful abstraction for other containers that implement hash tables using
- * open addressing. It handles the following aspects:
- *   - Allocation and deallocation of the open addressing array.
- *   - Optional small object optimization.
- *   - Keeps track of how many elements and dummies are in the table.
- *
- * The nice thing about this abstraction is that it does not get in the way of any performance
- * optimizations. The data that is actually stored in the table is still fully defined by the
- * actual hash table implementation.
- */
-
-#include <cmath>
-
-#include "BLI_allocator.hh"
-#include "BLI_array.hh"
-#include "BLI_math_base.h"
-#include "BLI_memory_utils.hh"
-#include "BLI_utildefines.h"
-
-namespace BLI {
-
-/** \name Constexpr utility functions.
- * \{ */
-
-inline constexpr int is_power_of_2_i_constexpr(int n)
-{
-  return (n & (n - 1)) == 0;
-}
-
-inline constexpr uint32_t log2_floor_u_constexpr(uint32_t x)
-{
-  return x <= 1 ? 0 : 1 + log2_floor_u_constexpr(x >> 1);
-}
-
-inline constexpr uint32_t log2_ceil_u_constexpr(uint32_t x)
-{
-  return (is_power_of_2_i_constexpr((int)x)) ? log2_floor_u_constexpr(x) :
-                                               log2_floor_u_constexpr(x) + 1;
-}
-
-template<typename IntT> inline constexpr IntT ceil_division(IntT x, IntT y)
-{
-  BLI_STATIC_ASSERT(!std::is_signed<IntT>::value, "");
-  return x / y + ((x % y) != 0);
-}
-
-template<typename IntT> inline constexpr IntT floor_division(IntT x, IntT y)
-{
-  BLI_STATIC_ASSERT(!std::is_signed<IntT>::value, "");
-  return x / y;
-}
-
-inline constexpr uint8_t compute_item_exponent(uint32_t min_usable_slots,
-                                               uint32_t slots_per_item,
-                                               uint32_t max_load_factor_numerator,
-                                               uint32_t max_load_factor_denominator)
-{
-  // uint64_t min_total_slots = ceil_division((uint64_t)min_usable_slots *
-  //                                              (uint64_t)max_load_factor_denominator,
-  //                                          (uint64_t)max_load_factor_numerator);
-  // uint32_t min_total_items = (uint32_t)ceil_division(min_total_slots, (uint64_t)slots_per_item);
-  // uint8_t item_exponent = (uint8_t)log2_ceil_u_constexpr(min_total_items);
-  // return item_exponent;
-
-  return (uint8_t)log2_ceil_u_constexpr((uint32_t)ceil_division(
-      ceil_division((uint64_t)min_usable_slots * (uint64_t)max_load_factor_denominator,
-                    (uint64_t)max_load_factor_numerator),
-      (uint64_t)slots_per_item));
-}
-
-/** \} */
-
-template<typename Item,
-         uint32_t MinUsableSlotsInSmallStorage = 1,
-         typename Allocator = GuardedAllocator>
-class OpenAddressingArray {
- private:
-  static constexpr uint32_t s_max_load_factor_numerator = 1;
-  static constexpr uint32_t s_max_load_factor_denominator = 2;
-  static constexpr uint32_t s_slots_per_item = Item::slots_per_item;
-
-  static constexpr uint8_t s_small_storage_item_exponent = compute_item_exponent(
-      MinUsableSlotsInSmallStorage,
-      s_slots_per_item,
-      s_max_load_factor_numerator,
-      s_max_load_factor_denominator);
-  static constexpr uint32_t s_items_in_small_storage = 1u << s_small_storage_item_exponent;
-
-  /* Invariants:
-   *   2^m_item_exponent = m_item_amount
-   *   m_item_amount * s_slots_per_item = m_slots_total
-   *   m_slot_mask = m_slots_total - 1
-   *   m_slots_set_or_dummy < m_slots_total
-   */
-
-  /* Number of items in the hash table. Must be a power of two. */
-  uint32_t m_item_amount;
-  /* Exponent of the current item amount. */
-  uint8_t m_item_exponent;
-  /* Number of elements that could be stored in the table when the load factor is 1. */
-  uint32_t m_slots_total;
-  /* Number of elements that are not empty. */
-  uint32_t m_slots_set_or_dummy;
-  /* Number of dummy entries. */
-  uint32_t m_slots_dummy;
-  /* Max number of slots that can be non-empty according to the load factor. */
-  uint32_t m_slots_usable;
-  /* Can be used to map a hash value into the range of valid slot indices. */
-  uint32_t m_slot_mask;
-
-  Array<Item, s_items_in_small_storage, Allocator> m_items;
-
- public:
-  explicit OpenAddressingArray(uint8_t item_exponent = s_small_storage_item_exponent)
-  {
-    m_item_exponent = item_exponent;
-    m_item_amount = 1u << item_exponent;
-    m_slots_total = m_item_amount * s_slots_per_item;
-    m_slot_mask = m_slots_total - 1;
-    m_slots_set_or_dummy = 0;
-    m_slots_dummy = 0;
-    m_slots_usable = (uint32_t)floor_division((uint64_t)m_slots_total *
-                                                  (uint64_t)s_max_load_factor_numerator,
-                                              (uint64_t)s_max_load_factor_denominator);
-
-    m_items = Array<Item, s_items_in_small_storage, Allocator>(m_item_amount);
-  }
-
-  ~OpenAddressingArray() = default;
-
-  OpenAddressingArray(const OpenAddressingArray &other) = default;
-
-  OpenAddressingArray(OpenAddressingArray &&other) noexcept
-  {
-    m_slots_total = other.m_slots_total;
-    m_slots_set_or_dummy = other.m_slots_set_or_dummy;
-    m_slots_dummy = other.m_slots_dummy;
-    m_slots_usable = other.m_slots_usable;
-    m_slot_mask = other.m_slot_mask;
-    m_item_amount = other.m_item_amount;
-    m_item_exponent = other.m_item_exponent;
-    m_items = std::move(other.m_items);
-
-    other.~OpenAddressingArray();
-    new (&other) OpenAddressingArray();
-  }
-
-  OpenAddressingArray &operator=(const OpenAddressingArray &other)
-  {
-    if (this == &other) {
-      return *this;
-    }
-    this->~OpenAddressingArray();
-    new (this) OpenAddressingArray(other);
-    return *this;
-  }
-
-  OpenAddressingArray &operator=(OpenAddressingArray &&other)
-  {
-    if (this == &other) {
-      return *this;
-    }
-    this->~OpenAddressingArray();
-    new (this) OpenAddressingArray(std::move(other));
-    return *this;
-  }
-
-  Allocator &allocator()
-  {
-    return m_items.allocator();
-  }
-
-  /* Prepare a new array that can hold a minimum of min_usable_slots elements. All entries are
-   * empty. */
-  OpenAddressingArray init_reserved(uint32_t min_usable_slots) const
-  {
-    uint8_t item_exponent = compute_item_exponent(min_usable_slots,
-                                                  s_slots_per_item,
-                                                  s_max_load_factor_numerator,
-                                                  s_max_load_factor_denominator);
-    OpenAddressingArray grown(item_exponent);
-    grown.m_slots_set_or_dummy = this->slots_set();
-    return grown;
-  }
-
-  /**
-   * Amount of items in the array times the number of slots per item.
-   */
-  uint32_t slots_total() const
-  {
-    return m_slots_total;
-  }
-
-  /**
-   * Amount of slots that are initialized with some value that is not empty or dummy.
-   */
-  uint32_t slots_set() const
-  {
-    return m_slots_set_or_dummy - m_slots_dummy;
-  }
-
-  /**
-   * Amount of slots that can be used before the array should grow.
-   */
-  uint32_t slots_usable() const
-  {
-    return m_slots_usable;
-  }
-
-  /**
-   * Update the counters after one empty element is used for a newly added element.
-   */
-  void update__empty_to_set()
-  {
-    m_slots_set_or_dummy++;
-  }
-
-  /**
-   * Update the counters after one previously dummy element becomes set.
-   */
-  void update__dummy_to_set()
-  {
-    m_slots_dummy--;
-  }
-
-  /**
-   * Update the counters after one previously set element becomes a dummy.
-   */
-  void update__set_to_dummy()
-  {
-    m_slots_dummy++;
-  }
-
-  /**
-   * Access the current slot mask for this array.
-   */
-  uint32_t slot_mask() const
-  {
-    return m_slot_mask;
-  }
-
-  /**
-   * Access the item for a specific item index.
-   * Note: The item index is not necessarily the slot index.
-   */
-  const Item &item(uint32_t item_index) const
-  {
-    return m_items[item_index];
-  }
-
-  Item &item(uint32_t item_index)
-  {
-    return m_items[item_index];
-  }
-
-  uint8_t item_exponent() const
-  {
-    return m_item_exponent;
-  }
-
-  uint32_t item_amount() const
-  {
-    return m_item_amount;
-  }
-
-  bool should_grow() const
-  {
-    return m_slots_set_or_dummy >= m_slots_usable;
-  }
-
-  Item *begin()
-  {
-    return m_items.begin();
-  }
-
-  Item *end()
-  {
-    return m_items.end();
-  }
-
-  const Item *begin() const
-  {
-    return m_items.begin();
-  }
-
-  const Item *end() const
-  {
-    return m_items.end();
-  }
-};
-
-}  // namespace BLI
-
-#endif /* __BLI_OPEN_ADDRESSING_HH__ */
diff --git a/source/blender/blenlib/BLI_optional.hh b/source/blender/blenlib/BLI_optional.hh
index eac11293781..aeef0e7e151 100644
--- a/source/blender/blenlib/BLI_optional.hh
+++ b/source/blender/blenlib/BLI_optional.hh
@@ -37,16 +37,6 @@ template<typename T> class Optional {
   bool m_set;
 
  public:
-  static Optional FromPointer(const T *ptr)
-  {
-    if (ptr == nullptr) {
-      return Optional();
-    }
-    else {
-      return Optional(*ptr);
-    }
-  }
-
   Optional() : m_set(false)
   {
   }
diff --git a/source/blender/blenlib/BLI_probing_strategies.hh b/source/blender/blenlib/BLI_probing_strategies.hh
new file mode 100644
index 00000000000..9fa402d4244
--- /dev/null
+++ b/source/blender/blenlib/BLI_probing_strategies.hh
@@ -0,0 +1,250 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __BLI_PROBING_STRATEGIES_HH__
+#define __BLI_PROBING_STRATEGIES_HH__
+
+/** \file
+ * \ingroup bli
+ *
+ * This file implements different probing strategies. Those can be used by different hash table
+ * implementations like BLI::Set and BLI::Map. A probing strategy produces a sequence of values
+ * based on an initial hash value.
+ *
+ * A probing strategy has to implement the following methods:
+ * - Constructor(uint32_t hash): Start a new probing sequence based on the given hash.
+ * - get() const -> uint32_t: Get the current value in the sequence.
+ * - next() -> void: Update the internal state, so that the next value can be accessed with get().
+ * - linear_steps() -> uint32_t: Returns number of linear probing steps that should be done.
+ *
+ * Using linear probing steps between larger jumps can result in better performance, due to
+ * improved cache usage. It's a way of getting the benefits or linear probing without the
+ * clustering issues. However, more linear steps can also make things slower when the initial hash
+ * produces many collisions.
+ *
+ * Every probing strategy has to guarantee, that every possible uint32_t is returned eventually.
+ * This is necessary for correctness. If this is not the case, empty slots might not be found.
+ *
+ * The SLOT_PROBING_BEGIN and SLOT_PROBING_END macros can be used to implement a loop that iterates
+ * over a probing sequence.
+ *
+ * Probing strategies can be evaluated with many different criteria. Different use cases often
+ * have different optimal strategies. Examples:
+ * - If the hash function generates a well distributed initial hash value, the constructor should
+ *   be as short as possible. This is because the hash value can be used as slot index almost
+ *   immediately, without too many collisions. This is also a perfect use case for linear steps.
+ * - If the hash function is bad, it can help if the probing strategy remixes the hash value,
+ *   before the first slot is accessed.
+ * - Different next() methods can remix the hash value in different ways. Depending on which bits
+ *   of the hash value contain the most information, different rehashing strategies work best.
+ * - When the hash table is very small, having a trivial hash function and then doing linear
+ *   probing might work best.
+ */
+
+#include "BLI_sys_types.h"
+
+namespace BLI {
+
+/**
+ * The simplest probing strategy. It's bad in most cases, because it produces clusters in the hash
+ * table, which result in many collisions. However, if the hash function is very good or the hash
+ * table is small, this strategy might even work best.
+ */
+class LinearProbingStrategy {
+ private:
+  uint32_t m_hash;
+
+ public:
+  LinearProbingStrategy(uint32_t hash) : m_hash(hash)
+  {
+  }
+
+  void next()
+  {
+    m_hash++;
+  }
+
+  uint32_t get() const
+  {
+    return m_hash;
+  }
+
+  uint32_t linear_steps() const
+  {
+    return UINT32_MAX;
+  }
+};
+
+/**
+ * A slightly adapted quadratic probing strategy. The distance to the original slot increases
+ * quadratically. This method also leads to clustering. Another disadvantage is that not all bits
+ * of the original hash are used.
+ *
+ * The distance i * i is not used, because it does not guarantee, that every slot is hit.
+ * Instead (i * i + i) / 2 is used, which has this desired property.
+ *
+ * In the first few steps, this strategy can have good cache performance. It largely depends on how
+ * many keys fit into a cache line in the hash table.
+ */
+class QuadraticProbingStrategy {
+ private:
+  uint32_t m_original_hash;
+  uint32_t m_current_hash;
+  uint32_t m_iteration;
+
+ public:
+  QuadraticProbingStrategy(uint32_t hash)
+      : m_original_hash(hash), m_current_hash(hash), m_iteration(1)
+  {
+  }
+
+  void next()
+  {
+    m_current_hash = m_original_hash + ((m_iteration * m_iteration + m_iteration) >> 1);
+    m_iteration++;
+  }
+
+  uint32_t get() const
+  {
+    return m_current_hash;
+  }
+
+  uint32_t linear_steps() const
+  {
+    return 1;
+  }
+};
+
+/**
+ * This is the probing strategy used by CPython (in 2020).
+ *
+ * It is very fast when the original hash value is good. If there are collisions, more bits of the
+ * hash value are taken into account.
+ *
+ * LinearSteps: Can be set to something larger than 1 for improved cache performance in some cases.
+ * PreShuffle: When true, the initial call to next() will be done to the constructor. This can help
+ *   when the hash function has put little information into the lower bits.
+ */
+template<uint32_t LinearSteps = 1, bool PreShuffle = false> class PythonProbingStrategy {
+ private:
+  uint32_t m_hash;
+  uint32_t m_perturb;
+
+ public:
+  PythonProbingStrategy(uint32_t hash) : m_hash(hash), m_perturb(hash)
+  {
+    if (PreShuffle) {
+      this->next();
+    }
+  }
+
+  void next()
+  {
+    m_perturb >>= 5;
+    m_hash = 5 * m_hash + 1 + m_perturb;
+  }
+
+  uint32_t get() const
+  {
+    return m_hash;
+  }
+
+  uint32_t linear_steps() const
+  {
+    return LinearSteps;
+  }
+};
+
+/**
+ * Similar to the Python probing strategy. However, it does a bit more shuffling in the next()
+ * method. This way more bits are taken into account earlier. After a couple of collisions (that
+ * should happen rarely), it will fallback to a sequence that hits every slot.
+ */
+template<uint32_t LinearSteps = 2, bool PreShuffle = false> class ShuffleProbingStrategy {
+ private:
+  uint32_t m_hash;
+  uint32_t m_perturb;
+
+ public:
+  ShuffleProbingStrategy(uint32_t hash) : m_hash(hash), m_perturb(hash)
+  {
+    if (PreShuffle) {
+      this->next();
+    }
+  }
+
+  void next()
+  {
+    if (m_perturb != 0) {
+      m_perturb >>= 10;
+      m_hash = ((m_hash >> 16) ^ m_hash) * 0x45d9f3b + m_perturb;
+    }
+    else {
+      m_hash = 5 * m_hash + 1;
+    }
+  }
+
+  uint32_t get() const
+  {
+    return m_hash;
+  }
+
+  uint32_t linear_steps() const
+  {
+    return LinearSteps;
+  }
+};
+
+/**
+ * Having a specified default is convenient.
+ */
+using DefaultProbingStrategy = PythonProbingStrategy<>;
+
+/* Turning off clang format here, because otherwise it will mess up the alignment between the
+ * macros. */
+// clang-format off
+
+/**
+ * Both macros together form a loop that iterates over slot indices in a hash table with a
+ * power-of-two size.
+ *
+ * You must not `break` out of this loop. Only `return` is permitted. If you don't return
+ * out of the loop, it will be an infinite loop. These loops should not be nested within the
+ * same function.
+ *
+ * PROBING_STRATEGY: Class describing the probing strategy.
+ * HASH: The initial hash as produced by a hash function.
+ * MASK: A bit mask such that (hash & MASK) is a valid slot index.
+ * R_SLOT_INDEX: Name of the variable that will contain the slot index.
+ */
+#define SLOT_PROBING_BEGIN(PROBING_STRATEGY, HASH, MASK, R_SLOT_INDEX) \
+  PROBING_STRATEGY probing_strategy(HASH); \
+  do { \
+    uint32_t linear_offset = 0; \
+    uint32_t current_hash = probing_strategy.get(); \
+    do { \
+      uint32_t R_SLOT_INDEX = (current_hash + linear_offset) & MASK;
+
+#define SLOT_PROBING_END() \
+    } while (++linear_offset < probing_strategy.linear_steps()); \
+    probing_strategy.next(); \
+  } while (true)
+
+// clang-format on
+
+}  // namespace BLI
+
+#endif /* __BLI_PROBING_STRATEGIES_HH__ */
diff --git a/source/blender/blenlib/BLI_set.hh b/source/blender/blenlib/BLI_set.hh
index dc9df5d116a..d991c97a629 100644
--- a/source/blender/blenlib/BLI_set.hh
+++ b/source/blender/blenlib/BLI_set.hh
@@ -20,270 +20,411 @@
 /** \file
  * \ingroup bli
  *
- * This file provides a set implementation that uses open addressing with probing.
+ * A `BLI::Set<Key>` is an unordered container for unique elements of type `Key`. It is designed to
+ * be a more convenient and efficient replacement for `std::unordered_set`. All core operations
+ * (add, remove and contains) can be done in O(1) amortized expected time.
+ *
+ * In most cases, your default choice for a hash set in Blender should be `BLI::Set`.
+ *
+ * BLI::Set is implemented using open addressing in a slot array with a power-of-two size. Every
+ * slot is in one of three states: empty, occupied or removed. If a slot is occupied, it contains
+ * an instance of the key type.
+ *
+ * Benchmarking and comparing hash tables is hard, because many factors influence the result. The
+ * performance of a hash table depends on the combination of the hash function, probing strategy,
+ * max load factor, key type, slot type and the data distribution. This implementation is designed
+ * to be relatively fast by default in all cases. However, it also offers many customization
+ * points that allow it to be optimized for a specific use case.
+ *
+ * A rudimentary benchmark can be found in BLI_set_test.cc. The results of that benchmark are
+ * there as well. The numbers show that in this specific case BLI::Set outperforms
+ * std::unordered_set consistently by a good amount.
+ *
+ * Some noteworthy information:
+ * - Key must be a movable type.
+ * - Pointers to keys might be invalidated when the set is changed or moved.
+ * - The hash function can be customized. See BLI_hash.hh for details.
+ * - The probing strategy can be customized. See BLI_probing_stragies.hh for details.
+ * - The slot type can be customized. See BLI_set_slots.hh for details.
+ * - Small buffer optimization is enabled by default, if the key is not too large.
+ * - The methods `add_new` and `remove_contained` should be used instead of `add` and `remove`
+ *   whenever appropriate. Assumptions and intention are described better this way.
+ * - Lookups can be performed using types other than Key without conversion. For that use the
+ *   methods ending with `_as`. The template parameters Hash and IsEqual have to support the other
+ *   key type. This can greatly improve performance when the set contains strings.
+ * - The default constructor is cheap, even when a large InlineBufferCapacity is used. A large
+ *   slot array will only be initialized when the first key is added.
+ * - The `print_stats` method can be used to get information about the distribution of keys and
+ *   memory usage of the set.
+ * - The method names don't follow the std::unordered_set names in many cases. Searching for such
+ *   names in this file will usually let you discover the new name.
+ * - There is a StdUnorderedSetWrapper class, that wraps std::unordered_set and gives it the same
+ *   interface as BLI::Set. This is useful for benchmarking.
+ *
+ * Possible Improvements:
+ * - Use a branchless loop over slots in grow function (measured ~10% performance improvement when
+ *   the distribution of occupied slots is suffiently random).
+ * - Support max load factor customization.
+ * - Improve performance with large data sets through software prefetching. I got fairly
+ *   significant improvements in simple tests (~30% faster). It still needs to be investigated how
+ *   to make a nice interface for this functionality.
  */
 
+#include <unordered_set>
+
+#include "BLI_array.hh"
 #include "BLI_hash.hh"
-#include "BLI_open_addressing.hh"
-#include "BLI_vector.hh"
+#include "BLI_hash_tables.hh"
+#include "BLI_probing_strategies.hh"
+#include "BLI_set_slots.hh"
 
 namespace BLI {
 
-// clang-format off
-
-#define ITER_SLOTS_BEGIN(VALUE, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \
-  uint32_t hash = DefaultHash<T>{}(VALUE); \
-  uint32_t perturb = hash; \
-  while (true) { \
-    uint32_t item_index = (hash & ARRAY.slot_mask()) >> OFFSET_SHIFT; \
-    uint8_t R_OFFSET = hash & OFFSET_MASK; \
-    uint8_t initial_offset = R_OFFSET; \
-    OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \
-    do {
-
-#define ITER_SLOTS_END(R_OFFSET) \
-      R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \
-    } while (R_OFFSET != initial_offset); \
-    perturb >>= 5; \
-    hash = hash * 5 + 1 + perturb; \
-  } ((void)0)
-
-// clang-format on
-
-template<typename T, uint InlineBufferCapacity = 4, typename Allocator = GuardedAllocator>
+template<
+    /** Type of the elements that are stored in this set. It has to be movable. Furthermore, the
+     * hash and is-equal functions have to support it.
+     */
+    typename Key,
+    /**
+     * The minimum number of elements that can be stored in this Set without doing a heap
+     * allocation. This is useful when you expect to have many small sets. However, keep in mind
+     * that (unlike vector) initializing a set has a O(n) cost in the number of slots.
+     *
+     * When Key is large, the small buffer optimization is disabled by default to avoid large
+     * unexpected allocations on the stack. It can still be enabled explicitely though.
+     */
+    uint32_t InlineBufferCapacity = (sizeof(Key) < 100) ? 4 : 0,
+    /**
+     * The strategy used to deal with collisions. They are defined in BLI_probing_strategies.hh.
+     */
+    typename ProbingStrategy = DefaultProbingStrategy,
+    /**
+     * The hash function used to hash the keys. There is a default for many types. See BLI_hash.hh
+     * for examples on how to define a custom hash function.
+     */
+    typename Hash = DefaultHash<Key>,
+    /**
+     * The equality operator used to compare keys. By default it will simply compare keys using the
+     * `==` operator.
+     */
+    typename IsEqual = DefaultEquality,
+    /**
+     * This is what will actually be stored in the hash table array. At a minimum a slot has to
+     * be able to hold a key and information about whether the slot is empty, occupied or removed.
+     * Using a non-standard slot type can improve performance or reduce the memory footprint. For
+     * example, a hash can be stored in the slot, to make inequality checks more efficient. Some
+     * types have special values that can represent an empty or removed state, eliminating the need
+     * for an additional variable. Also see BLI_set_slots.hh.
+     */
+    typename Slot = typename DefaultSetSlot<Key>::type,
+    /**
+     * The allocator used by this set. Should rarely be changed, except when you don't want that
+     * MEM_* is used internally.
+     */
+    typename Allocator = GuardedAllocator>
 class Set {
  private:
-  static constexpr uint OFFSET_MASK = 3;
-  static constexpr uint OFFSET_SHIFT = 2;
+  /**
+   * Slots are either empty, occupied or removed. The number of occupied slots can be computed by
+   * subtracting the removed slots from the occupied-and-removed slots.
+   */
+  uint32_t m_removed_slots;
+  uint32_t m_occupied_and_removed_slots;
 
-  class Item {
-   private:
-    static constexpr uint8_t IS_EMPTY = 0;
-    static constexpr uint8_t IS_SET = 1;
-    static constexpr uint8_t IS_DUMMY = 2;
+  /**
+   * The maximum number of slots that can be used (either occupied or removed) until the set has to
+   * grow. This is the total number of slots times the max load factor.
+   */
+  uint32_t m_usable_slots;
 
-    uint8_t m_status[4];
-    AlignedBuffer<4 * sizeof(T), alignof(T)> m_buffer;
+  /**
+   * The number of slots minus one. This is a bit mask that can be used to turn any integer into a
+   * valid slot index efficiently.
+   */
+  uint32_t m_slot_mask;
 
-   public:
-    static constexpr uint slots_per_item = 4;
+  /** This is called to hash incoming keys. */
+  Hash m_hash;
 
-    Item()
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        m_status[offset] = IS_EMPTY;
-      }
-    }
+  /** This is called to check equality of two keys. */
+  IsEqual m_is_equal;
 
-    ~Item()
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (m_status[offset] == IS_SET) {
-          destruct(this->value(offset));
-        }
-      }
-    }
+  /** The max load factor is 1/2 = 50% by default. */
+#define LOAD_FACTOR 1, 2
+  LoadFactor m_max_load_factor = LoadFactor(LOAD_FACTOR);
+  using SlotArray =
+      Array<Slot, LoadFactor::compute_total_slots(InlineBufferCapacity, LOAD_FACTOR), Allocator>;
+#undef LOAD_FACTOR
 
-    Item(const Item &other)
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        uint8_t status = other.m_status[offset];
-        m_status[offset] = status;
-        if (status == IS_SET) {
-          T *src = other.value(offset);
-          T *dst = this->value(offset);
-          new (dst) T(*src);
-        }
-      }
-    }
+  /**
+   * This is the array that contains the actual slots. There is always at least one empty slot and
+   * the size of the array is a power of two.
+   */
+  SlotArray m_slots;
 
-    Item(Item &&other) noexcept
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        uint8_t status = other.m_status[offset];
-        m_status[offset] = status;
-        if (status == IS_SET) {
-          T *src = other.value(offset);
-          T *dst = this->value(offset);
-          new (dst) T(std::move(*src));
-        }
-      }
-    }
+  /** Iterate over a slot index sequence for a given hash. */
+#define SET_SLOT_PROBING_BEGIN(HASH, R_SLOT) \
+  SLOT_PROBING_BEGIN (ProbingStrategy, HASH, m_slot_mask, SLOT_INDEX) \
+    auto &R_SLOT = m_slots[SLOT_INDEX];
+#define SET_SLOT_PROBING_END() SLOT_PROBING_END()
 
-    Item &operator=(const Item &other) = delete;
-    Item &operator=(Item &&other) = delete;
+ public:
+  /**
+   * Initialize an empty set. This is a cheap operation no matter how large the inline buffer
+   * is. This is necessary to avoid a high cost when no elements are added at all. An optimized
+   * grow operation is performed on the first insertion.
+   */
+  Set()
+      : m_removed_slots(0),
+        m_occupied_and_removed_slots(0),
+        m_usable_slots(0),
+        m_slot_mask(0),
+        m_slots(1)
+  {
+  }
 
-    T *value(uint offset) const
-    {
-      return (T *)m_buffer.ptr() + offset;
-    }
+  ~Set() = default;
 
-    template<typename ForwardT> void store(uint offset, ForwardT &&value)
-    {
-      BLI_assert(m_status[offset] != IS_SET);
-      m_status[offset] = IS_SET;
-      T *dst = this->value(offset);
-      new (dst) T(std::forward<ForwardT>(value));
-    }
+  /**
+   * Construct a set that contains the given keys. Duplicates will be removed automatically.
+   */
+  Set(const std::initializer_list<Key> &list) : Set()
+  {
+    this->add_multiple(list);
+  }
 
-    void set_dummy(uint offset)
-    {
-      BLI_assert(m_status[offset] == IS_SET);
-      m_status[offset] = IS_DUMMY;
-      destruct(this->value(offset));
-    }
+  Set(const Set &other) = default;
 
-    bool is_empty(uint offset) const
-    {
-      return m_status[offset] == IS_EMPTY;
-    }
+  Set(Set &&other) noexcept
+      : m_removed_slots(other.m_removed_slots),
+        m_occupied_and_removed_slots(other.m_occupied_and_removed_slots),
+        m_usable_slots(other.m_usable_slots),
+        m_slot_mask(other.m_slot_mask),
+        m_hash(std::move(other.m_hash)),
+        m_is_equal(std::move(other.m_is_equal)),
+        m_slots(std::move(other.m_slots))
+  {
+    other.~Set();
+    new (&other) Set();
+  }
 
-    bool is_set(uint offset) const
-    {
-      return m_status[offset] == IS_SET;
+  Set &operator=(const Set &other)
+  {
+    if (this == &other) {
+      return *this;
     }
 
-    bool is_dummy(uint offset) const
-    {
-      return m_status[offset] == IS_DUMMY;
-    }
+    this->~Set();
+    new (this) Set(other);
 
-    bool has_value(uint offset, const T &value) const
-    {
-      return m_status[offset] == IS_SET && *this->value(offset) == value;
+    return *this;
+  }
+
+  Set &operator=(Set &&other)
+  {
+    if (this == &other) {
+      return *this;
     }
-  };
 
-  using ArrayType = OpenAddressingArray<Item, InlineBufferCapacity, Allocator>;
-  ArrayType m_array;
+    this->~Set();
+    new (this) Set(std::move(other));
 
- public:
-  Set() = default;
+    return *this;
+  }
 
   /**
-   * Create a new set that contains the given elements.
+   * Add a new key to the set. This invokes undefined behavior when the key is in the set already.
+   * When you know for certain that a key is not in the set yet, use this method for better
+   * performance. This also expresses the intent better.
    */
-  Set(ArrayRef<T> values)
+  void add_new(const Key &key)
   {
-    this->reserve(values.size());
-    for (const T &value : values) {
-      this->add(value);
-    }
+    this->add_new__impl(key, m_hash(key));
+  }
+  void add_new(Key &&key)
+  {
+    this->add_new__impl(std::move(key), m_hash(key));
+  }
+
+  /**
+   * Add a key to the set. If the key exists in the set already, nothing is done. The return value
+   * is true if the key was newly added to the set.
+   *
+   * This is similar to std::unordered_set::insert.
+   */
+  bool add(const Key &key)
+  {
+    return this->add_as(key);
+  }
+  bool add(Key &&key)
+  {
+    return this->add_as(std::move(key));
   }
 
   /**
-   * Create a new set from an initializer list.
+   * Same as `add`, but accepts other key types that are supported by the hash function.
    */
-  Set(std::initializer_list<T> values) : Set(ArrayRef<T>(values))
+  template<typename ForwardKey> bool add_as(ForwardKey &&key)
   {
+    return this->add__impl(std::forward<ForwardKey>(key), m_hash(key));
   }
 
   /**
-   * Make the set large enough to hold the given amount of elements.
+   * Convenience function to add many keys to the set at once. Duplicates are removed
+   * automatically.
+   *
+   * We might be able to make this faster than sequentially adding all keys, but that is not
+   * implemented yet.
    */
-  void reserve(uint32_t min_usable_slots)
+  void add_multiple(ArrayRef<Key> keys)
   {
-    if (m_array.slots_usable() < min_usable_slots) {
-      this->grow(min_usable_slots);
+    for (const Key &key : keys) {
+      this->add(key);
     }
   }
 
   /**
-   * Add a new element to the set.
-   * Asserts that the element did not exist in the set before.
+   * Convenience function to add many new keys to the set at once. The keys must not exist in the
+   * set before and there must not be duplicates in the array.
    */
-  void add_new(const T &value)
+  void add_multiple_new(ArrayRef<Key> keys)
   {
-    this->add_new__impl(value);
+    for (const Key &key : keys) {
+      this->add_new(key);
+    }
   }
-  void add_new(T &&value)
+
+  /**
+   * Returns true if the key is in the set.
+   *
+   * This is similar to std::unordered_set::find() != std::unordered_set::end().
+   */
+  bool contains(const Key &key) const
   {
-    this->add_new__impl(std::move(value));
+    return this->contains_as(key);
   }
 
   /**
-   * Add a new value to the set if it does not exist yet.
-   * Returns true of the value has been newly added.
+   * Same as `contains`, but accepts other key types that are supported by the hash function.
    */
-  bool add(const T &value)
+  template<typename ForwardKey> bool contains_as(const ForwardKey &key) const
   {
-    return this->add__impl(value);
+    return this->contains__impl(key, m_hash(key));
   }
-  bool add(T &&value)
+
+  /**
+   * Deletes the key from the set. Returns true when the key did exist beforehand, otherwise false.
+   *
+   * This is similar to std::unordered_set::erase.
+   */
+  bool remove(const Key &key)
   {
-    return this->add__impl(std::move(value));
+    return this->remove_as(key);
   }
 
   /**
-   * Add multiple elements to the set.
+   * Same as `remove`, but accepts other key types that are supported by the hash function.
    */
-  void add_multiple(ArrayRef<T> values)
+  template<typename ForwardKey> bool remove_as(const ForwardKey &key)
   {
-    for (const T &value : values) {
-      this->add(value);
-    }
+    return this->remove__impl(key, m_hash(key));
   }
 
   /**
-   * Add multiple new elements to the set.
-   * Asserts that none of the elements existed in the set before.
+   * Deletes the key from the set. This invokes undefined behavior when the key is not in the map.
    */
-  void add_multiple_new(ArrayRef<T> values)
+  void remove_contained(const Key &key)
   {
-    for (const T &value : values) {
-      this->add_new(value);
-    }
+    this->remove_contained_as(key);
   }
 
   /**
-   * Returns true when the value is in the set, otherwise false.
+   * Same as `remove_contained`, but accepts other key types that are supported by the hash
+   * function.
    */
-  bool contains(const T &value) const
+  template<typename ForwardKey> void remove_contained_as(const ForwardKey &key)
   {
-    ITER_SLOTS_BEGIN (value, m_array, const, item, offset) {
-      if (item.is_empty(offset)) {
-        return false;
-      }
-      else if (item.has_value(offset, value)) {
-        return true;
-      }
-    }
-    ITER_SLOTS_END(offset);
+    this->remove_contained__impl(key, m_hash(key));
   }
 
   /**
-   * Remove the value from the set.
-   * Asserts that the value exists in the set currently.
+   * An iterator that can iterate over all keys in the set. The iterator is invalidated when the
+   * set is moved or when it is grown.
+   *
+   * Keys returned by this iterator are always const. They should not change, because this might
+   * also change their hash.
    */
-  void remove(const T &value)
+  class Iterator {
+   private:
+    const Slot *m_slots;
+    uint32_t m_total_slots;
+    uint32_t m_current_slot;
+
+   public:
+    Iterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot)
+        : m_slots(slots), m_total_slots(total_slots), m_current_slot(current_slot)
+    {
+    }
+
+    Iterator &operator++()
+    {
+      while (++m_current_slot < m_total_slots) {
+        if (m_slots[m_current_slot].is_occupied()) {
+          break;
+        }
+      }
+      return *this;
+    }
+
+    const Key &operator*() const
+    {
+      return *m_slots[m_current_slot].key();
+    }
+
+    friend bool operator!=(const Iterator &a, const Iterator &b)
+    {
+      BLI_assert(a.m_slots == b.m_slots);
+      BLI_assert(a.m_total_slots == b.m_total_slots);
+      return a.m_current_slot != b.m_current_slot;
+    }
+  };
+
+  Iterator begin() const
   {
-    BLI_assert(this->contains(value));
-    ITER_SLOTS_BEGIN (value, m_array, , item, offset) {
-      if (item.has_value(offset, value)) {
-        item.set_dummy(offset);
-        m_array.update__set_to_dummy();
-        return;
+    for (uint32_t i = 0; i < m_slots.size(); i++) {
+      if (m_slots[i].is_occupied()) {
+        return Iterator(m_slots.data(), m_slots.size(), i);
       }
     }
-    ITER_SLOTS_END(offset);
+    return this->end();
+  }
+
+  Iterator end() const
+  {
+    return Iterator(m_slots.data(), m_slots.size(), m_slots.size());
   }
 
   /**
-   * Get the amount of values stored in the set.
+   * Print common statistics like size and collision count. This is useful for debugging purposes.
    */
-  uint32_t size() const
+  void print_stats(StringRef name = "") const
   {
-    return m_array.slots_set();
+    HashTableStats stats(*this, *this);
+    stats.print();
   }
 
   /**
-   * Return true if this set contains no elements.
+   * Get the number of collisions that the probing strategy has to go through to find the key or
+   * determine that it is not in the set.
    */
-  bool is_empty() const
+  uint32_t count_collisions(const Key &key) const
   {
-    return this->size() == 0;
+    return this->count_collisions__impl(key, m_hash(key));
   }
 
+  /**
+   * Remove all elements from the set.
+   */
   void clear()
   {
     this->~Set();
@@ -291,8 +432,76 @@ class Set {
   }
 
   /**
-   * Returns true when there is at least one element that is in both sets.
-   * Otherwise false.
+   * Creates a new slot array and reinserts all keys inside of that. This method can be used to get
+   * rid of dummy slots. Also this is useful for benchmarking the grow function.
+   */
+  void rehash()
+  {
+    this->realloc_and_reinsert(this->size());
+  }
+
+  /**
+   * Returns the number of keys stored in the set.
+   */
+  uint32_t size() const
+  {
+    return m_occupied_and_removed_slots - m_removed_slots;
+  }
+
+  /**
+   * Returns true if no keys are stored.
+   */
+  bool is_empty() const
+  {
+    return m_occupied_and_removed_slots == m_removed_slots;
+  }
+
+  /**
+   * Returns the number of available slots. This is mostly for debugging purposes.
+   */
+  uint32_t capacity() const
+  {
+    return m_slots.size();
+  }
+
+  /**
+   * Returns the amount of removed slots in the set. This is mostly for debugging purposes.
+   */
+  uint32_t removed_amount() const
+  {
+    return m_removed_slots;
+  }
+
+  /**
+   * Returns the bytes required per element. This is mostly for debugging purposes.
+   */
+  uint32_t size_per_element() const
+  {
+    return sizeof(Slot);
+  }
+
+  /**
+   * Returns the approximate memory requirements of the set in bytes. This is more correct for
+   * larger sets.
+   */
+  uint32_t size_in_bytes() const
+  {
+    return sizeof(Slot) * m_slots.size();
+  }
+
+  /**
+   * Potentially resize the set such that it can hold the specified number of keys without another
+   * grow operation.
+   */
+  void reserve(uint32_t n)
+  {
+    if (m_usable_slots < n) {
+      this->realloc_and_reinsert(n);
+    }
+  }
+
+  /**
+   * Returns true if there is a key that exists in both sets.
    */
   static bool Intersects(const Set &a, const Set &b)
   {
@@ -301,8 +510,8 @@ class Set {
       return Intersects(b, a);
     }
 
-    for (const T &value : a) {
-      if (b.contains(value)) {
+    for (const Key &key : a) {
+      if (b.contains(key)) {
         return true;
       }
     }
@@ -310,182 +519,249 @@ class Set {
   }
 
   /**
-   * Returns true when there is no value that is in both sets.
-   * Otherwise false.
+   * Returns true if no key from a is also in b and vice versa.
    */
   static bool Disjoint(const Set &a, const Set &b)
   {
     return !Intersects(a, b);
   }
 
-  void print_table() const
+ private:
+  BLI_NOINLINE void realloc_and_reinsert(uint32_t min_usable_slots)
   {
-    std::cout << "Hash Table:\n";
-    std::cout << "  Size: " << m_array.slots_set() << '\n';
-    std::cout << "  Capacity: " << m_array.slots_total() << '\n';
-    uint32_t item_index = 0;
-    for (const Item &item : m_array) {
-      std::cout << "   Item: " << item_index++ << '\n';
-      for (uint offset = 0; offset < 4; offset++) {
-        std::cout << "    " << offset << " \t";
-        if (item.is_empty(offset)) {
-          std::cout << "    <empty>\n";
-        }
-        else if (item.is_set(offset)) {
-          const T &value = *item.value(offset);
-          uint32_t collisions = this->count_collisions(value);
-          std::cout << "    " << value << "  \t Collisions: " << collisions << '\n';
-        }
-        else if (item.is_dummy(offset)) {
-          std::cout << "    <dummy>\n";
-        }
+    uint32_t total_slots, usable_slots;
+    m_max_load_factor.compute_total_and_usable_slots(
+        SlotArray::inline_buffer_capacity(), min_usable_slots, &total_slots, &usable_slots);
+    uint32_t new_slot_mask = total_slots - 1;
+
+    /**
+     * Optimize the case when the set was empty beforehand. We can avoid some copies here.
+     */
+    if (this->size() == 0) {
+      m_slots.~Array();
+      new (&m_slots) SlotArray(total_slots);
+      m_removed_slots = 0;
+      m_occupied_and_removed_slots = 0;
+      m_usable_slots = usable_slots;
+      m_slot_mask = new_slot_mask;
+      return;
+    }
+
+    /* The grown array that we insert the keys into. */
+    SlotArray new_slots(total_slots);
+
+    for (Slot &slot : m_slots) {
+      if (slot.is_occupied()) {
+        this->add_after_grow_and_destruct_old(slot, new_slots, new_slot_mask);
       }
     }
+
+    /* All occupied slots have been destructed already and empty/removed slots are assumed to be
+     * trivially destructible. */
+    m_slots.clear_without_destruct();
+    m_slots = std::move(new_slots);
+    m_occupied_and_removed_slots -= m_removed_slots;
+    m_usable_slots = usable_slots;
+    m_removed_slots = 0;
+    m_slot_mask = new_slot_mask;
   }
 
-  class Iterator {
-   private:
-    const Set *m_set;
-    uint32_t m_slot;
+  void add_after_grow_and_destruct_old(Slot &old_slot,
+                                       SlotArray &new_slots,
+                                       uint32_t new_slot_mask)
+  {
+    uint32_t hash = old_slot.get_hash(Hash());
 
-   public:
-    Iterator(const Set *set, uint32_t slot) : m_set(set), m_slot(slot)
-    {
+    SLOT_PROBING_BEGIN (ProbingStrategy, hash, new_slot_mask, slot_index) {
+      Slot &slot = new_slots[slot_index];
+      if (slot.is_empty()) {
+        slot.relocate_occupied_here(old_slot, hash);
+        return;
+      }
     }
+    SLOT_PROBING_END();
+  }
 
-    Iterator &operator++()
-    {
-      m_slot = m_set->next_slot(m_slot + 1);
-      return *this;
+  template<typename ForwardKey> bool contains__impl(const ForwardKey &key, uint32_t hash) const
+  {
+    SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        return false;
+      }
+      if (slot.contains(key, m_is_equal, hash)) {
+        return true;
+      }
     }
+    SET_SLOT_PROBING_END();
+  }
 
-    const T &operator*() const
-    {
-      uint32_t item_index = m_slot >> OFFSET_SHIFT;
-      uint offset = m_slot & OFFSET_MASK;
-      const Item &item = m_set->m_array.item(item_index);
-      BLI_assert(item.is_set(offset));
-      return *item.value(offset);
-    }
+  template<typename ForwardKey> void add_new__impl(ForwardKey &&key, uint32_t hash)
+  {
+    BLI_assert(!this->contains_as(key));
 
-    friend bool operator==(const Iterator &a, const Iterator &b)
-    {
-      BLI_assert(a.m_set == b.m_set);
-      return a.m_slot == b.m_slot;
-    }
+    this->ensure_can_add();
 
-    friend bool operator!=(const Iterator &a, const Iterator &b)
-    {
-      return !(a == b);
+    SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        slot.occupy(std::forward<ForwardKey>(key), hash);
+        m_occupied_and_removed_slots++;
+        return;
+      }
     }
-  };
+    SET_SLOT_PROBING_END();
+  }
 
-  friend Iterator;
+  template<typename ForwardKey> bool add__impl(ForwardKey &&key, uint32_t hash)
+  {
+    this->ensure_can_add();
 
-  Iterator begin() const
+    SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        slot.occupy(std::forward<ForwardKey>(key), hash);
+        m_occupied_and_removed_slots++;
+        return true;
+      }
+      if (slot.contains(key, m_is_equal, hash)) {
+        return false;
+      }
+    }
+    SET_SLOT_PROBING_END();
+  }
+
+  template<typename ForwardKey> bool remove__impl(const ForwardKey &key, uint32_t hash)
   {
-    return Iterator(this, this->next_slot(0));
+    SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash)) {
+        slot.remove();
+        m_removed_slots++;
+        return true;
+      }
+      if (slot.is_empty()) {
+        return false;
+      }
+    }
+    SET_SLOT_PROBING_END();
   }
 
-  Iterator end() const
+  template<typename ForwardKey> void remove_contained__impl(const ForwardKey &key, uint32_t hash)
   {
-    return Iterator(this, m_array.slots_total());
+    BLI_assert(this->contains_as(key));
+    m_removed_slots++;
+
+    SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash)) {
+        slot.remove();
+        return;
+      }
+    }
+    SET_SLOT_PROBING_END();
   }
 
- private:
-  uint32_t next_slot(uint32_t slot) const
-  {
-    for (; slot < m_array.slots_total(); slot++) {
-      uint32_t item_index = slot >> OFFSET_SHIFT;
-      uint offset = slot & OFFSET_MASK;
-      const Item &item = m_array.item(item_index);
-      if (item.is_set(offset)) {
-        return slot;
+  template<typename ForwardKey>
+  uint32_t count_collisions__impl(const ForwardKey &key, uint32_t hash) const
+  {
+    uint32_t collisions = 0;
+
+    SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash)) {
+        return collisions;
       }
+      if (slot.is_empty()) {
+        return collisions;
+      }
+      collisions++;
     }
-    return slot;
+    SET_SLOT_PROBING_END();
   }
 
   void ensure_can_add()
   {
-    if (UNLIKELY(m_array.should_grow())) {
-      this->grow(this->size() + 1);
+    if (m_occupied_and_removed_slots >= m_usable_slots) {
+      this->realloc_and_reinsert(this->size() + 1);
+      BLI_assert(m_occupied_and_removed_slots < m_usable_slots);
     }
   }
+};
 
-  BLI_NOINLINE void grow(uint32_t min_usable_slots)
+/**
+ * A wrapper for std::unordered_set with the API of BLI::Set. This can be used for benchmarking.
+ */
+template<typename Key> class StdUnorderedSetWrapper {
+ private:
+  using SetType = std::unordered_set<Key, BLI::DefaultHash<Key>>;
+  SetType m_set;
+
+ public:
+  uint32_t size() const
   {
-    ArrayType new_array = m_array.init_reserved(min_usable_slots);
+    return (uint32_t)m_set.size();
+  }
 
-    for (Item &old_item : m_array) {
-      for (uint8_t offset = 0; offset < 4; offset++) {
-        if (old_item.is_set(offset)) {
-          this->add_after_grow(*old_item.value(offset), new_array);
-        }
-      }
-    }
+  bool is_empty() const
+  {
+    return m_set.empty();
+  }
 
-    m_array = std::move(new_array);
+  void reserve(uint32_t n)
+  {
+    m_set.reserve(n);
   }
 
-  void add_after_grow(T &old_value, ArrayType &new_array)
+  void add_new(const Key &key)
   {
-    ITER_SLOTS_BEGIN (old_value, new_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        item.store(offset, std::move(old_value));
-        return;
-      }
-    }
-    ITER_SLOTS_END(offset);
+    m_set.insert(key);
+  }
+  void add_new(Key &&key)
+  {
+    m_set.insert(std::move(key));
   }
 
-  uint32_t count_collisions(const T &value) const
+  bool add(const Key &key)
   {
-    uint32_t collisions = 0;
-    ITER_SLOTS_BEGIN (value, m_array, const, item, offset) {
-      if (item.is_empty(offset) || item.has_value(offset, value)) {
-        return collisions;
-      }
-      collisions++;
+    return m_set.insert(key).second;
+  }
+  bool add(Key &&key)
+  {
+    return m_set.insert(std::move(key)).second;
+  }
+
+  void add_multiple(ArrayRef<Key> keys)
+  {
+    for (const Key &key : keys) {
+      m_set.insert(key);
     }
-    ITER_SLOTS_END(offset);
   }
 
-  template<typename ForwardT> void add_new__impl(ForwardT &&value)
+  bool contains(const Key &key) const
   {
-    BLI_assert(!this->contains(value));
-    this->ensure_can_add();
+    return m_set.find(key) != m_set.end();
+  }
 
-    ITER_SLOTS_BEGIN (value, m_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        item.store(offset, std::forward<ForwardT>(value));
-        m_array.update__empty_to_set();
-        return;
-      }
-    }
-    ITER_SLOTS_END(offset);
+  bool remove(const Key &key)
+  {
+    return (bool)m_set.erase(key);
   }
 
-  template<typename ForwardT> bool add__impl(ForwardT &&value)
+  void remove_contained(const Key &key)
   {
-    this->ensure_can_add();
+    return m_set.erase(key);
+  }
 
-    ITER_SLOTS_BEGIN (value, m_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        item.store(offset, std::forward<ForwardT>(value));
-        m_array.update__empty_to_set();
-        return true;
-      }
-      else if (item.has_value(offset, value)) {
-        return false;
-      }
-    }
-    ITER_SLOTS_END(offset);
+  void clear()
+  {
+    m_set.clear();
+  }
+
+  typename SetType::iterator begin() const
+  {
+    return m_set.begin();
   }
-};
 
-#undef ITER_SLOTS_BEGIN
-#undef ITER_SLOTS_END
+  typename SetType::iterator end() const
+  {
+    return m_set.end();
+  }
+};
 
 }  // namespace BLI
 
diff --git a/source/blender/blenlib/BLI_set_slots.hh b/source/blender/blenlib/BLI_set_slots.hh
new file mode 100644
index 00000000000..21493524bd8
--- /dev/null
+++ b/source/blender/blenlib/BLI_set_slots.hh
@@ -0,0 +1,415 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __BLI_SET_SLOTS_HH__
+#define __BLI_SET_SLOTS_HH__
+
+/** \file
+ * \ingroup bli
+ *
+ * This file contains different slot types that are supposed to be used with BLI::Set.
+ *
+ * Every slot type has to be able to hold a value of the Key type and state information.
+ * A set slot has three possible states: empty, occupied and removed.
+ *
+ * Only when a slot is occupied, it stores an instance of type Key.
+ *
+ * A set slot type has to implement a couple of methods that are explained in SimpleSetSlot.
+ * A slot type is assumed to be trivially destructible, when it is not in occupied state. So the
+ * destructor might not be called in that case.
+ */
+
+#include "BLI_memory_utils.hh"
+#include "BLI_string_ref.hh"
+
+namespace BLI {
+
+/**
+ * The simplest possible set slot. It stores the slot state and the optional key instance in
+ * separate variables. Depending on the alignment requirement of the key, many bytes might be
+ * wasted.
+ */
+template<typename Key> class SimpleSetSlot {
+ private:
+  enum State : uint8_t {
+    Empty = 0,
+    Occupied = 1,
+    Removed = 2,
+  };
+
+  State m_state;
+  AlignedBuffer<sizeof(Key), alignof(Key)> m_buffer;
+
+ public:
+  /**
+   * After the default constructor has run, the slot has to be in the empty state.
+   */
+  SimpleSetSlot()
+  {
+    m_state = Empty;
+  }
+
+  /**
+   * The destructor also has to destruct the key, if the slot is currently occupied.
+   */
+  ~SimpleSetSlot()
+  {
+    if (m_state == Occupied) {
+      this->key()->~Key();
+    }
+  }
+
+  /**
+   * The copy constructor has to copy the state. If the other slot was occupied, a copy of the key
+   * has to be made as well.
+   */
+  SimpleSetSlot(const SimpleSetSlot &other)
+  {
+    m_state = other.m_state;
+    if (other.m_state == Occupied) {
+      new (this->key()) Key(*other.key());
+    }
+  }
+
+  /**
+   * The move constructor has to copy the state. If the other slot was occupied, the key from the
+   * other slot has to be moved as well. The other slot stays in the state it was in before. Its
+   * optionally stored key remains in a moved-from state.
+   */
+  SimpleSetSlot(SimpleSetSlot &&other) noexcept
+  {
+    m_state = other.m_state;
+    if (other.m_state == Occupied) {
+      new (this->key()) Key(std::move(*other.key()));
+    }
+  }
+
+  /**
+   * Get a non-const pointer to the position where the key is stored.
+   */
+  Key *key()
+  {
+    return (Key *)m_buffer.ptr();
+  }
+
+  /**
+   * Get a const pointer to the position where the key is stored.
+   */
+  const Key *key() const
+  {
+    return (const Key *)m_buffer.ptr();
+  }
+
+  /**
+   * Return true if the slot currently contains a key.
+   */
+  bool is_occupied() const
+  {
+    return m_state == Occupied;
+  }
+
+  /**
+   * Return true if the slot is empty, i.e. it does not contain a key and is not in removed state.
+   */
+  bool is_empty() const
+  {
+    return m_state == Empty;
+  }
+
+  /**
+   * Return the hash of the currently stored key. In this simple set slot implementation, we just
+   * compute the hash here. Other implementations might store the hash in the slot instead.
+   */
+  template<typename Hash> uint32_t get_hash(const Hash &hash) const
+  {
+    BLI_assert(this->is_occupied());
+    return hash(*this->key());
+  }
+
+  /**
+   * Move the other slot into this slot and destruct it. We do destruction here, because this way
+   * we can avoid a comparison with the state, since we know the slot is occupied.
+   */
+  void relocate_occupied_here(SimpleSetSlot &other, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    BLI_assert(other.is_occupied());
+    m_state = Occupied;
+    new (this->key()) Key(std::move(*other.key()));
+    other.key()->~Key();
+  }
+
+  /**
+   * Return true, when this slot is occupied and contains a key that compares equal to the given
+   * key. The hash is used by other slot implementations to determine inequality faster.
+   */
+  template<typename ForwardKey, typename IsEqual>
+  bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t UNUSED(hash)) const
+  {
+    if (m_state == Occupied) {
+      return is_equal(key, *this->key());
+    }
+    return false;
+  }
+
+  /**
+   * Change the state of this slot from empty/removed to occupied. The key has to be constructed
+   * by calling the constructor with the given key as parameter.
+   */
+  template<typename ForwardKey> void occupy(ForwardKey &&key, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    m_state = Occupied;
+    new (this->key()) Key(std::forward<ForwardKey>(key));
+  }
+
+  /**
+   * Change the state of this slot from occupied to removed. The key has to be destructed as well.
+   */
+  void remove()
+  {
+    BLI_assert(this->is_occupied());
+    m_state = Removed;
+    this->key()->~Key();
+  }
+};
+
+/**
+ * This set slot implementation stores the hash of the key within the slot. This helps when
+ * computing the hash or an equality check is expensive.
+ */
+template<typename Key> class HashedSetSlot {
+ private:
+  enum State : uint8_t {
+    Empty = 0,
+    Occupied = 1,
+    Removed = 2,
+  };
+
+  uint32_t m_hash;
+  State m_state;
+  AlignedBuffer<sizeof(Key), alignof(Key)> m_buffer;
+
+ public:
+  HashedSetSlot()
+  {
+    m_state = Empty;
+  }
+
+  ~HashedSetSlot()
+  {
+    if (m_state == Occupied) {
+      this->key()->~Key();
+    }
+  }
+
+  HashedSetSlot(const HashedSetSlot &other)
+  {
+    m_state = other.m_state;
+    if (other.m_state == Occupied) {
+      m_hash = other.m_hash;
+      new (this->key()) Key(*other.key());
+    }
+  }
+
+  HashedSetSlot(HashedSetSlot &&other) noexcept
+  {
+    m_state = other.m_state;
+    if (other.m_state == Occupied) {
+      m_hash = other.m_hash;
+      new (this->key()) Key(std::move(*other.key()));
+    }
+  }
+
+  Key *key()
+  {
+    return (Key *)m_buffer.ptr();
+  }
+
+  const Key *key() const
+  {
+    return (const Key *)m_buffer.ptr();
+  }
+
+  bool is_occupied() const
+  {
+    return m_state == Occupied;
+  }
+
+  bool is_empty() const
+  {
+    return m_state == Empty;
+  }
+
+  template<typename Hash> uint32_t get_hash(const Hash &UNUSED(hash)) const
+  {
+    BLI_assert(this->is_occupied());
+    return m_hash;
+  }
+
+  void relocate_occupied_here(HashedSetSlot &other, uint32_t hash)
+  {
+    BLI_assert(!this->is_occupied());
+    BLI_assert(other.is_occupied());
+    m_state = Occupied;
+    m_hash = hash;
+    new (this->key()) Key(std::move(*other.key()));
+    other.key()->~Key();
+  }
+
+  template<typename ForwardKey, typename IsEqual>
+  bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t hash) const
+  {
+    /* m_hash might be uninitialized here, but that is ok. */
+    if (m_hash == hash) {
+      if (m_state == Occupied) {
+        return is_equal(key, *this->key());
+      }
+    }
+    return false;
+  }
+
+  template<typename ForwardKey> void occupy(ForwardKey &&key, uint32_t hash)
+  {
+    BLI_assert(!this->is_occupied());
+    m_state = Occupied;
+    m_hash = hash;
+    new (this->key()) Key(std::forward<ForwardKey>(key));
+  }
+
+  void remove()
+  {
+    BLI_assert(this->is_occupied());
+    m_state = Removed;
+    this->key()->~Key();
+  }
+};
+
+/**
+ * An IntrusiveSetSlot uses two special values of the key to indicate whether the slot is empty or
+ * removed. This saves some memory in all cases and is more efficient in many cases. The KeyInfo
+ * type indicates which specific values are used. An example for a KeyInfo implementation is
+ * PointerKeyInfo.
+ *
+ * The special key values are expected to be trivially destructible.
+ */
+template<typename Key, typename KeyInfo> class IntrusiveSetSlot {
+ private:
+  Key m_key = KeyInfo::get_empty();
+
+ public:
+  IntrusiveSetSlot() = default;
+  ~IntrusiveSetSlot() = default;
+  IntrusiveSetSlot(const IntrusiveSetSlot &other) = default;
+  IntrusiveSetSlot(IntrusiveSetSlot &&other) noexcept = default;
+
+  Key *key()
+  {
+    return &m_key;
+  }
+
+  const Key *key() const
+  {
+    return &m_key;
+  }
+
+  bool is_occupied() const
+  {
+    return KeyInfo::is_not_empty_or_removed(m_key);
+  }
+
+  bool is_empty() const
+  {
+    return KeyInfo::is_empty(m_key);
+  }
+
+  template<typename Hash> uint32_t get_hash(const Hash &hash) const
+  {
+    BLI_assert(this->is_occupied());
+    return hash(m_key);
+  }
+
+  void relocate_occupied_here(IntrusiveSetSlot &other, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    BLI_assert(other.is_occupied());
+    m_key = std::move(other.m_key);
+    other.m_key.~Key();
+  }
+
+  template<typename ForwardKey, typename IsEqual>
+  bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t UNUSED(hash)) const
+  {
+    BLI_assert(KeyInfo::is_not_empty_or_removed(key));
+    return is_equal(m_key, key);
+  }
+
+  template<typename ForwardKey> void occupy(ForwardKey &&key, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    BLI_assert(KeyInfo::is_not_empty_or_removed(key));
+
+    m_key = std::forward<ForwardKey>(key);
+  }
+
+  void remove()
+  {
+    BLI_assert(this->is_occupied());
+    KeyInfo::remove(m_key);
+  }
+};
+
+/**
+ * This exists just to make it more convenient to define which special integer values can be used
+ * to indicate an empty and removed value.
+ */
+template<typename Int, Int EmptyValue, Int RemovedValue>
+using IntegerSetSlot = IntrusiveSetSlot<Int, TemplatedKeyInfo<Int, EmptyValue, RemovedValue>>;
+
+template<typename Key> struct DefaultSetSlot;
+
+/**
+ * Use SimpleSetSlot by default, because it is the smallest slot type that works for all key types.
+ */
+template<typename Key> struct DefaultSetSlot {
+  using type = SimpleSetSlot<Key>;
+};
+
+/**
+ * Store the hash of a string in the slot by default. Recomputing the hash or doing string
+ * comparisons can be relatively costly.
+ */
+template<> struct DefaultSetSlot<std::string> {
+  using type = HashedSetSlot<std::string>;
+};
+template<> struct DefaultSetSlot<StringRef> {
+  using type = HashedSetSlot<StringRef>;
+};
+template<> struct DefaultSetSlot<StringRefNull> {
+  using type = HashedSetSlot<StringRefNull>;
+};
+
+/**
+ * Use a special slot type for pointer keys, because we can store whether a slot is empty or
+ * removed with special pointer values.
+ */
+template<typename Key> struct DefaultSetSlot<Key *> {
+  using type = IntrusiveSetSlot<Key *, PointerKeyInfo<Key *>>;
+};
+
+}  // namespace BLI
+
+#endif /* __BLI_SET_SLOTS_HH__ */
diff --git a/source/blender/blenlib/BLI_stack.hh b/source/blender/blenlib/BLI_stack.hh
index 7f1f9f9cc10..4645535876d 100644
--- a/source/blender/blenlib/BLI_stack.hh
+++ b/source/blender/blenlib/BLI_stack.hh
@@ -20,48 +20,208 @@
 /** \file
  * \ingroup bli
  *
- * Basic stack implementation with support for small object optimization.
+ * A `BLI::Stack<T>` is a dynamically growing FILO (first-in, last-out) data structure. It is
+ * designed to be a more convenient and efficient replacement for `std::stack`.
+ *
+ * The improved efficiency is mainly achieved by supporting small buffer optimization. As long as
+ * the number of elements added to the stack stays below InlineBufferCapacity, no heap allocation
+ * is done. Consequently, values stored in the stack have to be movable and they might be moved,
+ * when the stack is moved.
+ *
+ * A Vector can be used to emulate a stack. However, this stack implementation is more efficient
+ * when all you have to do is to push and pop elements. That is because a vector guarantees that
+ * all elements are in a contiguous array. Therefore, it has to copy all elements to a new buffer
+ * when it grows. This stack implementation does not have to copy all previously pushed elements
+ * when it grows.
+ *
+ * BLI::Stack is implemented using a double linked list of chunks. Each chunk contains an array of
+ * elements. The chunk size increases exponentially with every new chunk that is required. The
+ * lowest chunk, i.e. the one that is used for the first few pushed elements, is embedded into the
+ * stack.
  */
 
-#include "BLI_vector.hh"
+#include "BLI_allocator.hh"
+#include "BLI_array_ref.hh"
+#include "BLI_memory_utils.hh"
 
 namespace BLI {
 
-template<typename T, uint InlineBufferCapacity = 4, typename Allocator = GuardedAllocator>
+/**
+ * A StackChunk references a contiguous memory buffer. Multiple StackChunk instances are linked in
+ * a double linked list.
+ */
+template<typename T> struct StackChunk {
+  /** The below chunk contains the elements that have been pushed on the stack before. */
+  StackChunk *below;
+  /** The above chunk contains the elements that have been pushed on the stack afterwards. */
+  StackChunk *above;
+  /** Pointer to the first element of the referenced buffer. */
+  T *begin;
+  /** Pointer to one element past the end of the referenced buffer. */
+  T *capacity_end;
+
+  uint capacity() const
+  {
+    return capacity_end - begin;
+  }
+};
+
+template<
+    /** Type of the elements that are stored in the stack. */
+    typename T,
+    /**
+     * The number of values that can be stored in this stack, without doing a heap allocation.
+     * Sometimes it can make sense to increase this value a lot. The memory in the inline buffer is
+     * not initialized when it is not needed.
+     *
+     * When T is large, the small buffer optimization is disabled by default to avoid large
+     * unexpected allocations on the stack. It can still be enabled explicitely though.
+     */
+    uint InlineBufferCapacity = (sizeof(T) < 100) ? 4 : 0,
+    /**
+     * The allocator used by this stack. Should rarely be changed, except when you don't want that
+     * MEM_* is used internally.
+     */
+    typename Allocator = GuardedAllocator>
 class Stack {
  private:
-  Vector<T, InlineBufferCapacity, Allocator> m_elements;
+  using Chunk = StackChunk<T>;
 
- public:
-  Stack() = default;
+  /**
+   * Points to one element after top-most value in the stack.
+   *
+   * Invariant:
+   *  If m_size == 0
+   *    then: m_top == m_inline_chunk.begin
+   *    else: &peek() == m_top - 1;
+   */
+  T *m_top;
+
+  /** Points to the chunk that references the memory pointed to by m_top. */
+  Chunk *m_top_chunk;
 
   /**
-   * Construct a stack from an array ref. The elements will be pushed in the same order they are in
-   * the array.
+   * Number of elements in the entire stack. The sum of initialized element counts in the chunks.
    */
-  Stack(ArrayRef<T> values) : m_elements(values)
-  {
-  }
+  uint m_size;
+
+  /** The buffer used to implement small object optimization. */
+  AlignedBuffer<sizeof(T) * InlineBufferCapacity, alignof(T)> m_inline_buffer;
+
+  /**
+   * A chunk referencing the inline buffer. This is always the bottom-most chunk.
+   * So m_inline_chunk.below == nullptr.
+   */
+  Chunk m_inline_chunk;
 
-  operator ArrayRef<T>()
+  /** Used for allocations when the inline buffer is not large enough. */
+  Allocator m_allocator;
+
+ public:
+  /**
+   * Initialize an empty stack. No heap allocation is done.
+   */
+  Stack(Allocator allocator = {}) : m_allocator(allocator)
   {
-    return m_elements;
+    T *inline_buffer = this->inline_buffer();
+
+    m_inline_chunk.below = nullptr;
+    m_inline_chunk.above = nullptr;
+    m_inline_chunk.begin = inline_buffer;
+    m_inline_chunk.capacity_end = inline_buffer + InlineBufferCapacity;
+
+    m_top = inline_buffer;
+    m_top_chunk = &m_inline_chunk;
+    m_size = 0;
   }
 
   /**
-   * Return the number of elements in the stack.
+   * Create a new stack that contains the given elements. The values are pushed to the stack in
+   * the order they are in the array.
    */
-  uint size() const
+  Stack(ArrayRef<T> values) : Stack()
   {
-    return m_elements.size();
+    this->push_multiple(values);
   }
 
   /**
-   * Return true when the stack is empty, otherwise false.
+   * Create a new stack that contains the given elements. The values are pushed to the stack in the
+   * order they are in the array.
+   *
+   * Example:
+   *  Stack<int> stack = {4, 5, 6};
+   *  assert(stack.pop() == 6);
+   *  assert(stack.pop() == 5);
    */
-  bool is_empty() const
+  Stack(const std::initializer_list<T> &values) : Stack(ArrayRef<T>(values))
+  {
+  }
+
+  Stack(const Stack &other) : Stack(other.m_allocator)
+  {
+    for (const Chunk *chunk = &other.m_inline_chunk; chunk; chunk = chunk->above) {
+      const T *begin = chunk->begin;
+      const T *end = (chunk == other.m_top_chunk) ? other.m_top : chunk->capacity_end;
+      this->push_multiple(ArrayRef<T>(begin, end - begin));
+    }
+  }
+
+  Stack(Stack &&other) noexcept : Stack(other.m_allocator)
+  {
+    uninitialized_relocate_n(other.inline_buffer(),
+                             std::min(other.m_size, InlineBufferCapacity),
+                             this->inline_buffer());
+
+    m_inline_chunk.above = other.m_inline_chunk.above;
+    m_size = other.m_size;
+
+    if (m_size <= InlineBufferCapacity) {
+      m_top_chunk = &m_inline_chunk;
+      m_top = this->inline_buffer() + m_size;
+    }
+    else {
+      m_top_chunk = other.m_top_chunk;
+      m_top = other.m_top;
+    }
+
+    other.m_size = 0;
+    other.m_inline_chunk.above = nullptr;
+    other.m_top_chunk = &other.m_inline_chunk;
+    other.m_top = other.m_top_chunk->begin;
+  }
+
+  ~Stack()
   {
-    return this->size() == 0;
+    this->destruct_all_elements();
+    Chunk *above_chunk;
+    for (Chunk *chunk = m_inline_chunk.above; chunk; chunk = above_chunk) {
+      above_chunk = chunk->above;
+      m_allocator.deallocate(chunk);
+    }
+  }
+
+  Stack &operator=(const Stack &stack)
+  {
+    if (this == &stack) {
+      return *this;
+    }
+
+    this->~Stack();
+    new (this) Stack(stack);
+
+    return *this;
+  }
+
+  Stack &operator=(Stack &&stack)
+  {
+    if (this == &stack) {
+      return *this;
+    }
+
+    this->~Stack();
+    new (this) Stack(std::move(stack));
+
+    return *this;
   }
 
   /**
@@ -69,80 +229,159 @@ class Stack {
    */
   void push(const T &value)
   {
-    m_elements.append(value);
+    if (m_top == m_top_chunk->capacity_end) {
+      this->activate_next_chunk(1);
+    }
+    new (m_top) T(value);
+    m_top++;
+    m_size++;
   }
-
   void push(T &&value)
   {
-    m_elements.append(std::move(value));
-  }
-
-  void push_multiple(ArrayRef<T> values)
-  {
-    m_elements.extend(values);
+    if (m_top == m_top_chunk->capacity_end) {
+      this->activate_next_chunk(1);
+    }
+    new (m_top) T(std::move(value));
+    m_top++;
+    m_size++;
   }
 
   /**
-   * Remove the element from the top of the stack and return it.
-   * This will assert when the stack is empty.
+   * Remove and return the top-most element from the stack. This invokes undefined behavior when
+   * the stack is empty.
    */
   T pop()
   {
-    return m_elements.pop_last();
+    BLI_assert(m_size > 0);
+    m_top--;
+    T value = std::move(*m_top);
+    m_top->~T();
+    m_size--;
+
+    if (m_top == m_top_chunk->begin) {
+      if (m_top_chunk->below != nullptr) {
+        m_top_chunk = m_top_chunk->below;
+        m_top = m_top_chunk->capacity_end;
+      }
+    }
+    return value;
   }
 
   /**
-   * Return a reference to the value a the top of the stack.
-   * This will assert when the stack is empty.
+   * Get a reference to the top-most element without removing it from the stack. This invokes
+   * undefined behavior when the stack is empty.
    */
   T &peek()
   {
-    BLI_assert(!this->is_empty());
-    return m_elements[this->size() - 1];
+    BLI_assert(m_size > 0);
+    BLI_assert(m_top > m_top_chunk->begin);
+    return *(m_top - 1);
   }
-
-  T *begin()
+  const T &peek() const
   {
-    return m_elements.begin();
+    BLI_assert(m_size > 0);
+    BLI_assert(m_top > m_top_chunk->begin);
+    return *(m_top - 1);
   }
 
-  T *end()
+  /**
+   * Add multiple elements to the stack. The values are pushed in the order they are in the array.
+   * This method is more efficient than pushing multiple elements individually and might cause less
+   * heap allocations.
+   */
+  void push_multiple(ArrayRef<T> values)
   {
-    return m_elements.end();
+    ArrayRef<T> remaining_values = values;
+    while (!remaining_values.is_empty()) {
+      if (m_top == m_top_chunk->capacity_end) {
+        this->activate_next_chunk(remaining_values.size());
+      }
+
+      uint remaining_capacity = m_top_chunk->capacity_end - m_top;
+      uint amount = std::min(remaining_values.size(), remaining_capacity);
+      uninitialized_copy_n(remaining_values.data(), amount, m_top);
+      m_top += amount;
+
+      remaining_values = remaining_values.drop_front(amount);
+    }
+
+    m_size += values.size();
   }
 
-  const T *begin() const
+  /**
+   * Returns true when the size is zero.
+   */
+  bool is_empty() const
   {
-    return m_elements.begin();
+    return m_size == 0;
   }
 
-  const T *end() const
+  /**
+   * Returns the number of elements in the stack.
+   */
+  uint size() const
   {
-    return m_elements.end();
+    return m_size;
   }
 
   /**
-   * Remove all elements from the stack but keep the memory.
+   * Removes all elements from the stack. The memory is not freed, so it is more efficient to reuse
+   * the stack than to create a new one.
    */
   void clear()
   {
-    m_elements.clear();
+    this->destruct_all_elements();
+    m_top_chunk = &m_inline_chunk;
+    m_top = m_top_chunk->begin;
   }
 
-  /**
-   * Remove all elements and free any allocated memory.
-   */
-  void clear_and_make_small()
+ private:
+  T *inline_buffer() const
   {
-    m_elements.clear_and_make_small();
+    return (T *)m_inline_buffer.ptr();
   }
 
   /**
-   * Does a linear search to check if the value is in the stack.
+   * Changes m_top_chunk to point to a new chunk that is above the current one. The new chunk might
+   * be smaller than the given size_hint. This happens when a chunk that has been allocated before
+   * is reused. The size of the new chunk will be at least one.
+   *
+   * This invokes undefined behavior when the currently active chunk is not full.
    */
-  bool contains(const T &value)
+  void activate_next_chunk(uint size_hint)
+  {
+    BLI_assert(m_top == m_top_chunk->capacity_end);
+    if (m_top_chunk->above == nullptr) {
+      uint new_capacity = std::max(size_hint, m_top_chunk->capacity() * 2 + 10);
+
+      /* Do a single memory allocation for the Chunk and the array it references. */
+      void *buffer = m_allocator.allocate(
+          sizeof(Chunk) + sizeof(T) * new_capacity + alignof(T), alignof(Chunk), AT);
+      void *chunk_buffer = buffer;
+      void *data_buffer = (void *)(((uintptr_t)buffer + sizeof(Chunk) + alignof(T) - 1) &
+                                   ~(alignof(T) - 1));
+
+      Chunk *new_chunk = new (chunk_buffer) Chunk();
+      new_chunk->begin = (T *)data_buffer;
+      new_chunk->capacity_end = new_chunk->begin + new_capacity;
+      new_chunk->above = nullptr;
+      new_chunk->below = m_top_chunk;
+      m_top_chunk->above = new_chunk;
+    }
+    m_top_chunk = m_top_chunk->above;
+    m_top = m_top_chunk->begin;
+  }
+
+  void destruct_all_elements()
   {
-    return m_elements.contains(value);
+    for (T *value = m_top_chunk->begin; value != m_top; value++) {
+      value->~T();
+    }
+    for (Chunk *chunk = m_top_chunk->below; chunk; chunk = chunk->below) {
+      for (T *value = chunk->begin; value != chunk->capacity_end; value++) {
+        value->~T();
+      }
+    }
   }
 };
 
diff --git a/source/blender/blenlib/BLI_string_map.hh b/source/blender/blenlib/BLI_string_map.hh
deleted file mode 100644
index caa7e16d1f3..00000000000
--- a/source/blender/blenlib/BLI_string_map.hh
+++ /dev/null
@@ -1,540 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef __BLI_STRING_MAP_HH__
-#define __BLI_STRING_MAP_HH__
-
-/** \file
- * \ingroup bli
- *
- * This tries to solve the issue that a normal map with std::string as key might do many
- * allocations when the keys are longer than 16 bytes (the usual small string optimization size).
- *
- * For now this still uses std::string, but having this abstraction in place will make it easier to
- * make it more efficient later on. Also, even if we will never implement this optimization, having
- * a special map with string keys can be quite handy. */
-
-#include "BLI_map.hh"
-#include "BLI_optional.hh"
-#include "BLI_string_ref.hh"
-#include "BLI_vector.hh"
-
-namespace BLI {
-
-// clang-format off
-
-#define ITER_SLOTS_BEGIN(HASH, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \
-  uint32_t hash_copy = HASH; \
-  uint32_t perturb = HASH; \
-  while (true) { \
-    uint32_t item_index = (hash_copy & ARRAY.slot_mask()) >> OFFSET_SHIFT; \
-    uint8_t R_OFFSET = hash_copy & OFFSET_MASK; \
-    uint8_t initial_offset = R_OFFSET; \
-    OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \
-    do {
-
-#define ITER_SLOTS_END(R_OFFSET) \
-      R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \
-    } while (R_OFFSET != initial_offset); \
-    perturb >>= 5; \
-    hash_copy = hash_copy * 5 + 1 + perturb; \
-  } ((void)0)
-
-// clang-format on
-
-template<typename T, typename Allocator = GuardedAllocator> class StringMap {
- private:
-  static constexpr uint32_t OFFSET_MASK = 3;
-  static constexpr uint32_t OFFSET_SHIFT = 2;
-
-  class Item {
-   private:
-    static constexpr int32_t IS_EMPTY = -1;
-
-    uint32_t m_hashes[4];
-    int32_t m_indices[4];
-    char m_values[sizeof(T) * 4];
-
-   public:
-    static constexpr uint slots_per_item = 4;
-
-    Item()
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        m_indices[offset] = IS_EMPTY;
-      }
-    }
-
-    ~Item()
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (this->is_set(offset)) {
-          destruct(this->value(offset));
-        }
-      }
-    }
-
-    Item(const Item &other)
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        m_indices[offset] = other.m_indices[offset];
-        if (other.is_set(offset)) {
-          m_hashes[offset] = other.m_hashes[offset];
-          new (this->value(offset)) T(*other.value(offset));
-        }
-      }
-    }
-
-    Item(Item &&other) noexcept
-    {
-      for (uint offset = 0; offset < 4; offset++) {
-        m_indices[offset] = other.m_indices[offset];
-        if (other.is_set(offset)) {
-          m_hashes[offset] = other.m_hashes[offset];
-          new (this->value(offset)) T(std::move(*other.value(offset)));
-        }
-      }
-    }
-
-    uint32_t index(uint offset) const
-    {
-      return m_indices[offset];
-    }
-
-    uint32_t hash(uint offset) const
-    {
-      return m_hashes[offset];
-    }
-
-    T *value(uint offset) const
-    {
-      return (T *)POINTER_OFFSET(m_values, offset * sizeof(T));
-    }
-
-    bool is_set(uint offset) const
-    {
-      return m_indices[offset] >= 0;
-    }
-
-    bool is_empty(uint offset) const
-    {
-      return m_indices[offset] == IS_EMPTY;
-    }
-
-    bool has_hash(uint offset, uint32_t hash) const
-    {
-      BLI_assert(this->is_set(offset));
-      return m_hashes[offset] == hash;
-    }
-
-    bool has_exact_key(uint offset, StringRef key, const Vector<char> &chars) const
-    {
-      return key == this->get_key(offset, chars);
-    }
-
-    StringRefNull get_key(uint offset, const Vector<char> &chars) const
-    {
-      const char *ptr = chars.begin() + m_indices[offset];
-      uint length = *(uint *)ptr;
-      const char *start = ptr + sizeof(uint);
-      return StringRefNull(start, length);
-    }
-
-    template<typename ForwardT>
-    void store(uint offset, uint32_t hash, uint32_t index, ForwardT &&value)
-    {
-      this->store_without_value(offset, hash, index);
-      new (this->value(offset)) T(std::forward<ForwardT>(value));
-    }
-
-    void store_without_value(uint offset, uint32_t hash, uint32_t index)
-    {
-      BLI_assert(!this->is_set(offset));
-      m_hashes[offset] = hash;
-      m_indices[offset] = index;
-    }
-  };
-
-  using ArrayType = OpenAddressingArray<Item, 1, Allocator>;
-  ArrayType m_array;
-  Vector<char> m_chars;
-
- public:
-  StringMap() = default;
-
-  /**
-   * Get the number of key-value pairs in the map.
-   */
-  uint size() const
-  {
-    return m_array.slots_set();
-  }
-
-  /**
-   * Add a new element to the map. It is assumed that the key did not exist before.
-   */
-  void add_new(StringRef key, const T &value)
-  {
-    this->add_new__impl(key, value);
-  }
-  void add_new(StringRef key, T &&value)
-  {
-    this->add_new__impl(key, std::move(value));
-  }
-
-  /**
-   * Add a new element to the map if the key does not exist yet.
-   */
-  void add(StringRef key, const T &value)
-  {
-    this->add__impl(key, value);
-  }
-  void add(StringRef key, T &&value)
-  {
-    this->add__impl(key, std::move(value));
-  }
-
-  /**
-   * First, checks if the key exists in the map.
-   * If it does exist, call the modify function with a pointer to the corresponding value.
-   * If it does not exist, call the create function with a pointer to where the value should be
-   * created.
-   *
-   * Returns whatever is returned from one of the callback functions. Both callbacks have to return
-   * the same type.
-   *
-   * CreateValueF: Takes a pointer to where the value should be created.
-   * ModifyValueF: Takes a pointer to the value that should be modified.
-   */
-  template<typename CreateValueF, typename ModifyValueF>
-  auto add_or_modify(StringRef key,
-                     const CreateValueF &create_value,
-                     const ModifyValueF &modify_value) -> decltype(create_value(nullptr))
-  {
-    using CreateReturnT = decltype(create_value(nullptr));
-    using ModifyReturnT = decltype(modify_value(nullptr));
-    BLI_STATIC_ASSERT((std::is_same<CreateReturnT, ModifyReturnT>::value),
-                      "Both callbacks should return the same type.");
-
-    this->ensure_can_add();
-    uint32_t hash = this->compute_string_hash(key);
-    ITER_SLOTS_BEGIN (hash, m_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        m_array.update__empty_to_set();
-        uint32_t index = this->save_key_in_array(key);
-        item.store_without_value(offset, hash, index);
-        T *value_ptr = item.value(offset);
-        return create_value(value_ptr);
-      }
-      else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
-        T *value_ptr = item.value(offset);
-        return modify_value(value_ptr);
-      }
-    }
-    ITER_SLOTS_END(offset);
-  }
-
-  /**
-   * Return true when the key exists in the map, otherwise false.
-   */
-  bool contains(StringRef key) const
-  {
-    uint32_t hash = this->compute_string_hash(key);
-    ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) {
-      if (item.is_empty(offset)) {
-        return false;
-      }
-      else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
-        return true;
-      }
-    }
-    ITER_SLOTS_END(offset);
-  }
-
-  /**
-   * Get a reference to the value corresponding to a key. It is assumed that the key does exist.
-   */
-  const T &lookup(StringRef key) const
-  {
-    BLI_assert(this->contains(key));
-    T *found_value = nullptr;
-    uint32_t hash = this->compute_string_hash(key);
-    ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) {
-      if (item.is_empty(offset)) {
-        return *found_value;
-      }
-      else if (item.has_hash(offset, hash)) {
-        if (found_value == nullptr) {
-          /* Common case: the first slot with the correct hash contains the key.
-           * However, still need to iterate until the next empty slot to make sure there is no
-           * other key with the exact same hash. */
-          /* TODO: Check if we can guarantee that every hash only exists once in some cases. */
-          found_value = item.value(offset);
-        }
-        else if (item.has_exact_key(offset, key, m_chars)) {
-          /* Found the hash more than once, now check for actual string equality. */
-          return *item.value(offset);
-        }
-      }
-    }
-    ITER_SLOTS_END(offset);
-  }
-
-  T &lookup(StringRef key)
-  {
-    return const_cast<T &>(const_cast<const StringMap *>(this)->lookup(key));
-  }
-
-  /**
-   * Get a pointer to the value corresponding to the key. Return nullptr, if the key does not
-   * exist.
-   */
-  const T *lookup_ptr(StringRef key) const
-  {
-    uint32_t hash = this->compute_string_hash(key);
-    ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) {
-      if (item.is_empty(offset)) {
-        return nullptr;
-      }
-      else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
-        return item.value(offset);
-      }
-    }
-    ITER_SLOTS_END(offset);
-  }
-
-  T *lookup_ptr(StringRef key)
-  {
-    return const_cast<T *>(const_cast<const StringMap *>(this)->lookup_ptr(key));
-  }
-
-  Optional<T> try_lookup(StringRef key) const
-  {
-    return Optional<T>::FromPointer(this->lookup_ptr(key));
-  }
-
-  /**
-   * Get a copy of the value corresponding to the key. If the key does not exist, return the
-   * default value.
-   */
-  T lookup_default(StringRef key, const T &default_value) const
-  {
-    const T *ptr = this->lookup_ptr(key);
-    if (ptr != nullptr) {
-      return *ptr;
-    }
-    else {
-      return default_value;
-    }
-  }
-
-  /**
-   * Return the value that corresponds to the given key.
-   * If it does not exist yet, create and insert it first.
-   */
-  template<typename CreateValueF> T &lookup_or_add(StringRef key, const CreateValueF &create_value)
-  {
-    return *this->add_or_modify(
-        key,
-        [&](T *value) { return new (value) T(create_value()); },
-        [](T *value) { return value; });
-  }
-
-  /**
-   * Return the value that corresponds to the given key.
-   * If it does not exist yet, insert a new default constructed value and return that.
-   */
-  T &lookup_or_add_default(StringRef key)
-  {
-    return this->lookup_or_add(key, []() { return T(); });
-  }
-
-  /**
-   * Do a linear search over all items to find a key for a value.
-   */
-  StringRefNull find_key_for_value(const T &value) const
-  {
-    for (const Item &item : m_array) {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (item.is_set(offset) && value == *item.value(offset)) {
-          return item.get_key(offset, m_chars);
-        }
-      }
-    }
-    BLI_assert(false);
-    return {};
-  }
-
-  /**
-   * Run a function for every value in the map.
-   */
-  template<typename FuncT> void foreach_value(const FuncT &func)
-  {
-    for (Item &item : m_array) {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (item.is_set(offset)) {
-          func(*item.value(offset));
-        }
-      }
-    }
-  }
-
-  /**
-   * Run a function for every key in the map.
-   */
-  template<typename FuncT> void foreach_key(const FuncT &func)
-  {
-    for (Item &item : m_array) {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (item.is_set(offset)) {
-          StringRefNull key = item.get_key(offset, m_chars);
-          func(key);
-        }
-      }
-    }
-  }
-
-  /**
-   * Run a function for every key-value-pair in the map.
-   */
-  template<typename FuncT> void foreach_item(const FuncT &func)
-  {
-    for (Item &item : m_array) {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (item.is_set(offset)) {
-          StringRefNull key = item.get_key(offset, m_chars);
-          T &value = *item.value(offset);
-          func(key, value);
-        }
-      }
-    }
-  }
-
-  template<typename FuncT> void foreach_item(const FuncT &func) const
-  {
-    for (const Item &item : m_array) {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (item.is_set(offset)) {
-          StringRefNull key = item.get_key(offset, m_chars);
-          const T &value = *item.value(offset);
-          func(key, value);
-        }
-      }
-    }
-  }
-
- private:
-  uint32_t compute_string_hash(StringRef key) const
-  {
-    /* TODO: check if this can be optimized more because we know the key length already. */
-    uint32_t hash = 5381;
-    for (char c : key) {
-      hash = hash * 33 + c;
-    }
-    return hash;
-  }
-
-  uint32_t save_key_in_array(StringRef key)
-  {
-    uint index = m_chars.size();
-    uint string_size = key.size();
-    m_chars.extend(ArrayRef<char>((char *)&string_size, sizeof(uint)));
-    m_chars.extend(key);
-    m_chars.append('\0');
-    return index;
-  }
-
-  StringRefNull key_from_index(uint32_t index) const
-  {
-    const char *ptr = m_chars.begin() + index;
-    uint length = *(uint *)ptr;
-    const char *start = ptr + sizeof(uint);
-    return StringRefNull(start, length);
-  }
-
-  void ensure_can_add()
-  {
-    if (UNLIKELY(m_array.should_grow())) {
-      this->grow(this->size() + 1);
-    }
-  }
-
-  BLI_NOINLINE void grow(uint min_usable_slots)
-  {
-    ArrayType new_array = m_array.init_reserved(min_usable_slots);
-    for (Item &old_item : m_array) {
-      for (uint offset = 0; offset < 4; offset++) {
-        if (old_item.is_set(offset)) {
-          this->add_after_grow(
-              *old_item.value(offset), old_item.hash(offset), old_item.index(offset), new_array);
-        }
-      }
-    }
-    m_array = std::move(new_array);
-  }
-
-  void add_after_grow(T &value, uint32_t hash, uint32_t index, ArrayType &new_array)
-  {
-    ITER_SLOTS_BEGIN (hash, new_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        item.store(offset, hash, index, std::move(value));
-        return;
-      }
-    }
-    ITER_SLOTS_END(offset);
-  }
-
-  template<typename ForwardT> bool add__impl(StringRef key, ForwardT &&value)
-  {
-    this->ensure_can_add();
-    uint32_t hash = this->compute_string_hash(key);
-    ITER_SLOTS_BEGIN (hash, m_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        uint32_t index = this->save_key_in_array(key);
-        item.store(offset, hash, index, std::forward<ForwardT>(value));
-        m_array.update__empty_to_set();
-        return true;
-      }
-      else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
-        return false;
-      }
-    }
-    ITER_SLOTS_END(offset);
-  }
-
-  template<typename ForwardT> void add_new__impl(StringRef key, ForwardT &&value)
-  {
-    BLI_assert(!this->contains(key));
-    this->ensure_can_add();
-    uint32_t hash = this->compute_string_hash(key);
-    ITER_SLOTS_BEGIN (hash, m_array, , item, offset) {
-      if (item.is_empty(offset)) {
-        uint32_t index = this->save_key_in_array(key);
-        item.store(offset, hash, index, std::forward<ForwardT>(value));
-        m_array.update__empty_to_set();
-        return;
-      }
-    }
-    ITER_SLOTS_END(offset);
-  }
-};
-
-#undef ITER_SLOTS_BEGIN
-#undef ITER_SLOTS_END
-
-}  // namespace BLI
-
-#endif /* __BLI_STRING_MAP_HH__ */
diff --git a/source/blender/blenlib/BLI_string_ref.hh b/source/blender/blenlib/BLI_string_ref.hh
index eacc501375a..6a569bee1aa 100644
--- a/source/blender/blenlib/BLI_string_ref.hh
+++ b/source/blender/blenlib/BLI_string_ref.hh
@@ -20,12 +20,26 @@
 /** \file
  * \ingroup bli
  *
- * A StringRef is a pointer to a string somewhere in memory. It should not be used to transfer
- * ownership of that string. When a function gets a StringRef as input, it cannot expect, that
- * the string will still exist after the function ends.
+ * A `BLI::StringRef` references a const char array owned by someone else. It is just a pointer and
+ * a size. Since the memory is not owned, StringRef should not be used to transfer ownership of the
+ * string. The data referenced by a StringRef cannot be mutated through it.
  *
- * There are two types of string references: One that guarantees null termination and one that does
- * not.
+ * A StringRef is NOT null-terminated. This makes it much more powerful within C++, because we can
+ * also cut off parts of the end without creating a copy. When interfacing with C code that expects
+ * null-terminated strings, `BLI::StringRefNull` can be used. It is essentially the same as
+ * StringRef, but with the restriction that the string has to be null-terminated.
+ *
+ * Whenever possible, string parameters should be of type StringRef and the string return type
+ * should be StringRefNull. Don't forget that the StringRefNull does not own the string, so don't
+ * return it when the string exists only in the scope of the function. This convention makes
+ * functions usable in the most contexts.
+ *
+ * BLI::StringRef vs. std::string_view:
+ *   Both types are certainly very similar. The main benefit of using StringRef in Blender is that
+ *   this allows us to add convenience methods at any time. Especially, when doing a lot of string
+ *   manipulation, this helps to keep the code clean. Furthermore, we need StringRefNull anyway,
+ *   because there is a lot of C code that expects null-terminated strings. Once we use C++17,
+ *   implicit conversions to and from string_view can be added.
  */
 
 #include <cstring>
@@ -39,15 +53,16 @@ namespace BLI {
 
 class StringRef;
 
+/**
+ * A common base class for StringRef and StringRefNull. This should never be used in other files.
+ * It only exists to avoid some code duplication.
+ */
 class StringRefBase {
- public:
-  using size_type = size_t;
-
  protected:
   const char *m_data;
-  size_type m_size;
+  uint m_size;
 
-  StringRefBase(const char *data, size_type size) : m_data(data), m_size(size)
+  StringRefBase(const char *data, uint size) : m_data(data), m_size(size)
   {
   }
 
@@ -55,7 +70,7 @@ class StringRefBase {
   /**
    * Return the (byte-)length of the referenced string, without any null-terminator.
    */
-  size_type size() const
+  uint size() const
   {
     return m_size;
   }
@@ -68,17 +83,15 @@ class StringRefBase {
     return m_data;
   }
 
-  char operator[](size_type index) const
-  {
-    BLI_assert(index <= m_size);
-    return m_data[index];
-  }
-
   operator ArrayRef<char>() const
   {
     return ArrayRef<char>(m_data, m_size);
   }
 
+  /**
+   * Implicitely convert to std::string. This is convenient in most cases, but you have to be a bit
+   * careful not to convert to std::string accidentally.
+   */
   operator std::string() const
   {
     return std::string(m_data, m_size);
@@ -94,12 +107,21 @@ class StringRefBase {
     return m_data + m_size;
   }
 
+  /**
+   * Copy the string into a buffer. The buffer has to be one byte larger than the size of the
+   * string, because the copied string will be null-terminated. Only use this when you are
+   * absolutely sure that the buffer is large enough.
+   */
   void unsafe_copy(char *dst) const
   {
     memcpy(dst, m_data, m_size);
     dst[m_size] = '\0';
   }
 
+  /**
+   * Copy the string into a buffer. The copied string will be null-terminated. This invokes
+   * undefined behavior when dst_size is too small. (Should we define the behavior?)
+   */
   void copy(char *dst, uint dst_size) const
   {
     if (m_size < dst_size) {
@@ -111,6 +133,10 @@ class StringRefBase {
     }
   }
 
+  /**
+   * Copy the string into a char array. The copied string will be null-terminated. This invokes
+   * undefined behavior when dst is too small.
+   */
   template<uint N> void copy(char (&dst)[N])
   {
     this->copy(dst, N);
@@ -130,7 +156,7 @@ class StringRefBase {
 };
 
 /**
- * References a null-terminated char array.
+ * References a null-terminated const char array.
  */
 class StringRefNull : public StringRefBase {
 
@@ -139,24 +165,45 @@ class StringRefNull : public StringRefBase {
   {
   }
 
-  StringRefNull(const char *str) : StringRefBase(str, strlen(str))
+  /**
+   * Construct a StringRefNull from a null terminated c-string. The pointer must not point to NULL.
+   */
+  StringRefNull(const char *str) : StringRefBase(str, (uint)strlen(str))
   {
     BLI_assert(str != NULL);
     BLI_assert(m_data[m_size] == '\0');
   }
 
-  StringRefNull(const char *str, size_type size) : StringRefBase(str, size)
+  /**
+   * Construct a StringRefNull from a null terminated c-string. This invokes undefined behavior
+   * when the given size is not the correct size of the string.
+   */
+  StringRefNull(const char *str, uint size) : StringRefBase(str, size)
   {
-    BLI_assert(str[size] == '\0');
+    BLI_assert((uint)strlen(str) == size);
   }
 
+  /**
+   * Reference a std::string. Remember that when the std::string is destructed, the StringRefNull
+   * will point to uninitialized memory.
+   */
   StringRefNull(const std::string &str) : StringRefNull(str.data())
   {
   }
+
+  /**
+   * Get the char at the given index.
+   */
+  char operator[](uint index) const
+  {
+    /* Use '<=' instead of just '<', so that the null character can be accessed as well. */
+    BLI_assert(index <= m_size);
+    return m_data[index];
+  }
 };
 
 /**
- * References a char array. It might not be null terminated.
+ * References a const char array. It might not be null terminated.
  */
 class StringRef : public StringRefBase {
  public:
@@ -164,19 +211,29 @@ class StringRef : public StringRefBase {
   {
   }
 
+  /**
+   * StringRefNull can be converted into StringRef, but not the other way around.
+   */
   StringRef(StringRefNull other) : StringRefBase(other.data(), other.size())
   {
   }
 
-  StringRef(const char *str) : StringRefBase(str, str ? strlen(str) : 0)
+  /**
+   * Create a StringRef from a null-terminated c-string.
+   */
+  StringRef(const char *str) : StringRefBase(str, str ? (uint)strlen(str) : 0)
   {
   }
 
-  StringRef(const char *str, size_type length) : StringRefBase(str, length)
+  StringRef(const char *str, uint length) : StringRefBase(str, length)
   {
   }
 
-  StringRef(const std::string &str) : StringRefBase(str.data(), str.size())
+  /**
+   * Reference a std::string. Remember that when the std::string is destructed, the StringRef
+   * will point to uninitialized memory.
+   */
+  StringRef(const std::string &str) : StringRefBase(str.data(), (uint)str.size())
   {
   }
 
@@ -198,6 +255,15 @@ class StringRef : public StringRefBase {
     BLI_assert(this->startswith(prefix));
     return this->drop_prefix(prefix.size());
   }
+
+  /**
+   * Get the char at the given index.
+   */
+  char operator[](uint index) const
+  {
+    BLI_assert(index < m_size);
+    return m_data[index];
+  }
 };
 
 /* More inline functions
@@ -215,6 +281,10 @@ inline std::ostream &operator<<(std::ostream &stream, StringRefNull ref)
   return stream;
 }
 
+/**
+ * Adding two StringRefs will allocate an std::string. This is not efficient, but convenient in
+ * most cases.
+ */
 inline std::string operator+(StringRef a, StringRef b)
 {
   return std::string(a) + std::string(b);
@@ -233,6 +303,9 @@ inline bool operator!=(StringRef a, StringRef b)
   return !(a == b);
 }
 
+/**
+ * Return true when the string starts with the given prefix.
+ */
 inline bool StringRefBase::startswith(StringRef prefix) const
 {
   if (m_size < prefix.m_size) {
@@ -246,6 +319,9 @@ inline bool StringRefBase::startswith(StringRef prefix) const
   return true;
 }
 
+/**
+ * Return true when the string ends with the given suffix.
+ */
 inline bool StringRefBase::endswith(StringRef suffix) const
 {
   if (m_size < suffix.m_size) {
@@ -260,6 +336,9 @@ inline bool StringRefBase::endswith(StringRef suffix) const
   return true;
 }
 
+/**
+ * Return a new StringRef containing only a substring of the original string.
+ */
 inline StringRef StringRefBase::substr(uint start, uint size) const
 {
   BLI_assert(start + size <= m_size);
diff --git a/source/blender/blenlib/BLI_utility_mixins.hh b/source/blender/blenlib/BLI_utility_mixins.hh
index 441575f9111..31d28792865 100644
--- a/source/blender/blenlib/BLI_utility_mixins.hh
+++ b/source/blender/blenlib/BLI_utility_mixins.hh
@@ -23,6 +23,9 @@
 
 namespace BLI {
 
+/**
+ * A type that inherits from NonCopyable cannot be copied anymore.
+ */
 class NonCopyable {
  public:
   /* Disable copy construction and assignment. */
@@ -35,6 +38,9 @@ class NonCopyable {
   NonCopyable &operator=(NonCopyable &&other) = default;
 };
 
+/**
+ * A type that inherits from NonMovable cannot be moved anymore.
+ */
 class NonMovable {
  public:
   /* Disable move construction and assignment. */
diff --git a/source/blender/blenlib/BLI_vector.hh b/source/blender/blenlib/BLI_vector.hh
index 49cf41c2005..052cee23028 100644
--- a/source/blender/blenlib/BLI_vector.hh
+++ b/source/blender/blenlib/BLI_vector.hh
@@ -20,9 +20,21 @@
 /** \file
  * \ingroup bli
  *
- * This vector wraps a dynamically sized array of a specific type. It supports small object
- * optimization. That means, when the vector only contains a few elements, no memory allocation is
- * performed. Instead, those elements are stored directly in the vector.
+ * A `BLI::Vector<T>` is a dynamically growing contiguous array for values of type T. It is
+ * designed to be a more convenient and efficient replacement for `std::vector`. Note that the term
+ * "vector" has nothing to do with a vector from computer graphics here.
+ *
+ * A vector supports efficient insertion and removal at the end (O(1) amortized). Removal in other
+ * places takes O(n) time, because all elements afterwards have to be moved. If the order of
+ * elements is not important, `remove_and_reorder` can be used instead of `remove` for better
+ * performance.
+ *
+ * The improved efficiency is mainly achieved by supporting small buffer optimization. As long as
+ * the number of elements in the vector does not become larger than InlineBufferCapacity, no memory
+ * allocation is done. As a consequence, iterators are invalidated when a BLI::Vector is moved
+ * (iterators of std::vector remain valid when the vector is moved).
+ *
+ * `BLI::Vector` should be your default choice for a vector data structure in Blender.
  */
 
 #include <algorithm>
@@ -37,30 +49,69 @@
 #include "BLI_listbase_wrapper.hh"
 #include "BLI_math_base.h"
 #include "BLI_memory_utils.hh"
+#include "BLI_string.h"
+#include "BLI_string_ref.hh"
 #include "BLI_utildefines.h"
 
 #include "MEM_guardedalloc.h"
 
 namespace BLI {
 
-template<typename T, uint InlineBufferCapacity = 4, typename Allocator = GuardedAllocator>
+template<
+    /**
+     * Type of the values stored in this vector. It has to be movable.
+     */
+    typename T,
+    /**
+     * The number of values that can be stored in this vector, without doing a heap allocation.
+     * Sometimes it makes sense to increase this value a lot. The memory in the inline buffer is
+     * not initialized when it is not needed.
+     *
+     * When T is large, the small buffer optimization is disabled by default to avoid large
+     * unexpected allocations on the stack. It can still be enabled explicitely though.
+     */
+    uint InlineBufferCapacity = (sizeof(T) < 100) ? 4 : 0,
+    /**
+     * The allocator used by this vector. Should rarely be changed, except when you don't want that
+     * MEM_* is used internally.
+     */
+    typename Allocator = GuardedAllocator>
 class Vector {
  private:
+  /**
+   * Use pointers instead of storing the size explicitely. This reduces the number of instructions
+   * in `append`.
+   *
+   * The pointers might point to the memory in the inline buffer.
+   */
   T *m_begin;
   T *m_end;
   T *m_capacity_end;
+
+  /** Used for allocations when the inline buffer is too small. */
   Allocator m_allocator;
-  AlignedBuffer<(uint)sizeof(T) * InlineBufferCapacity, (uint)alignof(T)> m_small_buffer;
 
+  /** A placeholder buffer that will remain uninitialized until it is used. */
+  AlignedBuffer<(uint)sizeof(T) * InlineBufferCapacity, (uint)alignof(T)> m_inline_buffer;
+
+  /**
+   * Store the size of the vector explicitely in debug builds. Otherwise you'd always have to call
+   * the `size` function or do the math to compute it from the pointers manually. This is rather
+   * annoying. Knowing the size of a vector is often quite essential when debugging some code.
+   */
 #ifndef NDEBUG
-  /* Storing size in debug builds, because it makes debugging much easier sometimes. */
   uint m_debug_size;
 #  define UPDATE_VECTOR_SIZE(ptr) (ptr)->m_debug_size = (uint)((ptr)->m_end - (ptr)->m_begin)
 #else
 #  define UPDATE_VECTOR_SIZE(ptr) ((void)0)
 #endif
 
-  template<typename OtherT, uint OtherN, typename OtherAllocator> friend class Vector;
+  /**
+   * Be a friend with other vector instanciations. This is necessary to implement some memory
+   * management logic.
+   */
+  template<typename OtherT, uint OtherInlineBufferCapacity, typename OtherAllocator>
+  friend class Vector;
 
  public:
   /**
@@ -69,7 +120,7 @@ class Vector {
    */
   Vector()
   {
-    m_begin = this->small_buffer();
+    m_begin = this->inline_buffer();
     m_end = m_begin;
     m_capacity_end = m_begin + InlineBufferCapacity;
     UPDATE_VECTOR_SIZE(this);
@@ -77,15 +128,12 @@ class Vector {
 
   /**
    * Create a vector with a specific size.
-   * The elements will be default initialized.
+   * The elements will be default constructed.
+   * If T is trivially constructible, the elements in the vector are not touched.
    */
   explicit Vector(uint size) : Vector()
   {
-    this->reserve(size);
-    this->increase_size_unchecked(size);
-    for (T *current = m_begin; current != m_end; current++) {
-      new (current) T();
-    }
+    this->resize(size);
   }
 
   /**
@@ -94,25 +142,29 @@ class Vector {
   Vector(uint size, const T &value) : Vector()
   {
     this->reserve(size);
-    this->increase_size_unchecked(size);
+    this->increase_size_by_unchecked(size);
     BLI::uninitialized_fill_n(m_begin, size, value);
   }
 
   /**
-   * Create a vector from an initializer list.
+   * Create a vector that contains copys of the values in the initialized list.
+   *
+   * This allows you to write code like:
+   * Vector<int> vec = {3, 4, 5};
    */
-  Vector(std::initializer_list<T> values) : Vector(ArrayRef<T>(values))
+  Vector(const std::initializer_list<T> &values) : Vector(ArrayRef<T>(values))
   {
   }
 
   /**
-   * Create a vector from an array ref.
+   * Create a vector from an array ref. The values in the vector are copy constructed.
    */
   Vector(ArrayRef<T> values) : Vector()
   {
-    this->reserve(values.size());
-    this->increase_size_unchecked(values.size());
-    BLI::uninitialized_copy_n(values.begin(), values.size(), this->begin());
+    uint size = values.size();
+    this->reserve(size);
+    this->increase_size_by_unchecked(size);
+    BLI::uninitialized_copy_n(values.data(), size, m_begin);
   }
 
   /**
@@ -129,45 +181,53 @@ class Vector {
   }
 
   /**
-   * Create a vector from a ListBase.
+   * Create a vector from a ListBase. The caller has to make sure that the values in the linked
+   * list have the correct type.
+   *
+   * Example Usage:
+   *  Vector<ModifierData *> modifiers(ob->modifiers);
    */
   Vector(ListBase &values) : Vector()
   {
-    for (T value : ListBaseWrapper<typename std::remove_pointer<T>::type>(values)) {
+    LISTBASE_FOREACH (T, value, &values) {
       this->append(value);
     }
   }
 
   /**
-   * Create a copy of another vector.
-   * The other vector will not be changed.
-   * If the other vector has less than InlineBufferCapacity elements, no allocation will be made.
+   * Create a copy of another vector. The other vector will not be changed. If the other vector has
+   * less than InlineBufferCapacity elements, no allocation will be made.
    */
   Vector(const Vector &other) : m_allocator(other.m_allocator)
   {
     this->init_copy_from_other_vector(other);
   }
 
-  template<uint OtherN>
-  Vector(const Vector<T, OtherN, Allocator> &other) : m_allocator(other.m_allocator)
+  /**
+   * Create a copy of a vector with a different InlineBufferCapacity. This needs to be handled
+   * separately, so that the other one is a valid copy constructor.
+   */
+  template<uint OtherInlineBufferCapacity>
+  Vector(const Vector<T, OtherInlineBufferCapacity, Allocator> &other)
+      : m_allocator(other.m_allocator)
   {
     this->init_copy_from_other_vector(other);
   }
 
   /**
-   * Steal the elements from another vector.
-   * This does not do an allocation.
-   * The other vector will have zero elements afterwards.
+   * Steal the elements from another vector. This does not do an allocation. The other vector will
+   * have zero elements afterwards.
    */
-  template<uint OtherN>
-  Vector(Vector<T, OtherN, Allocator> &&other) noexcept : m_allocator(other.m_allocator)
+  template<uint OtherInlineBufferCapacity>
+  Vector(Vector<T, OtherInlineBufferCapacity, Allocator> &&other) noexcept
+      : m_allocator(other.m_allocator)
   {
     uint size = other.size();
 
-    if (other.is_small()) {
+    if (other.is_inline()) {
       if (size <= InlineBufferCapacity) {
         /* Copy between inline buffers. */
-        m_begin = this->small_buffer();
+        m_begin = this->inline_buffer();
         m_end = m_begin + size;
         m_capacity_end = m_begin + InlineBufferCapacity;
         uninitialized_relocate_n(other.m_begin, size, m_begin);
@@ -175,8 +235,7 @@ class Vector {
       else {
         /* Copy from inline buffer to newly allocated buffer. */
         uint capacity = size;
-        m_begin = (T *)m_allocator.allocate_aligned(
-            sizeof(T) * capacity, std::alignment_of<T>::value, __func__);
+        m_begin = (T *)m_allocator.allocate(sizeof(T) * capacity, alignof(T), AT);
         m_end = m_begin + size;
         m_capacity_end = m_begin + capacity;
         uninitialized_relocate_n(other.m_begin, size, m_begin);
@@ -189,9 +248,9 @@ class Vector {
       m_capacity_end = other.m_capacity_end;
     }
 
-    other.m_begin = other.small_buffer();
+    other.m_begin = other.inline_buffer();
     other.m_end = other.m_begin;
-    other.m_capacity_end = other.m_begin + OtherN;
+    other.m_capacity_end = other.m_begin + OtherInlineBufferCapacity;
     UPDATE_VECTOR_SIZE(this);
     UPDATE_VECTOR_SIZE(&other);
   }
@@ -199,7 +258,7 @@ class Vector {
   ~Vector()
   {
     destruct_n(m_begin, this->size());
-    if (!this->is_small()) {
+    if (!this->is_inline()) {
       m_allocator.deallocate(m_begin);
     }
   }
@@ -242,8 +301,8 @@ class Vector {
       return *this;
     }
 
-    /* This can fail, when the vector is used to build a recursive data structure.
-       See https://youtu.be/7Qgd9B1KuMQ?t=840. */
+    /* This can be incorrect, when the vector is used to build a recursive data structure. However,
+       we don't take care of it at this low level. See https://youtu.be/7Qgd9B1KuMQ?t=840. */
     this->~Vector();
     new (this) Vector(std::move(other));
 
@@ -251,13 +310,55 @@ class Vector {
   }
 
   /**
-   * Make sure that enough memory is allocated to hold size elements.
-   * This won't necessarily make an allocation when size is small.
+   * Make sure that enough memory is allocated to hold min_capacity elements.
+   * This won't necessarily make an allocation when min_capacity is small.
    * The actual size of the vector does not change.
    */
-  void reserve(uint size)
+  void reserve(uint min_capacity)
+  {
+    if (min_capacity > this->capacity()) {
+      this->realloc_to_at_least(min_capacity);
+    }
+  }
+
+  /**
+   * Change the size of the vector so that it contains new_size elements.
+   * If new_size is smaller than the old size, the elements at the end of the vector are
+   * destructed. If new_size is larger than the old size, the new elements at the end are default
+   * constructed. If T is trivially constructible, the memory is not touched by this function.
+   */
+  void resize(uint new_size)
   {
-    this->grow(size);
+    uint old_size = this->size();
+    if (new_size > old_size) {
+      this->reserve(new_size);
+      default_construct_n(m_begin + old_size, new_size - old_size);
+    }
+    else {
+      destruct_n(m_begin + new_size, old_size - new_size);
+    }
+    m_end = m_begin + new_size;
+    UPDATE_VECTOR_SIZE(this);
+  }
+
+  /**
+   * Change the size of the vector so that it contains new_size elements.
+   * If new_size is smaller than the old size, the elements at the end of the vector are
+   * destructed. If new_size is larger than the old size, the new elements will be copy constructed
+   * from the given value.
+   */
+  void resize(uint new_size, const T &value)
+  {
+    uint old_size = this->size();
+    if (new_size > old_size) {
+      this->reserve(new_size);
+      uninitialized_fill_n(m_begin + old_size, new_size - old_size, value);
+    }
+    else {
+      destruct_n(m_begin + new_size, old_size - new_size);
+    }
+    m_end = m_begin + new_size;
+    UPDATE_VECTOR_SIZE(this);
   }
 
   /**
@@ -275,14 +376,14 @@ class Vector {
    * Afterwards the vector has 0 elements and any allocated memory
    * will be freed.
    */
-  void clear_and_make_small()
+  void clear_and_make_inline()
   {
     destruct_n(m_begin, this->size());
-    if (!this->is_small()) {
+    if (!this->is_inline()) {
       m_allocator.deallocate(m_begin);
     }
 
-    m_begin = this->small_buffer();
+    m_begin = this->inline_buffer();
     m_end = m_begin;
     m_capacity_end = m_begin + InlineBufferCapacity;
     UPDATE_VECTOR_SIZE(this);
@@ -291,19 +392,24 @@ class Vector {
   /**
    * Insert a new element at the end of the vector.
    * This might cause a reallocation with the capacity is exceeded.
+   *
+   * This is similar to std::vector::push_back.
    */
   void append(const T &value)
   {
     this->ensure_space_for_one();
     this->append_unchecked(value);
   }
-
   void append(T &&value)
   {
     this->ensure_space_for_one();
     this->append_unchecked(std::move(value));
   }
 
+  /**
+   * Append the value to the vector and return the index that can be used to access the newly
+   * added value.
+   */
   uint append_and_get_index(const T &value)
   {
     uint index = this->size();
@@ -311,6 +417,11 @@ class Vector {
     return index;
   }
 
+  /**
+   * Append the value if it is not yet in the vector. This has to do a linear search to check if
+   * the value is in the vector. Therefore, this should only be called when it is known that the
+   * vector is small.
+   */
   void append_non_duplicates(const T &value)
   {
     if (!this->contains(value)) {
@@ -318,6 +429,11 @@ class Vector {
     }
   }
 
+  /**
+   * Append the value and assume that vector has enough memory reserved. This invokes undefined
+   * behavior when not enough capacity has been reserved beforehand. Only use this in performance
+   * critical code.
+   */
   void append_unchecked(const T &value)
   {
     BLI_assert(m_end < m_capacity_end);
@@ -325,7 +441,6 @@ class Vector {
     m_end++;
     UPDATE_VECTOR_SIZE(this);
   }
-
   void append_unchecked(T &&value)
   {
     BLI_assert(m_end < m_capacity_end);
@@ -342,10 +457,16 @@ class Vector {
   {
     this->reserve(this->size() + n);
     BLI::uninitialized_fill_n(m_end, n, value);
-    this->increase_size_unchecked(n);
+    this->increase_size_by_unchecked(n);
   }
 
-  void increase_size_unchecked(uint n)
+  /**
+   * Enlarges the size of the internal buffer that is considered to be initialized. This invokes
+   * undefined behavior when when the new size is larger than the capacity. The method can be
+   * useful when you want to call constructors in the vector yourself. This should only be done in
+   * very rare cases and has to be justified every time.
+   */
+  void increase_size_by_unchecked(uint n)
   {
     BLI_assert(m_end + n <= m_capacity_end);
     m_end += n;
@@ -354,18 +475,24 @@ class Vector {
 
   /**
    * Copy the elements of another array to the end of this vector.
+   *
+   * This can be used to emulate parts of std::vector::insert.
    */
   void extend(ArrayRef<T> array)
   {
-    this->extend(array.begin(), array.size());
+    this->extend(array.data(), array.size());
   }
-
   void extend(const T *start, uint amount)
   {
     this->reserve(this->size() + amount);
     this->extend_unchecked(start, amount);
   }
 
+  /**
+   * Adds all elements from the array that are not already in the vector. This is an expensive
+   * operation when the vector is large, but can be very cheap when it is known that the vector is
+   * small.
+   */
   void extend_non_duplicates(ArrayRef<T> array)
   {
     for (const T &value : array) {
@@ -373,11 +500,14 @@ class Vector {
     }
   }
 
+  /**
+   * Extend the vector without bounds checking. It is assumed that enough memory has been reserved
+   * beforehand. Only use this in performance critical code.
+   */
   void extend_unchecked(ArrayRef<T> array)
   {
-    this->extend_unchecked(array.begin(), array.size());
+    this->extend_unchecked(array.data(), array.size());
   }
-
   void extend_unchecked(const T *start, uint amount)
   {
     BLI_assert(m_begin + amount <= m_capacity_end);
@@ -395,7 +525,6 @@ class Vector {
     BLI_assert(this->size() > 0);
     return *(m_end - 1);
   }
-
   T &last()
   {
     BLI_assert(this->size() > 0);
@@ -407,9 +536,12 @@ class Vector {
    */
   void fill(const T &value)
   {
-    std::fill(m_begin, m_end, value);
+    initialized_fill_n(m_begin, this->size(), value);
   }
 
+  /**
+   * Copy the value to all positions specified by the indices array.
+   */
   void fill_indices(ArrayRef<uint> indices, const T &value)
   {
     MutableArrayRef<T>(*this).fill_indices(indices, value);
@@ -426,6 +558,8 @@ class Vector {
 
   /**
    * Returns true when the vector contains no elements, otherwise false.
+   *
+   * This is the same as std::vector::empty.
    */
   bool is_empty() const
   {
@@ -433,33 +567,36 @@ class Vector {
   }
 
   /**
-   * Deconstructs the last element and decreases the size by one.
-   * This will assert when the vector is empty.
+   * Destructs the last element and decreases the size by one. This invokes undefined behavior when
+   * the vector is empty.
    */
   void remove_last()
   {
     BLI_assert(!this->is_empty());
     m_end--;
-    destruct(m_end);
+    m_end->~T();
     UPDATE_VECTOR_SIZE(this);
   }
 
   /**
-   * Remove the last element from the vector and return it.
+   * Remove the last element from the vector and return it. This invokes undefined behavior when
+   * the vector is empty.
+   *
+   * This is similar to std::vector::pop_back.
    */
   T pop_last()
   {
     BLI_assert(!this->is_empty());
     m_end--;
     T value = std::move(*m_end);
-    destruct(m_end);
+    m_end->~T();
     UPDATE_VECTOR_SIZE(this);
     return value;
   }
 
   /**
-   * Delete any element in the vector.
-   * The empty space will be filled by the previously last element.
+   * Delete any element in the vector. The empty space will be filled by the previously last
+   * element. This takes O(1) time.
    */
   void remove_and_reorder(uint index)
   {
@@ -469,37 +606,60 @@ class Vector {
     if (element_to_remove < m_end) {
       *element_to_remove = std::move(*m_end);
     }
-    destruct(m_end);
+    m_end->~T();
     UPDATE_VECTOR_SIZE(this);
   }
 
+  /**
+   * Finds the first occurence of the value, removes it and copies the last element to the hole in
+   * the vector. This takes O(n) time.
+   */
   void remove_first_occurrence_and_reorder(const T &value)
   {
-    uint index = this->index(value);
+    uint index = this->first_index_of(value);
     this->remove_and_reorder((uint)index);
   }
 
+  /**
+   * Remove the element at the given index and move all values coming after it one towards the
+   * front. This takes O(n) time. If the order is not important, remove_and_reorder should be used
+   * instead.
+   *
+   * This is similar to std::vector::erase.
+   */
+  void remove(uint index)
+  {
+    BLI_assert(index < this->size());
+    uint last_index = this->size() - 1;
+    for (uint i = index; i < last_index; i++) {
+      m_begin[i] = std::move(m_begin[i + 1]);
+    }
+    m_begin[last_index].~T();
+    m_end--;
+    UPDATE_VECTOR_SIZE(this);
+  }
+
   /**
    * Do a linear search to find the value in the vector.
    * When found, return the first index, otherwise return -1.
    */
-  int index_try(const T &value) const
+  int first_index_of_try(const T &value) const
   {
     for (T *current = m_begin; current != m_end; current++) {
       if (*current == value) {
-        return current - m_begin;
+        return (int)(current - m_begin);
       }
     }
     return -1;
   }
 
   /**
-   * Do a linear search to find the value in the vector.
-   * When found, return the first index, otherwise fail.
+   * Do a linear search to find the value in the vector and return the found index. This invokes
+   * undefined behavior when the value is not in the vector.
    */
-  uint index(const T &value) const
+  uint first_index_of(const T &value) const
   {
-    int index = this->index_try(value);
+    int index = this->first_index_of_try(value);
     BLI_assert(index >= 0);
     return (uint)index;
   }
@@ -510,27 +670,13 @@ class Vector {
    */
   bool contains(const T &value) const
   {
-    return this->index_try(value) != -1;
+    return this->first_index_of_try(value) != -1;
   }
 
   /**
-   * Compare vectors element-wise.
-   * Return true when they have the same length and all elements
-   * compare equal, otherwise false.
+   * Get the value at the given index. This invokes undefined behavior when the index is out of
+   * bounds.
    */
-  static bool all_equal(const Vector &a, const Vector &b)
-  {
-    if (a.size() != b.size()) {
-      return false;
-    }
-    for (uint i = 0; i < a.size(); i++) {
-      if (a[i] != b[i]) {
-        return false;
-      }
-    }
-    return true;
-  }
-
   const T &operator[](uint index) const
   {
     BLI_assert(index < this->size());
@@ -543,6 +689,22 @@ class Vector {
     return m_begin[index];
   }
 
+  /**
+   * Get access to the underlying array.
+   */
+  T *data()
+  {
+    return m_begin;
+  }
+
+  /**
+   * Get access to the underlying array.
+   */
+  const T *data() const
+  {
+    return m_begin;
+  }
+
   T *begin()
   {
     return m_begin;
@@ -562,74 +724,94 @@ class Vector {
   }
 
   /**
-   * Get the current capacity of the vector.
+   * Get the current capacity of the vector, i.e. the maximum number of elements the vector can
+   * hold, before it has to reallocate.
    */
   uint capacity() const
   {
     return (uint)(m_capacity_end - m_begin);
   }
 
+  /**
+   * Get an index range that makes looping over all indices more convenient and less error prone.
+   * Obviously, this should only be used when you actually need the index in the loop.
+   *
+   * Example:
+   *  for (uint i : myvector.index_range()) {
+   *    do_something(i, my_vector[i]);
+   *  }
+   */
   IndexRange index_range() const
   {
     return IndexRange(this->size());
   }
 
-  void print_stats() const
+  /**
+   * Print some debug information about the vector.
+   */
+  void print_stats(StringRef name = "") const
   {
-    std::cout << "Small Vector at " << (void *)this << ":" << std::endl;
-    std::cout << "  Elements: " << this->size() << std::endl;
-    std::cout << "  Capacity: " << (m_capacity_end - m_begin) << std::endl;
-    std::cout << "  Small Elements: " << InlineBufferCapacity
-              << "  Size on Stack: " << sizeof(*this) << std::endl;
+    std::cout << "Vector Stats: " << name << "\n";
+    std::cout << "  Address: " << this << "\n";
+    std::cout << "  Elements: " << this->size() << "\n";
+    std::cout << "  Capacity: " << (m_capacity_end - m_begin) << "\n";
+    std::cout << "  Inline Capacity: " << InlineBufferCapacity << "\n";
+
+    char memory_size_str[15];
+    BLI_str_format_byte_unit(memory_size_str, sizeof(*this), true);
+    std::cout << "  Size on Stack: " << memory_size_str << "\n";
   }
 
  private:
-  T *small_buffer() const
+  T *inline_buffer() const
   {
-    return (T *)m_small_buffer.ptr();
+    return (T *)m_inline_buffer.ptr();
   }
 
-  bool is_small() const
+  bool is_inline() const
   {
-    return m_begin == this->small_buffer();
+    return m_begin == this->inline_buffer();
   }
 
   void ensure_space_for_one()
   {
     if (UNLIKELY(m_end >= m_capacity_end)) {
-      this->grow(std::max(this->size() * 2, (uint)1));
+      this->realloc_to_at_least(this->size() + 1);
     }
+    std::vector<int> a;
+    a.push_back(4);
   }
 
-  BLI_NOINLINE void grow(uint min_capacity)
+  BLI_NOINLINE void realloc_to_at_least(uint min_capacity)
   {
     if (this->capacity() >= min_capacity) {
       return;
     }
 
-    /* Round up to the next power of two. Otherwise consecutive calls to grow can cause a
-     * reallocation every time even though the min_capacity only increments. */
-    min_capacity = power_of_2_max_u(min_capacity);
+    /* At least double the size of the previous allocation. Otherwise consecutive calls to grow can
+     * cause a reallocation every time even though min_capacity only increments.  */
+    uint min_new_capacity = this->capacity() * 2;
 
+    uint new_capacity = std::max(min_capacity, min_new_capacity);
     uint size = this->size();
 
-    T *new_array = (T *)m_allocator.allocate_aligned(
-        min_capacity * (uint)sizeof(T), std::alignment_of<T>::value, "grow BLI::Vector");
+    T *new_array = (T *)m_allocator.allocate(new_capacity * (uint)sizeof(T), alignof(T), AT);
     uninitialized_relocate_n(m_begin, size, new_array);
 
-    if (!this->is_small()) {
+    if (!this->is_inline()) {
       m_allocator.deallocate(m_begin);
     }
 
     m_begin = new_array;
     m_end = m_begin + size;
-    m_capacity_end = m_begin + min_capacity;
+    m_capacity_end = m_begin + new_capacity;
   }
 
   /**
    * Initialize all properties, except for m_allocator, which has to be initialized beforehand.
    */
-  template<uint OtherN> void init_copy_from_other_vector(const Vector<T, OtherN, Allocator> &other)
+  template<uint OtherInlineBufferCapacity>
+  void init_copy_from_other_vector(const Vector<T, OtherInlineBufferCapacity, Allocator> &other)
   {
     m_allocator = other.m_allocator;
 
@@ -637,19 +819,18 @@ class Vector {
     uint capacity = size;
 
     if (size <= InlineBufferCapacity) {
-      m_begin = this->small_buffer();
+      m_begin = this->inline_buffer();
       capacity = InlineBufferCapacity;
     }
     else {
-      m_begin = (T *)m_allocator.allocate_aligned(
-          sizeof(T) * size, std::alignment_of<T>::value, __func__);
+      m_begin = (T *)m_allocator.allocate(sizeof(T) * size, alignof(T), AT);
       capacity = size;
     }
 
     m_end = m_begin + size;
     m_capacity_end = m_begin + capacity;
 
-    uninitialized_copy(other.begin(), other.end(), m_begin);
+    uninitialized_copy_n(other.data(), size, m_begin);
     UPDATE_VECTOR_SIZE(this);
   }
 };
diff --git a/source/blender/blenlib/BLI_vector_set.hh b/source/blender/blenlib/BLI_vector_set.hh
index f402f47c357..7e6a7de6ee6 100644
--- a/source/blender/blenlib/BLI_vector_set.hh
+++ b/source/blender/blenlib/BLI_vector_set.hh
@@ -20,138 +20,195 @@
 /** \file
  * \ingroup bli
  *
- * A VectorSet is a set built on top of a vector. The elements are stored in a continuous array,
- * but every element exists at most once. The insertion order is maintained, as long as there are
- * no deletes. The expected time to check if a value is in the VectorSet is O(1).
+ * A `BLI::VectorSet<Key>` is an ordered container for elements of type `Key`. It has the same
+ * interface as `BLI::Set` with the following extensions:
+ * - The insertion order of keys is maintained as long as no elements are removed.
+ * - The keys are stored in a contiguous array.
+ *
+ * All core operations (add, remove and contains) can be done in O(1) amortized expected time.
+ *
+ * Using a VectorSet instead of a normal Set can be benefitial in any of the following
+ * circumstances:
+ * - The insertion order is important.
+ * - Iteration over all keys has to be fast.
+ * - The keys in the set are supposed to be passed to a function that does not have to know that
+ *   the keys are stored in a set. With a VectorSet, one can get an ArrayRef containing all keys
+ *   without additional copies.
+ *
+ * BLI::VectorSet is implemented using open addressing in a slot array with a power-of-two size.
+ * Other than in BLI::Set, a slot does not contain the key though. Instead it only contains an
+ * index into an array of keys that is stored separately.
+ *
+ * Some noteworthy information:
+ * - Key must be a movable type.
+ * - Pointers to keys might be invalidated, when the vector set is changed or moved.
+ * - The hash function can be customized. See BLI_hash.hh for details.
+ * - The probing strategy can be customized. See BLI_probing_strategies.hh for details.
+ * - The slot type can be customized. See BLI_vector_set_slots.hh for details.
+ * - The methods `add_new` and `remove_contained` should be used instead of `add` and `remove`
+ *   whenever appropriate. Assumptions and intention are described better this way.
+ * - Using a range-for loop over a vector set, is as efficient as iterating over an array (because
+ *   it is the same thing).
+ * - Lookups can be performed using types other than Key without conversion. For that use the
+ *   methods ending with `_as`. The template parameters Hash and IsEqual have to support the other
+ *   key type. This can greatly improve performance when the strings are used as keys.
+ * - The default constructor is cheap.
+ * - The `print_stats` method can be used to get information about the distribution of keys and
+ *   memory usage.
+ *
+ * Possible Improvements:
+ * - Small buffer optimization for the keys.
  */
 
+#include "BLI_array.hh"
 #include "BLI_hash.hh"
-#include "BLI_open_addressing.hh"
-#include "BLI_vector.hh"
+#include "BLI_hash_tables.hh"
+#include "BLI_probing_strategies.hh"
+#include "BLI_vector_set_slots.hh"
 
 namespace BLI {
 
-// clang-format off
-
-#define ITER_SLOTS_BEGIN(VALUE, ARRAY, OPTIONAL_CONST, R_SLOT) \
-  uint32_t hash = DefaultHash<T>{}(VALUE); \
-  uint32_t perturb = hash; \
-  while (true) { \
-    for (uint i = 0; i < 4; i++) {\
-      uint32_t slot_index = (hash + i) & ARRAY.slot_mask(); \
-      OPTIONAL_CONST Slot &R_SLOT = ARRAY.item(slot_index);
-
-#define ITER_SLOTS_END \
-    } \
-    perturb >>= 5; \
-    hash = hash * 5 + 1 + perturb; \
-  } ((void)0)
-
-// clang-format on
-
-template<typename T, typename Allocator = GuardedAllocator> class VectorSet {
+template<
+    /**
+     * Type of the elements that are stored in this set. It has to be movable. Furthermore, the
+     * hash and is-equal functions have to support it.
+     */
+    typename Key,
+    /**
+     * The strategy used to deal with collisions. They are defined in BLI_probing_strategies.hh.
+     */
+    typename ProbingStrategy = DefaultProbingStrategy,
+    /**
+     * The hash function used to hash the keys. There is a default for many types. See BLI_hash.hh
+     * for examples on how to define a custom hash function.
+     */
+    typename Hash = DefaultHash<Key>,
+    /**
+     * The equality operator used to compare keys. By default it will simply compare keys using the
+     * `==` operator.
+     */
+    typename IsEqual = DefaultEquality,
+    /**
+     * This is what will actually be stored in the hash table array. At a minimum a slot has to be
+     * able to hold an array index and information about whether the slot is empty, occupied or
+     * removed. Using a non-standard slot type can improve performance for some types.
+     * Also see BLI_vector_set_slots.hh.
+     */
+    typename Slot = typename DefaultVectorSetSlot<Key>::type,
+    /**
+     * The allocator used by this set. Should rarely be changed, except when you don't want that
+     * MEM_* etc. is used internally.
+     */
+    typename Allocator = GuardedAllocator>
+class VectorSet {
  private:
-  static constexpr int32_t IS_EMPTY = -1;
-  static constexpr int32_t IS_DUMMY = -2;
-
-  class Slot {
-   private:
-    int32_t m_value = IS_EMPTY;
-
-   public:
-    static constexpr uint slots_per_item = 1;
-
-    bool is_set() const
-    {
-      return m_value >= 0;
-    }
-
-    bool is_empty() const
-    {
-      return m_value == IS_EMPTY;
-    }
-
-    bool is_dummy() const
-    {
-      return m_value == IS_DUMMY;
-    }
+  /**
+   * Slots are either empty, occupied or removed. The number of occupied slots can be computed by
+   * subtracting the removed slots from the occupied-and-removed slots.
+   */
+  uint32_t m_removed_slots;
+  uint32_t m_occupied_and_removed_slots;
 
-    bool has_value(const T &value, const T *elements) const
-    {
-      return this->is_set() && elements[this->index()] == value;
-    }
+  /**
+   * The maximum number of slots that can be used (either occupied or removed) until the set has to
+   * grow. This is the total number of slots times the max load factor.
+   */
+  uint32_t m_usable_slots;
 
-    bool has_index(uint index) const
-    {
-      return m_value == (int32_t)index;
-    }
+  /**
+   * The number of slots minus one. This is a bit mask that can be used to turn any integer into a
+   * valid slot index efficiently.
+   */
+  uint32_t m_slot_mask;
 
-    uint index() const
-    {
-      BLI_assert(this->is_set());
-      return (uint)m_value;
-    }
+  /** This is called to hash incoming keys. */
+  Hash m_hash;
 
-    int32_t &index_ref()
-    {
-      return m_value;
-    }
+  /** This is called to check equality of two keys. */
+  IsEqual m_is_equal;
 
-    void set_index(uint index)
-    {
-      BLI_assert(!this->is_set());
-      m_value = (int32_t)index;
-    }
+  /** The max load factor is 1/2 = 50% by default. */
+#define LOAD_FACTOR 1, 2
+  LoadFactor m_max_load_factor = LoadFactor(LOAD_FACTOR);
+  using SlotArray = Array<Slot, LoadFactor::compute_total_slots(4, LOAD_FACTOR), Allocator>;
+#undef LOAD_FACTOR
 
-    void set_dummy()
-    {
-      BLI_assert(this->is_set());
-      m_value = IS_DUMMY;
-    }
-  };
+  /**
+   * This is the array that contains the actual slots. There is always at least one empty slot and
+   * the size of the array is a power of two.
+   */
+  SlotArray m_slots;
 
-  using ArrayType = OpenAddressingArray<Slot, 4, Allocator>;
-  ArrayType m_array;
+  /**
+   * Pointer to an array that contains all keys. The keys are sorted by insertion order as long as
+   * no keys are removed. The first set->size() elements in this array are initialized. The
+   * capacity of the array is m_usable_slots.
+   */
+  Key *m_keys;
 
-  /* The capacity of the array should always be at least m_array.slots_usable(). */
-  T *m_elements = nullptr;
+  /** Iterate over a slot index sequence for a given hash. */
+#define VECTOR_SET_SLOT_PROBING_BEGIN(HASH, R_SLOT) \
+  SLOT_PROBING_BEGIN (ProbingStrategy, HASH, m_slot_mask, SLOT_INDEX) \
+    auto &R_SLOT = m_slots[SLOT_INDEX];
+#define VECTOR_SET_SLOT_PROBING_END() SLOT_PROBING_END()
 
  public:
+  /**
+   * Initialize an empty vector set. This is a cheap operation and won't do an allocation. This is
+   * necessary to avoid a high cost when no elements are added at all. An optimized grow operation
+   * is performed on the first insertion.
+   */
   VectorSet()
+      : m_removed_slots(0),
+        m_occupied_and_removed_slots(0),
+        m_usable_slots(0),
+        m_slot_mask(0),
+        m_slots(1),
+        m_keys(nullptr)
   {
-    m_elements = this->allocate_elements_array(m_array.slots_usable());
   }
 
-  VectorSet(ArrayRef<T> values) : VectorSet()
-  {
-    this->add_multiple(values);
-  }
-
-  VectorSet(const std::initializer_list<T> &values) : VectorSet()
-  {
-    this->add_multiple(values);
-  }
-
-  VectorSet(const Vector<T> &values) : VectorSet()
+  /**
+   * Construct a vector set that contains the given keys. Duplicates will be removed automatically.
+   */
+  VectorSet(const std::initializer_list<Key> &keys) : VectorSet()
   {
-    this->add_multiple(values);
+    this->add_multiple(keys);
   }
 
-  VectorSet(const VectorSet &other) : m_array(other.m_array)
+  ~VectorSet()
   {
-    m_elements = this->allocate_elements_array(m_array.slots_usable());
-    uninitialized_copy_n(other.m_elements, m_array.slots_set(), m_elements);
+    destruct_n(m_keys, this->size());
+    if (m_keys != nullptr) {
+      this->deallocate_keys_array(m_keys);
+    }
   }
 
-  VectorSet(VectorSet &&other) : m_array(std::move(other.m_array)), m_elements(other.m_elements)
+  VectorSet(const VectorSet &other)
+      : m_removed_slots(other.m_removed_slots),
+        m_occupied_and_removed_slots(other.m_occupied_and_removed_slots),
+        m_usable_slots(other.m_usable_slots),
+        m_slot_mask(other.m_slot_mask),
+        m_slots(other.m_slots)
   {
-    other.m_elements = other.allocate_elements_array(other.m_array.slots_usable());
+    m_keys = this->allocate_keys_array(m_usable_slots);
+    uninitialized_copy_n(other.m_keys, other.size(), m_keys);
   }
 
-  ~VectorSet()
+  VectorSet(VectorSet &&other) noexcept
+      : m_removed_slots(other.m_removed_slots),
+        m_occupied_and_removed_slots(other.m_occupied_and_removed_slots),
+        m_usable_slots(other.m_usable_slots),
+        m_slot_mask(other.m_slot_mask),
+        m_slots(std::move(other.m_slots)),
+        m_keys(other.m_keys)
   {
-    destruct_n(m_elements, this->size());
-    this->deallocate_elements_array(m_elements);
+    other.m_removed_slots = 0;
+    other.m_occupied_and_removed_slots = 0;
+    other.m_usable_slots = 0;
+    other.m_slot_mask = 0;
+    other.m_slots = SlotArray(1);
+    other.m_keys = nullptr;
   }
 
   VectorSet &operator=(const VectorSet &other)
@@ -159,8 +216,10 @@ template<typename T, typename Allocator = GuardedAllocator> class VectorSet {
     if (this == &other) {
       return *this;
     }
+
     this->~VectorSet();
     new (this) VectorSet(other);
+
     return *this;
   }
 
@@ -169,318 +228,529 @@ template<typename T, typename Allocator = GuardedAllocator> class VectorSet {
     if (this == &other) {
       return *this;
     }
+
     this->~VectorSet();
     new (this) VectorSet(std::move(other));
+
     return *this;
   }
 
   /**
-   * Allocate memory such that at least min_usable_slots can be added without having to grow again.
+   * Add a new key to the vector set. This invokes undefined behavior when the key is in the set
+   * already. When you know for certain that a key is not in the set yet, use this method for
+   * better performance. This also expresses the intent better.
    */
-  void reserve(uint min_usable_slots)
+  void add_new(const Key &key)
   {
-    if (m_array.slots_usable() < min_usable_slots) {
-      this->grow(min_usable_slots);
-    }
+    this->add_new__impl(key, m_hash(key));
+  }
+  void add_new(Key &&key)
+  {
+    this->add_new__impl(std::move(key), m_hash(key));
   }
 
   /**
-   * Add a new element. The method assumes that the value did not exist before.
+   * Add a key to the vector set. If the key exists in the set already, nothing is done. The return
+   * value is true if the key was newly added.
+   *
+   * This is similar to std::unordered_set::insert.
    */
-  void add_new(const T &value)
+  bool add(const Key &key)
   {
-    this->add_new__impl(value);
+    return this->add_as(key);
   }
-  void add_new(T &&value)
+  bool add(Key &&key)
   {
-    this->add_new__impl(std::move(value));
+    return this->add_as(std::move(key));
   }
 
   /**
-   * Add a new element if it does not exist yet. Does not add the value again if it exists already.
+   * Same as `add`, but accepts other key types that are supported by the hash function.
    */
-  bool add(const T &value)
+  template<typename ForwardKey> bool add_as(ForwardKey &&key)
   {
-    return this->add__impl(value);
+    return this->add__impl(std::forward<ForwardKey>(key), m_hash(key));
   }
-  bool add(T &&value)
+
+  /**
+   * Convenience function to add many keys to the vector set at once. Duplicates are removed
+   * automatically.
+   *
+   * We might be able to make this faster than sequentially adding all keys, but that is not
+   * implemented yet.
+   */
+  void add_multiple(ArrayRef<Key> keys)
   {
-    return this->add__impl(std::move(value));
+    for (const Key &key : keys) {
+      this->add(key);
+    }
   }
 
   /**
-   * Add multiple values. Duplicates will not be inserted.
+   * Returns true if the key is in the vector set.
+   *
+   * This is similar to std::unordered_set::find() != std::unordered_set::end().
    */
-  void add_multiple(ArrayRef<T> values)
+  bool contains(const Key &key) const
   {
-    for (const T &value : values) {
-      this->add(value);
-    }
+    return this->contains_as(key);
   }
 
   /**
-   * Returns true when the value is in the set-vector, otherwise false.
+   * Same as `contains`, but accepts other key types that are supported by the hash function.
    */
-  bool contains(const T &value) const
+  template<typename ForwardKey> bool contains_as(const ForwardKey &key) const
   {
-    ITER_SLOTS_BEGIN (value, m_array, const, slot) {
-      if (slot.is_empty()) {
-        return false;
-      }
-      else if (slot.has_value(value, m_elements)) {
-        return true;
-      }
-    }
-    ITER_SLOTS_END;
+    return this->contains__impl(key, m_hash(key));
   }
 
   /**
-   * Remove a value from the set-vector. The method assumes that the value exists.
+   * Deletes the key from the set. Returns true when the key existed in the set and is now removed.
+   * This might change the order of elements in the vector.
+   *
+   * This is similar to std::unordered_set::erase.
    */
-  void remove(const T &value)
+  bool remove(const Key &key)
   {
-    BLI_assert(this->contains(value));
-    ITER_SLOTS_BEGIN (value, m_array, , slot) {
-      if (slot.has_value(value, m_elements)) {
-        uint old_index = this->size() - 1;
-        uint new_index = slot.index();
+    return this->remove_as(key);
+  }
 
-        if (new_index < old_index) {
-          m_elements[new_index] = std::move(m_elements[old_index]);
-          this->update_slot_index(m_elements[new_index], old_index, new_index);
-        }
+  /**
+   * Same as `remove`, but accepts other key types that are supported by the hash function.
+   */
+  template<typename ForwardKey> bool remove_as(const ForwardKey &key)
+  {
+    return this->remove__impl(key, m_hash(key));
+  }
 
-        destruct(m_elements + old_index);
-        slot.set_dummy();
-        m_array.update__set_to_dummy();
-        return;
-      }
-    }
-    ITER_SLOTS_END;
+  /**
+   * Deletes the key from the set. This invokes undefined behavior when the key is not in the set.
+   * It might change the order of elements in the vector.
+   */
+  void remove_contained(const Key &key)
+  {
+    this->remove_contained_as(key);
   }
 
   /**
-   * Get and remove the last element of the vector.
+   * Same as `remove_contained`, but accepts other key types that are supported by the hash
+   * function.
    */
-  T pop()
+  template<typename ForwardKey> void remove_contained_as(const ForwardKey &key)
   {
-    BLI_assert(this->size() > 0);
-    uint index_to_pop = this->size() - 1;
-    T value = std::move(m_elements[index_to_pop]);
-    destruct(m_elements + index_to_pop);
+    this->remove_contained__impl(key, m_hash(key));
+  }
 
-    ITER_SLOTS_BEGIN (value, m_array, , slot) {
-      if (slot.has_index(index_to_pop)) {
-        slot.set_dummy();
-        m_array.update__set_to_dummy();
-        return value;
-      }
-    }
-    ITER_SLOTS_END;
+  /**
+   * Delete and return a key from the set. This will remove the last element in the vector. The
+   * order of the remaining elements in the set is not changed.
+   */
+  Key pop()
+  {
+    return this->pop__impl();
   }
 
   /**
-   * Get the index of the value in the vector. It is assumed that the value is in the vector.
+   * Return the location of the key in the vector. It is assumed, that the key is in the vector
+   * set. If this is not necessarily the case, use `index_of_try`.
    */
-  uint index(const T &value) const
+  uint32_t index_of(const Key &key) const
   {
-    BLI_assert(this->contains(value));
-    ITER_SLOTS_BEGIN (value, m_array, const, slot) {
-      if (slot.has_value(value, m_elements)) {
-        return slot.index();
-      }
-    }
-    ITER_SLOTS_END;
+    return this->index_of_as(key);
   }
 
   /**
-   * Get the index of the value in the vector. If it does not exist return -1.
+   * Same as `index_of`, but accepts other key types that are supported by the hash function.
    */
-  int index_try(const T &value) const
+  template<typename ForwardKey> uint32_t index_of_as(const ForwardKey &key) const
   {
-    ITER_SLOTS_BEGIN (value, m_array, const, slot) {
-      if (slot.has_value(value, m_elements)) {
-        return slot.index();
-      }
-      else if (slot.is_empty()) {
-        return -1;
-      }
-    }
-    ITER_SLOTS_END;
+    return this->index_of__impl(key, m_hash(key));
   }
 
   /**
-   * Get the number of elements in the set-vector.
+   * Return the location of the key in the vector. If the key is not in the set, -1 is returned.
+   * If you know for sure that the key is in the set, it is better to use `index_of` instead.
    */
-  uint size() const
+  int32_t index_of_try(const Key &key) const
   {
-    return m_array.slots_set();
+    return (int32_t)this->index_of_try_as(key);
   }
 
-  bool is_empty() const
+  /**
+   * Same as `index_of_try`, but accepts other key types that are supported by the hash function.
+   */
+  template<typename ForwardKey> int32_t index_of_try_as(const ForwardKey &key) const
   {
-    return this->size() == 0;
+    return this->index_of_try__impl(key, m_hash(key));
   }
 
-  const T *begin() const
+  /**
+   * Get a pointer to the beginning of the array containing all keys.
+   */
+  const Key *data() const
   {
-    return m_elements;
+    return m_keys;
   }
 
-  const T *end() const
+  const Key *begin() const
   {
-    return m_elements + this->size();
+    return m_keys;
   }
 
-  const T &operator[](uint index) const
+  const Key *end() const
+  {
+    return m_keys + this->size();
+  }
+
+  /**
+   * Get the key stored at the given position in the vector.
+   */
+  const Key &operator[](uint32_t index) const
   {
     BLI_assert(index <= this->size());
-    return m_elements[index];
+    return m_keys[index];
   }
 
-  ArrayRef<T> as_ref() const
+  operator ArrayRef<Key>() const
+  {
+    return ArrayRef<Key>(m_keys, this->size());
+  }
+
+  /**
+   * Get an ArrayRef referencing the keys vector. The referenced memory buffer is only valid as
+   * long as the vector set is not changed.
+   *
+   * The keys must not be changed, because this would change their hash value.
+   */
+  ArrayRef<Key> as_ref() const
   {
     return *this;
   }
 
-  operator ArrayRef<T>() const
+  /**
+   * Print common statistics like size and collision count. This is useful for debugging purposes.
+   */
+  void print_stats(StringRef name = "") const
   {
-    return ArrayRef<T>(m_elements, this->size());
+    HashTableStats stats(*this, this->as_ref());
+    stats.print();
   }
 
-  void print_stats() const
+  /**
+   * Returns the number of keys stored in the vector set.
+   */
+  uint32_t size() const
   {
-    std::cout << "VectorSet at " << (void *)this << ":\n";
-    std::cout << "  Size: " << this->size() << "\n";
-    std::cout << "  Usable Slots: " << m_array.slots_usable() << "\n";
-    std::cout << "  Total Slots: " << m_array.slots_total() << "\n";
-    std::cout << "  Average Collisions: " << this->compute_average_collisions() << "\n";
+    return m_occupied_and_removed_slots - m_removed_slots;
   }
 
- private:
-  void update_slot_index(T &value, uint old_index, uint new_index)
+  /**
+   * Returns true if no keys are stored.
+   */
+  bool is_empty() const
   {
-    ITER_SLOTS_BEGIN (value, m_array, , slot) {
-      int32_t &stored_index = slot.index_ref();
-      if (stored_index == old_index) {
-        stored_index = new_index;
-        return;
-      }
-    }
-    ITER_SLOTS_END;
+    return m_occupied_and_removed_slots == m_removed_slots;
   }
 
-  template<typename ForwardT> void add_new_in_slot(Slot &slot, ForwardT &&value)
+  /**
+   * Returns the number of available slots. This is mostly for debugging purposes.
+   */
+  uint32_t capacity() const
   {
-    uint index = this->size();
-    slot.set_index(index);
-    new (m_elements + index) T(std::forward<ForwardT>(value));
-    m_array.update__empty_to_set();
+    return m_slots.size();
   }
 
-  void ensure_can_add()
+  /**
+   * Returns the amount of removed slots in the set. This is mostly for debugging purposes.
+   */
+  uint32_t removed_amount() const
   {
-    if (UNLIKELY(m_array.should_grow())) {
-      this->grow(this->size() + 1);
+    return m_removed_slots;
+  }
+
+  /**
+   * Returns the bytes required per element. This is mostly for debugging purposes.
+   */
+  uint32_t size_per_element() const
+  {
+    return sizeof(Slot) + sizeof(Key);
+  }
+
+  /**
+   * Returns the approximate memory requirements of the set in bytes. This is more correct for
+   * larger sets.
+   */
+  uint32_t size_in_bytes() const
+  {
+    return sizeof(Slot) * m_slots.size() + sizeof(Key) * m_usable_slots;
+  }
+
+  /**
+   * Potentially resize the vector set such that it can hold n elements without doing another grow.
+   */
+  void reserve(uint32_t n)
+  {
+    if (m_usable_slots < n) {
+      this->realloc_and_reinsert(n);
     }
   }
 
-  BLI_NOINLINE void grow(uint min_usable_slots)
+  /**
+   * Get the number of collisions that the probing strategy has to go through to find the key or
+   * determine that it is not in the set.
+   */
+  uint32_t count_collisions(const Key &key) const
   {
-    uint size = this->size();
+    return this->count_collisions__impl(key, m_hash(key));
+  }
+
+ private:
+  BLI_NOINLINE void realloc_and_reinsert(uint32_t min_usable_slots)
+  {
+    uint32_t total_slots, usable_slots;
+    m_max_load_factor.compute_total_and_usable_slots(
+        SlotArray::inline_buffer_capacity(), min_usable_slots, &total_slots, &usable_slots);
+    uint32_t new_slot_mask = total_slots - 1;
+
+    /* Optimize the case when the set was empty beforehand. We can avoid some copies here. */
+    if (this->size() == 0) {
+      m_slots.~Array();
+      new (&m_slots) SlotArray(total_slots);
+      m_removed_slots = 0;
+      m_occupied_and_removed_slots = 0;
+      m_usable_slots = usable_slots;
+      m_slot_mask = new_slot_mask;
+      m_keys = this->allocate_keys_array(usable_slots);
+      return;
+    }
 
-    ArrayType new_array = m_array.init_reserved(min_usable_slots);
-    T *new_elements = this->allocate_elements_array(new_array.slots_usable());
+    SlotArray new_slots(total_slots);
 
-    for (uint i : IndexRange(size)) {
-      this->add_after_grow(i, new_array);
+    for (Slot &slot : m_slots) {
+      if (slot.is_occupied()) {
+        this->add_after_grow_and_destruct_old(slot, new_slots, new_slot_mask);
+      }
     }
 
-    uninitialized_relocate_n(m_elements, size, new_elements);
-    this->deallocate_elements_array(m_elements);
+    Key *new_keys = this->allocate_keys_array(usable_slots);
+    uninitialized_relocate_n(m_keys, this->size(), new_keys);
+    this->deallocate_keys_array(m_keys);
 
-    m_array = std::move(new_array);
-    m_elements = new_elements;
+    /* All occupied slots have been destructed already and empty/removed slots are assumed to be
+     * trivially destructible. */
+    m_slots.clear_without_destruct();
+    m_slots = std::move(new_slots);
+    m_keys = new_keys;
+    m_occupied_and_removed_slots -= m_removed_slots;
+    m_usable_slots = usable_slots;
+    m_removed_slots = 0;
+    m_slot_mask = new_slot_mask;
   }
 
-  void add_after_grow(uint index, ArrayType &new_array)
+  void add_after_grow_and_destruct_old(Slot &old_slot,
+                                       SlotArray &new_slots,
+                                       uint32_t new_slot_mask)
   {
-    const T &value = m_elements[index];
-    ITER_SLOTS_BEGIN (value, new_array, , slot) {
+    const Key &key = m_keys[old_slot.index()];
+    uint32_t hash = old_slot.get_hash(key, Hash());
+
+    SLOT_PROBING_BEGIN (ProbingStrategy, hash, new_slot_mask, slot_index) {
+      Slot &slot = new_slots[slot_index];
       if (slot.is_empty()) {
-        slot.set_index(index);
+        slot.relocate_occupied_here(old_slot, hash);
         return;
       }
     }
-    ITER_SLOTS_END;
+    SLOT_PROBING_END();
   }
 
-  float compute_average_collisions() const
+  template<typename ForwardKey> bool contains__impl(const ForwardKey &key, uint32_t hash) const
   {
-    if (this->size() == 0) {
-      return 0.0f;
-    }
-
-    uint collisions_sum = 0;
-    for (const T &value : this->as_ref()) {
-      collisions_sum += this->count_collisions(value);
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        return false;
+      }
+      if (slot.contains(key, m_is_equal, hash, m_keys)) {
+        return true;
+      }
     }
-    return (float)collisions_sum / (float)this->size();
+    VECTOR_SET_SLOT_PROBING_END();
   }
 
-  uint count_collisions(const T &value) const
+  template<typename ForwardKey> void add_new__impl(ForwardKey &&key, uint32_t hash)
   {
-    uint collisions = 0;
-    ITER_SLOTS_BEGIN (value, m_array, const, slot) {
-      if (slot.is_empty() || slot.has_value(value, m_elements)) {
-        return collisions;
+    BLI_assert(!this->contains_as(key));
+
+    this->ensure_can_add();
+
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.is_empty()) {
+        uint32_t index = this->size();
+        new (m_keys + index) Key(std::forward<ForwardKey>(key));
+        slot.occupy(index, hash);
+        m_occupied_and_removed_slots++;
+        return;
       }
-      collisions++;
     }
-    ITER_SLOTS_END;
+    VECTOR_SET_SLOT_PROBING_END();
   }
 
-  template<typename ForwardT> void add_new__impl(ForwardT &&value)
+  template<typename ForwardKey> bool add__impl(ForwardKey &&key, uint32_t hash)
   {
-    BLI_assert(!this->contains(value));
     this->ensure_can_add();
-    ITER_SLOTS_BEGIN (value, m_array, , slot) {
+
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
       if (slot.is_empty()) {
-        this->add_new_in_slot(slot, std::forward<ForwardT>(value));
-        return;
+        uint32_t index = this->size();
+        new (m_keys + index) Key(std::forward<ForwardKey>(key));
+        m_occupied_and_removed_slots++;
+        slot.occupy(index, hash);
+        return true;
+      }
+      if (slot.contains(key, m_is_equal, hash, m_keys)) {
+        return false;
       }
     }
-    ITER_SLOTS_END;
+    VECTOR_SET_SLOT_PROBING_END();
   }
 
-  template<typename ForwardT> bool add__impl(ForwardT &&value)
+  template<typename ForwardKey> uint32_t index_of__impl(const ForwardKey &key, uint32_t hash) const
   {
-    this->ensure_can_add();
-    ITER_SLOTS_BEGIN (value, m_array, , slot) {
+    BLI_assert(this->contains_as(key));
+
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash, m_keys)) {
+        return slot.index();
+      }
+    }
+    VECTOR_SET_SLOT_PROBING_END();
+  }
+
+  template<typename ForwardKey>
+  int32_t index_of_try__impl(const ForwardKey &key, uint32_t hash) const
+  {
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash, m_keys)) {
+        return (int32_t)slot.index();
+      }
       if (slot.is_empty()) {
-        this->add_new_in_slot(slot, std::forward<ForwardT>(value));
+        return -1;
+      }
+    }
+    VECTOR_SET_SLOT_PROBING_END();
+  }
+
+  Key pop__impl()
+  {
+    BLI_assert(this->size() > 0);
+
+    uint32_t index_to_pop = this->size() - 1;
+    Key key = std::move(m_keys[index_to_pop]);
+    m_keys[index_to_pop].~Key();
+    uint32_t hash = m_hash(key);
+
+    m_removed_slots++;
+
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.has_index(index_to_pop)) {
+        slot.remove();
+        return key;
+      }
+    }
+    VECTOR_SET_SLOT_PROBING_END();
+  }
+
+  template<typename ForwardKey> bool remove__impl(const ForwardKey &key, uint32_t hash)
+  {
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash, m_keys)) {
+        this->remove_key_internal(slot);
         return true;
       }
-      else if (slot.has_value(value, m_elements)) {
+      if (slot.is_empty()) {
         return false;
       }
     }
-    ITER_SLOTS_END;
+    VECTOR_SET_SLOT_PROBING_END();
   }
 
-  T *allocate_elements_array(uint size)
+  template<typename ForwardKey> void remove_contained__impl(const ForwardKey &key, uint32_t hash)
   {
-    return (T *)m_array.allocator().allocate_aligned((uint)sizeof(T) * size, alignof(T), __func__);
+    BLI_assert(this->contains_as(key));
+
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash, m_keys)) {
+        this->remove_key_internal(slot);
+        return;
+      }
+    }
+    VECTOR_SET_SLOT_PROBING_END();
   }
 
-  void deallocate_elements_array(T *elements)
+  void remove_key_internal(Slot &slot)
   {
-    m_array.allocator().deallocate(elements);
+    uint32_t index_to_remove = slot.index();
+    uint32_t size = this->size();
+    uint32_t last_element_index = size - 1;
+
+    if (index_to_remove < last_element_index) {
+      m_keys[index_to_remove] = std::move(m_keys[last_element_index]);
+      this->update_slot_index(m_keys[index_to_remove], last_element_index, index_to_remove);
+    }
+
+    m_keys[last_element_index].~Key();
+    slot.remove();
+    m_removed_slots++;
+    return;
   }
-};
 
-#undef ITER_SLOTS_BEGIN
-#undef ITER_SLOTS_END
+  void update_slot_index(const Key &key, uint32_t old_index, uint32_t new_index)
+  {
+    uint32_t hash = m_hash(key);
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.has_index(old_index)) {
+        slot.update_index(new_index);
+        return;
+      }
+    }
+    VECTOR_SET_SLOT_PROBING_END();
+  }
+
+  template<typename ForwardKey>
+  uint32_t count_collisions__impl(const ForwardKey &key, uint32_t hash) const
+  {
+    uint32_t collisions = 0;
+
+    VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) {
+      if (slot.contains(key, m_is_equal, hash, m_keys)) {
+        return collisions;
+      }
+      if (slot.is_empty()) {
+        return collisions;
+      }
+      collisions++;
+    }
+    VECTOR_SET_SLOT_PROBING_END();
+  }
+
+  void ensure_can_add()
+  {
+    if (m_occupied_and_removed_slots >= m_usable_slots) {
+      this->realloc_and_reinsert(this->size() + 1);
+      BLI_assert(m_occupied_and_removed_slots < m_usable_slots);
+    }
+  }
+
+  Key *allocate_keys_array(uint32_t size)
+  {
+    return (Key *)m_slots.allocator().allocate((uint32_t)sizeof(Key) * size, alignof(Key), AT);
+  }
+
+  void deallocate_keys_array(Key *keys)
+  {
+    m_slots.allocator().deallocate(keys);
+  }
+};
 
 }  // namespace BLI
 
diff --git a/source/blender/blenlib/BLI_vector_set_slots.hh b/source/blender/blenlib/BLI_vector_set_slots.hh
new file mode 100644
index 00000000000..13d6acb05c5
--- /dev/null
+++ b/source/blender/blenlib/BLI_vector_set_slots.hh
@@ -0,0 +1,171 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __BLI_VECTOR_SET_SLOTS_HH__
+#define __BLI_VECTOR_SET_SLOTS_HH__
+
+/** \file
+ * \ingroup bli
+ *
+ * This file contains slot types that are supposed to be used with BLI::VectorSet.
+ *
+ * Every slot type has to be able to hold an integer index and state information.
+ * A vector set slot has three possible states: empty, occupied and removed.
+ *
+ * A vector slot type has to implement a couple of methods that are explained in
+ * SimpleVectorSetSlot.
+ * A vector slot type is assumed to be trivially destructible, when it is in empty or removed
+ * state.
+ *
+ * Possible Improvements:
+ * - Implement a slot type that stores the hash.
+ * - Implement a slot type that stores the key. That means that the key would be stored in two
+ *   places: the key vector and the slot itself. Maybe storing the key in the slot as well, can
+ *   result in better performance, due to better cache utilization.
+ */
+
+#include "BLI_sys_types.h"
+
+namespace BLI {
+
+/**
+ * The simplest possible vector set slot. It stores the index and state in a signed integer. If the
+ * value is negative, it represents empty or occupied state. Otherwise it represents the index.
+ */
+template<typename Key> class SimpleVectorSetSlot {
+ private:
+#define s_is_empty -1
+#define s_is_removed -2
+
+  /**
+   * After the default constructor has run, the slot has to be in the empty state.
+   */
+  int32_t m_state = s_is_empty;
+
+ public:
+  /**
+   * Return true if this slot contains an index to a key.
+   */
+  bool is_occupied() const
+  {
+    return m_state >= 0;
+  }
+
+  /**
+   * Return true if the slot is empty, i.e. it does not contain an index.
+   */
+  bool is_empty() const
+  {
+    return m_state == s_is_empty;
+  }
+
+  /**
+   * Return the stored index. It is assumed that the slot is occupied.
+   */
+  uint32_t index() const
+  {
+    BLI_assert(this->is_occupied());
+    return (uint32_t)m_state;
+  }
+
+  /**
+   * Return true if the slot contains the given key, i.e. its index points to a key that compares
+   * equal to it. The hash can be used by other implementations to determine inequality faster.
+   */
+  template<typename ForwardKey, typename IsEqual>
+  bool contains(const ForwardKey &key,
+                const IsEqual &is_equal,
+                uint32_t UNUSED(hash),
+                const Key *keys) const
+  {
+    if (m_state >= 0) {
+      return is_equal(key, keys[m_state]);
+    }
+    return false;
+  }
+
+  /**
+   * Move the other slot into this slot and destruct it. We do destruction here, because this way
+   * we can avoid a comparison with the state, since we know the slot is occupied. For this
+   * specific slot implementation, this does not make a difference.
+   */
+  void relocate_occupied_here(SimpleVectorSetSlot &other, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    BLI_assert(other.is_occupied());
+    m_state = other.m_state;
+  }
+
+  /**
+   * Change the state of this slot from empty/removed to occupied. The hash can be used by other
+   * slot implementations.
+   */
+  void occupy(uint32_t index, uint32_t UNUSED(hash))
+  {
+    BLI_assert(!this->is_occupied());
+    m_state = (int32_t)index;
+  }
+
+  /**
+   * The key has changed its position in the vector, so the index has to be updated. This method
+   * can assume that the slot is currently occupied.
+   */
+  void update_index(uint32_t index)
+  {
+    BLI_assert(this->is_occupied());
+    m_state = (int32_t)index;
+  }
+
+  /**
+   * Change the state of this slot from occupied to removed.
+   */
+  void remove()
+  {
+    BLI_assert(this->is_occupied());
+    m_state = s_is_removed;
+  }
+
+  /**
+   * Return true if this slot is currently occupied and its corresponding key has the given index.
+   */
+  bool has_index(uint32_t index) const
+  {
+    return (uint32_t)m_state == index;
+  }
+
+  /**
+   * Return the hash of the currently stored key. In this simple set slot implementation, we just
+   * compute the hash here. Other implementations might store the hash in the slot instead.
+   */
+  template<typename Hash> uint32_t get_hash(const Key &key, const Hash &hash) const
+  {
+    BLI_assert(this->is_occupied());
+    return hash(key);
+  }
+
+#undef s_is_empty
+#undef s_is_removed
+};
+
+template<typename Key> struct DefaultVectorSetSlot;
+
+template<typename Key> struct DefaultVectorSetSlot {
+  using type = SimpleVectorSetSlot<Key>;
+};
+
+}  // namespace BLI
+
+#endif /* __BLI_VECTOR_SET_SLOTS_HH__ */
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index 73b35a17ec2..79a65bbb98d 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -187,6 +187,7 @@ set(SRC
   BLI_hash_md5.h
   BLI_hash_mm2a.h
   BLI_hash_mm3.h
+  BLI_hash_tables.hh
   BLI_heap.h
   BLI_heap_simple.h
   BLI_index_mask.hh
@@ -204,6 +205,7 @@ set(SRC
   BLI_listbase.h
   BLI_listbase_wrapper.hh
   BLI_map.hh
+  BLI_map_slots.hh
   BLI_math.h
   BLI_math_base.h
   BLI_math_bits.h
@@ -224,16 +226,17 @@ set(SRC
   BLI_memory_utils.hh
   BLI_mempool.h
   BLI_noise.h
-  BLI_open_addressing.hh
   BLI_optional.hh
   BLI_path_util.h
   BLI_polyfill_2d.h
   BLI_polyfill_2d_beautify.h
+  BLI_probing_strategies.hh
   BLI_quadric.h
   BLI_rand.h
   BLI_rect.h
   BLI_scanfill.h
   BLI_set.hh
+  BLI_set_slots.hh
   BLI_smallhash.h
   BLI_sort.h
   BLI_sort_utils.h
@@ -242,7 +245,6 @@ set(SRC
   BLI_strict_flags.h
   BLI_string.h
   BLI_string_cursor_utf8.h
-  BLI_string_map.hh
   BLI_string_ref.hh
   BLI_string_utf8.h
   BLI_string_utils.h
@@ -261,6 +263,7 @@ set(SRC
   BLI_uvproject.h
   BLI_vector.hh
   BLI_vector_set.hh
+  BLI_vector_set_slots.hh
   BLI_vfontdata.h
   BLI_voronoi_2d.h
   BLI_voxel.h
diff --git a/source/blender/blenlib/intern/BLI_index_range.cc b/source/blender/blenlib/intern/BLI_index_range.cc
index fefb6e6598e..0fa87cf854d 100644
--- a/source/blender/blenlib/intern/BLI_index_range.cc
+++ b/source/blender/blenlib/intern/BLI_index_range.cc
@@ -50,7 +50,7 @@ ArrayRef<uint> IndexRange::as_array_ref() const
   }
   arrays.append(std::move(new_array));
 
-  current_array = arrays.last().begin();
+  current_array = arrays.last().data();
   std::atomic_thread_fence(std::memory_order_seq_cst);
   current_array_size = new_size;
 
diff --git a/source/blender/blenlib/intern/math_bits_inline.c b/source/blender/blenlib/intern/math_bits_inline.c
index a6883c2aaba..8f8f257f1e7 100644
--- a/source/blender/blenlib/intern/math_bits_inline.c
+++ b/source/blender/blenlib/intern/math_bits_inline.c
@@ -77,8 +77,7 @@ MINLINE int bitscan_reverse_i(int a)
 MINLINE unsigned int bitscan_reverse_clear_uint(unsigned int *a)
 {
   unsigned int i = bitscan_reverse_uint(*a);
-  /* TODO(sergey): This could probably be optimized. */
-  *a &= ~(1 << (sizeof(unsigned int) * 8 - i - 1));
+  *a &= ~(0x80000000 >> i);
   return i;
 }
 
@@ -97,10 +96,10 @@ MINLINE unsigned int highest_order_bit_uint(unsigned int n)
 
 MINLINE unsigned short highest_order_bit_s(unsigned short n)
 {
-  n |= (n >> 1);
-  n |= (n >> 2);
-  n |= (n >> 4);
-  n |= (n >> 8);
+  n |= (unsigned short)(n >> 1);
+  n |= (unsigned short)(n >> 2);
+  n |= (unsigned short)(n >> 4);
+  n |= (unsigned short)(n >> 8);
   return (unsigned short)(n - (n >> 1));
 }
 
diff --git a/source/blender/depsgraph/intern/node/deg_node_id.cc b/source/blender/depsgraph/intern/node/deg_node_id.cc
index 4a7e5c4568b..984873fbcac 100644
--- a/source/blender/depsgraph/intern/node/deg_node_id.cc
+++ b/source/blender/depsgraph/intern/node/deg_node_id.cc
@@ -66,6 +66,13 @@ bool IDNode::ComponentIDKey::operator==(const ComponentIDKey &other) const
   return type == other.type && STREQ(name, other.name);
 }
 
+uint32_t IDNode::ComponentIDKey::hash() const
+{
+  const int type_as_int = static_cast<int>(type);
+  return BLI_ghashutil_combine_hash(BLI_ghashutil_uinthash(type_as_int),
+                                    BLI_ghashutil_strhash_p(name));
+}
+
 /* Initialize 'id' node - from pointer data given. */
 void IDNode::init(const ID *id, const char *UNUSED(subdata))
 {
diff --git a/source/blender/depsgraph/intern/node/deg_node_id.h b/source/blender/depsgraph/intern/node/deg_node_id.h
index c7663b50c6f..1e315195c1a 100644
--- a/source/blender/depsgraph/intern/node/deg_node_id.h
+++ b/source/blender/depsgraph/intern/node/deg_node_id.h
@@ -50,6 +50,7 @@ const char *linkedStateAsString(eDepsNode_LinkedState_Type linked_state);
 struct IDNode : public Node {
   struct ComponentIDKey {
     ComponentIDKey(NodeType type, const char *name = "");
+    uint32_t hash() const;
     bool operator==(const ComponentIDKey &other) const;
 
     NodeType type;
@@ -115,16 +116,3 @@ struct IDNode : public Node {
 };
 
 }  // namespace DEG
-
-namespace BLI {
-
-template<> struct DefaultHash<DEG::IDNode::ComponentIDKey> {
-  uint32_t operator()(const DEG::IDNode::ComponentIDKey &key) const
-  {
-    const int type_as_int = static_cast<int>(key.type);
-    return BLI_ghashutil_combine_hash(BLI_ghashutil_uinthash(type_as_int),
-                                      BLI_ghashutil_strhash_p(key.name));
-  }
-};
-
-}  // namespace BLI
diff --git a/source/blender/functions/FN_cpp_type.hh b/source/blender/functions/FN_cpp_type.hh
index 10e95c0341e..df6da7a3f47 100644
--- a/source/blender/functions/FN_cpp_type.hh
+++ b/source/blender/functions/FN_cpp_type.hh
@@ -534,22 +534,20 @@ namespace CPPTypeUtil {
 
 template<typename T> void construct_default_cb(void *ptr)
 {
-  BLI::construct_default((T *)ptr);
+  new (ptr) T;
 }
 template<typename T> void construct_default_n_cb(void *ptr, uint n)
 {
-  for (uint i = 0; i < n; i++) {
-    BLI::construct_default((T *)ptr + i);
-  }
+  BLI::default_construct_n((T *)ptr, n);
 }
 template<typename T> void construct_default_indices_cb(void *ptr, IndexMask index_mask)
 {
-  index_mask.foreach_index([&](uint i) { BLI::construct_default((T *)ptr + i); });
+  index_mask.foreach_index([&](uint i) { new ((T *)ptr + i) T; });
 }
 
 template<typename T> void destruct_cb(void *ptr)
 {
-  BLI::destruct((T *)ptr);
+  ((T *)ptr)->~T();
 }
 template<typename T> void destruct_n_cb(void *ptr, uint n)
 {
@@ -557,7 +555,8 @@ template<typename T> void destruct_n_cb(void *ptr, uint n)
 }
 template<typename T> void destruct_indices_cb(void *ptr, IndexMask index_mask)
 {
-  index_mask.foreach_index([&](uint i) { BLI::destruct((T *)ptr + i); });
+  T *ptr_ = (T *)ptr;
+  index_mask.foreach_index([&](uint i) { ptr_[i].~T(); });
 }
 
 template<typename T> void copy_to_initialized_cb(const void *src, void *dst)
@@ -601,11 +600,15 @@ void copy_to_uninitialized_indices_cb(const void *src, void *dst, IndexMask inde
 
 template<typename T> void relocate_to_initialized_cb(void *src, void *dst)
 {
-  BLI::relocate((T *)src, (T *)dst);
+  T *src_ = (T *)src;
+  T *dst_ = (T *)dst;
+
+  *dst_ = std::move(*src_);
+  src_->~T();
 }
 template<typename T> void relocate_to_initialized_n_cb(void *src, void *dst, uint n)
 {
-  BLI::relocate_n((T *)src, n, (T *)dst);
+  BLI::initialized_relocate_n((T *)src, n, (T *)dst);
 }
 template<typename T>
 void relocate_to_initialized_indices_cb(void *src, void *dst, IndexMask index_mask)
@@ -621,7 +624,11 @@ void relocate_to_initialized_indices_cb(void *src, void *dst, IndexMask index_ma
 
 template<typename T> void relocate_to_uninitialized_cb(void *src, void *dst)
 {
-  BLI::uninitialized_relocate((T *)src, (T *)dst);
+  T *src_ = (T *)src;
+  T *dst_ = (T *)dst;
+
+  new (dst_) T(std::move(*src_));
+  src_->~T();
 }
 template<typename T> void relocate_to_uninitialized_n_cb(void *src, void *dst, uint n)
 {
-- 
cgit v1.2.3