From d8678e02ecec9375bec1dcf1388c6fc8b4ce3ad2 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Tue, 9 Jun 2020 10:10:56 +0200 Subject: BLI: generally improve C++ data structures The main focus here was to improve the docs significantly. Furthermore, I reimplemented `Set`, `Map` and `VectorSet`. They are now (usually) faster, simpler and more customizable. I also rewrote `Stack` to make it more efficient by avoiding unnecessary copies. Thanks to everyone who helped with constructive feedback. Approved by brecht and sybren. Differential Revision: https://developer.blender.org/D7931 --- source/blender/blenlib/BLI_allocator.hh | 45 +- source/blender/blenlib/BLI_array.hh | 158 ++- source/blender/blenlib/BLI_array_ref.hh | 239 ++-- source/blender/blenlib/BLI_dot_export.hh | 3 +- source/blender/blenlib/BLI_hash.hh | 107 +- source/blender/blenlib/BLI_hash_tables.hh | 350 ++++++ source/blender/blenlib/BLI_index_range.hh | 38 +- source/blender/blenlib/BLI_linear_allocator.hh | 6 +- source/blender/blenlib/BLI_listbase_wrapper.hh | 6 +- source/blender/blenlib/BLI_map.hh | 1289 +++++++++++++------- source/blender/blenlib/BLI_map_slots.hh | 361 ++++++ source/blender/blenlib/BLI_math_bits.h | 1 + source/blender/blenlib/BLI_memory_utils.hh | 189 ++- source/blender/blenlib/BLI_open_addressing.hh | 316 ----- source/blender/blenlib/BLI_optional.hh | 10 - source/blender/blenlib/BLI_probing_strategies.hh | 250 ++++ source/blender/blenlib/BLI_set.hh | 872 ++++++++----- source/blender/blenlib/BLI_set_slots.hh | 415 +++++++ source/blender/blenlib/BLI_stack.hh | 345 +++++- source/blender/blenlib/BLI_string_map.hh | 540 -------- source/blender/blenlib/BLI_string_ref.hh | 129 +- source/blender/blenlib/BLI_utility_mixins.hh | 6 + source/blender/blenlib/BLI_vector.hh | 413 +++++-- source/blender/blenlib/BLI_vector_set.hh | 814 +++++++----- source/blender/blenlib/BLI_vector_set_slots.hh | 171 +++ source/blender/blenlib/CMakeLists.txt | 7 +- source/blender/blenlib/intern/BLI_index_range.cc | 2 +- source/blender/blenlib/intern/math_bits_inline.c | 11 +- .../blender/depsgraph/intern/node/deg_node_id.cc | 7 + source/blender/depsgraph/intern/node/deg_node_id.h | 14 +- source/blender/functions/FN_cpp_type.hh | 27 +- 31 files changed, 4852 insertions(+), 2289 deletions(-) create mode 100644 source/blender/blenlib/BLI_hash_tables.hh create mode 100644 source/blender/blenlib/BLI_map_slots.hh delete mode 100644 source/blender/blenlib/BLI_open_addressing.hh create mode 100644 source/blender/blenlib/BLI_probing_strategies.hh create mode 100644 source/blender/blenlib/BLI_set_slots.hh delete mode 100644 source/blender/blenlib/BLI_string_map.hh create mode 100644 source/blender/blenlib/BLI_vector_set_slots.hh (limited to 'source') diff --git a/source/blender/blenlib/BLI_allocator.hh b/source/blender/blenlib/BLI_allocator.hh index c52db4aab53..cf81b024277 100644 --- a/source/blender/blenlib/BLI_allocator.hh +++ b/source/blender/blenlib/BLI_allocator.hh @@ -19,14 +19,23 @@ /** \file * \ingroup bli * - * This file offers a couple of memory allocators that can be used with containers such as Vector - * and Map. Note that these allocators are not designed to work with standard containers like + * An `Allocator` can allocate and deallocate memory. It is used by containers such as BLI::Vector. + * The allocators defined in this file do not work with standard library containers such as * std::vector. * - * Also see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2271.html for why the standard - * allocators are not a good fit applications like Blender. The current implementations in this - * file are fairly simple still, more complexity can be added when necessary. For now they do their - * job good enough. + * Every allocator has to implement two methods: + * void *allocate(size_t size, size_t alignment, const char *name); + * void deallocate(void *ptr); + * + * We don't use the std::allocator interface, because it does more than is really necessary for an + * allocator and has some other quirks. It mixes the concepts of allocation and construction. It is + * essentially forced to be a template, even though the allocator should not care about the type. + * Also see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2271.html#std_allocator. Some + * of these aspects have been improved in new versions of C++, so we might have to reevaluate the + * strategy later on. + * + * The allocator interface dictated by this file is very simplistic, but for now that is all we + * need. More complexity can be added when it seems necessary. */ #include @@ -40,18 +49,14 @@ namespace BLI { /** - * Use Blenders guarded allocator (aka MEM_malloc). This should always be used except there is a + * Use Blender's guarded allocator (aka MEM_*). This should always be used except there is a * good reason not to use it. */ class GuardedAllocator { public: - void *allocate(uint size, const char *name) - { - return MEM_mallocN(size, name); - } - - void *allocate_aligned(uint size, uint alignment, const char *name) + void *allocate(size_t size, size_t alignment, const char *name) { + /* Should we use MEM_mallocN, when alignment is small? If yes, how small must alignment be? */ return MEM_mallocN_aligned(size, alignment, name); } @@ -62,8 +67,9 @@ class GuardedAllocator { }; /** - * This is a simple wrapper around malloc/free. Only use this when the GuardedAllocator cannot be - * used. This can be the case when the allocated element might live longer than Blenders Allocator. + * This is a wrapper around malloc/free. Only use this when the GuardedAllocator cannot be + * used. This can be the case when the allocated memory might live longer than Blender's + * allocator. For example, when the memory is owned by a static variable. */ class RawAllocator { private: @@ -72,14 +78,7 @@ class RawAllocator { }; public: - void *allocate(uint size, const char *UNUSED(name)) - { - void *ptr = malloc(size + sizeof(MemHead)); - ((MemHead *)ptr)->offset = sizeof(MemHead); - return POINTER_OFFSET(ptr, sizeof(MemHead)); - } - - void *allocate_aligned(uint size, uint alignment, const char *UNUSED(name)) + void *allocate(size_t size, size_t alignment, const char *UNUSED(name)) { BLI_assert(is_power_of_2_i((int)alignment)); void *ptr = malloc(size + alignment + sizeof(MemHead)); diff --git a/source/blender/blenlib/BLI_array.hh b/source/blender/blenlib/BLI_array.hh index 9dd8341aa76..09eeb321abf 100644 --- a/source/blender/blenlib/BLI_array.hh +++ b/source/blender/blenlib/BLI_array.hh @@ -19,8 +19,23 @@ /** \file * \ingroup bli * - * This is a container that contains a fixed size array. Note however, the size of the array is not - * a template argument. Instead it can be specified at the construction time. + * A `BLI::Array` is a container for a fixed size array the size of which is NOT known at + * compile time. + * + * If the size is known at compile time, `std::array` should be used instead. + * + * BLI::Array should usually be used instead of BLI::Vector whenever the number of elements is + * known at construction time. Note however, that BLI::Array will default construct all elements + * when initialized with the size-constructor. For trivial types, this does nothing. In all other + * cases, this adds overhead. If this becomes a problem, a different constructor which does not do + * default construction can be added. + * + * A main benefit of using Array over Vector is that it expresses the intent of the developer + * better. It indicates that the size of the data structure is not expected to change. Furthermore, + * you can be more certain that an array does not overallocate. + * + * BLI::Array supports small object optimization to improve performance when the size turns out to + * be small at run-time. */ #include "BLI_allocator.hh" @@ -31,42 +46,83 @@ namespace BLI { -template +template< + /** + * The type of the values stored in the array. + */ + typename T, + /** + * The number of values that can be stored in the array, without doing a heap allocation. + * + * When T is large, the small buffer optimization is disabled by default to avoid large + * unexpected allocations on the stack. It can still be enabled explicitely though. + */ + uint InlineBufferCapacity = (sizeof(T) < 100) ? 4 : 0, + /** + * The allocator used by this array. Should rarely be changed, except when you don't want that + * MEM_* functions are used internally. + */ + typename Allocator = GuardedAllocator> class Array { private: + /** The beginning of the array. It might point into the inline buffer. */ T *m_data; + + /** Number of elements in the array. */ uint m_size; + + /** Used for allocations when the inline buffer is too small. */ Allocator m_allocator; - AlignedBuffer m_inline_storage; + + /** A placeholder buffer that will remain uninitialized until it is used. */ + AlignedBuffer m_inline_buffer; public: + /** + * By default an empty array is created. + */ Array() { - m_data = this->inline_storage(); + m_data = this->inline_buffer(); m_size = 0; } + /** + * Create a new array that contains copies of all values. + */ Array(ArrayRef values) { m_size = values.size(); m_data = this->get_buffer_for_size(values.size()); - uninitialized_copy_n(values.begin(), m_size, m_data); + uninitialized_copy_n(values.data(), m_size, m_data); } + /** + * Create a new array that contains copies of all values. + */ Array(const std::initializer_list &values) : Array(ArrayRef(values)) { } + /** + * Create a new array with the given size. All values will be default constructed. For trivial + * types like int, default construction does nothing. + * + * We might want another version of this in the future, that does not do default construction + * even for non-trivial types. This should not be the default though, because one can easily mess + * up when dealing with uninitialized memory. + */ explicit Array(uint size) { m_size = size; m_data = this->get_buffer_for_size(size); - - for (uint i = 0; i < m_size; i++) { - new (m_data + i) T(); - } + default_construct_n(m_data, size); } + /** + * Create a new array with the given size. All values will be initialized by copying the given + * default. + */ Array(uint size, const T &value) { m_size = size; @@ -74,21 +130,19 @@ class Array { uninitialized_fill_n(m_data, m_size, value); } - Array(const Array &other) + Array(const Array &other) : m_allocator(other.m_allocator) { m_size = other.size(); - m_allocator = other.m_allocator; m_data = this->get_buffer_for_size(other.size()); - uninitialized_copy_n(other.begin(), m_size, m_data); + uninitialized_copy_n(other.data(), m_size, m_data); } - Array(Array &&other) noexcept + Array(Array &&other) noexcept : m_allocator(other.m_allocator) { m_size = other.m_size; - m_allocator = other.m_allocator; - if (!other.uses_inline_storage()) { + if (!other.uses_inline_buffer()) { m_data = other.m_data; } else { @@ -96,14 +150,14 @@ class Array { uninitialized_relocate_n(other.m_data, m_size, m_data); } - other.m_data = other.inline_storage(); + other.m_data = other.inline_buffer(); other.m_size = 0; } ~Array() { destruct_n(m_data, m_size); - if (!this->uses_inline_storage()) { + if (!this->uses_inline_buffer()) { m_allocator.deallocate((void *)m_data); } } @@ -162,21 +216,50 @@ class Array { return m_data[index]; } + /** + * Returns the number of elements in the array. + */ uint size() const { return m_size; } + /** + * Returns true when the number of elements in the array is zero. + */ + bool is_empty() const + { + return m_size == 0; + } + + /** + * Copies the value to all indices in the array. + */ void fill(const T &value) { - MutableArrayRef(*this).fill(value); + initialized_fill_n(m_data, m_size, value); } + /** + * Copies the value to the given indices in the array. + */ void fill_indices(ArrayRef indices, const T &value) { MutableArrayRef(*this).fill_indices(indices, value); } + /** + * Get a pointer to the beginning of the array. + */ + const T *data() const + { + return m_data; + } + T *data() + { + return m_data; + } + const T *begin() const { return m_data; @@ -197,41 +280,64 @@ class Array { return m_data + m_size; } + /** + * Get an index range containing all valid indices for this array. + */ IndexRange index_range() const { return IndexRange(m_size); } + /** + * Sets the size to zero. This should only be used when you have manually destructed all elements + * in the array beforehand. Use with care. + */ + void clear_without_destruct() + { + m_size = 0; + } + + /** + * Access the allocator used by this array. + */ Allocator &allocator() { return m_allocator; } + /** + * Get the value of the InlineBufferCapacity template argument. This is the number of elements + * that can be stored without doing an allocation. + */ + static uint inline_buffer_capacity() + { + return InlineBufferCapacity; + } + private: T *get_buffer_for_size(uint size) { if (size <= InlineBufferCapacity) { - return this->inline_storage(); + return this->inline_buffer(); } else { return this->allocate(size); } } - T *inline_storage() const + T *inline_buffer() const { - return (T *)m_inline_storage.ptr(); + return (T *)m_inline_buffer.ptr(); } T *allocate(uint size) { - return (T *)m_allocator.allocate_aligned( - size * sizeof(T), std::alignment_of::value, __func__); + return (T *)m_allocator.allocate(size * sizeof(T), alignof(T), AT); } - bool uses_inline_storage() const + bool uses_inline_buffer() const { - return m_data == this->inline_storage(); + return m_data == this->inline_buffer(); } }; diff --git a/source/blender/blenlib/BLI_array_ref.hh b/source/blender/blenlib/BLI_array_ref.hh index c0484493bda..34745c0af83 100644 --- a/source/blender/blenlib/BLI_array_ref.hh +++ b/source/blender/blenlib/BLI_array_ref.hh @@ -20,19 +20,38 @@ /** \file * \ingroup bli * - * These classes offer a convenient way to work with continuous chunks of memory of a certain type. - * We differentiate #ArrayRef and #MutableArrayRef. The elements in the former are const while the - * elements in the other are not. + * A `BLI::ArrayRef` references an array that is owned by someone else. It is just a pointer and + * a size. Since the memory is not owned, ArrayRef should not be used to transfer ownership. The + * array cannot be modified through the ArrayRef. However, if T is a non-const pointer, the + * pointed-to elements can be modified. * - * Passing array references as parameters has multiple benefits: - * - Less templates are used because the function does not have to work with different - * container types. - * - It encourages an Struct-of-Arrays data layout which is often beneficial when - * writing high performance code. Also it makes it easier to reuse code. - * - Array references offer convenient ways of slicing and other operations. + * There is also `BLI::MutableArrayRef`. It is mostly the same as ArrayRef, but allows the array + * to be modified. * - * The instances of #ArrayRef and #MutableArrayRef are very small and should be passed by value. - * Since array references do not own any memory, it is generally not save to store them. + * An (Mutable)ArrayRef can refer to data owned by many different data structures including + * BLI::Vector, BLI::Array, BLI::VectorSet, std::vector, std::array, std::string, + * std::initializer_list and c-style array. + * + * `BLI::ArrayRef` should be your default choice when you have to pass a read-only array into a + * function. It is better than passing a `const Vector &`, because then the function only works for + * vectors and not for e.g. arrays. Using ArrayRef as function parameter makes it usable in more + * contexts, better expresses the intent and does not sacrifice performance. It is also better than + * passing a raw pointer and size separately, because it is more convenient and safe. + * + * `BLI::MutableArrayRef` can be used when a function is supposed to return an array, the size + * of which is known before the function is called. One advantage of this approach is that the + * caller is responsible for allocation and deallocation. Furthermore, the function can focus on + * its task, without having to worry about memory allocation. Alternatively, a function could + * return an Array or Vector. + * + * Note: When a function has a MutableArrayRef output parameter and T is not a trivial type, + * then the function has to specify whether the referenced array is expected to be initialized or + * not. + * + * Since the arrays are only referenced, it is generally unsafe to store an ArrayRef. When you + * store one, you should know who owns the memory. + * + * Instances of ArrayRef and MutableArrayRef are small and should be passed by value. */ #include @@ -48,7 +67,8 @@ namespace BLI { /** - * References an array of data. The elements in the array should not be changed. + * References an array of type T that is owned by someone else. The data in the array cannot be + * modified. */ template class ArrayRef { private: @@ -58,7 +78,6 @@ template class ArrayRef { public: /** * Create a reference to an empty array. - * The pointer is allowed to be nullptr. */ ArrayRef() = default; @@ -66,11 +85,22 @@ template class ArrayRef { { } - ArrayRef(const std::initializer_list &list) : ArrayRef(list.begin(), list.size()) + /** + * Reference an initializer_list. Note that the data in the initializer_list is only valid until + * the expression containing it is fully computed. + * + * Do: + * call_function_with_array({1, 2, 3, 4}); + * + * Don't: + * ArrayRef ref = {1, 2, 3, 4}; + * call_function_with_array(ref); + */ + ArrayRef(const std::initializer_list &list) : ArrayRef(list.begin(), (uint)list.size()) { } - ArrayRef(const std::vector &vector) : ArrayRef(vector.data(), vector.size()) + ArrayRef(const std::vector &vector) : ArrayRef(vector.data(), (uint)vector.size()) { } @@ -79,18 +109,19 @@ template class ArrayRef { } /** - * ArrayRef -> ArrayRef - * ArrayRef -> ArrayRef + * Support implicit conversions like the ones below: + * ArrayRef -> ArrayRef + * ArrayRef -> ArrayRef */ template::value>::type * = nullptr> - ArrayRef(ArrayRef array) : ArrayRef((T *)array.begin(), array.size()) + ArrayRef(ArrayRef array) : ArrayRef((T *)array.data(), array.size()) { } /** - * Return a continuous part of the array. - * Asserts that the slice stays within the array. + * Returns a contiguous part of the array. This invokes undefined behavior when the slice does + * not stay within the bounds of the array. */ ArrayRef slice(uint start, uint size) const { @@ -104,28 +135,28 @@ template class ArrayRef { } /** - * Return a new ArrayRef with n elements removed from the beginning. - * Asserts that the array contains enough elements. + * Returns a new ArrayRef with n elements removed from the beginning. This invokes undefined + * behavior when the array is too small. */ - ArrayRef drop_front(uint n = 1) const + ArrayRef drop_front(uint n) const { BLI_assert(n <= this->size()); return this->slice(n, this->size() - n); } /** - * Return a new ArrayRef with n elements removed from the beginning. - * Asserts that the array contains enough elements. + * Returns a new ArrayRef with n elements removed from the beginning. This invokes undefined + * behavior when the array is too small. */ - ArrayRef drop_back(uint n = 1) const + ArrayRef drop_back(uint n) const { BLI_assert(n <= this->size()); return this->slice(0, this->size() - n); } /** - * Return a new ArrayRef that only contains the first n elements. - * Asserts that the array contains enough elements. + * Returns a new ArrayRef that only contains the first n elements. This invokes undefined + * behavior when the array is too small. */ ArrayRef take_front(uint n) const { @@ -134,8 +165,8 @@ template class ArrayRef { } /** - * Return a new ArrayRef that only contains the last n elements. - * Asserts that the array contains enough elements. + * Returns a new ArrayRef that only contains the last n elements. This invokes undefined + * behavior when the array is too small. */ ArrayRef take_back(uint n) const { @@ -144,11 +175,12 @@ template class ArrayRef { } /** - * Copy the values in this array to another array. + * Returns the pointer to the beginning of the referenced array. This may be nullptr when the + * size is zero. */ - void copy_to(T *ptr) const + const T *data() const { - BLI::copy_n(m_start, m_size, ptr); + return m_start; } const T *begin() const @@ -162,8 +194,8 @@ template class ArrayRef { } /** - * Access an element in the array. - * Asserts that the index is in the bounds of the array. + * Access an element in the array. This invokes undefined behavior when the index is out of + * bounds. */ const T &operator[](uint index) const { @@ -172,7 +204,7 @@ template class ArrayRef { } /** - * Return the number of elements in the referenced array. + * Returns the number of elements in the referenced array. */ uint size() const { @@ -180,16 +212,24 @@ template class ArrayRef { } /** - * Return the number of bytes referenced by this ArrayRef. + * Returns true if the size is zero. */ - uint byte_size() const + bool is_empty() const + { + return m_size == 0; + } + + /** + * Returns the number of bytes referenced by this ArrayRef. + */ + uint size_in_bytes() const { return sizeof(T) * m_size; } /** * Does a linear search to see of the value is in the array. - * Return true if it is, otherwise false. + * Returns true if it is, otherwise false. */ bool contains(const T &value) const { @@ -202,7 +242,7 @@ template class ArrayRef { } /** - * Does a constant time check to see if the pointer is within the referenced array. + * Does a constant time check to see if the pointer points to a value in the referenced array. * Return true if it is, otherwise false. */ bool contains_ptr(const T *ptr) const @@ -226,8 +266,8 @@ template class ArrayRef { } /** - * Return a reference to the first element in the array. - * Asserts that the array is not empty. + * Return a reference to the first element in the array. This invokes undefined behavior when the + * array is empty. */ const T &first() const { @@ -236,8 +276,8 @@ template class ArrayRef { } /** - * Return a reference to the last element in the array. - * Asserts that the array is not empty. + * Returns a reference to the last element in the array. This invokes undefined behavior when the + * array is empty. */ const T &last() const { @@ -246,7 +286,8 @@ template class ArrayRef { } /** - * Get element at the given index. If the index is out of range, return the fallback value. + * Returns the element at the given index. If the index is out of range, return the fallback + * value. */ T get(uint index, const T &fallback) const { @@ -277,6 +318,11 @@ template class ArrayRef { return false; } + /** + * Returns true when this and the other array have an element in common. This should only be + * called on small arrays, because it has a running time of O(n*m) where n and m are the sizes of + * the arrays. + */ bool intersects__linear_search(ArrayRef other) const { /* The size should really be smaller than that. If it is not, the calling code should be @@ -292,6 +338,10 @@ template class ArrayRef { return false; } + /** + * Returns the index of the first occurrence of the given value. This invokes undefined behavior + * when the value is not in the array. + */ uint first_index(const T &search_value) const { int index = this->first_index_try(search_value); @@ -299,6 +349,9 @@ template class ArrayRef { return (uint)index; } + /** + * Returns the index of the first occurrence of the given value or -1 if it does not exist. + */ int first_index_try(const T &search_value) const { for (uint i = 0; i < m_size; i++) { @@ -309,16 +362,6 @@ template class ArrayRef { return -1; } - template bool any(const PredicateT predicate) - { - for (uint i = 0; i < m_size; i++) { - if (predicate(m_start[i])) { - return true; - } - } - return false; - } - /** * Utility to make it more convenient to iterate over all indices that can be used with this * array. @@ -329,7 +372,7 @@ template class ArrayRef { } /** - * Get a new array ref to the same underlying memory buffer. No conversions are done. + * Returns a new ArrayRef to the same underlying memory buffer. No conversions are done. */ template ArrayRef cast() const { @@ -339,7 +382,7 @@ template class ArrayRef { } /** - * A debug utility to print the content of the array ref. Every element will be printed on a + * A debug utility to print the content of the ArrayRef. Every element will be printed on a * separate line using the given callback. */ template void print_as_lines(std::string name, PrintLineF print_line) const @@ -352,6 +395,10 @@ template class ArrayRef { } } + /** + * A debug utility to print the content of the array ref. Every element be printed on a separate + * line. + */ void print_as_lines(std::string name) const { this->print_as_lines(name, [](const T &value) { std::cout << value; }); @@ -359,7 +406,8 @@ template class ArrayRef { }; /** - * Mostly the same as ArrayRef, except that one can change the array elements via this reference. + * Mostly the same as ArrayRef, except that one can change the array elements through a + * MutableArrayRef. */ template class MutableArrayRef { private: @@ -373,6 +421,17 @@ template class MutableArrayRef { { } + /** + * Reference an initializer_list. Note that the data in the initializer_list is only valid until + * the expression containing it is fully computed. + * + * Do: + * call_function_with_array({1, 2, 3, 4}); + * + * Don't: + * MutableArrayRef ref = {1, 2, 3, 4}; + * call_function_with_array(ref); + */ MutableArrayRef(std::initializer_list &list) : MutableArrayRef(list.begin(), list.size()) { } @@ -392,7 +451,7 @@ template class MutableArrayRef { } /** - * Get the number of elements in the array. + * Returns the number of elements in the array. */ uint size() const { @@ -402,33 +461,30 @@ template class MutableArrayRef { /** * Replace all elements in the referenced array with the given value. */ - void fill(const T &element) + void fill(const T &value) { - std::fill_n(m_start, m_size, element); + initialized_fill_n(m_start, m_size, value); } /** - * Replace a subset of all elements with the given value. + * Replace a subset of all elements with the given value. This invokes undefined behavior when + * one of the indices is out of bounds. */ - void fill_indices(ArrayRef indices, const T &element) + void fill_indices(ArrayRef indices, const T &value) { for (uint i : indices) { - m_start[i] = element; + BLI_assert(i < m_size); + m_start[i] = value; } } /** - * Copy the values from another array into the references array. + * Returns a pointer to the beginning of the referenced array. This may be nullptr, when the size + * is zero. */ - void copy_from(const T *ptr) - { - BLI::copy_n(ptr, m_size, m_start); - } - - void copy_from(ArrayRef other) + T *data() const { - BLI_assert(this->size() == other.size()); - this->copy_from(other.begin()); + return m_start; } T *begin() const @@ -448,8 +504,8 @@ template class MutableArrayRef { } /** - * Return a continuous part of the array. - * Asserts that the slice stays in the array bounds. + * Returns a contiguous part of the array. This invokes undefined behavior when the slice would + * go out of bounds. */ MutableArrayRef slice(uint start, uint length) const { @@ -458,25 +514,28 @@ template class MutableArrayRef { } /** - * Return a new MutableArrayRef with n elements removed from the beginning. + * Returns a new MutableArrayRef with n elements removed from the beginning. This invokes + * undefined behavior when the array is too small. */ - MutableArrayRef drop_front(uint n = 1) const + MutableArrayRef drop_front(uint n) const { BLI_assert(n <= this->size()); return this->slice(n, this->size() - n); } /** - * Return a new MutableArrayRef with n elements removed from the beginning. + * Returns a new MutableArrayRef with n elements removed from the end. This invokes undefined + * behavior when the array is too small. */ - MutableArrayRef drop_back(uint n = 1) const + MutableArrayRef drop_back(uint n) const { BLI_assert(n <= this->size()); return this->slice(0, this->size() - n); } /** - * Return a new MutableArrayRef that only contains the first n elements. + * Returns a new MutableArrayRef that only contains the first n elements. This invokes undefined + * behavior when the array is too small. */ MutableArrayRef take_front(uint n) const { @@ -485,7 +544,8 @@ template class MutableArrayRef { } /** - * Return a new MutableArrayRef that only contains the last n elements. + * Return a new MutableArrayRef that only contains the last n elements. This invokes undefined + * behavior when the array is too small. */ MutableArrayRef take_back(uint n) const { @@ -493,16 +553,28 @@ template class MutableArrayRef { return this->slice(this->size() - n, n); } + /** + * Returns an (immutable) ArrayRef that references the same array. This is usually not needed, + * due to implicit conversions. However, sometimes automatic type deduction needs some help. + */ ArrayRef as_ref() const { return ArrayRef(m_start, m_size); } + /** + * Utility to make it more convenient to iterate over all indices that can be used with this + * array. + */ IndexRange index_range() const { return IndexRange(m_size); } + /** + * Returns a reference to the last element. This invokes undefined behavior when the array is + * empty. + */ const T &last() const { BLI_assert(m_size > 0); @@ -510,7 +582,7 @@ template class MutableArrayRef { } /** - * Get a new array ref to the same underlying memory buffer. No conversions are done. + * Returns a new array ref to the same underlying memory buffer. No conversions are done. */ template MutableArrayRef cast() const { @@ -528,6 +600,9 @@ template ArrayRef ref_c_array(const T *array, uint size) return ArrayRef(array, size); } +/** + * Utilities to check that arrays have the same size in debug builds. + */ template void assert_same_size(const T1 &v1, const T2 &v2) { UNUSED_VARS_NDEBUG(v1, v2); diff --git a/source/blender/blenlib/BLI_dot_export.hh b/source/blender/blenlib/BLI_dot_export.hh index 08c37fec01e..8da0ed6df14 100644 --- a/source/blender/blenlib/BLI_dot_export.hh +++ b/source/blender/blenlib/BLI_dot_export.hh @@ -27,7 +27,6 @@ #include "BLI_map.hh" #include "BLI_optional.hh" #include "BLI_set.hh" -#include "BLI_string_map.hh" #include "BLI_utility_mixins.hh" #include "BLI_vector.hh" @@ -57,7 +56,7 @@ class AttributeList { void set(StringRef key, StringRef value) { - m_attributes.add_override(key, value); + m_attributes.add_overwrite(key, value); } }; diff --git a/source/blender/blenlib/BLI_hash.hh b/source/blender/blenlib/BLI_hash.hh index 3b3448f66b1..029fa32553a 100644 --- a/source/blender/blenlib/BLI_hash.hh +++ b/source/blender/blenlib/BLI_hash.hh @@ -20,8 +20,58 @@ /** \file * \ingroup bli * - * This file provides default hash functions for some primitive types. The hash functions can be - * used by containers such as Map and Set. + * A specialization of `BLI::DefaultHash` provides a hash function for values of type T. This + * hash function is used by default in hash table implementations in blenlib. + * + * The actual hash function is in the `operator()` method of DefaultHash. The following code + * computes the hash of some value using DefaultHash. + * + * T value = ...; + * DefaultHash hash_function; + * uint32_t hash = hash_function(value); + * + * Hash table implementations like BLI::Set support heterogeneous key lookups. That means that one + * can do a lookup with a key of type A in a hash table that stores keys of type B. This is + * commonly done when B is std::string, because the conversion from e.g. a StringRef to std::string + * can be costly and is unnecessary. To make this work, values of type A and B that compare equal + * have to have the same hash value. This is achieved by defining potentially multiple `operator()` + * in a specialization of DefaultHash. All those methods have to compute the same hash for values + * that compare equal. + * + * The computed hash is an unsigned 32 bit integer. Ideally, the hash function would generate + * uniformly random hash values for a set of keys. However, in many cases trivial hash functions + * are faster and produce a good enough distribution. In general it is better when more information + * is in the lower bits of the hash. By choosing a good probing strategy, the effects of a bad hash + * function are less noticable though. In this context a good probing strategy is one that takes + * all bits of the hash into account eventually. One has to check on a case by case basis to see if + * a better but more expensive or trivial hash function works better. + * + * There are three main ways to provide a hash table implementation with a custom hash function. + * + * - When you want to provide a default hash function for your own custom type: Add a `hash` + * member function to it. The function should return `uint32_t` and take no arguments. This + * method will be called by the default implementation of DefaultHash. It will automatically be + * used by hash table implementations. + * + * - When you want to provide a default hash function for a type that you cannot modify: Add a new + * specialization to the DefaultHash struct. This can be done by writing code like below in + * either global or BLI namespace. + * + * template<> struct BLI::DefaultHash { + * uint32_t operator()(const TheType &value) const { + * return ...; + * } + * }; + * + * - When you want to provide a different hash function for a type that already has a default hash + * function: Implement a struct like the one below and pass it as template parameter to the hash + * table explicitely. + * + * struct MyCustomHash { + * uint32_t operator()(const TheType &value) const { + * return ...; + * } + * }; */ #include @@ -35,7 +85,16 @@ namespace BLI { +/** + * If there is no other specialization of DefaultHash for a given type, try to call `hash()` on the + * value. If there is no such method, this will result in a compiler error. Usually that means that + * you have to implement a hash function using one of three strategies listed above. + */ template struct DefaultHash { + uint32_t operator()(const T &value) const + { + return value.hash(); + } }; #define TRIVIAL_DEFAULT_INT_HASH(TYPE) \ @@ -47,9 +106,9 @@ template struct DefaultHash { } /** - * Cannot make any assumptions about the distribution of keys, so use a trivial hash function by - * default. The hash table implementations are designed to take all bits of the hash into account - * to avoid really bad behavior when the lower bits are all zero. Special hash functions can be + * We cannot make any assumptions about the distribution of keys, so use a trivial hash function by + * default. The default probing strategy is designed to take all bits of the hash into account + * to avoid worst case behavior when the lower bits are all zero. Special hash functions can be * implemented when more knowledge about a specific key distribution is available. */ TRIVIAL_DEFAULT_INT_HASH(int8_t); @@ -58,9 +117,26 @@ TRIVIAL_DEFAULT_INT_HASH(int16_t); TRIVIAL_DEFAULT_INT_HASH(uint16_t); TRIVIAL_DEFAULT_INT_HASH(int32_t); TRIVIAL_DEFAULT_INT_HASH(uint32_t); -TRIVIAL_DEFAULT_INT_HASH(int64_t); -TRIVIAL_DEFAULT_INT_HASH(uint64_t); +template<> struct DefaultHash { + uint32_t operator()(uint64_t value) const + { + uint32_t low = (uint32_t)value; + uint32_t high = (uint32_t)(value >> 32); + return low ^ (high * 0x45d9f3b); + } +}; + +template<> struct DefaultHash { + uint32_t operator()(uint64_t value) const + { + return DefaultHash{}((uint64_t)value); + } +}; + +/** + * One should try to avoid using floats as keys in hash tables, but sometimes it is convenient. + */ template<> struct DefaultHash { uint32_t operator()(float value) const { @@ -78,35 +154,38 @@ inline uint32_t hash_string(StringRef str) } template<> struct DefaultHash { - uint32_t operator()(const std::string &value) const + /** + * Take a StringRef as parameter to support heterogeneous lookups in hash table implementations + * when std::string is used as key. + */ + uint32_t operator()(StringRef value) const { return hash_string(value); } }; template<> struct DefaultHash { - uint32_t operator()(const StringRef &value) const + uint32_t operator()(StringRef value) const { return hash_string(value); } }; template<> struct DefaultHash { - uint32_t operator()(const StringRefNull &value) const + uint32_t operator()(StringRef value) const { return hash_string(value); } }; /** - * While we cannot guarantee that the lower 3 bits or a pointer are zero, it is safe to assume - * this in the general case. MEM_malloc only returns 8 byte aligned addresses on 64-bit systems. + * While we cannot guarantee that the lower 4 bits of a pointer are zero, it is often the case. */ template struct DefaultHash { uint32_t operator()(const T *value) const { - uintptr_t ptr = POINTER_AS_UINT(value); - uint32_t hash = (uint32_t)(ptr >> 3); + uintptr_t ptr = (uintptr_t)value; + uint32_t hash = (uint32_t)(ptr >> 4); return hash; } }; diff --git a/source/blender/blenlib/BLI_hash_tables.hh b/source/blender/blenlib/BLI_hash_tables.hh new file mode 100644 index 00000000000..c171b26f770 --- /dev/null +++ b/source/blender/blenlib/BLI_hash_tables.hh @@ -0,0 +1,350 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __BLI_OPEN_ADDRESSING_HH__ +#define __BLI_OPEN_ADDRESSING_HH__ + +/** \file + * \ingroup bli + * + * This file contains code that can be shared between different hash table implementations. + */ + +#include + +#include "BLI_allocator.hh" +#include "BLI_array.hh" +#include "BLI_math_base.h" +#include "BLI_memory_utils.hh" +#include "BLI_string.h" +#include "BLI_utildefines.h" +#include "BLI_vector.hh" + +namespace BLI { + +/* -------------------------------------------------------------------- */ +/** \name Constexpr Utility Functions + * + * Those should eventually be deduplicated with functions in BLI_math_base.h. + * \{ */ + +inline constexpr int is_power_of_2_i_constexpr(int n) +{ + return (n & (n - 1)) == 0; +} + +inline constexpr uint32_t log2_floor_u_constexpr(uint32_t x) +{ + return x <= 1 ? 0 : 1 + log2_floor_u_constexpr(x >> 1); +} + +inline constexpr uint32_t log2_ceil_u_constexpr(uint32_t x) +{ + return (is_power_of_2_i_constexpr((int)x)) ? log2_floor_u_constexpr(x) : + log2_floor_u_constexpr(x) + 1; +} + +inline constexpr uint32_t power_of_2_max_u_constexpr(uint32_t x) +{ + return 1 << log2_ceil_u_constexpr(x); +} + +template inline constexpr IntT ceil_division(IntT x, IntT y) +{ + BLI_STATIC_ASSERT(!std::is_signed::value, ""); + return x / y + ((x % y) != 0); +} + +template inline constexpr IntT floor_division(IntT x, IntT y) +{ + BLI_STATIC_ASSERT(!std::is_signed::value, ""); + return x / y; +} + +inline constexpr uint32_t ceil_division_by_fraction(uint32_t x, + uint32_t numerator, + uint32_t denominator) +{ + return (uint32_t)ceil_division((uint64_t)x * (uint64_t)denominator, (uint64_t)numerator); +} + +inline constexpr uint32_t floor_multiplication_with_fraction(uint32_t x, + uint32_t numerator, + uint32_t denominator) +{ + return (uint32_t)((uint64_t)x * (uint64_t)numerator / (uint64_t)denominator); +} + +inline constexpr uint32_t total_slot_amount_for_usable_slots(uint32_t min_usable_slots, + uint32_t max_load_factor_numerator, + uint32_t max_load_factor_denominator) +{ + return power_of_2_max_u_constexpr(ceil_division_by_fraction( + min_usable_slots, max_load_factor_numerator, max_load_factor_denominator)); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Load Factor + * + * This is an abstraction for a fractional load factor. The hash table using this class is assumed + * to use arrays with a size that is a power of two. + * + * \{ */ + +class LoadFactor { + private: + uint8_t m_numerator; + uint8_t m_denominator; + + public: + LoadFactor(uint8_t numerator, uint8_t denominator) + : m_numerator(numerator), m_denominator(denominator) + { + BLI_assert(numerator > 0); + BLI_assert(numerator < denominator); + } + + void compute_total_and_usable_slots(uint32_t min_total_slots, + uint32_t min_usable_slots, + uint32_t *r_total_slots, + uint32_t *r_usable_slots) const + { + BLI_assert(is_power_of_2_i((int)min_total_slots)); + + uint32_t total_slots = this->compute_total_slots(min_usable_slots, m_numerator, m_denominator); + total_slots = std::max(total_slots, min_total_slots); + uint32_t usable_slots = floor_multiplication_with_fraction( + total_slots, m_numerator, m_denominator); + BLI_assert(min_usable_slots <= usable_slots); + + *r_total_slots = total_slots; + *r_usable_slots = usable_slots; + } + + static constexpr uint32_t compute_total_slots(uint32_t min_usable_slots, + uint8_t numerator, + uint8_t denominator) + { + return total_slot_amount_for_usable_slots(min_usable_slots, numerator, denominator); + } +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Intrusive Key Info + * + * A hash table slot has to maintain state about whether the slot is empty, occupied or removed. + * Usually, this state information is stored in its own variable. While it only needs two bits in + * theory, in practice often 4 or 8 bytes are used, due to alignment requirements. + * + * One solution to deal with this problem is to embed the state information in the key. That means, + * two values of the key type are selected to indicate whether the slot is empty or removed. + * + * The classes below tell a slot implementation which special key values it can use. They can be + * used as KeyInfo in slot types like IntrusiveSetSlot and IntrusiveMapSlot. + * + * A KeyInfo type has to implement a couple of static methods that are descriped in + * TemplatedKeyInfo. + * + * \{ */ + +/** + * The template arguments EmptyValue and RemovedValue define which special are used. This can be + * used when a hash table has integer keys and there are two specific integers that will never be + * used as keys. + */ +template struct TemplatedKeyInfo { + /** + * Get the value that indicates that the slot is empty. This is used to indicate new slots. + */ + static Key get_empty() + { + return EmptyValue; + } + + /** + * Modify the given key so that it represents a removed slot. + */ + static void remove(Key &key) + { + key = RemovedValue; + } + + /** + * Return true, when the given key indicates that the slot is empty. + */ + static bool is_empty(const Key &key) + { + return key == EmptyValue; + } + + /** + * Return true, when the given key indicates that the slot is removed. + */ + static bool is_removed(const Key &key) + { + return key == RemovedValue; + } + + /** + * Return true, when the key is valid, i.e. it can be contained in an occupied slot. + */ + static bool is_not_empty_or_removed(const Key &key) + { + return key != EmptyValue && key != RemovedValue; + } +}; + +/** + * 0xffff...ffff indicates an empty slot. + * 0xffff...fffe indicates a removed slot. + * + * Those specific values are used, because with them a single comparison is enough to check whether + * a slot is occupied. The keys 0x0000...0000 and 0x0000...0001 also satisfy this constraint. + * However, nullptr is much more likely to be used as valid key. + */ +template struct PointerKeyInfo { + static Pointer get_empty() + { + return (Pointer)UINTPTR_MAX; + } + + static void remove(Pointer &pointer) + { + pointer = (Pointer)(UINTPTR_MAX - 1); + } + + static bool is_empty(Pointer pointer) + { + return (uintptr_t)pointer == UINTPTR_MAX; + } + + static bool is_removed(Pointer pointer) + { + return (uintptr_t)pointer == UINTPTR_MAX - 1; + } + + static bool is_not_empty_or_removed(Pointer pointer) + { + return (uintptr_t)pointer < UINTPTR_MAX - 1; + } +}; + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Hash Table Stats + * + * A utility class that makes it easier for hash table implementations to provide statistics to the + * developer. These statistics can be helpful when trying to figure out why a hash table is slow. + * + * To use this utility, a hash table has to implement various methods, that are mentioned below. + * + * \{ */ + +class HashTableStats { + private: + Vector m_keys_by_collision_count; + uint32_t m_total_collisions; + float m_average_collisions; + uint32_t m_size; + uint32_t m_capacity; + uint32_t m_removed_amount; + float m_load_factor; + float m_removed_load_factor; + uint32_t m_size_per_element; + uint32_t m_size_in_bytes; + const void *m_address; + + public: + /** + * Requires that the hash table has the following methods: + * - count_collisions(key) -> uint32_t + * - size() -> uint32_t + * - capacity() -> uint32_t + * - removed_amount() -> uint32_t + * - size_per_element() -> uint32_t + * - size_in_bytes() -> uint32_t + */ + template + HashTableStats(const HashTable &hash_table, const Keys &keys) + { + m_total_collisions = 0; + m_size = hash_table.size(); + m_capacity = hash_table.capacity(); + m_removed_amount = hash_table.removed_amount(); + m_size_per_element = hash_table.size_per_element(); + m_size_in_bytes = hash_table.size_in_bytes(); + m_address = (const void *)&hash_table; + + for (const auto &key : keys) { + uint32_t collisions = hash_table.count_collisions(key); + if (m_keys_by_collision_count.size() <= collisions) { + m_keys_by_collision_count.append_n_times( + 0, collisions - m_keys_by_collision_count.size() + 1); + } + m_keys_by_collision_count[collisions]++; + m_total_collisions += collisions; + } + + m_average_collisions = (m_size == 0) ? 0 : (float)m_total_collisions / (float)m_size; + m_load_factor = (float)m_size / (float)m_capacity; + m_removed_load_factor = (float)m_removed_amount / (float)m_capacity; + } + + void print(StringRef name = "") + { + std::cout << "Hash Table Stats: " << name << "\n"; + std::cout << " Address: " << m_address << "\n"; + std::cout << " Total Slots: " << m_capacity << "\n"; + std::cout << " Occupied Slots: " << m_size << " (" << m_load_factor * 100.0f << " %)\n"; + std::cout << " Removed Slots: " << m_removed_amount << " (" << m_removed_load_factor * 100.0f + << " %)\n"; + + char memory_size_str[15]; + BLI_str_format_byte_unit(memory_size_str, m_size_in_bytes, true); + std::cout << " Size: ~" << memory_size_str << "\n"; + std::cout << " Size per Slot: " << m_size_per_element << " bytes\n"; + + std::cout << " Average Collisions: " << m_average_collisions << "\n"; + for (uint32_t collision_count : m_keys_by_collision_count.index_range()) { + std::cout << " " << collision_count + << " Collisions: " << m_keys_by_collision_count[collision_count] << "\n"; + } + } +}; + +/** \} */ + +/** + * This struct provides an equality operator that returns true for all objects that compare equal + * when one would use the `==` operator. This is different from std::equal_to, because that + * requires the parameters to be of type T. Our hash tables support lookups using other types + * without conversion, therefore DefaultEquality needs to be more generic. + */ +struct DefaultEquality { + template bool operator()(const T1 &a, const T2 &b) const + { + return a == b; + } +}; + +} // namespace BLI + +#endif /* __BLI_OPEN_ADDRESSING_HH__ */ diff --git a/source/blender/blenlib/BLI_index_range.hh b/source/blender/blenlib/BLI_index_range.hh index e24fd567810..4a64ae16589 100644 --- a/source/blender/blenlib/BLI_index_range.hh +++ b/source/blender/blenlib/BLI_index_range.hh @@ -20,9 +20,37 @@ /** \file * \ingroup bli * - * Allows passing iterators over ranges of integers without actually allocating an array or passing - * separate values. A range always has a step of one. If other step sizes are required in some - * cases, a separate data structure should be used. + * A `BLI::IndexRange` wraps an interval of non-negative integers. It can be used to reference + * consecutive elements in an array. Furthermore, it can make for loops more convenient and less + * error prone, especially when using nested loops. + * + * I'd argue that the second loop is more readable and less error prone than the first one. That is + * not necessarily always the case, but often it is. + * + * for (uint i = 0; i < 10; i++) { + * for (uint j = 0; j < 20; j++) { + * for (uint k = 0; k < 30; k++) { + * + * for (uint i : IndexRange(10)) { + * for (uint j : IndexRange(20)) { + * for (uint k : IndexRange(30)) { + * + * Some containers like BLI::Vector have an index_range() method. This will return the IndexRange + * that contains all indices that can be used to access the container. This is particularly useful + * when you want to iterate over the indices and the elements (much like Python's enumerate(), just + * worse). Again, I think the second example here is better: + * + * for (uint i = 0; i < my_vector_with_a_long_name.size(); i++) { + * do_something(i, my_vector_with_a_long_name[i]); + * + * for (uint i : my_vector_with_a_long_name.index_range()) { + * do_something(i, my_vector_with_a_long_name[i]); + * + * Ideally this could be could be even closer to Python's enumerate(). We might get that in the + * future with newer C++ versions. + * + * One other important feature is the as_array_ref method. This method returns an ArrayRef + * that contains the interval as individual numbers. */ #include @@ -182,13 +210,15 @@ class IndexRange { return value >= m_start && value < m_start + m_size; } + /** + * Returns a new range, that contains a subinterval of the current one. + */ IndexRange slice(uint start, uint size) const { uint new_start = m_start + start; BLI_assert(new_start + size <= m_start + m_size || size == 0); return IndexRange(new_start, size); } - IndexRange slice(IndexRange range) const { return this->slice(range.start(), range.size()); diff --git a/source/blender/blenlib/BLI_linear_allocator.hh b/source/blender/blenlib/BLI_linear_allocator.hh index 285af49f500..8b77855bb07 100644 --- a/source/blender/blenlib/BLI_linear_allocator.hh +++ b/source/blender/blenlib/BLI_linear_allocator.hh @@ -130,7 +130,7 @@ template class LinearAllocator : NonCopya template MutableArrayRef construct_array_copy(ArrayRef src) { MutableArrayRef dst = this->allocate_array(src.size()); - uninitialized_copy_n(src.begin(), src.size(), dst.begin()); + uninitialized_copy_n(src.data(), src.size(), dst.data()); return dst; } @@ -186,7 +186,7 @@ template class LinearAllocator : NonCopya m_unused_borrowed_buffers.append(ArrayRef((char *)buffer, size)); } - template + template void provide_buffer(AlignedBuffer &aligned_buffer) { this->provide_buffer(aligned_buffer.ptr(), Size); @@ -208,7 +208,7 @@ template class LinearAllocator : NonCopya uint size_in_bytes = power_of_2_min_u(std::max(min_allocation_size, m_next_min_alloc_size)); m_next_min_alloc_size = size_in_bytes * 2; - void *buffer = m_allocator.allocate(size_in_bytes, __func__); + void *buffer = m_allocator.allocate(size_in_bytes, 8, AT); m_owned_buffers.append(buffer); m_current_begin = (uintptr_t)buffer; m_current_end = m_current_begin + size_in_bytes; diff --git a/source/blender/blenlib/BLI_listbase_wrapper.hh b/source/blender/blenlib/BLI_listbase_wrapper.hh index 02313d9d22d..70f3135d9e3 100644 --- a/source/blender/blenlib/BLI_listbase_wrapper.hh +++ b/source/blender/blenlib/BLI_listbase_wrapper.hh @@ -20,8 +20,10 @@ /** \file * \ingroup bli * - * The purpose of this wrapper is just to make it more comfortable to iterate of ListBase - * instances, that are used in many places in Blender. + * `BLI::ListBaseWrapper` is a typed wrapper for the ListBase struct. That makes it safer and more + * convenient to use in C++ in some cases. However, if you find yourself iterating over a linked + * list a lot, consider to convert it into a vector for further processing. This improves + * performance and debugability. */ #include "BLI_listbase.h" diff --git a/source/blender/blenlib/BLI_map.hh b/source/blender/blenlib/BLI_map.hh index ea5e5da4099..54e086cc36d 100644 --- a/source/blender/blenlib/BLI_map.hh +++ b/source/blender/blenlib/BLI_map.hh @@ -20,698 +20,986 @@ /** \file * \ingroup bli * - * This file provides a map implementation that uses open addressing with probing. + * A `BLI::Map` is an unordered associative container that stores key-value pairs. The + * keys have to be unique. It is designed to be a more convenient and efficient replacement for + * `std::unordered_map`. All core operations (add, lookup, remove and contains) can be done in O(1) + * amortized expected time. * - * The key and value objects are stored directly in the hash table to avoid indirect memory - * lookups. Keys and values are stored in groups of four to avoid wasting memory due to padding. + * Your default choice for a hash map in Blender should be `BLI::Map`. + * + * BLI::Map is implemented using open addressing in a slot array with a power-of-two size. Every + * slot is in one of three states: empty, occupied or removed. If a slot is occupied, it contains + * a Key and Value instance. + * + * Benchmarking and comparing hash tables is hard, because many factors influence the result. The + * performance of a hash table depends on the combination of the hash function, probing strategy, + * max load factor, data types, slot type and data distribution. This implementation is designed to + * be relatively fast by default in all cases. However, it also offers many customization points + * that allow it to be optimized for a specific use case. + * + * A rudimentary benchmark can be found in BLI_map_test.cc. The results of that benchmark are there + * as well. The numbers show that in this specific case BLI::Map outperforms std::unordered_map + * consistently by a good amount. + * + * Some noteworthy information: + * - Key and Value must be movable types. + * - Pointers to keys and values might be invalidated when the map is changed or moved. + * - The hash function can be customized. See BLI_hash.hh for details. + * - The probing strategy can be customized. See BLI_probing_strategies.hh for details. + * - The slot type can be customized. See BLI_map_slots.hh for details. + * - Small buffer optimization is enabled by default, if Key and Value are not too large. + * - The methods `add_new` and `remove_contained` should be used instead of `add` and `remove` + * whenever appropriate. Assumptions and intention are described better this way. + * - There are multiple methods to add and lookup keys for different use cases. + * - You cannot use a range-for loop on the map directly. Instead use the keys(), values() and + * items() iterators. If your map is non-const, you can also change the values through those + * iterators (but not the keys). + * - Lookups can be performed using types other than Key without conversion. For that use the + * methods ending with `_as`. The template parameters Hash and IsEqual have to support the other + * key type. This can greatly improve performance when the map uses strings as keys. + * - The default constructor is cheap, even when a large InlineBufferCapacity is used. A large + * slot array will only be initialized when the first element is added. + * - The `print_stats` method can be used to get information about the distribution of keys and + * memory usage of the map. + * - The method names don't follow the std::unordered_map names in many cases. Searching for such + * names in this file will usually let you discover the new name. + * - There is a StdUnorderedMapWrapper class, that wraps std::unordered_map and gives it the same + * interface as BLI::Map. This is useful for benchmarking. */ -#include "BLI_array_ref.hh" +#include + +#include "BLI_array.hh" #include "BLI_hash.hh" -#include "BLI_open_addressing.hh" +#include "BLI_hash_tables.hh" +#include "BLI_map_slots.hh" +#include "BLI_probing_strategies.hh" namespace BLI { -// clang-format off - -#define ITER_SLOTS_BEGIN(KEY, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \ - uint32_t hash = DefaultHash{}(KEY); \ - uint32_t perturb = hash; \ - while (true) { \ - uint32_t item_index = (hash & ARRAY.slot_mask()) >> OFFSET_SHIFT; \ - uint8_t R_OFFSET = hash & OFFSET_MASK; \ - uint8_t initial_offset = R_OFFSET; \ - OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \ - do { - -#define ITER_SLOTS_END(R_OFFSET) \ - R_OFFSET = (R_OFFSET + 1u) & OFFSET_MASK; \ - } while (R_OFFSET != initial_offset); \ - perturb >>= 5; \ - hash = hash * 5 + 1 + perturb; \ - } ((void)0) - -// clang-format on - -template +template< + /** + * Type of the keys stored in the map. Keys have to be movable. Furthermore, the hash and + * is-equal functions have to support it. + */ + typename Key, + /** + * Type of the value that is stored per key. It has to be movable as well. + */ + typename Value, + /** + * The minimum number of elements that can be stored in this Map without doing a heap + * allocation. This is useful when you expect to have many small maps. However, keep in mind + * that (unlike vector) initializing a map has a O(n) cost in the number of slots. + * + * When Key or Value are large, the small buffer optimization is disabled by default to avoid + * large unexpected allocations on the stack. It can still be enabled explicitely though. + */ + uint32_t InlineBufferCapacity = (sizeof(Key) + sizeof(Value) < 100) ? 4 : 0, + /** + * The strategy used to deal with collistions. They are defined in BLI_probing_strategies.hh. + */ + typename ProbingStrategy = DefaultProbingStrategy, + /** + * The hash function used to hash the keys. There is a default for many types. See BLI_hash.hh + * for examples on how to define a custom hash function. + */ + typename Hash = DefaultHash, + /** + * The equality operator used to compare keys. By default it will simply compare keys using the + * `==` operator. + */ + typename IsEqual = DefaultEquality, + /** + * This is what will actually be stored in the hash table array. At a minimum a slot has to be + * able to hold a key, a value and information about whether the slot is empty, occupied or + * removed. Using a non-standard slot type can improve performance or reduce the memory + * footprint for some types. Slot types are defined in BLI_map_slots.hh + */ + typename Slot = typename DefaultMapSlot::type, + /** + * The allocator used by this map. Should rarely be changed, except when you don't want that + * MEM_* is used internally. + */ + typename Allocator = GuardedAllocator> class Map { private: - static constexpr uint OFFSET_MASK = 3; - static constexpr uint OFFSET_SHIFT = 2; + /** + * Slots are either empty, occupied or removed. The number of occupied slots can be computed by + * subtracting the removed slots from the occupied-and-removed slots. + */ + uint32_t m_removed_slots; + uint32_t m_occupied_and_removed_slots; - class Item { - private: - static constexpr uint8_t IS_EMPTY = 0; - static constexpr uint8_t IS_SET = 1; - static constexpr uint8_t IS_DUMMY = 2; + /** + * The maximum number of slots that can be used (either occupied or removed) until the set has to + * grow. This is the total number of slots times the max load factor. + */ + uint32_t m_usable_slots; - uint8_t m_status[4]; - AlignedBuffer<4 * sizeof(KeyT), alignof(KeyT)> m_keys_buffer; - AlignedBuffer<4 * sizeof(ValueT), alignof(ValueT)> m_values_buffer; + /** + * The number of slots minus one. This is a bit mask that can be used to turn any integer into a + * valid slot index efficiently. + */ + uint32_t m_slot_mask; - public: - static constexpr uint slots_per_item = 4; + /** This is called to hash incoming keys. */ + Hash m_hash; - Item() - { - for (uint offset = 0; offset < 4; offset++) { - m_status[offset] = IS_EMPTY; - } - } + /** This is called to check equality of two keys. */ + IsEqual m_is_equal; - ~Item() - { - for (uint offset = 0; offset < 4; offset++) { - if (m_status[offset] == IS_SET) { - this->key(offset)->~KeyT(); - this->value(offset)->~ValueT(); - } - } - } + /** The max load factor is 1/2 = 50% by default. */ +#define LOAD_FACTOR 1, 2 + LoadFactor m_max_load_factor = LoadFactor(LOAD_FACTOR); + using SlotArray = + Array; +#undef LOAD_FACTOR - Item(const Item &other) - { - for (uint offset = 0; offset < 4; offset++) { - uint8_t status = other.m_status[offset]; - m_status[offset] = status; - if (status == IS_SET) { - new (this->key(offset)) KeyT(*other.key(offset)); - new (this->value(offset)) ValueT(*other.value(offset)); - } - } - } + /** + * This is the array that contains the actual slots. There is always at least one empty slot and + * the size of the array is a power of two. + */ + SlotArray m_slots; - Item(Item &&other) noexcept - { - for (uint offset = 0; offset < 4; offset++) { - uint8_t status = other.m_status[offset]; - m_status[offset] = status; - if (status == IS_SET) { - new (this->key(offset)) KeyT(std::move(*other.key(offset))); - new (this->value(offset)) ValueT(std::move(*other.value(offset))); - } - } - } + /** Iterate over a slot index sequence for a given hash. */ +#define MAP_SLOT_PROBING_BEGIN(HASH, R_SLOT) \ + SLOT_PROBING_BEGIN (ProbingStrategy, HASH, m_slot_mask, SLOT_INDEX) \ + auto &R_SLOT = m_slots[SLOT_INDEX]; +#define MAP_SLOT_PROBING_END() SLOT_PROBING_END() - bool has_key(uint offset, const KeyT &key) const - { - return m_status[offset] == IS_SET && key == *this->key(offset); - } + public: + /** + * Initialize an empty map. This is a cheap operation no matter how large the inline buffer is. + * This is necessary to avoid a high cost when no elements are added at all. An optimized grow + * operation is performed on the first insertion. + */ + Map() + : m_removed_slots(0), + m_occupied_and_removed_slots(0), + m_usable_slots(0), + m_slot_mask(0), + m_hash(), + m_is_equal(), + m_slots(1) + { + } - bool is_set(uint offset) const - { - return m_status[offset] == IS_SET; - } + ~Map() = default; - bool is_empty(uint offset) const - { - return m_status[offset] == IS_EMPTY; - } + Map(const Map &other) = default; - bool is_dummy(uint offset) const - { - return m_status[offset] == IS_DUMMY; - } + Map(Map &&other) noexcept + : m_removed_slots(other.m_removed_slots), + m_occupied_and_removed_slots(other.m_occupied_and_removed_slots), + m_usable_slots(other.m_usable_slots), + m_slot_mask(other.m_slot_mask), + m_hash(std::move(other.m_hash)), + m_is_equal(std::move(other.m_is_equal)), + m_slots(std::move(other.m_slots)) + { + other.~Map(); + new (&other) Map(); + } - KeyT *key(uint offset) const - { - return (KeyT *)m_keys_buffer.ptr() + offset; + Map &operator=(const Map &other) + { + if (this == &other) { + return *this; } - ValueT *value(uint offset) const - { - return (ValueT *)m_values_buffer.ptr() + offset; - } + this->~Map(); + new (this) Map(other); - template - void store(uint offset, ForwardKeyT &&key, ForwardValueT &&value) - { - this->store_without_value(offset, std::forward(key)); - new (this->value(offset)) ValueT(std::forward(value)); - } + return *this; + } - template void store_without_value(uint offset, ForwardKeyT &&key) - { - BLI_assert(m_status[offset] != IS_SET); - m_status[offset] = IS_SET; - new (this->key(offset)) KeyT(std::forward(key)); + Map &operator=(Map &&other) + { + if (this == &other) { + return *this; } - void set_dummy(uint offset) - { - BLI_assert(m_status[offset] == IS_SET); - m_status[offset] = IS_DUMMY; - destruct(this->key(offset)); - destruct(this->value(offset)); - } - }; + this->~Map(); + new (this) Map(std::move(other)); - using ArrayType = OpenAddressingArray; - ArrayType m_array; + return *this; + } - public: - Map() = default; + /** + * Insert a new key-value-pair into the map. This invokes undefined behavior when the key is in + * the map already. + */ + void add_new(const Key &key, const Value &value) + { + this->add_new__impl(key, value, m_hash(key)); + } + void add_new(const Key &key, Value &&value) + { + this->add_new__impl(key, std::move(value), m_hash(key)); + } + void add_new(Key &&key, const Value &value) + { + this->add_new__impl(std::move(key), value, m_hash(key)); + } + void add_new(Key &&key, Value &&value) + { + this->add_new__impl(std::move(key), std::move(value), m_hash(key)); + } /** - * Allocate memory such that at least min_usable_slots can be added before the map has to grow - * again. + * Add a key-value-pair to the map. If the map contains the key already, nothing is changed. + * If you want to replace the currently stored value, use `add_overwrite`. + * Returns true when the key has been newly added. + * + * This is similar to std::unordered_map::insert. */ - void reserve(uint min_usable_slots) + bool add(const Key &key, const Value &value) { - if (m_array.slots_usable() < min_usable_slots) { - this->grow(min_usable_slots); - } + return this->add_as(key, value); + } + bool add(const Key &key, Value &&value) + { + return this->add_as(key, std::move(value)); + } + bool add(Key &&key, const Value &value) + { + return this->add_as(std::move(key), value); + } + bool add(Key &&key, Value &&value) + { + return this->add_as(std::move(key), std::move(value)); } /** - * Remove all elements from the map. + * Same as `add`, but accepts other key types that are supported by the hash function. */ - void clear() + template + bool add_as(ForwardKey &&key, ForwardValue &&value) { - this->~Map(); - new (this) Map(); + return this->add__impl( + std::forward(key), std::forward(value), m_hash(key)); } /** - * Insert a new key-value-pair in the map. - * Asserts when the key existed before. + * Adds a key-value-pair to the map. If the map contained the key already, the corresponding + * value will be replaced. + * Returns true when the key has been newly added. + * + * This is similar to std::unordered_map::insert_or_assign. */ - void add_new(const KeyT &key, const ValueT &value) + bool add_overwrite(const Key &key, const Value &value) { - this->add_new__impl(key, value); + return this->add_overwrite_as(key, value); } - void add_new(const KeyT &key, ValueT &&value) + bool add_overwrite(const Key &key, Value &&value) { - this->add_new__impl(key, std::move(value)); + return this->add_overwrite_as(key, std::move(value)); } - void add_new(KeyT &&key, const ValueT &value) + bool add_overwrite(Key &&key, const Value &value) { - this->add_new__impl(std::move(key), value); + return this->add_overwrite_as(std::move(key), value); } - void add_new(KeyT &&key, ValueT &&value) + bool add_overwrite(Key &&key, Value &&value) { - this->add_new__impl(std::move(key), std::move(value)); + return this->add_overwrite_as(std::move(key), std::move(value)); } /** - * Insert a new key-value-pair in the map if the key does not exist yet. - * Returns true when the pair was newly inserted, otherwise false. + * Same as `add_overwrite`, but accepts other key types that are supported by the hash function. */ - bool add(const KeyT &key, const ValueT &value) + template + bool add_overwrite_as(ForwardKey &&key, ForwardValue &&value) { - return this->add__impl(key, value); + return this->add_overwrite__impl( + std::forward(key), std::forward(value), m_hash(key)); } - bool add(const KeyT &key, ValueT &&value) + + /** + * Returns true if there is a key in the map that compares equal to the given key. + * + * This is similar to std::unordered_map::contains. + */ + bool contains(const Key &key) const { - return this->add__impl(key, std::move(value)); + return this->contains_as(key); } - bool add(KeyT &&key, const ValueT &value) + + /** + * Same as `contains`, but accepts other key types that are supported by the hash function. + */ + template bool contains_as(const ForwardKey &key) const { - return this->add__impl(std::move(key), value); + return this->contains__impl(key, m_hash(key)); } - bool add(KeyT &&key, ValueT &&value) + + /** + * Deletes the key-value-pair with the given key. Returns true when the key was contained and is + * now removed, otherwise false. + * + * This is similar to std::unordered_map::erase. + */ + bool remove(const Key &key) { - return this->add__impl(std::move(key), std::move(value)); + return this->remove_as(key); } /** - * Remove the key from the map. - * Asserts when the key does not exist in the map. + * Same as `remove`, but accepts other key types that are supported by the hash function. */ - void remove(const KeyT &key) + template bool remove_as(const ForwardKey &key) { - BLI_assert(this->contains(key)); - ITER_SLOTS_BEGIN (key, m_array, , item, offset) { - if (item.has_key(offset, key)) { - item.set_dummy(offset); - m_array.update__set_to_dummy(); - return; - } - } - ITER_SLOTS_END(offset); + return this->remove__impl(key, m_hash(key)); } /** - * Get the value for the given key and remove it from the map. - * Asserts when the key does not exist in the map. + * Deletes the key-value-pair with the given key. This invokes undefined behavior when the key is + * not in the map. */ - ValueT pop(const KeyT &key) + void remove_contained(const Key &key) { - BLI_assert(this->contains(key)); - ITER_SLOTS_BEGIN (key, m_array, , item, offset) { - if (item.has_key(offset, key)) { - ValueT value = std::move(*item.value(offset)); - item.set_dummy(offset); - m_array.update__set_to_dummy(); - return value; - } - } - ITER_SLOTS_END(offset); + this->remove_contained_as(key); } /** - * Returns true when the key exists in the map, otherwise false. + * Same as `remove_contained`, but accepts other key types that are supported by the hash + * function. */ - bool contains(const KeyT &key) const + template void remove_contained_as(const ForwardKey &key) { - ITER_SLOTS_BEGIN (key, m_array, const, item, offset) { - if (item.is_empty(offset)) { - return false; - } - else if (item.has_key(offset, key)) { - return true; - } - } - ITER_SLOTS_END(offset); + this->remove_contained__impl(key, m_hash(key)); + } + + /** + * Get the value that is stored for the given key and remove it from the map. This invokes + * undefined behavior when the key is not in the map. + */ + Value pop(const Key &key) + { + return this->pop_as(key); + } + + /** + * Same as `pop`, but accepts other key types that are supported by the hash function. + */ + template Value pop_as(const ForwardKey &key) + { + return this->pop__impl(key, m_hash(key)); } /** - * First, checks if the key exists in the map. - * If it does exist, call the modify function with a pointer to the corresponding value. - * If it does not exist, call the create function with a pointer to where the value should be - * created. + * This method can be used to implement more complex custom behavior without having to do + * multiple lookups * - * Returns whatever is returned from one of the callback functions. Both callbacks have to return - * the same type. + * When the key did not yet exist in the map, the create_value function is called. Otherwise the + * modify_value function is called. * - * CreateValueF: Takes a pointer to where the value should be created. - * ModifyValueF: Takes a pointer to the value that should be modified. + * Both functions are expected to take a single parameter of type `Value *`. In create_value, + * this pointer will point to uninitialized memory that has to be initialized by the function. In + * modify_value, it will point to an already initialized value. + * + * The function returns whatever is returned from the create_value or modify_value callback. + * Therefore, both callbacks have to have the same return type. + * + * In this example an integer is stored for every key. The initial value is five and we want to + * increase it every time the same key is used. + * map.add_or_modify(key, + * [](int *value) { *value = 5; }, + * [](int *value) { (*value)++; }); */ template - auto add_or_modify(const KeyT &key, + auto add_or_modify(const Key &key, const CreateValueF &create_value, const ModifyValueF &modify_value) -> decltype(create_value(nullptr)) { - return this->add_or_modify__impl(key, create_value, modify_value); + return this->add_or_modify_as(key, create_value, modify_value); } template - auto add_or_modify(KeyT &&key, - const CreateValueF &create_value, - const ModifyValueF &modify_value) -> decltype(create_value(nullptr)) + auto add_or_modify(Key &&key, const CreateValueF &create_value, const ModifyValueF &modify_value) + -> decltype(create_value(nullptr)) { - return this->add_or_modify__impl(std::move(key), create_value, modify_value); + return this->add_or_modify_as(std::move(key), create_value, modify_value); } /** - * Similar to add, but overrides the value for the key when it exists already. + * Same as `add_or_modify`, but accepts other key types that are supported by the hash function. */ - bool add_override(const KeyT &key, const ValueT &value) + template + auto add_or_modify_as(ForwardKey &&key, + const CreateValueF &create_value, + const ModifyValueF &modify_value) -> decltype(create_value(nullptr)) { - return this->add_override__impl(key, value); + return this->add_or_modify__impl( + std::forward(key), create_value, modify_value, m_hash(key)); } - bool add_override(const KeyT &key, ValueT &&value) + + /** + * Returns a pointer to the value that corresponds to the given key. If the key is not in the + * map, nullptr is returned. + * + * This is similar to std::unordered_map::find. + */ + const Value *lookup_ptr(const Key &key) const { - return this->add_override__impl(key, std::move(value)); + return this->lookup_ptr_as(key); } - bool add_override(KeyT &&key, const ValueT &value) + Value *lookup_ptr(const Key &key) { - return this->add_override__impl(std::move(key), value); + return this->lookup_ptr_as(key); } - bool add_override(KeyT &&key, ValueT &&value) + + /** + * Same as `lookup_ptr`, but accepts other key types that are supported by the hash function. + */ + template const Value *lookup_ptr_as(const ForwardKey &key) const { - return this->add_override__impl(std::move(key), std::move(value)); + return this->lookup_ptr__impl(key, m_hash(key)); + } + template Value *lookup_ptr_as(const ForwardKey &key) + { + return const_cast(this->lookup_ptr__impl(key, m_hash(key))); } /** - * Check if the key exists in the map. - * Return a pointer to the value, when it exists. - * Otherwise return nullptr. + * Returns a reference to the value that corresponds to the given key. This invokes undefined + * behavior when the key is not in the map. */ - const ValueT *lookup_ptr(const KeyT &key) const + const Value &lookup(const Key &key) const { - ITER_SLOTS_BEGIN (key, m_array, const, item, offset) { - if (item.is_empty(offset)) { - return nullptr; - } - else if (item.has_key(offset, key)) { - return item.value(offset); - } - } - ITER_SLOTS_END(offset); + return this->lookup_as(key); } - - ValueT *lookup_ptr(const KeyT &key) + Value &lookup(const Key &key) { - const Map *const_this = this; - return const_cast(const_this->lookup_ptr(key)); + return this->lookup_as(key); } /** - * Lookup the value that corresponds to the key. - * Asserts when the key does not exist. + * Same as `lookup`, but accepts other key types that are supported by the hash function. */ - const ValueT &lookup(const KeyT &key) const + template const Value &lookup_as(const ForwardKey &key) const { - const ValueT *ptr = this->lookup_ptr(key); + const Value *ptr = this->lookup_ptr_as(key); + BLI_assert(ptr != nullptr); + return *ptr; + } + template Value &lookup_as(const ForwardKey &key) + { + Value *ptr = this->lookup_ptr_as(key); BLI_assert(ptr != nullptr); return *ptr; } - ValueT &lookup(const KeyT &key) + /** + * Returns a copy of the value that corresponds to the given key. If the key is not in the + * map, the provided default_value is returned. + */ + Value lookup_default(const Key &key, const Value &default_value) const { - const Map *const_this = this; - return const_cast(const_this->lookup(key)); + return this->lookup_default_as(key, default_value); } /** - * Check if the key exists in the map. - * If it does, return a copy of the value. - * Otherwise, return the default value. + * Same as `lookup_default`, but accepts other key types that are supported by the hash function. */ - ValueT lookup_default(const KeyT &key, ValueT default_value) const + template + Value lookup_default_as(const ForwardKey &key, ForwardValue &&default_value) const { - const ValueT *ptr = this->lookup_ptr(key); + const Value *ptr = this->lookup_ptr_as(key); if (ptr != nullptr) { return *ptr; } else { - return default_value; + return std::forward(default_value); } } /** - * Return the value that corresponds to the given key. - * If it does not exist yet, create and insert it first. + * Returns a reference to the value that corresponds to the given key. If the key is not yet in + * the map, it will be newly added. + * + * The create_value callback is only called when the key did not exist yet. It is expected to + * take no parameters and return the value to be inserted. */ template - ValueT &lookup_or_add(const KeyT &key, const CreateValueF &create_value) + Value &lookup_or_add(const Key &key, const CreateValueF &create_value) { - return this->lookup_or_add__impl(key, create_value); + return this->lookup_or_add_as(key, create_value); } - template - ValueT &lookup_or_add(KeyT &&key, const CreateValueF &create_value) + template Value &lookup_or_add(Key &&key, const CreateValueF &create_value) { - return this->lookup_or_add__impl(std::move(key), create_value); + return this->lookup_or_add_as(std::move(key), create_value); } /** - * Return the value that corresponds to the given key. - * If it does not exist yet, insert a new default constructed value and return that. + * Same as `lookup_or_add`, but accepts other key types that are supported by the hash function. */ - ValueT &lookup_or_add_default(const KeyT &key) + template + Value &lookup_or_add_as(ForwardKey &&key, const CreateValueF &create_value) { - return this->lookup_or_add(key, []() { return ValueT(); }); - } - ValueT &lookup_or_add_default(const KeyT &&key) - { - return this->lookup_or_add(std::move(key), []() { return ValueT(); }); + return this->lookup_or_add__impl(std::forward(key), create_value, m_hash(key)); } /** - * Get the number of elements in the map. + * Returns a reference to the value that corresponds to the given key. If the key is not yet in + * the map, it will be newly added. The newly added value will be default constructed. */ - uint32_t size() const + Value &lookup_or_add_default(const Key &key) { - return m_array.slots_set(); + return this->lookup_or_add_default_as(key); + } + Value &lookup_or_add_default(Key &&key) + { + return this->lookup_or_add_default_as(std::move(key)); } /** - * Returns true if there are no elements in the map. + * Same as `lookup_or_add_default`, but accepts other key types that are supported by the hash + * function. */ - bool is_empty() const + template Value &lookup_or_add_default_as(ForwardKey &&key) { - return this->size() == 0; + return this->lookup_or_add(std::forward(key), []() { return Value(); }); } /** - * Calls the given function for each key-value-pair. + * Calls the provided callback for every key-value-pair in the map. The callback is expected + * to take a `const Key &` as first and a `const Value &` as second parameter. */ template void foreach_item(const FuncT &func) const { - for (const Item &item : m_array) { - for (uint offset = 0; offset < 4; offset++) { - if (item.is_set(offset)) { - const KeyT &key = *item.key(offset); - const ValueT &value = *item.value(offset); - func(key, value); - } + uint32_t size = this->size(); + for (uint32_t i = 0; i < size; i++) { + const Slot &slot = m_slots[i]; + if (slot.is_occupied()) { + const Key &key = *slot.key(); + const Value &value = *slot.value(); + func(key, value); } } } - void print_table() const - { - std::cout << "Hash Table:\n"; - std::cout << " Size: " << m_array.slots_set() << '\n'; - std::cout << " Capacity: " << m_array.slots_total() << '\n'; - uint32_t item_index = 0; - for (const Item &item : m_array) { - std::cout << " Item: " << item_index++ << '\n'; - for (uint offset = 0; offset < 4; offset++) { - std::cout << " " << offset << " \t"; - if (item.is_empty(offset)) { - std::cout << " \n"; - } - else if (item.is_set(offset)) { - const KeyT &key = *item.key(offset); - const ValueT &value = *item.value(offset); - uint32_t collisions = this->count_collisions(key); - std::cout << " " << key << " -> " << value << " \t Collisions: " << collisions - << '\n'; - } - else if (item.is_dummy(offset)) { - std::cout << " \n"; - } - } - } - } - - template class BaseIterator { - protected: - const Map *m_map; - uint32_t m_slot; - - public: - BaseIterator(const Map *map, uint32_t slot) : m_map(map), m_slot(slot) + /** + * A utility iterator that reduces the amount of code when implementing the actual iterators. + * This uses the "curiously recurring template pattern" (CRTP). + */ + template struct BaseIterator { + Slot *m_slots; + uint32_t m_total_slots; + uint32_t m_current_slot; + + BaseIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot) + : m_slots(const_cast(slots)), + m_total_slots(total_slots), + m_current_slot(current_slot) { } BaseIterator &operator++() { - m_slot = m_map->next_slot(m_slot + 1); + while (++m_current_slot < m_total_slots) { + if (m_slots[m_current_slot].is_occupied()) { + break; + } + } return *this; } - friend bool operator==(const BaseIterator &a, const BaseIterator &b) - { - BLI_assert(a.m_map == b.m_map); - return a.m_slot == b.m_slot; - } - friend bool operator!=(const BaseIterator &a, const BaseIterator &b) { - return !(a == b); + BLI_assert(a.m_slots == b.m_slots); + BLI_assert(a.m_total_slots == b.m_total_slots); + return a.m_current_slot != b.m_current_slot; } SubIterator begin() const { - return SubIterator(m_map, m_map->next_slot(0)); + for (uint32_t i = 0; i < m_total_slots; i++) { + if (m_slots[i].is_occupied()) { + return SubIterator(m_slots, m_total_slots, i); + } + } + return this->end(); } SubIterator end() const { - return SubIterator(m_map, m_map->m_array.slots_total()); + return SubIterator(m_slots, m_total_slots, m_total_slots); + } + + Slot ¤t_slot() const + { + return m_slots[m_current_slot]; } }; class KeyIterator final : public BaseIterator { public: - KeyIterator(const Map *map, uint32_t slot) : BaseIterator(map, slot) + KeyIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot) + : BaseIterator(slots, total_slots, current_slot) { } - const KeyT &operator*() const + const Key &operator*() const { - uint32_t item_index = this->m_slot >> OFFSET_SHIFT; - uint offset = this->m_slot & OFFSET_MASK; - const Item &item = this->m_map->m_array.item(item_index); - BLI_assert(item.is_set(offset)); - return *item.key(offset); + return *this->current_slot().key(); } }; class ValueIterator final : public BaseIterator { public: - ValueIterator(const Map *map, uint32_t slot) : BaseIterator(map, slot) + ValueIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot) + : BaseIterator(slots, total_slots, current_slot) { } - ValueT &operator*() const + const Value &operator*() const { - uint32_t item_index = this->m_slot >> OFFSET_SHIFT; - uint offset = this->m_slot & OFFSET_MASK; - const Item &item = this->m_map->m_array.item(item_index); - BLI_assert(item.is_set(offset)); - return *item.value(offset); + return *this->current_slot().value(); } }; - class ItemIterator final : public BaseIterator { + class MutableValueIterator final : public BaseIterator { public: - ItemIterator(const Map *map, uint32_t slot) : BaseIterator(map, slot) + MutableValueIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot) + : BaseIterator(slots, total_slots, current_slot) { } - struct UserItem { - const KeyT &key; - ValueT &value; + Value &operator*() + { + return *this->current_slot().value(); + } + }; - friend std::ostream &operator<<(std::ostream &stream, const Item &item) - { - stream << item.key << " -> " << item.value; - return stream; - } + class ItemIterator final : public BaseIterator { + public: + ItemIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot) + : BaseIterator(slots, total_slots, current_slot) + { + } + + struct Item { + const Key &key; + const Value &value; }; - UserItem operator*() const + Item operator*() const { - uint32_t item_index = this->m_slot >> OFFSET_SHIFT; - uint offset = this->m_slot & OFFSET_MASK; - const Item &item = this->m_map->m_array.item(item_index); - BLI_assert(item.is_set(offset)); - return {*item.key(offset), *item.value(offset)}; + const Slot &slot = this->current_slot(); + return {*slot.key(), *slot.value()}; } }; - template friend class BaseIterator; + class MutableItemIterator final : public BaseIterator { + public: + MutableItemIterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot) + : BaseIterator(slots, total_slots, current_slot) + { + } + + struct Item { + const Key &key; + Value &value; + }; + + Item operator*() const + { + Slot &slot = this->current_slot(); + return {*slot.key(), *slot.value()}; + } + }; /** - * Iterate over all keys in the map. + * Allows writing a range-for loop that iterates over all keys. The iterator is invalidated, when + * the map is changed. */ KeyIterator keys() const { - return KeyIterator(this, 0); + return KeyIterator(m_slots.data(), m_slots.size(), 0); } /** - * Iterate over all values in the map. + * Returns an iterator over all values in the map. The iterator is invalidated, when the map is + * changed. */ ValueIterator values() const { - return ValueIterator(this, 0); + return ValueIterator(m_slots.data(), m_slots.size(), 0); } /** - * Iterate over all key-value-pairs in the map. - * They can be accessed with item.key and item.value. + * Returns an iterator over all values in the map and allows you to change the values. The + * iterator is invalidated, when the map is changed. + */ + MutableValueIterator values() + { + return MutableValueIterator(m_slots.data(), m_slots.size(), 0); + } + + /** + * Returns an iterator over all key-value-pairs in the map. The key-value-pairs are stored in + * a temporary struct with a .key and a .value field.The iterator is invalidated, when the map is + * changed. */ ItemIterator items() const { - return ItemIterator(this, 0); + return ItemIterator(m_slots.data(), m_slots.size(), 0); } - private: - uint32_t next_slot(uint32_t slot) const - { - for (; slot < m_array.slots_total(); slot++) { - uint32_t item_index = slot >> OFFSET_SHIFT; - uint offset = slot & OFFSET_MASK; - const Item &item = m_array.item(item_index); - if (item.is_set(offset)) { - return slot; - } + /** + * Returns an iterator over all key-value-pairs in the map. The key-value-pairs are stored in + * a temporary struct with a .key and a .value field. The iterator is invalidated, when the map + * is changed. + * + * This iterator also allows you to modify the value (but not the key). + */ + MutableItemIterator items() + { + return MutableItemIterator(m_slots.data(), m_slots.size(), 0); + } + + /** + * Print common statistics like size and collision count. This is useful for debugging purposes. + */ + void print_stats(StringRef name = "") const + { + HashTableStats stats(*this, this->keys()); + stats.print(); + } + + /** + * Return the number of key-value-pairs that are stored in the map. + */ + uint32_t size() const + { + return m_occupied_and_removed_slots - m_removed_slots; + } + + /** + * Returns true if there are no elements in the map. + * + * This is similar to std::unordered_map::empty. + */ + bool is_empty() const + { + return m_occupied_and_removed_slots == m_removed_slots; + } + + /** + * Returns the number of available slots. This is mostly for debugging purposes. + */ + uint32_t capacity() const + { + return m_slots.size(); + } + + /** + * Returns the amount of removed slots in the set. This is mostly for debugging purposes. + */ + uint32_t removed_amount() const + { + return m_removed_slots; + } + + /** + * Returns the bytes required per element. This is mostly for debugging purposes. + */ + uint32_t size_per_element() const + { + return sizeof(Slot); + } + + /** + * Returns the approximate memory requirements of the map in bytes. This becomes more exact the + * larger the map becomes. + */ + uint32_t size_in_bytes() const + { + return sizeof(Slot) * m_slots.size(); + } + + /** + * Potentially resize the map such that the specified number of elements can be added without + * another grow operation. + */ + void reserve(uint32_t n) + { + if (m_usable_slots < n) { + this->realloc_and_reinsert(n); } - return slot; } - uint32_t count_collisions(const KeyT &key) const + /** + * Removes all key-value-pairs from the map. + */ + void clear() { - uint32_t collisions = 0; - ITER_SLOTS_BEGIN (key, m_array, const, item, offset) { - if (item.is_empty(offset) || item.has_key(offset, key)) { - return collisions; + this->~Map(); + new (this) Map(); + } + + /** + * Get the number of collisions that the probing strategy has to go through to find the key or + * determine that it is not in the map. + */ + uint32_t count_collisions(const Key &key) const + { + return this->count_collisions__impl(key, m_hash(key)); + } + + private: + BLI_NOINLINE void realloc_and_reinsert(uint32_t min_usable_slots) + { + uint32_t total_slots, usable_slots; + m_max_load_factor.compute_total_and_usable_slots( + SlotArray::inline_buffer_capacity(), min_usable_slots, &total_slots, &usable_slots); + uint32_t new_slot_mask = total_slots - 1; + + /** + * Optimize the case when the map was empty beforehand. We can avoid some copies here. + */ + if (this->size() == 0) { + m_slots.~Array(); + new (&m_slots) SlotArray(total_slots); + m_removed_slots = 0; + m_occupied_and_removed_slots = 0; + m_usable_slots = usable_slots; + m_slot_mask = new_slot_mask; + return; + } + + SlotArray new_slots(total_slots); + + for (Slot &slot : m_slots) { + if (slot.is_occupied()) { + this->add_after_grow_and_destruct_old(slot, new_slots, new_slot_mask); } - collisions++; } - ITER_SLOTS_END(offset); + + /* All occupied slots have been destructed already and empty/removed slots are assumed to be + * trivially destructible. */ + m_slots.clear_without_destruct(); + m_slots = std::move(new_slots); + m_occupied_and_removed_slots -= m_removed_slots; + m_usable_slots = usable_slots; + m_removed_slots = 0; + m_slot_mask = new_slot_mask; } - void ensure_can_add() + void add_after_grow_and_destruct_old(Slot &old_slot, + SlotArray &new_slots, + uint32_t new_slot_mask) { - if (UNLIKELY(m_array.should_grow())) { - this->grow(this->size() + 1); + uint32_t hash = old_slot.get_hash(Hash()); + SLOT_PROBING_BEGIN (ProbingStrategy, hash, new_slot_mask, slot_index) { + Slot &slot = new_slots[slot_index]; + if (slot.is_empty()) { + slot.relocate_occupied_here(old_slot, hash); + return; + } } + SLOT_PROBING_END(); } - BLI_NOINLINE void grow(uint32_t min_usable_slots) + template bool contains__impl(const ForwardKey &key, uint32_t hash) const { - ArrayType new_array = m_array.init_reserved(min_usable_slots); - for (Item &old_item : m_array) { - for (uint offset = 0; offset < 4; offset++) { - if (old_item.is_set(offset)) { - this->add_after_grow(*old_item.key(offset), *old_item.value(offset), new_array); - } + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + return false; + } + if (slot.contains(key, m_is_equal, hash)) { + return true; } } - m_array = std::move(new_array); + MAP_SLOT_PROBING_END(); } - void add_after_grow(KeyT &key, ValueT &value, ArrayType &new_array) + template + void add_new__impl(ForwardKey &&key, ForwardValue &&value, uint32_t hash) { - ITER_SLOTS_BEGIN (key, new_array, , item, offset) { - if (item.is_empty(offset)) { - item.store(offset, std::move(key), std::move(value)); + BLI_assert(!this->contains_as(key)); + + this->ensure_can_add(); + m_occupied_and_removed_slots++; + + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + slot.occupy(std::forward(key), std::forward(value), hash); return; } } - ITER_SLOTS_END(offset); + MAP_SLOT_PROBING_END(); } - template - bool add_override__impl(ForwardKeyT &&key, ForwardValueT &&value) + template + bool add__impl(ForwardKey &&key, ForwardValue &&value, uint32_t hash) { - auto create_func = [&](ValueT *dst) { - new (dst) ValueT(std::forward(value)); - return true; - }; - auto modify_func = [&](ValueT *old_value) { - *old_value = std::forward(value); - return false; - }; - return this->add_or_modify(std::forward(key), create_func, modify_func); + this->ensure_can_add(); + + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + slot.occupy(std::forward(key), std::forward(value), hash); + m_occupied_and_removed_slots++; + return true; + } + if (slot.contains(key, m_is_equal, hash)) { + return false; + } + } + MAP_SLOT_PROBING_END(); } - template - bool add__impl(ForwardKeyT &&key, ForwardValueT &&value) + template bool remove__impl(const ForwardKey &key, uint32_t hash) { - this->ensure_can_add(); - - ITER_SLOTS_BEGIN (key, m_array, , item, offset) { - if (item.is_empty(offset)) { - item.store(offset, std::forward(key), std::forward(value)); - m_array.update__empty_to_set(); + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash)) { + slot.remove(); + m_removed_slots++; return true; } - else if (item.has_key(offset, key)) { + if (slot.is_empty()) { return false; } } - ITER_SLOTS_END(offset); + MAP_SLOT_PROBING_END(); } - template - void add_new__impl(ForwardKeyT &&key, ForwardValueT &&value) + template void remove_contained__impl(const ForwardKey &key, uint32_t hash) { - BLI_assert(!this->contains(key)); - this->ensure_can_add(); + BLI_assert(this->contains_as(key)); + + m_removed_slots++; - ITER_SLOTS_BEGIN (key, m_array, , item, offset) { - if (item.is_empty(offset)) { - item.store(offset, std::forward(key), std::forward(value)); - m_array.update__empty_to_set(); + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash)) { + slot.remove(); return; } } - ITER_SLOTS_END(offset); + MAP_SLOT_PROBING_END(); } - template - auto add_or_modify__impl(ForwardKeyT &&key, + template Value pop__impl(const ForwardKey &key, uint32_t hash) + { + BLI_assert(this->contains_as(key)); + + m_removed_slots++; + + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash)) { + Value value = *slot.value(); + slot.remove(); + return value; + } + } + MAP_SLOT_PROBING_END(); + } + + template + auto add_or_modify__impl(ForwardKey &&key, const CreateValueF &create_value, - const ModifyValueF &modify_value) -> decltype(create_value(nullptr)) + const ModifyValueF &modify_value, + uint32_t hash) -> decltype(create_value(nullptr)) { using CreateReturnT = decltype(create_value(nullptr)); using ModifyReturnT = decltype(modify_value(nullptr)); @@ -720,42 +1008,159 @@ class Map { this->ensure_can_add(); - ITER_SLOTS_BEGIN (key, m_array, , item, offset) { - if (item.is_empty(offset)) { - m_array.update__empty_to_set(); - item.store_without_value(offset, std::forward(key)); - ValueT *value_ptr = item.value(offset); + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + m_occupied_and_removed_slots++; + slot.occupy_without_value(std::forward(key), hash); + Value *value_ptr = slot.value(); return create_value(value_ptr); } - else if (item.has_key(offset, key)) { - ValueT *value_ptr = item.value(offset); + if (slot.contains(key, m_is_equal, hash)) { + Value *value_ptr = slot.value(); return modify_value(value_ptr); } } - ITER_SLOTS_END(offset); + MAP_SLOT_PROBING_END(); } - template - ValueT &lookup_or_add__impl(ForwardKeyT &&key, const CreateValueF &create_value) + template + Value &lookup_or_add__impl(ForwardKey &&key, const CreateValueF &create_value, uint32_t hash) { this->ensure_can_add(); - ITER_SLOTS_BEGIN (key, m_array, , item, offset) { - if (item.is_empty(offset)) { - item.store(offset, std::forward(key), create_value()); - m_array.update__empty_to_set(); - return *item.value(offset); + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + slot.occupy(std::forward(key), create_value(), hash); + m_occupied_and_removed_slots++; + return *slot.value(); + } + if (slot.contains(key, m_is_equal, hash)) { + return *slot.value(); + } + } + MAP_SLOT_PROBING_END(); + } + + template + bool add_overwrite__impl(ForwardKey &&key, ForwardValue &&value, uint32_t hash) + { + auto create_func = [&](Value *ptr) { + new (ptr) Value(std::forward(value)); + return true; + }; + auto modify_func = [&](Value *ptr) { + *ptr = std::forward(value); + return false; + }; + return this->add_or_modify__impl( + std::forward(key), create_func, modify_func, hash); + } + + template + const Value *lookup_ptr__impl(const ForwardKey &key, uint32_t hash) const + { + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + return nullptr; + } + if (slot.contains(key, m_is_equal, hash)) { + return slot.value(); } - else if (item.has_key(offset, key)) { - return *item.value(offset); + } + MAP_SLOT_PROBING_END(); + } + + template + uint32_t count_collisions__impl(const ForwardKey &key, uint32_t hash) const + { + uint32_t collisions = 0; + + MAP_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash)) { + return collisions; + } + if (slot.is_empty()) { + return collisions; } + collisions++; + } + MAP_SLOT_PROBING_END(); + } + + void ensure_can_add() + { + if (m_occupied_and_removed_slots >= m_usable_slots) { + this->realloc_and_reinsert(this->size() + 1); + BLI_assert(m_occupied_and_removed_slots < m_usable_slots); } - ITER_SLOTS_END(offset); } }; -#undef ITER_SLOTS_BEGIN -#undef ITER_SLOTS_END +/** + * A wrapper for std::unordered_map with the API of BLI::Map. This can be used for benchmarking. + */ +template class StdUnorderedMapWrapper { + private: + using MapType = std::unordered_map>; + MapType m_map; + + public: + uint32_t size() const + { + return (uint32_t)m_map.size(); + } + + bool is_empty() const + { + return m_map.empty(); + } + + void reserve(uint32_t n) + { + m_map.reserve(n); + } + + template + void add_new(ForwardKey &&key, ForwardValue &&value) + { + m_map.insert({std::forward(key), std::forward(value)}); + } + + template + bool add(ForwardKey &&key, ForwardValue &&value) + { + return m_map.insert({std::forward(key), std::forward(value)}).second; + } + + bool contains(const Key &key) const + { + return m_map.find(key) != m_map.end(); + } + + bool remove(const Key &key) + { + return (bool)m_map.erase(key); + } + + Value &lookup(const Key &key) + { + return m_map.find(key)->second; + } + + const Value &lookup(const Key &key) const + { + return m_map.find(key)->second; + } + + void clear() + { + m_map.clear(); + } + + void print_stats(StringRef UNUSED(name) = "") const + { + } +}; } // namespace BLI diff --git a/source/blender/blenlib/BLI_map_slots.hh b/source/blender/blenlib/BLI_map_slots.hh new file mode 100644 index 00000000000..dd6d706e2af --- /dev/null +++ b/source/blender/blenlib/BLI_map_slots.hh @@ -0,0 +1,361 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __BLI_MAP_SLOTS_HH__ +#define __BLI_MAP_SLOTS_HH__ + +/** \file + * \ingroup bli + * + * This file contains slot types that are supposed to be used with BLI::Map. + * + * Every slot type has to be able to hold a value of type Key, a value of type Value and state + * information. A map slot has three possible states: empty, occupied and removed. + * + * When a slot is occupied, it stores instances of type Key and Value. + * + * A map slot type has to implement a couple of methods that are explained in SimpleMapSlot. + * A slot type is assumed to be trivially destructible, when it is not in occupied state. So the + * destructor might not be called in that case. + * + * Possible Improvements: + * - Implement slot type that stores the hash. + */ + +#include "BLI_memory_utils.hh" + +namespace BLI { + +/** + * The simplest possible map slot. It stores the slot state and the optional key and value + * instances in separate variables. Depending on the alignment requirement of the key and value, + * many bytes might be wasted. + */ +template class SimpleMapSlot { + private: + enum State : uint8_t { + Empty = 0, + Occupied = 1, + Removed = 2, + }; + + State m_state; + AlignedBuffer m_key_buffer; + AlignedBuffer m_value_buffer; + + public: + /** + * After the default constructor has run, the slot has to be in the empty state. + */ + SimpleMapSlot() + { + m_state = Empty; + } + + /** + * The destructor also has to destruct the key and value, if the slot is currently occupied. + */ + ~SimpleMapSlot() + { + if (m_state == Occupied) { + this->key()->~Key(); + this->value()->~Value(); + } + } + + /** + * The copy constructor has to copy the state. If the other slot was occupied, a copy of the key + * and value have to be made as well. + */ + SimpleMapSlot(const SimpleMapSlot &other) + { + m_state = other.m_state; + if (other.m_state == Occupied) { + new (this->key()) Key(*other.key()); + new (this->value()) Value(*other.value()); + } + } + + /** + * The move construtor has to copy the state. If the other slot was occupied, the key and value + * from the other have to moved as well. The other slot stays in the state it was in before. Its + * optionally stored key and value remain in a moved-from state. + */ + SimpleMapSlot(SimpleMapSlot &&other) noexcept + { + m_state = other.m_state; + if (other.m_state == Occupied) { + new (this->key()) Key(std::move(*other.key())); + new (this->value()) Value(std::move(*other.value())); + } + } + + /** + * Returns a non-const pointer to the position where the key is stored. + */ + Key *key() + { + return (Key *)m_key_buffer.ptr(); + } + + /** + * Returns a const pointer to the position where the key is stored. + */ + const Key *key() const + { + return (const Key *)m_key_buffer.ptr(); + } + + /** + * Returns a non-const pointer to the position where the value is stored. + */ + Value *value() + { + return (Value *)m_value_buffer.ptr(); + } + + /** + * Returns a const pointer to the position where the value is stored. + */ + const Value *value() const + { + return (const Value *)m_value_buffer.ptr(); + } + + /** + * Returns true if the slot currently contains a key and a value. + */ + bool is_occupied() const + { + return m_state == Occupied; + } + + /** + * Returns true if the slot is empty, i.e. it does not contain a key and is not in removed state. + */ + bool is_empty() const + { + return m_state == Empty; + } + + /** + * Returns the hash of the currently stored key. In this simple map slot implementation, we just + * computed the hash here. Other implementations might store the hash in the slot instead. + */ + template uint32_t get_hash(const Hash &hash) + { + BLI_assert(this->is_occupied()); + return hash(*this->key()); + } + + /** + * Move the other slot into this slot and destruct it. We do destruction here, because this way + * we can avoid a comparison with the state, since we know the slot is occupied. + */ + void relocate_occupied_here(SimpleMapSlot &other, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + BLI_assert(other.is_occupied()); + m_state = Occupied; + new (this->key()) Key(std::move(*other.key())); + new (this->value()) Value(std::move(*other.value())); + other.key()->~Key(); + other.value()->~Value(); + } + + /** + * Returns true, when this slot is occupied and contains a key that compares equal to the given + * key. The hash can be used by other slot implementations to determine inequality faster. + */ + template + bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t UNUSED(hash)) const + { + if (m_state == Occupied) { + return is_equal(key, *this->key()); + } + return false; + } + + /** + * Change the state of this slot from empty/removed to occupied. The key/value has to be + * constructed by calling the constructor with the given key/value as parameter. + */ + template + void occupy(ForwardKey &&key, ForwardValue &&value, uint32_t hash) + { + BLI_assert(!this->is_occupied()); + this->occupy_without_value(std::forward(key), hash); + new (this->value()) Value(std::forward(value)); + } + + /** + * Change the state of this slot from empty/removed to occupied, but leave the value + * uninitialized. The caller is responsible to construct the value afterwards. + */ + template void occupy_without_value(ForwardKey &&key, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + m_state = Occupied; + new (this->key()) Key(std::forward(key)); + } + + /** + * Change the state of this slot from occupied to removed. The key and value have to be + * destructed as well. + */ + void remove() + { + BLI_assert(this->is_occupied()); + m_state = Removed; + this->key()->~Key(); + this->value()->~Value(); + } +}; + +/** + * An IntrusiveMapSlot uses two special values of the key to indicate whether the slot is empty or + * removed. This saves some memory in all cases and is more efficient in many cases. The KeyInfo + * type indicates which specific values are used. An example for a KeyInfo implementation is + * PointerKeyInfo. + * + * The special key values are expected to be trivially destructible. + */ +template class IntrusiveMapSlot { + private: + Key m_key = KeyInfo::get_empty(); + AlignedBuffer m_value_buffer; + + public: + IntrusiveMapSlot() = default; + + ~IntrusiveMapSlot() + { + if (KeyInfo::is_not_empty_or_removed(m_key)) { + this->value()->~Value(); + } + } + + IntrusiveMapSlot(const IntrusiveMapSlot &other) : m_key(other.m_key) + { + if (KeyInfo::is_not_empty_or_removed(m_key)) { + new (this->value()) Value(*other.value()); + } + } + + IntrusiveMapSlot(IntrusiveMapSlot &&other) noexcept : m_key(other.m_key) + { + if (KeyInfo::is_not_empty_or_removed(m_key)) { + new (this->value()) Value(std::move(*other.value())); + } + } + + Key *key() + { + return &m_key; + } + + const Key *key() const + { + return &m_key; + } + + Value *value() + { + return (Value *)m_value_buffer.ptr(); + } + + const Value *value() const + { + return (const Value *)m_value_buffer.ptr(); + } + + bool is_occupied() const + { + return KeyInfo::is_not_empty_or_removed(m_key); + } + + bool is_empty() const + { + return KeyInfo::is_empty(m_key); + } + + template uint32_t get_hash(const Hash &hash) + { + BLI_assert(this->is_occupied()); + return hash(*this->key()); + } + + void relocate_occupied_here(IntrusiveMapSlot &other, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + BLI_assert(other.is_occupied()); + m_key = std::move(other.m_key); + new (this->value()) Value(std::move(*other.value())); + other.m_key.~Key(); + other.value()->~Value(); + } + + template + bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t UNUSED(hash)) const + { + BLI_assert(KeyInfo::is_not_empty_or_removed(key)); + return is_equal(key, m_key); + } + + template + void occupy(ForwardKey &&key, ForwardValue &&value, uint32_t hash) + { + BLI_assert(!this->is_occupied()); + BLI_assert(KeyInfo::is_not_empty_or_removed(key)); + this->occupy_without_value(std::forward(key), hash); + new (this->value()) Value(std::forward(value)); + } + + template void occupy_without_value(ForwardKey &&key, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + BLI_assert(KeyInfo::is_not_empty_or_removed(key)); + m_key = std::forward(key); + } + + void remove() + { + BLI_assert(this->is_occupied()); + KeyInfo::remove(m_key); + this->value()->~Value(); + } +}; + +template struct DefaultMapSlot; + +/** + * Use SimpleMapSlot by default, because it is the smallest slot type, that works for all keys. + */ +template struct DefaultMapSlot { + using type = SimpleMapSlot; +}; + +/** + * Use a special slot type for pointer keys, because we can store whether a slot is empty or + * removed with special pointer values. + */ +template struct DefaultMapSlot { + using type = IntrusiveMapSlot>; +}; + +} // namespace BLI + +#endif /* __BLI_MAP_SLOTS_HH__ */ diff --git a/source/blender/blenlib/BLI_math_bits.h b/source/blender/blenlib/BLI_math_bits.h index 842fce22f91..0283622ca89 100644 --- a/source/blender/blenlib/BLI_math_bits.h +++ b/source/blender/blenlib/BLI_math_bits.h @@ -23,6 +23,7 @@ */ #include "BLI_math_inline.h" +#include "BLI_utildefines.h" #ifdef __cplusplus extern "C" { diff --git a/source/blender/blenlib/BLI_memory_utils.hh b/source/blender/blenlib/BLI_memory_utils.hh index d9acf08a43f..2f7b27240f3 100644 --- a/source/blender/blenlib/BLI_memory_utils.hh +++ b/source/blender/blenlib/BLI_memory_utils.hh @@ -21,71 +21,191 @@ * \ingroup bli */ -#include #include #include "BLI_utildefines.h" namespace BLI { -using std::copy; -using std::copy_n; -using std::uninitialized_copy; -using std::uninitialized_copy_n; -using std::uninitialized_fill; -using std::uninitialized_fill_n; +/** + * Call the default constructor on n consecutive elements. For trivially constructible types, this + * does nothing. + * + * Before: + * ptr: uninitialized + * After: + * ptr: initialized + */ +template void default_construct_n(T *ptr, uint n) +{ + /* This is not strictly necessary, because the loop below will be optimized away anyway. It is + * nice to make behavior this explicitely, though. */ + if (std::is_trivially_constructible::value) { + return; + } -template void construct_default(T *ptr) + for (uint i = 0; i < n; i++) { + new (ptr + i) T; + } +} + +/** + * Call the destructor on n consecutive values. For trivially destructible types, this does + * nothing. + * + * Before: + * ptr: initialized + * After: + * ptr: uninitialized + */ +template void destruct_n(T *ptr, uint n) { - new (ptr) T(); + /* This is not strictly necessary, because the loop below will be optimized away anyway. It is + * nice to make behavior this explicitely, though. */ + if (std::is_trivially_destructible::value) { + return; + } + + for (uint i = 0; i < n; i++) { + ptr[i].~T(); + } } -template void destruct(T *ptr) +/** + * Copy n values from src to dst. + * + * Before: + * src: initialized + * dst: initialized + * After: + * src: initialized + * dst: initialized + */ +template void initialized_copy_n(const T *src, uint n, T *dst) { - ptr->~T(); + for (uint i = 0; i < n; i++) { + dst[i] = src[i]; + } } -template void destruct_n(T *ptr, uint n) +/** + * Copy n values from src to dst. + * + * Before: + * src: initialized + * dst: uninitialized + * After: + * src: initialized + * dst: initialized + */ +template void uninitialized_copy_n(const T *src, uint n, T *dst) { for (uint i = 0; i < n; i++) { - ptr[i].~T(); + new (dst + i) T(src[i]); } } -template void uninitialized_move_n(T *src, uint n, T *dst) +/** + * Move n values from src to dst. + * + * Before: + * src: initialized + * dst: initialized + * After: + * src: initialized, moved-from + * dst: initialized + */ +template void initialized_move_n(T *src, uint n, T *dst) { - std::uninitialized_copy_n(std::make_move_iterator(src), n, dst); + for (uint i = 0; i < n; i++) { + dst[i] = std::move(src[i]); + } } -template void move_n(T *src, uint n, T *dst) +/** + * Move n values from src to dst. + * + * Before: + * src: initialized + * dst: uninitialized + * After: + * src: initialized, moved-from + * dst: initialized + */ +template void uninitialized_move_n(T *src, uint n, T *dst) { - std::copy_n(std::make_move_iterator(src), n, dst); + for (uint i = 0; i < n; i++) { + new (dst + i) T(std::move(src[i])); + } } -template void uninitialized_relocate(T *src, T *dst) +/** + * Relocate n values from src to dst. Relocation is a move followed by destruction of the src + * value. + * + * Before: + * src: initialized + * dst: initialized + * After: + * src: uninitialized + * dst: initialized + */ +template void initialized_relocate_n(T *src, uint n, T *dst) { - new (dst) T(std::move(*src)); - destruct(src); + initialized_move_n(src, n, dst); + destruct_n(src, n); } +/** + * Relocate n values from src to dst. Relocation is a move followed by destruction of the src + * value. + * + * Before: + * src: initialized + * dst: uinitialized + * After: + * src: uninitialized + * dst: initialized + */ template void uninitialized_relocate_n(T *src, uint n, T *dst) { uninitialized_move_n(src, n, dst); destruct_n(src, n); } -template void relocate(T *src, T *dst) +/** + * Copy the value to n consecutive elements. + * + * Before: + * dst: initialized + * After: + * dst: initialized + */ +template void initialized_fill_n(T *dst, uint n, const T &value) { - *dst = std::move(*src); - destruct(src); + for (uint i = 0; i < n; i++) { + dst[i] = value; + } } -template void relocate_n(T *src, uint n, T *dst) +/** + * Copy the value to n consecutive elements. + * + * Before: + * dst: uninitialized + * After: + * dst: initialized + */ +template void uninitialized_fill_n(T *dst, uint n, const T &value) { - move_n(src, n, dst); - destruct_n(src, n); + for (uint i = 0; i < n; i++) { + new (dst + i) T(value); + } } +/** + * The same as std::unique_ptr. This can be removed when we start using C++14. + */ template std::unique_ptr make_unique(Args &&... args) { return std::unique_ptr(new T(std::forward(args)...)); @@ -98,13 +218,24 @@ template struct DestructValueAtAddress { } }; +/** + * A destruct_ptr is like unique_ptr, but it will only call the destructor and will not free the + * memory. This is useful when using custom allocators. + */ template using destruct_ptr = std::unique_ptr>; -template class alignas(Alignment) AlignedBuffer { +/** + * An `AlignedBuffer` is simply a byte array with the given size and alignment. The buffer will + * not be initialized by the default constructor. + * + * This can be used to reserve memory for C++ objects whose lifetime is different from the + * lifetime of the object they are embedded in. It's used by containers with small buffer + * optimization and hash table implementations. + */ +template class alignas(Alignment) AlignedBuffer { private: /* Don't create an empty array. This causes problems with some compilers. */ - static constexpr uint ActualSize = (Size > 0) ? Size : 1; - char m_buffer[ActualSize]; + char m_buffer[(Size > 0) ? Size : 1]; public: void *ptr() diff --git a/source/blender/blenlib/BLI_open_addressing.hh b/source/blender/blenlib/BLI_open_addressing.hh deleted file mode 100644 index 3bd932350d0..00000000000 --- a/source/blender/blenlib/BLI_open_addressing.hh +++ /dev/null @@ -1,316 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef __BLI_OPEN_ADDRESSING_HH__ -#define __BLI_OPEN_ADDRESSING_HH__ - -/** \file - * \ingroup bli - * - * This class offers a useful abstraction for other containers that implement hash tables using - * open addressing. It handles the following aspects: - * - Allocation and deallocation of the open addressing array. - * - Optional small object optimization. - * - Keeps track of how many elements and dummies are in the table. - * - * The nice thing about this abstraction is that it does not get in the way of any performance - * optimizations. The data that is actually stored in the table is still fully defined by the - * actual hash table implementation. - */ - -#include - -#include "BLI_allocator.hh" -#include "BLI_array.hh" -#include "BLI_math_base.h" -#include "BLI_memory_utils.hh" -#include "BLI_utildefines.h" - -namespace BLI { - -/** \name Constexpr utility functions. - * \{ */ - -inline constexpr int is_power_of_2_i_constexpr(int n) -{ - return (n & (n - 1)) == 0; -} - -inline constexpr uint32_t log2_floor_u_constexpr(uint32_t x) -{ - return x <= 1 ? 0 : 1 + log2_floor_u_constexpr(x >> 1); -} - -inline constexpr uint32_t log2_ceil_u_constexpr(uint32_t x) -{ - return (is_power_of_2_i_constexpr((int)x)) ? log2_floor_u_constexpr(x) : - log2_floor_u_constexpr(x) + 1; -} - -template inline constexpr IntT ceil_division(IntT x, IntT y) -{ - BLI_STATIC_ASSERT(!std::is_signed::value, ""); - return x / y + ((x % y) != 0); -} - -template inline constexpr IntT floor_division(IntT x, IntT y) -{ - BLI_STATIC_ASSERT(!std::is_signed::value, ""); - return x / y; -} - -inline constexpr uint8_t compute_item_exponent(uint32_t min_usable_slots, - uint32_t slots_per_item, - uint32_t max_load_factor_numerator, - uint32_t max_load_factor_denominator) -{ - // uint64_t min_total_slots = ceil_division((uint64_t)min_usable_slots * - // (uint64_t)max_load_factor_denominator, - // (uint64_t)max_load_factor_numerator); - // uint32_t min_total_items = (uint32_t)ceil_division(min_total_slots, (uint64_t)slots_per_item); - // uint8_t item_exponent = (uint8_t)log2_ceil_u_constexpr(min_total_items); - // return item_exponent; - - return (uint8_t)log2_ceil_u_constexpr((uint32_t)ceil_division( - ceil_division((uint64_t)min_usable_slots * (uint64_t)max_load_factor_denominator, - (uint64_t)max_load_factor_numerator), - (uint64_t)slots_per_item)); -} - -/** \} */ - -template -class OpenAddressingArray { - private: - static constexpr uint32_t s_max_load_factor_numerator = 1; - static constexpr uint32_t s_max_load_factor_denominator = 2; - static constexpr uint32_t s_slots_per_item = Item::slots_per_item; - - static constexpr uint8_t s_small_storage_item_exponent = compute_item_exponent( - MinUsableSlotsInSmallStorage, - s_slots_per_item, - s_max_load_factor_numerator, - s_max_load_factor_denominator); - static constexpr uint32_t s_items_in_small_storage = 1u << s_small_storage_item_exponent; - - /* Invariants: - * 2^m_item_exponent = m_item_amount - * m_item_amount * s_slots_per_item = m_slots_total - * m_slot_mask = m_slots_total - 1 - * m_slots_set_or_dummy < m_slots_total - */ - - /* Number of items in the hash table. Must be a power of two. */ - uint32_t m_item_amount; - /* Exponent of the current item amount. */ - uint8_t m_item_exponent; - /* Number of elements that could be stored in the table when the load factor is 1. */ - uint32_t m_slots_total; - /* Number of elements that are not empty. */ - uint32_t m_slots_set_or_dummy; - /* Number of dummy entries. */ - uint32_t m_slots_dummy; - /* Max number of slots that can be non-empty according to the load factor. */ - uint32_t m_slots_usable; - /* Can be used to map a hash value into the range of valid slot indices. */ - uint32_t m_slot_mask; - - Array m_items; - - public: - explicit OpenAddressingArray(uint8_t item_exponent = s_small_storage_item_exponent) - { - m_item_exponent = item_exponent; - m_item_amount = 1u << item_exponent; - m_slots_total = m_item_amount * s_slots_per_item; - m_slot_mask = m_slots_total - 1; - m_slots_set_or_dummy = 0; - m_slots_dummy = 0; - m_slots_usable = (uint32_t)floor_division((uint64_t)m_slots_total * - (uint64_t)s_max_load_factor_numerator, - (uint64_t)s_max_load_factor_denominator); - - m_items = Array(m_item_amount); - } - - ~OpenAddressingArray() = default; - - OpenAddressingArray(const OpenAddressingArray &other) = default; - - OpenAddressingArray(OpenAddressingArray &&other) noexcept - { - m_slots_total = other.m_slots_total; - m_slots_set_or_dummy = other.m_slots_set_or_dummy; - m_slots_dummy = other.m_slots_dummy; - m_slots_usable = other.m_slots_usable; - m_slot_mask = other.m_slot_mask; - m_item_amount = other.m_item_amount; - m_item_exponent = other.m_item_exponent; - m_items = std::move(other.m_items); - - other.~OpenAddressingArray(); - new (&other) OpenAddressingArray(); - } - - OpenAddressingArray &operator=(const OpenAddressingArray &other) - { - if (this == &other) { - return *this; - } - this->~OpenAddressingArray(); - new (this) OpenAddressingArray(other); - return *this; - } - - OpenAddressingArray &operator=(OpenAddressingArray &&other) - { - if (this == &other) { - return *this; - } - this->~OpenAddressingArray(); - new (this) OpenAddressingArray(std::move(other)); - return *this; - } - - Allocator &allocator() - { - return m_items.allocator(); - } - - /* Prepare a new array that can hold a minimum of min_usable_slots elements. All entries are - * empty. */ - OpenAddressingArray init_reserved(uint32_t min_usable_slots) const - { - uint8_t item_exponent = compute_item_exponent(min_usable_slots, - s_slots_per_item, - s_max_load_factor_numerator, - s_max_load_factor_denominator); - OpenAddressingArray grown(item_exponent); - grown.m_slots_set_or_dummy = this->slots_set(); - return grown; - } - - /** - * Amount of items in the array times the number of slots per item. - */ - uint32_t slots_total() const - { - return m_slots_total; - } - - /** - * Amount of slots that are initialized with some value that is not empty or dummy. - */ - uint32_t slots_set() const - { - return m_slots_set_or_dummy - m_slots_dummy; - } - - /** - * Amount of slots that can be used before the array should grow. - */ - uint32_t slots_usable() const - { - return m_slots_usable; - } - - /** - * Update the counters after one empty element is used for a newly added element. - */ - void update__empty_to_set() - { - m_slots_set_or_dummy++; - } - - /** - * Update the counters after one previously dummy element becomes set. - */ - void update__dummy_to_set() - { - m_slots_dummy--; - } - - /** - * Update the counters after one previously set element becomes a dummy. - */ - void update__set_to_dummy() - { - m_slots_dummy++; - } - - /** - * Access the current slot mask for this array. - */ - uint32_t slot_mask() const - { - return m_slot_mask; - } - - /** - * Access the item for a specific item index. - * Note: The item index is not necessarily the slot index. - */ - const Item &item(uint32_t item_index) const - { - return m_items[item_index]; - } - - Item &item(uint32_t item_index) - { - return m_items[item_index]; - } - - uint8_t item_exponent() const - { - return m_item_exponent; - } - - uint32_t item_amount() const - { - return m_item_amount; - } - - bool should_grow() const - { - return m_slots_set_or_dummy >= m_slots_usable; - } - - Item *begin() - { - return m_items.begin(); - } - - Item *end() - { - return m_items.end(); - } - - const Item *begin() const - { - return m_items.begin(); - } - - const Item *end() const - { - return m_items.end(); - } -}; - -} // namespace BLI - -#endif /* __BLI_OPEN_ADDRESSING_HH__ */ diff --git a/source/blender/blenlib/BLI_optional.hh b/source/blender/blenlib/BLI_optional.hh index eac11293781..aeef0e7e151 100644 --- a/source/blender/blenlib/BLI_optional.hh +++ b/source/blender/blenlib/BLI_optional.hh @@ -37,16 +37,6 @@ template class Optional { bool m_set; public: - static Optional FromPointer(const T *ptr) - { - if (ptr == nullptr) { - return Optional(); - } - else { - return Optional(*ptr); - } - } - Optional() : m_set(false) { } diff --git a/source/blender/blenlib/BLI_probing_strategies.hh b/source/blender/blenlib/BLI_probing_strategies.hh new file mode 100644 index 00000000000..9fa402d4244 --- /dev/null +++ b/source/blender/blenlib/BLI_probing_strategies.hh @@ -0,0 +1,250 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __BLI_PROBING_STRATEGIES_HH__ +#define __BLI_PROBING_STRATEGIES_HH__ + +/** \file + * \ingroup bli + * + * This file implements different probing strategies. Those can be used by different hash table + * implementations like BLI::Set and BLI::Map. A probing strategy produces a sequence of values + * based on an initial hash value. + * + * A probing strategy has to implement the following methods: + * - Constructor(uint32_t hash): Start a new probing sequence based on the given hash. + * - get() const -> uint32_t: Get the current value in the sequence. + * - next() -> void: Update the internal state, so that the next value can be accessed with get(). + * - linear_steps() -> uint32_t: Returns number of linear probing steps that should be done. + * + * Using linear probing steps between larger jumps can result in better performance, due to + * improved cache usage. It's a way of getting the benefits or linear probing without the + * clustering issues. However, more linear steps can also make things slower when the initial hash + * produces many collisions. + * + * Every probing strategy has to guarantee, that every possible uint32_t is returned eventually. + * This is necessary for correctness. If this is not the case, empty slots might not be found. + * + * The SLOT_PROBING_BEGIN and SLOT_PROBING_END macros can be used to implement a loop that iterates + * over a probing sequence. + * + * Probing strategies can be evaluated with many different criteria. Different use cases often + * have different optimal strategies. Examples: + * - If the hash function generates a well distributed initial hash value, the constructor should + * be as short as possible. This is because the hash value can be used as slot index almost + * immediately, without too many collisions. This is also a perfect use case for linear steps. + * - If the hash function is bad, it can help if the probing strategy remixes the hash value, + * before the first slot is accessed. + * - Different next() methods can remix the hash value in different ways. Depending on which bits + * of the hash value contain the most information, different rehashing strategies work best. + * - When the hash table is very small, having a trivial hash function and then doing linear + * probing might work best. + */ + +#include "BLI_sys_types.h" + +namespace BLI { + +/** + * The simplest probing strategy. It's bad in most cases, because it produces clusters in the hash + * table, which result in many collisions. However, if the hash function is very good or the hash + * table is small, this strategy might even work best. + */ +class LinearProbingStrategy { + private: + uint32_t m_hash; + + public: + LinearProbingStrategy(uint32_t hash) : m_hash(hash) + { + } + + void next() + { + m_hash++; + } + + uint32_t get() const + { + return m_hash; + } + + uint32_t linear_steps() const + { + return UINT32_MAX; + } +}; + +/** + * A slightly adapted quadratic probing strategy. The distance to the original slot increases + * quadratically. This method also leads to clustering. Another disadvantage is that not all bits + * of the original hash are used. + * + * The distance i * i is not used, because it does not guarantee, that every slot is hit. + * Instead (i * i + i) / 2 is used, which has this desired property. + * + * In the first few steps, this strategy can have good cache performance. It largely depends on how + * many keys fit into a cache line in the hash table. + */ +class QuadraticProbingStrategy { + private: + uint32_t m_original_hash; + uint32_t m_current_hash; + uint32_t m_iteration; + + public: + QuadraticProbingStrategy(uint32_t hash) + : m_original_hash(hash), m_current_hash(hash), m_iteration(1) + { + } + + void next() + { + m_current_hash = m_original_hash + ((m_iteration * m_iteration + m_iteration) >> 1); + m_iteration++; + } + + uint32_t get() const + { + return m_current_hash; + } + + uint32_t linear_steps() const + { + return 1; + } +}; + +/** + * This is the probing strategy used by CPython (in 2020). + * + * It is very fast when the original hash value is good. If there are collisions, more bits of the + * hash value are taken into account. + * + * LinearSteps: Can be set to something larger than 1 for improved cache performance in some cases. + * PreShuffle: When true, the initial call to next() will be done to the constructor. This can help + * when the hash function has put little information into the lower bits. + */ +template class PythonProbingStrategy { + private: + uint32_t m_hash; + uint32_t m_perturb; + + public: + PythonProbingStrategy(uint32_t hash) : m_hash(hash), m_perturb(hash) + { + if (PreShuffle) { + this->next(); + } + } + + void next() + { + m_perturb >>= 5; + m_hash = 5 * m_hash + 1 + m_perturb; + } + + uint32_t get() const + { + return m_hash; + } + + uint32_t linear_steps() const + { + return LinearSteps; + } +}; + +/** + * Similar to the Python probing strategy. However, it does a bit more shuffling in the next() + * method. This way more bits are taken into account earlier. After a couple of collisions (that + * should happen rarely), it will fallback to a sequence that hits every slot. + */ +template class ShuffleProbingStrategy { + private: + uint32_t m_hash; + uint32_t m_perturb; + + public: + ShuffleProbingStrategy(uint32_t hash) : m_hash(hash), m_perturb(hash) + { + if (PreShuffle) { + this->next(); + } + } + + void next() + { + if (m_perturb != 0) { + m_perturb >>= 10; + m_hash = ((m_hash >> 16) ^ m_hash) * 0x45d9f3b + m_perturb; + } + else { + m_hash = 5 * m_hash + 1; + } + } + + uint32_t get() const + { + return m_hash; + } + + uint32_t linear_steps() const + { + return LinearSteps; + } +}; + +/** + * Having a specified default is convenient. + */ +using DefaultProbingStrategy = PythonProbingStrategy<>; + +/* Turning off clang format here, because otherwise it will mess up the alignment between the + * macros. */ +// clang-format off + +/** + * Both macros together form a loop that iterates over slot indices in a hash table with a + * power-of-two size. + * + * You must not `break` out of this loop. Only `return` is permitted. If you don't return + * out of the loop, it will be an infinite loop. These loops should not be nested within the + * same function. + * + * PROBING_STRATEGY: Class describing the probing strategy. + * HASH: The initial hash as produced by a hash function. + * MASK: A bit mask such that (hash & MASK) is a valid slot index. + * R_SLOT_INDEX: Name of the variable that will contain the slot index. + */ +#define SLOT_PROBING_BEGIN(PROBING_STRATEGY, HASH, MASK, R_SLOT_INDEX) \ + PROBING_STRATEGY probing_strategy(HASH); \ + do { \ + uint32_t linear_offset = 0; \ + uint32_t current_hash = probing_strategy.get(); \ + do { \ + uint32_t R_SLOT_INDEX = (current_hash + linear_offset) & MASK; + +#define SLOT_PROBING_END() \ + } while (++linear_offset < probing_strategy.linear_steps()); \ + probing_strategy.next(); \ + } while (true) + +// clang-format on + +} // namespace BLI + +#endif /* __BLI_PROBING_STRATEGIES_HH__ */ diff --git a/source/blender/blenlib/BLI_set.hh b/source/blender/blenlib/BLI_set.hh index dc9df5d116a..d991c97a629 100644 --- a/source/blender/blenlib/BLI_set.hh +++ b/source/blender/blenlib/BLI_set.hh @@ -20,270 +20,411 @@ /** \file * \ingroup bli * - * This file provides a set implementation that uses open addressing with probing. + * A `BLI::Set` is an unordered container for unique elements of type `Key`. It is designed to + * be a more convenient and efficient replacement for `std::unordered_set`. All core operations + * (add, remove and contains) can be done in O(1) amortized expected time. + * + * In most cases, your default choice for a hash set in Blender should be `BLI::Set`. + * + * BLI::Set is implemented using open addressing in a slot array with a power-of-two size. Every + * slot is in one of three states: empty, occupied or removed. If a slot is occupied, it contains + * an instance of the key type. + * + * Benchmarking and comparing hash tables is hard, because many factors influence the result. The + * performance of a hash table depends on the combination of the hash function, probing strategy, + * max load factor, key type, slot type and the data distribution. This implementation is designed + * to be relatively fast by default in all cases. However, it also offers many customization + * points that allow it to be optimized for a specific use case. + * + * A rudimentary benchmark can be found in BLI_set_test.cc. The results of that benchmark are + * there as well. The numbers show that in this specific case BLI::Set outperforms + * std::unordered_set consistently by a good amount. + * + * Some noteworthy information: + * - Key must be a movable type. + * - Pointers to keys might be invalidated when the set is changed or moved. + * - The hash function can be customized. See BLI_hash.hh for details. + * - The probing strategy can be customized. See BLI_probing_stragies.hh for details. + * - The slot type can be customized. See BLI_set_slots.hh for details. + * - Small buffer optimization is enabled by default, if the key is not too large. + * - The methods `add_new` and `remove_contained` should be used instead of `add` and `remove` + * whenever appropriate. Assumptions and intention are described better this way. + * - Lookups can be performed using types other than Key without conversion. For that use the + * methods ending with `_as`. The template parameters Hash and IsEqual have to support the other + * key type. This can greatly improve performance when the set contains strings. + * - The default constructor is cheap, even when a large InlineBufferCapacity is used. A large + * slot array will only be initialized when the first key is added. + * - The `print_stats` method can be used to get information about the distribution of keys and + * memory usage of the set. + * - The method names don't follow the std::unordered_set names in many cases. Searching for such + * names in this file will usually let you discover the new name. + * - There is a StdUnorderedSetWrapper class, that wraps std::unordered_set and gives it the same + * interface as BLI::Set. This is useful for benchmarking. + * + * Possible Improvements: + * - Use a branchless loop over slots in grow function (measured ~10% performance improvement when + * the distribution of occupied slots is suffiently random). + * - Support max load factor customization. + * - Improve performance with large data sets through software prefetching. I got fairly + * significant improvements in simple tests (~30% faster). It still needs to be investigated how + * to make a nice interface for this functionality. */ +#include + +#include "BLI_array.hh" #include "BLI_hash.hh" -#include "BLI_open_addressing.hh" -#include "BLI_vector.hh" +#include "BLI_hash_tables.hh" +#include "BLI_probing_strategies.hh" +#include "BLI_set_slots.hh" namespace BLI { -// clang-format off - -#define ITER_SLOTS_BEGIN(VALUE, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \ - uint32_t hash = DefaultHash{}(VALUE); \ - uint32_t perturb = hash; \ - while (true) { \ - uint32_t item_index = (hash & ARRAY.slot_mask()) >> OFFSET_SHIFT; \ - uint8_t R_OFFSET = hash & OFFSET_MASK; \ - uint8_t initial_offset = R_OFFSET; \ - OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \ - do { - -#define ITER_SLOTS_END(R_OFFSET) \ - R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \ - } while (R_OFFSET != initial_offset); \ - perturb >>= 5; \ - hash = hash * 5 + 1 + perturb; \ - } ((void)0) - -// clang-format on - -template +template< + /** Type of the elements that are stored in this set. It has to be movable. Furthermore, the + * hash and is-equal functions have to support it. + */ + typename Key, + /** + * The minimum number of elements that can be stored in this Set without doing a heap + * allocation. This is useful when you expect to have many small sets. However, keep in mind + * that (unlike vector) initializing a set has a O(n) cost in the number of slots. + * + * When Key is large, the small buffer optimization is disabled by default to avoid large + * unexpected allocations on the stack. It can still be enabled explicitely though. + */ + uint32_t InlineBufferCapacity = (sizeof(Key) < 100) ? 4 : 0, + /** + * The strategy used to deal with collisions. They are defined in BLI_probing_strategies.hh. + */ + typename ProbingStrategy = DefaultProbingStrategy, + /** + * The hash function used to hash the keys. There is a default for many types. See BLI_hash.hh + * for examples on how to define a custom hash function. + */ + typename Hash = DefaultHash, + /** + * The equality operator used to compare keys. By default it will simply compare keys using the + * `==` operator. + */ + typename IsEqual = DefaultEquality, + /** + * This is what will actually be stored in the hash table array. At a minimum a slot has to + * be able to hold a key and information about whether the slot is empty, occupied or removed. + * Using a non-standard slot type can improve performance or reduce the memory footprint. For + * example, a hash can be stored in the slot, to make inequality checks more efficient. Some + * types have special values that can represent an empty or removed state, eliminating the need + * for an additional variable. Also see BLI_set_slots.hh. + */ + typename Slot = typename DefaultSetSlot::type, + /** + * The allocator used by this set. Should rarely be changed, except when you don't want that + * MEM_* is used internally. + */ + typename Allocator = GuardedAllocator> class Set { private: - static constexpr uint OFFSET_MASK = 3; - static constexpr uint OFFSET_SHIFT = 2; + /** + * Slots are either empty, occupied or removed. The number of occupied slots can be computed by + * subtracting the removed slots from the occupied-and-removed slots. + */ + uint32_t m_removed_slots; + uint32_t m_occupied_and_removed_slots; - class Item { - private: - static constexpr uint8_t IS_EMPTY = 0; - static constexpr uint8_t IS_SET = 1; - static constexpr uint8_t IS_DUMMY = 2; + /** + * The maximum number of slots that can be used (either occupied or removed) until the set has to + * grow. This is the total number of slots times the max load factor. + */ + uint32_t m_usable_slots; - uint8_t m_status[4]; - AlignedBuffer<4 * sizeof(T), alignof(T)> m_buffer; + /** + * The number of slots minus one. This is a bit mask that can be used to turn any integer into a + * valid slot index efficiently. + */ + uint32_t m_slot_mask; - public: - static constexpr uint slots_per_item = 4; + /** This is called to hash incoming keys. */ + Hash m_hash; - Item() - { - for (uint offset = 0; offset < 4; offset++) { - m_status[offset] = IS_EMPTY; - } - } + /** This is called to check equality of two keys. */ + IsEqual m_is_equal; - ~Item() - { - for (uint offset = 0; offset < 4; offset++) { - if (m_status[offset] == IS_SET) { - destruct(this->value(offset)); - } - } - } + /** The max load factor is 1/2 = 50% by default. */ +#define LOAD_FACTOR 1, 2 + LoadFactor m_max_load_factor = LoadFactor(LOAD_FACTOR); + using SlotArray = + Array; +#undef LOAD_FACTOR - Item(const Item &other) - { - for (uint offset = 0; offset < 4; offset++) { - uint8_t status = other.m_status[offset]; - m_status[offset] = status; - if (status == IS_SET) { - T *src = other.value(offset); - T *dst = this->value(offset); - new (dst) T(*src); - } - } - } + /** + * This is the array that contains the actual slots. There is always at least one empty slot and + * the size of the array is a power of two. + */ + SlotArray m_slots; - Item(Item &&other) noexcept - { - for (uint offset = 0; offset < 4; offset++) { - uint8_t status = other.m_status[offset]; - m_status[offset] = status; - if (status == IS_SET) { - T *src = other.value(offset); - T *dst = this->value(offset); - new (dst) T(std::move(*src)); - } - } - } + /** Iterate over a slot index sequence for a given hash. */ +#define SET_SLOT_PROBING_BEGIN(HASH, R_SLOT) \ + SLOT_PROBING_BEGIN (ProbingStrategy, HASH, m_slot_mask, SLOT_INDEX) \ + auto &R_SLOT = m_slots[SLOT_INDEX]; +#define SET_SLOT_PROBING_END() SLOT_PROBING_END() - Item &operator=(const Item &other) = delete; - Item &operator=(Item &&other) = delete; + public: + /** + * Initialize an empty set. This is a cheap operation no matter how large the inline buffer + * is. This is necessary to avoid a high cost when no elements are added at all. An optimized + * grow operation is performed on the first insertion. + */ + Set() + : m_removed_slots(0), + m_occupied_and_removed_slots(0), + m_usable_slots(0), + m_slot_mask(0), + m_slots(1) + { + } - T *value(uint offset) const - { - return (T *)m_buffer.ptr() + offset; - } + ~Set() = default; - template void store(uint offset, ForwardT &&value) - { - BLI_assert(m_status[offset] != IS_SET); - m_status[offset] = IS_SET; - T *dst = this->value(offset); - new (dst) T(std::forward(value)); - } + /** + * Construct a set that contains the given keys. Duplicates will be removed automatically. + */ + Set(const std::initializer_list &list) : Set() + { + this->add_multiple(list); + } - void set_dummy(uint offset) - { - BLI_assert(m_status[offset] == IS_SET); - m_status[offset] = IS_DUMMY; - destruct(this->value(offset)); - } + Set(const Set &other) = default; - bool is_empty(uint offset) const - { - return m_status[offset] == IS_EMPTY; - } + Set(Set &&other) noexcept + : m_removed_slots(other.m_removed_slots), + m_occupied_and_removed_slots(other.m_occupied_and_removed_slots), + m_usable_slots(other.m_usable_slots), + m_slot_mask(other.m_slot_mask), + m_hash(std::move(other.m_hash)), + m_is_equal(std::move(other.m_is_equal)), + m_slots(std::move(other.m_slots)) + { + other.~Set(); + new (&other) Set(); + } - bool is_set(uint offset) const - { - return m_status[offset] == IS_SET; + Set &operator=(const Set &other) + { + if (this == &other) { + return *this; } - bool is_dummy(uint offset) const - { - return m_status[offset] == IS_DUMMY; - } + this->~Set(); + new (this) Set(other); - bool has_value(uint offset, const T &value) const - { - return m_status[offset] == IS_SET && *this->value(offset) == value; + return *this; + } + + Set &operator=(Set &&other) + { + if (this == &other) { + return *this; } - }; - using ArrayType = OpenAddressingArray; - ArrayType m_array; + this->~Set(); + new (this) Set(std::move(other)); - public: - Set() = default; + return *this; + } /** - * Create a new set that contains the given elements. + * Add a new key to the set. This invokes undefined behavior when the key is in the set already. + * When you know for certain that a key is not in the set yet, use this method for better + * performance. This also expresses the intent better. */ - Set(ArrayRef values) + void add_new(const Key &key) { - this->reserve(values.size()); - for (const T &value : values) { - this->add(value); - } + this->add_new__impl(key, m_hash(key)); + } + void add_new(Key &&key) + { + this->add_new__impl(std::move(key), m_hash(key)); + } + + /** + * Add a key to the set. If the key exists in the set already, nothing is done. The return value + * is true if the key was newly added to the set. + * + * This is similar to std::unordered_set::insert. + */ + bool add(const Key &key) + { + return this->add_as(key); + } + bool add(Key &&key) + { + return this->add_as(std::move(key)); } /** - * Create a new set from an initializer list. + * Same as `add`, but accepts other key types that are supported by the hash function. */ - Set(std::initializer_list values) : Set(ArrayRef(values)) + template bool add_as(ForwardKey &&key) { + return this->add__impl(std::forward(key), m_hash(key)); } /** - * Make the set large enough to hold the given amount of elements. + * Convenience function to add many keys to the set at once. Duplicates are removed + * automatically. + * + * We might be able to make this faster than sequentially adding all keys, but that is not + * implemented yet. */ - void reserve(uint32_t min_usable_slots) + void add_multiple(ArrayRef keys) { - if (m_array.slots_usable() < min_usable_slots) { - this->grow(min_usable_slots); + for (const Key &key : keys) { + this->add(key); } } /** - * Add a new element to the set. - * Asserts that the element did not exist in the set before. + * Convenience function to add many new keys to the set at once. The keys must not exist in the + * set before and there must not be duplicates in the array. */ - void add_new(const T &value) + void add_multiple_new(ArrayRef keys) { - this->add_new__impl(value); + for (const Key &key : keys) { + this->add_new(key); + } } - void add_new(T &&value) + + /** + * Returns true if the key is in the set. + * + * This is similar to std::unordered_set::find() != std::unordered_set::end(). + */ + bool contains(const Key &key) const { - this->add_new__impl(std::move(value)); + return this->contains_as(key); } /** - * Add a new value to the set if it does not exist yet. - * Returns true of the value has been newly added. + * Same as `contains`, but accepts other key types that are supported by the hash function. */ - bool add(const T &value) + template bool contains_as(const ForwardKey &key) const { - return this->add__impl(value); + return this->contains__impl(key, m_hash(key)); } - bool add(T &&value) + + /** + * Deletes the key from the set. Returns true when the key did exist beforehand, otherwise false. + * + * This is similar to std::unordered_set::erase. + */ + bool remove(const Key &key) { - return this->add__impl(std::move(value)); + return this->remove_as(key); } /** - * Add multiple elements to the set. + * Same as `remove`, but accepts other key types that are supported by the hash function. */ - void add_multiple(ArrayRef values) + template bool remove_as(const ForwardKey &key) { - for (const T &value : values) { - this->add(value); - } + return this->remove__impl(key, m_hash(key)); } /** - * Add multiple new elements to the set. - * Asserts that none of the elements existed in the set before. + * Deletes the key from the set. This invokes undefined behavior when the key is not in the map. */ - void add_multiple_new(ArrayRef values) + void remove_contained(const Key &key) { - for (const T &value : values) { - this->add_new(value); - } + this->remove_contained_as(key); } /** - * Returns true when the value is in the set, otherwise false. + * Same as `remove_contained`, but accepts other key types that are supported by the hash + * function. */ - bool contains(const T &value) const + template void remove_contained_as(const ForwardKey &key) { - ITER_SLOTS_BEGIN (value, m_array, const, item, offset) { - if (item.is_empty(offset)) { - return false; - } - else if (item.has_value(offset, value)) { - return true; - } - } - ITER_SLOTS_END(offset); + this->remove_contained__impl(key, m_hash(key)); } /** - * Remove the value from the set. - * Asserts that the value exists in the set currently. + * An iterator that can iterate over all keys in the set. The iterator is invalidated when the + * set is moved or when it is grown. + * + * Keys returned by this iterator are always const. They should not change, because this might + * also change their hash. */ - void remove(const T &value) + class Iterator { + private: + const Slot *m_slots; + uint32_t m_total_slots; + uint32_t m_current_slot; + + public: + Iterator(const Slot *slots, uint32_t total_slots, uint32_t current_slot) + : m_slots(slots), m_total_slots(total_slots), m_current_slot(current_slot) + { + } + + Iterator &operator++() + { + while (++m_current_slot < m_total_slots) { + if (m_slots[m_current_slot].is_occupied()) { + break; + } + } + return *this; + } + + const Key &operator*() const + { + return *m_slots[m_current_slot].key(); + } + + friend bool operator!=(const Iterator &a, const Iterator &b) + { + BLI_assert(a.m_slots == b.m_slots); + BLI_assert(a.m_total_slots == b.m_total_slots); + return a.m_current_slot != b.m_current_slot; + } + }; + + Iterator begin() const { - BLI_assert(this->contains(value)); - ITER_SLOTS_BEGIN (value, m_array, , item, offset) { - if (item.has_value(offset, value)) { - item.set_dummy(offset); - m_array.update__set_to_dummy(); - return; + for (uint32_t i = 0; i < m_slots.size(); i++) { + if (m_slots[i].is_occupied()) { + return Iterator(m_slots.data(), m_slots.size(), i); } } - ITER_SLOTS_END(offset); + return this->end(); + } + + Iterator end() const + { + return Iterator(m_slots.data(), m_slots.size(), m_slots.size()); } /** - * Get the amount of values stored in the set. + * Print common statistics like size and collision count. This is useful for debugging purposes. */ - uint32_t size() const + void print_stats(StringRef name = "") const { - return m_array.slots_set(); + HashTableStats stats(*this, *this); + stats.print(); } /** - * Return true if this set contains no elements. + * Get the number of collisions that the probing strategy has to go through to find the key or + * determine that it is not in the set. */ - bool is_empty() const + uint32_t count_collisions(const Key &key) const { - return this->size() == 0; + return this->count_collisions__impl(key, m_hash(key)); } + /** + * Remove all elements from the set. + */ void clear() { this->~Set(); @@ -291,8 +432,76 @@ class Set { } /** - * Returns true when there is at least one element that is in both sets. - * Otherwise false. + * Creates a new slot array and reinserts all keys inside of that. This method can be used to get + * rid of dummy slots. Also this is useful for benchmarking the grow function. + */ + void rehash() + { + this->realloc_and_reinsert(this->size()); + } + + /** + * Returns the number of keys stored in the set. + */ + uint32_t size() const + { + return m_occupied_and_removed_slots - m_removed_slots; + } + + /** + * Returns true if no keys are stored. + */ + bool is_empty() const + { + return m_occupied_and_removed_slots == m_removed_slots; + } + + /** + * Returns the number of available slots. This is mostly for debugging purposes. + */ + uint32_t capacity() const + { + return m_slots.size(); + } + + /** + * Returns the amount of removed slots in the set. This is mostly for debugging purposes. + */ + uint32_t removed_amount() const + { + return m_removed_slots; + } + + /** + * Returns the bytes required per element. This is mostly for debugging purposes. + */ + uint32_t size_per_element() const + { + return sizeof(Slot); + } + + /** + * Returns the approximate memory requirements of the set in bytes. This is more correct for + * larger sets. + */ + uint32_t size_in_bytes() const + { + return sizeof(Slot) * m_slots.size(); + } + + /** + * Potentially resize the set such that it can hold the specified number of keys without another + * grow operation. + */ + void reserve(uint32_t n) + { + if (m_usable_slots < n) { + this->realloc_and_reinsert(n); + } + } + + /** + * Returns true if there is a key that exists in both sets. */ static bool Intersects(const Set &a, const Set &b) { @@ -301,8 +510,8 @@ class Set { return Intersects(b, a); } - for (const T &value : a) { - if (b.contains(value)) { + for (const Key &key : a) { + if (b.contains(key)) { return true; } } @@ -310,182 +519,249 @@ class Set { } /** - * Returns true when there is no value that is in both sets. - * Otherwise false. + * Returns true if no key from a is also in b and vice versa. */ static bool Disjoint(const Set &a, const Set &b) { return !Intersects(a, b); } - void print_table() const + private: + BLI_NOINLINE void realloc_and_reinsert(uint32_t min_usable_slots) { - std::cout << "Hash Table:\n"; - std::cout << " Size: " << m_array.slots_set() << '\n'; - std::cout << " Capacity: " << m_array.slots_total() << '\n'; - uint32_t item_index = 0; - for (const Item &item : m_array) { - std::cout << " Item: " << item_index++ << '\n'; - for (uint offset = 0; offset < 4; offset++) { - std::cout << " " << offset << " \t"; - if (item.is_empty(offset)) { - std::cout << " \n"; - } - else if (item.is_set(offset)) { - const T &value = *item.value(offset); - uint32_t collisions = this->count_collisions(value); - std::cout << " " << value << " \t Collisions: " << collisions << '\n'; - } - else if (item.is_dummy(offset)) { - std::cout << " \n"; - } + uint32_t total_slots, usable_slots; + m_max_load_factor.compute_total_and_usable_slots( + SlotArray::inline_buffer_capacity(), min_usable_slots, &total_slots, &usable_slots); + uint32_t new_slot_mask = total_slots - 1; + + /** + * Optimize the case when the set was empty beforehand. We can avoid some copies here. + */ + if (this->size() == 0) { + m_slots.~Array(); + new (&m_slots) SlotArray(total_slots); + m_removed_slots = 0; + m_occupied_and_removed_slots = 0; + m_usable_slots = usable_slots; + m_slot_mask = new_slot_mask; + return; + } + + /* The grown array that we insert the keys into. */ + SlotArray new_slots(total_slots); + + for (Slot &slot : m_slots) { + if (slot.is_occupied()) { + this->add_after_grow_and_destruct_old(slot, new_slots, new_slot_mask); } } + + /* All occupied slots have been destructed already and empty/removed slots are assumed to be + * trivially destructible. */ + m_slots.clear_without_destruct(); + m_slots = std::move(new_slots); + m_occupied_and_removed_slots -= m_removed_slots; + m_usable_slots = usable_slots; + m_removed_slots = 0; + m_slot_mask = new_slot_mask; } - class Iterator { - private: - const Set *m_set; - uint32_t m_slot; + void add_after_grow_and_destruct_old(Slot &old_slot, + SlotArray &new_slots, + uint32_t new_slot_mask) + { + uint32_t hash = old_slot.get_hash(Hash()); - public: - Iterator(const Set *set, uint32_t slot) : m_set(set), m_slot(slot) - { + SLOT_PROBING_BEGIN (ProbingStrategy, hash, new_slot_mask, slot_index) { + Slot &slot = new_slots[slot_index]; + if (slot.is_empty()) { + slot.relocate_occupied_here(old_slot, hash); + return; + } } + SLOT_PROBING_END(); + } - Iterator &operator++() - { - m_slot = m_set->next_slot(m_slot + 1); - return *this; + template bool contains__impl(const ForwardKey &key, uint32_t hash) const + { + SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + return false; + } + if (slot.contains(key, m_is_equal, hash)) { + return true; + } } + SET_SLOT_PROBING_END(); + } - const T &operator*() const - { - uint32_t item_index = m_slot >> OFFSET_SHIFT; - uint offset = m_slot & OFFSET_MASK; - const Item &item = m_set->m_array.item(item_index); - BLI_assert(item.is_set(offset)); - return *item.value(offset); - } + template void add_new__impl(ForwardKey &&key, uint32_t hash) + { + BLI_assert(!this->contains_as(key)); - friend bool operator==(const Iterator &a, const Iterator &b) - { - BLI_assert(a.m_set == b.m_set); - return a.m_slot == b.m_slot; - } + this->ensure_can_add(); - friend bool operator!=(const Iterator &a, const Iterator &b) - { - return !(a == b); + SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + slot.occupy(std::forward(key), hash); + m_occupied_and_removed_slots++; + return; + } } - }; + SET_SLOT_PROBING_END(); + } - friend Iterator; + template bool add__impl(ForwardKey &&key, uint32_t hash) + { + this->ensure_can_add(); - Iterator begin() const + SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + slot.occupy(std::forward(key), hash); + m_occupied_and_removed_slots++; + return true; + } + if (slot.contains(key, m_is_equal, hash)) { + return false; + } + } + SET_SLOT_PROBING_END(); + } + + template bool remove__impl(const ForwardKey &key, uint32_t hash) { - return Iterator(this, this->next_slot(0)); + SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash)) { + slot.remove(); + m_removed_slots++; + return true; + } + if (slot.is_empty()) { + return false; + } + } + SET_SLOT_PROBING_END(); } - Iterator end() const + template void remove_contained__impl(const ForwardKey &key, uint32_t hash) { - return Iterator(this, m_array.slots_total()); + BLI_assert(this->contains_as(key)); + m_removed_slots++; + + SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash)) { + slot.remove(); + return; + } + } + SET_SLOT_PROBING_END(); } - private: - uint32_t next_slot(uint32_t slot) const - { - for (; slot < m_array.slots_total(); slot++) { - uint32_t item_index = slot >> OFFSET_SHIFT; - uint offset = slot & OFFSET_MASK; - const Item &item = m_array.item(item_index); - if (item.is_set(offset)) { - return slot; + template + uint32_t count_collisions__impl(const ForwardKey &key, uint32_t hash) const + { + uint32_t collisions = 0; + + SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash)) { + return collisions; } + if (slot.is_empty()) { + return collisions; + } + collisions++; } - return slot; + SET_SLOT_PROBING_END(); } void ensure_can_add() { - if (UNLIKELY(m_array.should_grow())) { - this->grow(this->size() + 1); + if (m_occupied_and_removed_slots >= m_usable_slots) { + this->realloc_and_reinsert(this->size() + 1); + BLI_assert(m_occupied_and_removed_slots < m_usable_slots); } } +}; - BLI_NOINLINE void grow(uint32_t min_usable_slots) +/** + * A wrapper for std::unordered_set with the API of BLI::Set. This can be used for benchmarking. + */ +template class StdUnorderedSetWrapper { + private: + using SetType = std::unordered_set>; + SetType m_set; + + public: + uint32_t size() const { - ArrayType new_array = m_array.init_reserved(min_usable_slots); + return (uint32_t)m_set.size(); + } - for (Item &old_item : m_array) { - for (uint8_t offset = 0; offset < 4; offset++) { - if (old_item.is_set(offset)) { - this->add_after_grow(*old_item.value(offset), new_array); - } - } - } + bool is_empty() const + { + return m_set.empty(); + } - m_array = std::move(new_array); + void reserve(uint32_t n) + { + m_set.reserve(n); } - void add_after_grow(T &old_value, ArrayType &new_array) + void add_new(const Key &key) { - ITER_SLOTS_BEGIN (old_value, new_array, , item, offset) { - if (item.is_empty(offset)) { - item.store(offset, std::move(old_value)); - return; - } - } - ITER_SLOTS_END(offset); + m_set.insert(key); + } + void add_new(Key &&key) + { + m_set.insert(std::move(key)); } - uint32_t count_collisions(const T &value) const + bool add(const Key &key) { - uint32_t collisions = 0; - ITER_SLOTS_BEGIN (value, m_array, const, item, offset) { - if (item.is_empty(offset) || item.has_value(offset, value)) { - return collisions; - } - collisions++; + return m_set.insert(key).second; + } + bool add(Key &&key) + { + return m_set.insert(std::move(key)).second; + } + + void add_multiple(ArrayRef keys) + { + for (const Key &key : keys) { + m_set.insert(key); } - ITER_SLOTS_END(offset); } - template void add_new__impl(ForwardT &&value) + bool contains(const Key &key) const { - BLI_assert(!this->contains(value)); - this->ensure_can_add(); + return m_set.find(key) != m_set.end(); + } - ITER_SLOTS_BEGIN (value, m_array, , item, offset) { - if (item.is_empty(offset)) { - item.store(offset, std::forward(value)); - m_array.update__empty_to_set(); - return; - } - } - ITER_SLOTS_END(offset); + bool remove(const Key &key) + { + return (bool)m_set.erase(key); } - template bool add__impl(ForwardT &&value) + void remove_contained(const Key &key) { - this->ensure_can_add(); + return m_set.erase(key); + } - ITER_SLOTS_BEGIN (value, m_array, , item, offset) { - if (item.is_empty(offset)) { - item.store(offset, std::forward(value)); - m_array.update__empty_to_set(); - return true; - } - else if (item.has_value(offset, value)) { - return false; - } - } - ITER_SLOTS_END(offset); + void clear() + { + m_set.clear(); + } + + typename SetType::iterator begin() const + { + return m_set.begin(); } -}; -#undef ITER_SLOTS_BEGIN -#undef ITER_SLOTS_END + typename SetType::iterator end() const + { + return m_set.end(); + } +}; } // namespace BLI diff --git a/source/blender/blenlib/BLI_set_slots.hh b/source/blender/blenlib/BLI_set_slots.hh new file mode 100644 index 00000000000..21493524bd8 --- /dev/null +++ b/source/blender/blenlib/BLI_set_slots.hh @@ -0,0 +1,415 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __BLI_SET_SLOTS_HH__ +#define __BLI_SET_SLOTS_HH__ + +/** \file + * \ingroup bli + * + * This file contains different slot types that are supposed to be used with BLI::Set. + * + * Every slot type has to be able to hold a value of the Key type and state information. + * A set slot has three possible states: empty, occupied and removed. + * + * Only when a slot is occupied, it stores an instance of type Key. + * + * A set slot type has to implement a couple of methods that are explained in SimpleSetSlot. + * A slot type is assumed to be trivially destructible, when it is not in occupied state. So the + * destructor might not be called in that case. + */ + +#include "BLI_memory_utils.hh" +#include "BLI_string_ref.hh" + +namespace BLI { + +/** + * The simplest possible set slot. It stores the slot state and the optional key instance in + * separate variables. Depending on the alignment requirement of the key, many bytes might be + * wasted. + */ +template class SimpleSetSlot { + private: + enum State : uint8_t { + Empty = 0, + Occupied = 1, + Removed = 2, + }; + + State m_state; + AlignedBuffer m_buffer; + + public: + /** + * After the default constructor has run, the slot has to be in the empty state. + */ + SimpleSetSlot() + { + m_state = Empty; + } + + /** + * The destructor also has to destruct the key, if the slot is currently occupied. + */ + ~SimpleSetSlot() + { + if (m_state == Occupied) { + this->key()->~Key(); + } + } + + /** + * The copy constructor has to copy the state. If the other slot was occupied, a copy of the key + * has to be made as well. + */ + SimpleSetSlot(const SimpleSetSlot &other) + { + m_state = other.m_state; + if (other.m_state == Occupied) { + new (this->key()) Key(*other.key()); + } + } + + /** + * The move constructor has to copy the state. If the other slot was occupied, the key from the + * other slot has to be moved as well. The other slot stays in the state it was in before. Its + * optionally stored key remains in a moved-from state. + */ + SimpleSetSlot(SimpleSetSlot &&other) noexcept + { + m_state = other.m_state; + if (other.m_state == Occupied) { + new (this->key()) Key(std::move(*other.key())); + } + } + + /** + * Get a non-const pointer to the position where the key is stored. + */ + Key *key() + { + return (Key *)m_buffer.ptr(); + } + + /** + * Get a const pointer to the position where the key is stored. + */ + const Key *key() const + { + return (const Key *)m_buffer.ptr(); + } + + /** + * Return true if the slot currently contains a key. + */ + bool is_occupied() const + { + return m_state == Occupied; + } + + /** + * Return true if the slot is empty, i.e. it does not contain a key and is not in removed state. + */ + bool is_empty() const + { + return m_state == Empty; + } + + /** + * Return the hash of the currently stored key. In this simple set slot implementation, we just + * compute the hash here. Other implementations might store the hash in the slot instead. + */ + template uint32_t get_hash(const Hash &hash) const + { + BLI_assert(this->is_occupied()); + return hash(*this->key()); + } + + /** + * Move the other slot into this slot and destruct it. We do destruction here, because this way + * we can avoid a comparison with the state, since we know the slot is occupied. + */ + void relocate_occupied_here(SimpleSetSlot &other, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + BLI_assert(other.is_occupied()); + m_state = Occupied; + new (this->key()) Key(std::move(*other.key())); + other.key()->~Key(); + } + + /** + * Return true, when this slot is occupied and contains a key that compares equal to the given + * key. The hash is used by other slot implementations to determine inequality faster. + */ + template + bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t UNUSED(hash)) const + { + if (m_state == Occupied) { + return is_equal(key, *this->key()); + } + return false; + } + + /** + * Change the state of this slot from empty/removed to occupied. The key has to be constructed + * by calling the constructor with the given key as parameter. + */ + template void occupy(ForwardKey &&key, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + m_state = Occupied; + new (this->key()) Key(std::forward(key)); + } + + /** + * Change the state of this slot from occupied to removed. The key has to be destructed as well. + */ + void remove() + { + BLI_assert(this->is_occupied()); + m_state = Removed; + this->key()->~Key(); + } +}; + +/** + * This set slot implementation stores the hash of the key within the slot. This helps when + * computing the hash or an equality check is expensive. + */ +template class HashedSetSlot { + private: + enum State : uint8_t { + Empty = 0, + Occupied = 1, + Removed = 2, + }; + + uint32_t m_hash; + State m_state; + AlignedBuffer m_buffer; + + public: + HashedSetSlot() + { + m_state = Empty; + } + + ~HashedSetSlot() + { + if (m_state == Occupied) { + this->key()->~Key(); + } + } + + HashedSetSlot(const HashedSetSlot &other) + { + m_state = other.m_state; + if (other.m_state == Occupied) { + m_hash = other.m_hash; + new (this->key()) Key(*other.key()); + } + } + + HashedSetSlot(HashedSetSlot &&other) noexcept + { + m_state = other.m_state; + if (other.m_state == Occupied) { + m_hash = other.m_hash; + new (this->key()) Key(std::move(*other.key())); + } + } + + Key *key() + { + return (Key *)m_buffer.ptr(); + } + + const Key *key() const + { + return (const Key *)m_buffer.ptr(); + } + + bool is_occupied() const + { + return m_state == Occupied; + } + + bool is_empty() const + { + return m_state == Empty; + } + + template uint32_t get_hash(const Hash &UNUSED(hash)) const + { + BLI_assert(this->is_occupied()); + return m_hash; + } + + void relocate_occupied_here(HashedSetSlot &other, uint32_t hash) + { + BLI_assert(!this->is_occupied()); + BLI_assert(other.is_occupied()); + m_state = Occupied; + m_hash = hash; + new (this->key()) Key(std::move(*other.key())); + other.key()->~Key(); + } + + template + bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t hash) const + { + /* m_hash might be uninitialized here, but that is ok. */ + if (m_hash == hash) { + if (m_state == Occupied) { + return is_equal(key, *this->key()); + } + } + return false; + } + + template void occupy(ForwardKey &&key, uint32_t hash) + { + BLI_assert(!this->is_occupied()); + m_state = Occupied; + m_hash = hash; + new (this->key()) Key(std::forward(key)); + } + + void remove() + { + BLI_assert(this->is_occupied()); + m_state = Removed; + this->key()->~Key(); + } +}; + +/** + * An IntrusiveSetSlot uses two special values of the key to indicate whether the slot is empty or + * removed. This saves some memory in all cases and is more efficient in many cases. The KeyInfo + * type indicates which specific values are used. An example for a KeyInfo implementation is + * PointerKeyInfo. + * + * The special key values are expected to be trivially destructible. + */ +template class IntrusiveSetSlot { + private: + Key m_key = KeyInfo::get_empty(); + + public: + IntrusiveSetSlot() = default; + ~IntrusiveSetSlot() = default; + IntrusiveSetSlot(const IntrusiveSetSlot &other) = default; + IntrusiveSetSlot(IntrusiveSetSlot &&other) noexcept = default; + + Key *key() + { + return &m_key; + } + + const Key *key() const + { + return &m_key; + } + + bool is_occupied() const + { + return KeyInfo::is_not_empty_or_removed(m_key); + } + + bool is_empty() const + { + return KeyInfo::is_empty(m_key); + } + + template uint32_t get_hash(const Hash &hash) const + { + BLI_assert(this->is_occupied()); + return hash(m_key); + } + + void relocate_occupied_here(IntrusiveSetSlot &other, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + BLI_assert(other.is_occupied()); + m_key = std::move(other.m_key); + other.m_key.~Key(); + } + + template + bool contains(const ForwardKey &key, const IsEqual &is_equal, uint32_t UNUSED(hash)) const + { + BLI_assert(KeyInfo::is_not_empty_or_removed(key)); + return is_equal(m_key, key); + } + + template void occupy(ForwardKey &&key, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + BLI_assert(KeyInfo::is_not_empty_or_removed(key)); + + m_key = std::forward(key); + } + + void remove() + { + BLI_assert(this->is_occupied()); + KeyInfo::remove(m_key); + } +}; + +/** + * This exists just to make it more convenient to define which special integer values can be used + * to indicate an empty and removed value. + */ +template +using IntegerSetSlot = IntrusiveSetSlot>; + +template struct DefaultSetSlot; + +/** + * Use SimpleSetSlot by default, because it is the smallest slot type that works for all key types. + */ +template struct DefaultSetSlot { + using type = SimpleSetSlot; +}; + +/** + * Store the hash of a string in the slot by default. Recomputing the hash or doing string + * comparisons can be relatively costly. + */ +template<> struct DefaultSetSlot { + using type = HashedSetSlot; +}; +template<> struct DefaultSetSlot { + using type = HashedSetSlot; +}; +template<> struct DefaultSetSlot { + using type = HashedSetSlot; +}; + +/** + * Use a special slot type for pointer keys, because we can store whether a slot is empty or + * removed with special pointer values. + */ +template struct DefaultSetSlot { + using type = IntrusiveSetSlot>; +}; + +} // namespace BLI + +#endif /* __BLI_SET_SLOTS_HH__ */ diff --git a/source/blender/blenlib/BLI_stack.hh b/source/blender/blenlib/BLI_stack.hh index 7f1f9f9cc10..4645535876d 100644 --- a/source/blender/blenlib/BLI_stack.hh +++ b/source/blender/blenlib/BLI_stack.hh @@ -20,48 +20,208 @@ /** \file * \ingroup bli * - * Basic stack implementation with support for small object optimization. + * A `BLI::Stack` is a dynamically growing FILO (first-in, last-out) data structure. It is + * designed to be a more convenient and efficient replacement for `std::stack`. + * + * The improved efficiency is mainly achieved by supporting small buffer optimization. As long as + * the number of elements added to the stack stays below InlineBufferCapacity, no heap allocation + * is done. Consequently, values stored in the stack have to be movable and they might be moved, + * when the stack is moved. + * + * A Vector can be used to emulate a stack. However, this stack implementation is more efficient + * when all you have to do is to push and pop elements. That is because a vector guarantees that + * all elements are in a contiguous array. Therefore, it has to copy all elements to a new buffer + * when it grows. This stack implementation does not have to copy all previously pushed elements + * when it grows. + * + * BLI::Stack is implemented using a double linked list of chunks. Each chunk contains an array of + * elements. The chunk size increases exponentially with every new chunk that is required. The + * lowest chunk, i.e. the one that is used for the first few pushed elements, is embedded into the + * stack. */ -#include "BLI_vector.hh" +#include "BLI_allocator.hh" +#include "BLI_array_ref.hh" +#include "BLI_memory_utils.hh" namespace BLI { -template +/** + * A StackChunk references a contiguous memory buffer. Multiple StackChunk instances are linked in + * a double linked list. + */ +template struct StackChunk { + /** The below chunk contains the elements that have been pushed on the stack before. */ + StackChunk *below; + /** The above chunk contains the elements that have been pushed on the stack afterwards. */ + StackChunk *above; + /** Pointer to the first element of the referenced buffer. */ + T *begin; + /** Pointer to one element past the end of the referenced buffer. */ + T *capacity_end; + + uint capacity() const + { + return capacity_end - begin; + } +}; + +template< + /** Type of the elements that are stored in the stack. */ + typename T, + /** + * The number of values that can be stored in this stack, without doing a heap allocation. + * Sometimes it can make sense to increase this value a lot. The memory in the inline buffer is + * not initialized when it is not needed. + * + * When T is large, the small buffer optimization is disabled by default to avoid large + * unexpected allocations on the stack. It can still be enabled explicitely though. + */ + uint InlineBufferCapacity = (sizeof(T) < 100) ? 4 : 0, + /** + * The allocator used by this stack. Should rarely be changed, except when you don't want that + * MEM_* is used internally. + */ + typename Allocator = GuardedAllocator> class Stack { private: - Vector m_elements; + using Chunk = StackChunk; - public: - Stack() = default; + /** + * Points to one element after top-most value in the stack. + * + * Invariant: + * If m_size == 0 + * then: m_top == m_inline_chunk.begin + * else: &peek() == m_top - 1; + */ + T *m_top; + + /** Points to the chunk that references the memory pointed to by m_top. */ + Chunk *m_top_chunk; /** - * Construct a stack from an array ref. The elements will be pushed in the same order they are in - * the array. + * Number of elements in the entire stack. The sum of initialized element counts in the chunks. */ - Stack(ArrayRef values) : m_elements(values) - { - } + uint m_size; + + /** The buffer used to implement small object optimization. */ + AlignedBuffer m_inline_buffer; + + /** + * A chunk referencing the inline buffer. This is always the bottom-most chunk. + * So m_inline_chunk.below == nullptr. + */ + Chunk m_inline_chunk; - operator ArrayRef() + /** Used for allocations when the inline buffer is not large enough. */ + Allocator m_allocator; + + public: + /** + * Initialize an empty stack. No heap allocation is done. + */ + Stack(Allocator allocator = {}) : m_allocator(allocator) { - return m_elements; + T *inline_buffer = this->inline_buffer(); + + m_inline_chunk.below = nullptr; + m_inline_chunk.above = nullptr; + m_inline_chunk.begin = inline_buffer; + m_inline_chunk.capacity_end = inline_buffer + InlineBufferCapacity; + + m_top = inline_buffer; + m_top_chunk = &m_inline_chunk; + m_size = 0; } /** - * Return the number of elements in the stack. + * Create a new stack that contains the given elements. The values are pushed to the stack in + * the order they are in the array. */ - uint size() const + Stack(ArrayRef values) : Stack() { - return m_elements.size(); + this->push_multiple(values); } /** - * Return true when the stack is empty, otherwise false. + * Create a new stack that contains the given elements. The values are pushed to the stack in the + * order they are in the array. + * + * Example: + * Stack stack = {4, 5, 6}; + * assert(stack.pop() == 6); + * assert(stack.pop() == 5); */ - bool is_empty() const + Stack(const std::initializer_list &values) : Stack(ArrayRef(values)) + { + } + + Stack(const Stack &other) : Stack(other.m_allocator) + { + for (const Chunk *chunk = &other.m_inline_chunk; chunk; chunk = chunk->above) { + const T *begin = chunk->begin; + const T *end = (chunk == other.m_top_chunk) ? other.m_top : chunk->capacity_end; + this->push_multiple(ArrayRef(begin, end - begin)); + } + } + + Stack(Stack &&other) noexcept : Stack(other.m_allocator) + { + uninitialized_relocate_n(other.inline_buffer(), + std::min(other.m_size, InlineBufferCapacity), + this->inline_buffer()); + + m_inline_chunk.above = other.m_inline_chunk.above; + m_size = other.m_size; + + if (m_size <= InlineBufferCapacity) { + m_top_chunk = &m_inline_chunk; + m_top = this->inline_buffer() + m_size; + } + else { + m_top_chunk = other.m_top_chunk; + m_top = other.m_top; + } + + other.m_size = 0; + other.m_inline_chunk.above = nullptr; + other.m_top_chunk = &other.m_inline_chunk; + other.m_top = other.m_top_chunk->begin; + } + + ~Stack() { - return this->size() == 0; + this->destruct_all_elements(); + Chunk *above_chunk; + for (Chunk *chunk = m_inline_chunk.above; chunk; chunk = above_chunk) { + above_chunk = chunk->above; + m_allocator.deallocate(chunk); + } + } + + Stack &operator=(const Stack &stack) + { + if (this == &stack) { + return *this; + } + + this->~Stack(); + new (this) Stack(stack); + + return *this; + } + + Stack &operator=(Stack &&stack) + { + if (this == &stack) { + return *this; + } + + this->~Stack(); + new (this) Stack(std::move(stack)); + + return *this; } /** @@ -69,80 +229,159 @@ class Stack { */ void push(const T &value) { - m_elements.append(value); + if (m_top == m_top_chunk->capacity_end) { + this->activate_next_chunk(1); + } + new (m_top) T(value); + m_top++; + m_size++; } - void push(T &&value) { - m_elements.append(std::move(value)); - } - - void push_multiple(ArrayRef values) - { - m_elements.extend(values); + if (m_top == m_top_chunk->capacity_end) { + this->activate_next_chunk(1); + } + new (m_top) T(std::move(value)); + m_top++; + m_size++; } /** - * Remove the element from the top of the stack and return it. - * This will assert when the stack is empty. + * Remove and return the top-most element from the stack. This invokes undefined behavior when + * the stack is empty. */ T pop() { - return m_elements.pop_last(); + BLI_assert(m_size > 0); + m_top--; + T value = std::move(*m_top); + m_top->~T(); + m_size--; + + if (m_top == m_top_chunk->begin) { + if (m_top_chunk->below != nullptr) { + m_top_chunk = m_top_chunk->below; + m_top = m_top_chunk->capacity_end; + } + } + return value; } /** - * Return a reference to the value a the top of the stack. - * This will assert when the stack is empty. + * Get a reference to the top-most element without removing it from the stack. This invokes + * undefined behavior when the stack is empty. */ T &peek() { - BLI_assert(!this->is_empty()); - return m_elements[this->size() - 1]; + BLI_assert(m_size > 0); + BLI_assert(m_top > m_top_chunk->begin); + return *(m_top - 1); } - - T *begin() + const T &peek() const { - return m_elements.begin(); + BLI_assert(m_size > 0); + BLI_assert(m_top > m_top_chunk->begin); + return *(m_top - 1); } - T *end() + /** + * Add multiple elements to the stack. The values are pushed in the order they are in the array. + * This method is more efficient than pushing multiple elements individually and might cause less + * heap allocations. + */ + void push_multiple(ArrayRef values) { - return m_elements.end(); + ArrayRef remaining_values = values; + while (!remaining_values.is_empty()) { + if (m_top == m_top_chunk->capacity_end) { + this->activate_next_chunk(remaining_values.size()); + } + + uint remaining_capacity = m_top_chunk->capacity_end - m_top; + uint amount = std::min(remaining_values.size(), remaining_capacity); + uninitialized_copy_n(remaining_values.data(), amount, m_top); + m_top += amount; + + remaining_values = remaining_values.drop_front(amount); + } + + m_size += values.size(); } - const T *begin() const + /** + * Returns true when the size is zero. + */ + bool is_empty() const { - return m_elements.begin(); + return m_size == 0; } - const T *end() const + /** + * Returns the number of elements in the stack. + */ + uint size() const { - return m_elements.end(); + return m_size; } /** - * Remove all elements from the stack but keep the memory. + * Removes all elements from the stack. The memory is not freed, so it is more efficient to reuse + * the stack than to create a new one. */ void clear() { - m_elements.clear(); + this->destruct_all_elements(); + m_top_chunk = &m_inline_chunk; + m_top = m_top_chunk->begin; } - /** - * Remove all elements and free any allocated memory. - */ - void clear_and_make_small() + private: + T *inline_buffer() const { - m_elements.clear_and_make_small(); + return (T *)m_inline_buffer.ptr(); } /** - * Does a linear search to check if the value is in the stack. + * Changes m_top_chunk to point to a new chunk that is above the current one. The new chunk might + * be smaller than the given size_hint. This happens when a chunk that has been allocated before + * is reused. The size of the new chunk will be at least one. + * + * This invokes undefined behavior when the currently active chunk is not full. */ - bool contains(const T &value) + void activate_next_chunk(uint size_hint) + { + BLI_assert(m_top == m_top_chunk->capacity_end); + if (m_top_chunk->above == nullptr) { + uint new_capacity = std::max(size_hint, m_top_chunk->capacity() * 2 + 10); + + /* Do a single memory allocation for the Chunk and the array it references. */ + void *buffer = m_allocator.allocate( + sizeof(Chunk) + sizeof(T) * new_capacity + alignof(T), alignof(Chunk), AT); + void *chunk_buffer = buffer; + void *data_buffer = (void *)(((uintptr_t)buffer + sizeof(Chunk) + alignof(T) - 1) & + ~(alignof(T) - 1)); + + Chunk *new_chunk = new (chunk_buffer) Chunk(); + new_chunk->begin = (T *)data_buffer; + new_chunk->capacity_end = new_chunk->begin + new_capacity; + new_chunk->above = nullptr; + new_chunk->below = m_top_chunk; + m_top_chunk->above = new_chunk; + } + m_top_chunk = m_top_chunk->above; + m_top = m_top_chunk->begin; + } + + void destruct_all_elements() { - return m_elements.contains(value); + for (T *value = m_top_chunk->begin; value != m_top; value++) { + value->~T(); + } + for (Chunk *chunk = m_top_chunk->below; chunk; chunk = chunk->below) { + for (T *value = chunk->begin; value != chunk->capacity_end; value++) { + value->~T(); + } + } } }; diff --git a/source/blender/blenlib/BLI_string_map.hh b/source/blender/blenlib/BLI_string_map.hh deleted file mode 100644 index caa7e16d1f3..00000000000 --- a/source/blender/blenlib/BLI_string_map.hh +++ /dev/null @@ -1,540 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef __BLI_STRING_MAP_HH__ -#define __BLI_STRING_MAP_HH__ - -/** \file - * \ingroup bli - * - * This tries to solve the issue that a normal map with std::string as key might do many - * allocations when the keys are longer than 16 bytes (the usual small string optimization size). - * - * For now this still uses std::string, but having this abstraction in place will make it easier to - * make it more efficient later on. Also, even if we will never implement this optimization, having - * a special map with string keys can be quite handy. */ - -#include "BLI_map.hh" -#include "BLI_optional.hh" -#include "BLI_string_ref.hh" -#include "BLI_vector.hh" - -namespace BLI { - -// clang-format off - -#define ITER_SLOTS_BEGIN(HASH, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \ - uint32_t hash_copy = HASH; \ - uint32_t perturb = HASH; \ - while (true) { \ - uint32_t item_index = (hash_copy & ARRAY.slot_mask()) >> OFFSET_SHIFT; \ - uint8_t R_OFFSET = hash_copy & OFFSET_MASK; \ - uint8_t initial_offset = R_OFFSET; \ - OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \ - do { - -#define ITER_SLOTS_END(R_OFFSET) \ - R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \ - } while (R_OFFSET != initial_offset); \ - perturb >>= 5; \ - hash_copy = hash_copy * 5 + 1 + perturb; \ - } ((void)0) - -// clang-format on - -template class StringMap { - private: - static constexpr uint32_t OFFSET_MASK = 3; - static constexpr uint32_t OFFSET_SHIFT = 2; - - class Item { - private: - static constexpr int32_t IS_EMPTY = -1; - - uint32_t m_hashes[4]; - int32_t m_indices[4]; - char m_values[sizeof(T) * 4]; - - public: - static constexpr uint slots_per_item = 4; - - Item() - { - for (uint offset = 0; offset < 4; offset++) { - m_indices[offset] = IS_EMPTY; - } - } - - ~Item() - { - for (uint offset = 0; offset < 4; offset++) { - if (this->is_set(offset)) { - destruct(this->value(offset)); - } - } - } - - Item(const Item &other) - { - for (uint offset = 0; offset < 4; offset++) { - m_indices[offset] = other.m_indices[offset]; - if (other.is_set(offset)) { - m_hashes[offset] = other.m_hashes[offset]; - new (this->value(offset)) T(*other.value(offset)); - } - } - } - - Item(Item &&other) noexcept - { - for (uint offset = 0; offset < 4; offset++) { - m_indices[offset] = other.m_indices[offset]; - if (other.is_set(offset)) { - m_hashes[offset] = other.m_hashes[offset]; - new (this->value(offset)) T(std::move(*other.value(offset))); - } - } - } - - uint32_t index(uint offset) const - { - return m_indices[offset]; - } - - uint32_t hash(uint offset) const - { - return m_hashes[offset]; - } - - T *value(uint offset) const - { - return (T *)POINTER_OFFSET(m_values, offset * sizeof(T)); - } - - bool is_set(uint offset) const - { - return m_indices[offset] >= 0; - } - - bool is_empty(uint offset) const - { - return m_indices[offset] == IS_EMPTY; - } - - bool has_hash(uint offset, uint32_t hash) const - { - BLI_assert(this->is_set(offset)); - return m_hashes[offset] == hash; - } - - bool has_exact_key(uint offset, StringRef key, const Vector &chars) const - { - return key == this->get_key(offset, chars); - } - - StringRefNull get_key(uint offset, const Vector &chars) const - { - const char *ptr = chars.begin() + m_indices[offset]; - uint length = *(uint *)ptr; - const char *start = ptr + sizeof(uint); - return StringRefNull(start, length); - } - - template - void store(uint offset, uint32_t hash, uint32_t index, ForwardT &&value) - { - this->store_without_value(offset, hash, index); - new (this->value(offset)) T(std::forward(value)); - } - - void store_without_value(uint offset, uint32_t hash, uint32_t index) - { - BLI_assert(!this->is_set(offset)); - m_hashes[offset] = hash; - m_indices[offset] = index; - } - }; - - using ArrayType = OpenAddressingArray; - ArrayType m_array; - Vector m_chars; - - public: - StringMap() = default; - - /** - * Get the number of key-value pairs in the map. - */ - uint size() const - { - return m_array.slots_set(); - } - - /** - * Add a new element to the map. It is assumed that the key did not exist before. - */ - void add_new(StringRef key, const T &value) - { - this->add_new__impl(key, value); - } - void add_new(StringRef key, T &&value) - { - this->add_new__impl(key, std::move(value)); - } - - /** - * Add a new element to the map if the key does not exist yet. - */ - void add(StringRef key, const T &value) - { - this->add__impl(key, value); - } - void add(StringRef key, T &&value) - { - this->add__impl(key, std::move(value)); - } - - /** - * First, checks if the key exists in the map. - * If it does exist, call the modify function with a pointer to the corresponding value. - * If it does not exist, call the create function with a pointer to where the value should be - * created. - * - * Returns whatever is returned from one of the callback functions. Both callbacks have to return - * the same type. - * - * CreateValueF: Takes a pointer to where the value should be created. - * ModifyValueF: Takes a pointer to the value that should be modified. - */ - template - auto add_or_modify(StringRef key, - const CreateValueF &create_value, - const ModifyValueF &modify_value) -> decltype(create_value(nullptr)) - { - using CreateReturnT = decltype(create_value(nullptr)); - using ModifyReturnT = decltype(modify_value(nullptr)); - BLI_STATIC_ASSERT((std::is_same::value), - "Both callbacks should return the same type."); - - this->ensure_can_add(); - uint32_t hash = this->compute_string_hash(key); - ITER_SLOTS_BEGIN (hash, m_array, , item, offset) { - if (item.is_empty(offset)) { - m_array.update__empty_to_set(); - uint32_t index = this->save_key_in_array(key); - item.store_without_value(offset, hash, index); - T *value_ptr = item.value(offset); - return create_value(value_ptr); - } - else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) { - T *value_ptr = item.value(offset); - return modify_value(value_ptr); - } - } - ITER_SLOTS_END(offset); - } - - /** - * Return true when the key exists in the map, otherwise false. - */ - bool contains(StringRef key) const - { - uint32_t hash = this->compute_string_hash(key); - ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) { - if (item.is_empty(offset)) { - return false; - } - else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) { - return true; - } - } - ITER_SLOTS_END(offset); - } - - /** - * Get a reference to the value corresponding to a key. It is assumed that the key does exist. - */ - const T &lookup(StringRef key) const - { - BLI_assert(this->contains(key)); - T *found_value = nullptr; - uint32_t hash = this->compute_string_hash(key); - ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) { - if (item.is_empty(offset)) { - return *found_value; - } - else if (item.has_hash(offset, hash)) { - if (found_value == nullptr) { - /* Common case: the first slot with the correct hash contains the key. - * However, still need to iterate until the next empty slot to make sure there is no - * other key with the exact same hash. */ - /* TODO: Check if we can guarantee that every hash only exists once in some cases. */ - found_value = item.value(offset); - } - else if (item.has_exact_key(offset, key, m_chars)) { - /* Found the hash more than once, now check for actual string equality. */ - return *item.value(offset); - } - } - } - ITER_SLOTS_END(offset); - } - - T &lookup(StringRef key) - { - return const_cast(const_cast(this)->lookup(key)); - } - - /** - * Get a pointer to the value corresponding to the key. Return nullptr, if the key does not - * exist. - */ - const T *lookup_ptr(StringRef key) const - { - uint32_t hash = this->compute_string_hash(key); - ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) { - if (item.is_empty(offset)) { - return nullptr; - } - else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) { - return item.value(offset); - } - } - ITER_SLOTS_END(offset); - } - - T *lookup_ptr(StringRef key) - { - return const_cast(const_cast(this)->lookup_ptr(key)); - } - - Optional try_lookup(StringRef key) const - { - return Optional::FromPointer(this->lookup_ptr(key)); - } - - /** - * Get a copy of the value corresponding to the key. If the key does not exist, return the - * default value. - */ - T lookup_default(StringRef key, const T &default_value) const - { - const T *ptr = this->lookup_ptr(key); - if (ptr != nullptr) { - return *ptr; - } - else { - return default_value; - } - } - - /** - * Return the value that corresponds to the given key. - * If it does not exist yet, create and insert it first. - */ - template T &lookup_or_add(StringRef key, const CreateValueF &create_value) - { - return *this->add_or_modify( - key, - [&](T *value) { return new (value) T(create_value()); }, - [](T *value) { return value; }); - } - - /** - * Return the value that corresponds to the given key. - * If it does not exist yet, insert a new default constructed value and return that. - */ - T &lookup_or_add_default(StringRef key) - { - return this->lookup_or_add(key, []() { return T(); }); - } - - /** - * Do a linear search over all items to find a key for a value. - */ - StringRefNull find_key_for_value(const T &value) const - { - for (const Item &item : m_array) { - for (uint offset = 0; offset < 4; offset++) { - if (item.is_set(offset) && value == *item.value(offset)) { - return item.get_key(offset, m_chars); - } - } - } - BLI_assert(false); - return {}; - } - - /** - * Run a function for every value in the map. - */ - template void foreach_value(const FuncT &func) - { - for (Item &item : m_array) { - for (uint offset = 0; offset < 4; offset++) { - if (item.is_set(offset)) { - func(*item.value(offset)); - } - } - } - } - - /** - * Run a function for every key in the map. - */ - template void foreach_key(const FuncT &func) - { - for (Item &item : m_array) { - for (uint offset = 0; offset < 4; offset++) { - if (item.is_set(offset)) { - StringRefNull key = item.get_key(offset, m_chars); - func(key); - } - } - } - } - - /** - * Run a function for every key-value-pair in the map. - */ - template void foreach_item(const FuncT &func) - { - for (Item &item : m_array) { - for (uint offset = 0; offset < 4; offset++) { - if (item.is_set(offset)) { - StringRefNull key = item.get_key(offset, m_chars); - T &value = *item.value(offset); - func(key, value); - } - } - } - } - - template void foreach_item(const FuncT &func) const - { - for (const Item &item : m_array) { - for (uint offset = 0; offset < 4; offset++) { - if (item.is_set(offset)) { - StringRefNull key = item.get_key(offset, m_chars); - const T &value = *item.value(offset); - func(key, value); - } - } - } - } - - private: - uint32_t compute_string_hash(StringRef key) const - { - /* TODO: check if this can be optimized more because we know the key length already. */ - uint32_t hash = 5381; - for (char c : key) { - hash = hash * 33 + c; - } - return hash; - } - - uint32_t save_key_in_array(StringRef key) - { - uint index = m_chars.size(); - uint string_size = key.size(); - m_chars.extend(ArrayRef((char *)&string_size, sizeof(uint))); - m_chars.extend(key); - m_chars.append('\0'); - return index; - } - - StringRefNull key_from_index(uint32_t index) const - { - const char *ptr = m_chars.begin() + index; - uint length = *(uint *)ptr; - const char *start = ptr + sizeof(uint); - return StringRefNull(start, length); - } - - void ensure_can_add() - { - if (UNLIKELY(m_array.should_grow())) { - this->grow(this->size() + 1); - } - } - - BLI_NOINLINE void grow(uint min_usable_slots) - { - ArrayType new_array = m_array.init_reserved(min_usable_slots); - for (Item &old_item : m_array) { - for (uint offset = 0; offset < 4; offset++) { - if (old_item.is_set(offset)) { - this->add_after_grow( - *old_item.value(offset), old_item.hash(offset), old_item.index(offset), new_array); - } - } - } - m_array = std::move(new_array); - } - - void add_after_grow(T &value, uint32_t hash, uint32_t index, ArrayType &new_array) - { - ITER_SLOTS_BEGIN (hash, new_array, , item, offset) { - if (item.is_empty(offset)) { - item.store(offset, hash, index, std::move(value)); - return; - } - } - ITER_SLOTS_END(offset); - } - - template bool add__impl(StringRef key, ForwardT &&value) - { - this->ensure_can_add(); - uint32_t hash = this->compute_string_hash(key); - ITER_SLOTS_BEGIN (hash, m_array, , item, offset) { - if (item.is_empty(offset)) { - uint32_t index = this->save_key_in_array(key); - item.store(offset, hash, index, std::forward(value)); - m_array.update__empty_to_set(); - return true; - } - else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) { - return false; - } - } - ITER_SLOTS_END(offset); - } - - template void add_new__impl(StringRef key, ForwardT &&value) - { - BLI_assert(!this->contains(key)); - this->ensure_can_add(); - uint32_t hash = this->compute_string_hash(key); - ITER_SLOTS_BEGIN (hash, m_array, , item, offset) { - if (item.is_empty(offset)) { - uint32_t index = this->save_key_in_array(key); - item.store(offset, hash, index, std::forward(value)); - m_array.update__empty_to_set(); - return; - } - } - ITER_SLOTS_END(offset); - } -}; - -#undef ITER_SLOTS_BEGIN -#undef ITER_SLOTS_END - -} // namespace BLI - -#endif /* __BLI_STRING_MAP_HH__ */ diff --git a/source/blender/blenlib/BLI_string_ref.hh b/source/blender/blenlib/BLI_string_ref.hh index eacc501375a..6a569bee1aa 100644 --- a/source/blender/blenlib/BLI_string_ref.hh +++ b/source/blender/blenlib/BLI_string_ref.hh @@ -20,12 +20,26 @@ /** \file * \ingroup bli * - * A StringRef is a pointer to a string somewhere in memory. It should not be used to transfer - * ownership of that string. When a function gets a StringRef as input, it cannot expect, that - * the string will still exist after the function ends. + * A `BLI::StringRef` references a const char array owned by someone else. It is just a pointer and + * a size. Since the memory is not owned, StringRef should not be used to transfer ownership of the + * string. The data referenced by a StringRef cannot be mutated through it. * - * There are two types of string references: One that guarantees null termination and one that does - * not. + * A StringRef is NOT null-terminated. This makes it much more powerful within C++, because we can + * also cut off parts of the end without creating a copy. When interfacing with C code that expects + * null-terminated strings, `BLI::StringRefNull` can be used. It is essentially the same as + * StringRef, but with the restriction that the string has to be null-terminated. + * + * Whenever possible, string parameters should be of type StringRef and the string return type + * should be StringRefNull. Don't forget that the StringRefNull does not own the string, so don't + * return it when the string exists only in the scope of the function. This convention makes + * functions usable in the most contexts. + * + * BLI::StringRef vs. std::string_view: + * Both types are certainly very similar. The main benefit of using StringRef in Blender is that + * this allows us to add convenience methods at any time. Especially, when doing a lot of string + * manipulation, this helps to keep the code clean. Furthermore, we need StringRefNull anyway, + * because there is a lot of C code that expects null-terminated strings. Once we use C++17, + * implicit conversions to and from string_view can be added. */ #include @@ -39,15 +53,16 @@ namespace BLI { class StringRef; +/** + * A common base class for StringRef and StringRefNull. This should never be used in other files. + * It only exists to avoid some code duplication. + */ class StringRefBase { - public: - using size_type = size_t; - protected: const char *m_data; - size_type m_size; + uint m_size; - StringRefBase(const char *data, size_type size) : m_data(data), m_size(size) + StringRefBase(const char *data, uint size) : m_data(data), m_size(size) { } @@ -55,7 +70,7 @@ class StringRefBase { /** * Return the (byte-)length of the referenced string, without any null-terminator. */ - size_type size() const + uint size() const { return m_size; } @@ -68,17 +83,15 @@ class StringRefBase { return m_data; } - char operator[](size_type index) const - { - BLI_assert(index <= m_size); - return m_data[index]; - } - operator ArrayRef() const { return ArrayRef(m_data, m_size); } + /** + * Implicitely convert to std::string. This is convenient in most cases, but you have to be a bit + * careful not to convert to std::string accidentally. + */ operator std::string() const { return std::string(m_data, m_size); @@ -94,12 +107,21 @@ class StringRefBase { return m_data + m_size; } + /** + * Copy the string into a buffer. The buffer has to be one byte larger than the size of the + * string, because the copied string will be null-terminated. Only use this when you are + * absolutely sure that the buffer is large enough. + */ void unsafe_copy(char *dst) const { memcpy(dst, m_data, m_size); dst[m_size] = '\0'; } + /** + * Copy the string into a buffer. The copied string will be null-terminated. This invokes + * undefined behavior when dst_size is too small. (Should we define the behavior?) + */ void copy(char *dst, uint dst_size) const { if (m_size < dst_size) { @@ -111,6 +133,10 @@ class StringRefBase { } } + /** + * Copy the string into a char array. The copied string will be null-terminated. This invokes + * undefined behavior when dst is too small. + */ template void copy(char (&dst)[N]) { this->copy(dst, N); @@ -130,7 +156,7 @@ class StringRefBase { }; /** - * References a null-terminated char array. + * References a null-terminated const char array. */ class StringRefNull : public StringRefBase { @@ -139,24 +165,45 @@ class StringRefNull : public StringRefBase { { } - StringRefNull(const char *str) : StringRefBase(str, strlen(str)) + /** + * Construct a StringRefNull from a null terminated c-string. The pointer must not point to NULL. + */ + StringRefNull(const char *str) : StringRefBase(str, (uint)strlen(str)) { BLI_assert(str != NULL); BLI_assert(m_data[m_size] == '\0'); } - StringRefNull(const char *str, size_type size) : StringRefBase(str, size) + /** + * Construct a StringRefNull from a null terminated c-string. This invokes undefined behavior + * when the given size is not the correct size of the string. + */ + StringRefNull(const char *str, uint size) : StringRefBase(str, size) { - BLI_assert(str[size] == '\0'); + BLI_assert((uint)strlen(str) == size); } + /** + * Reference a std::string. Remember that when the std::string is destructed, the StringRefNull + * will point to uninitialized memory. + */ StringRefNull(const std::string &str) : StringRefNull(str.data()) { } + + /** + * Get the char at the given index. + */ + char operator[](uint index) const + { + /* Use '<=' instead of just '<', so that the null character can be accessed as well. */ + BLI_assert(index <= m_size); + return m_data[index]; + } }; /** - * References a char array. It might not be null terminated. + * References a const char array. It might not be null terminated. */ class StringRef : public StringRefBase { public: @@ -164,19 +211,29 @@ class StringRef : public StringRefBase { { } + /** + * StringRefNull can be converted into StringRef, but not the other way around. + */ StringRef(StringRefNull other) : StringRefBase(other.data(), other.size()) { } - StringRef(const char *str) : StringRefBase(str, str ? strlen(str) : 0) + /** + * Create a StringRef from a null-terminated c-string. + */ + StringRef(const char *str) : StringRefBase(str, str ? (uint)strlen(str) : 0) { } - StringRef(const char *str, size_type length) : StringRefBase(str, length) + StringRef(const char *str, uint length) : StringRefBase(str, length) { } - StringRef(const std::string &str) : StringRefBase(str.data(), str.size()) + /** + * Reference a std::string. Remember that when the std::string is destructed, the StringRef + * will point to uninitialized memory. + */ + StringRef(const std::string &str) : StringRefBase(str.data(), (uint)str.size()) { } @@ -198,6 +255,15 @@ class StringRef : public StringRefBase { BLI_assert(this->startswith(prefix)); return this->drop_prefix(prefix.size()); } + + /** + * Get the char at the given index. + */ + char operator[](uint index) const + { + BLI_assert(index < m_size); + return m_data[index]; + } }; /* More inline functions @@ -215,6 +281,10 @@ inline std::ostream &operator<<(std::ostream &stream, StringRefNull ref) return stream; } +/** + * Adding two StringRefs will allocate an std::string. This is not efficient, but convenient in + * most cases. + */ inline std::string operator+(StringRef a, StringRef b) { return std::string(a) + std::string(b); @@ -233,6 +303,9 @@ inline bool operator!=(StringRef a, StringRef b) return !(a == b); } +/** + * Return true when the string starts with the given prefix. + */ inline bool StringRefBase::startswith(StringRef prefix) const { if (m_size < prefix.m_size) { @@ -246,6 +319,9 @@ inline bool StringRefBase::startswith(StringRef prefix) const return true; } +/** + * Return true when the string ends with the given suffix. + */ inline bool StringRefBase::endswith(StringRef suffix) const { if (m_size < suffix.m_size) { @@ -260,6 +336,9 @@ inline bool StringRefBase::endswith(StringRef suffix) const return true; } +/** + * Return a new StringRef containing only a substring of the original string. + */ inline StringRef StringRefBase::substr(uint start, uint size) const { BLI_assert(start + size <= m_size); diff --git a/source/blender/blenlib/BLI_utility_mixins.hh b/source/blender/blenlib/BLI_utility_mixins.hh index 441575f9111..31d28792865 100644 --- a/source/blender/blenlib/BLI_utility_mixins.hh +++ b/source/blender/blenlib/BLI_utility_mixins.hh @@ -23,6 +23,9 @@ namespace BLI { +/** + * A type that inherits from NonCopyable cannot be copied anymore. + */ class NonCopyable { public: /* Disable copy construction and assignment. */ @@ -35,6 +38,9 @@ class NonCopyable { NonCopyable &operator=(NonCopyable &&other) = default; }; +/** + * A type that inherits from NonMovable cannot be moved anymore. + */ class NonMovable { public: /* Disable move construction and assignment. */ diff --git a/source/blender/blenlib/BLI_vector.hh b/source/blender/blenlib/BLI_vector.hh index 49cf41c2005..052cee23028 100644 --- a/source/blender/blenlib/BLI_vector.hh +++ b/source/blender/blenlib/BLI_vector.hh @@ -20,9 +20,21 @@ /** \file * \ingroup bli * - * This vector wraps a dynamically sized array of a specific type. It supports small object - * optimization. That means, when the vector only contains a few elements, no memory allocation is - * performed. Instead, those elements are stored directly in the vector. + * A `BLI::Vector` is a dynamically growing contiguous array for values of type T. It is + * designed to be a more convenient and efficient replacement for `std::vector`. Note that the term + * "vector" has nothing to do with a vector from computer graphics here. + * + * A vector supports efficient insertion and removal at the end (O(1) amortized). Removal in other + * places takes O(n) time, because all elements afterwards have to be moved. If the order of + * elements is not important, `remove_and_reorder` can be used instead of `remove` for better + * performance. + * + * The improved efficiency is mainly achieved by supporting small buffer optimization. As long as + * the number of elements in the vector does not become larger than InlineBufferCapacity, no memory + * allocation is done. As a consequence, iterators are invalidated when a BLI::Vector is moved + * (iterators of std::vector remain valid when the vector is moved). + * + * `BLI::Vector` should be your default choice for a vector data structure in Blender. */ #include @@ -37,30 +49,69 @@ #include "BLI_listbase_wrapper.hh" #include "BLI_math_base.h" #include "BLI_memory_utils.hh" +#include "BLI_string.h" +#include "BLI_string_ref.hh" #include "BLI_utildefines.h" #include "MEM_guardedalloc.h" namespace BLI { -template +template< + /** + * Type of the values stored in this vector. It has to be movable. + */ + typename T, + /** + * The number of values that can be stored in this vector, without doing a heap allocation. + * Sometimes it makes sense to increase this value a lot. The memory in the inline buffer is + * not initialized when it is not needed. + * + * When T is large, the small buffer optimization is disabled by default to avoid large + * unexpected allocations on the stack. It can still be enabled explicitely though. + */ + uint InlineBufferCapacity = (sizeof(T) < 100) ? 4 : 0, + /** + * The allocator used by this vector. Should rarely be changed, except when you don't want that + * MEM_* is used internally. + */ + typename Allocator = GuardedAllocator> class Vector { private: + /** + * Use pointers instead of storing the size explicitely. This reduces the number of instructions + * in `append`. + * + * The pointers might point to the memory in the inline buffer. + */ T *m_begin; T *m_end; T *m_capacity_end; + + /** Used for allocations when the inline buffer is too small. */ Allocator m_allocator; - AlignedBuffer<(uint)sizeof(T) * InlineBufferCapacity, (uint)alignof(T)> m_small_buffer; + /** A placeholder buffer that will remain uninitialized until it is used. */ + AlignedBuffer<(uint)sizeof(T) * InlineBufferCapacity, (uint)alignof(T)> m_inline_buffer; + + /** + * Store the size of the vector explicitely in debug builds. Otherwise you'd always have to call + * the `size` function or do the math to compute it from the pointers manually. This is rather + * annoying. Knowing the size of a vector is often quite essential when debugging some code. + */ #ifndef NDEBUG - /* Storing size in debug builds, because it makes debugging much easier sometimes. */ uint m_debug_size; # define UPDATE_VECTOR_SIZE(ptr) (ptr)->m_debug_size = (uint)((ptr)->m_end - (ptr)->m_begin) #else # define UPDATE_VECTOR_SIZE(ptr) ((void)0) #endif - template friend class Vector; + /** + * Be a friend with other vector instanciations. This is necessary to implement some memory + * management logic. + */ + template + friend class Vector; public: /** @@ -69,7 +120,7 @@ class Vector { */ Vector() { - m_begin = this->small_buffer(); + m_begin = this->inline_buffer(); m_end = m_begin; m_capacity_end = m_begin + InlineBufferCapacity; UPDATE_VECTOR_SIZE(this); @@ -77,15 +128,12 @@ class Vector { /** * Create a vector with a specific size. - * The elements will be default initialized. + * The elements will be default constructed. + * If T is trivially constructible, the elements in the vector are not touched. */ explicit Vector(uint size) : Vector() { - this->reserve(size); - this->increase_size_unchecked(size); - for (T *current = m_begin; current != m_end; current++) { - new (current) T(); - } + this->resize(size); } /** @@ -94,25 +142,29 @@ class Vector { Vector(uint size, const T &value) : Vector() { this->reserve(size); - this->increase_size_unchecked(size); + this->increase_size_by_unchecked(size); BLI::uninitialized_fill_n(m_begin, size, value); } /** - * Create a vector from an initializer list. + * Create a vector that contains copys of the values in the initialized list. + * + * This allows you to write code like: + * Vector vec = {3, 4, 5}; */ - Vector(std::initializer_list values) : Vector(ArrayRef(values)) + Vector(const std::initializer_list &values) : Vector(ArrayRef(values)) { } /** - * Create a vector from an array ref. + * Create a vector from an array ref. The values in the vector are copy constructed. */ Vector(ArrayRef values) : Vector() { - this->reserve(values.size()); - this->increase_size_unchecked(values.size()); - BLI::uninitialized_copy_n(values.begin(), values.size(), this->begin()); + uint size = values.size(); + this->reserve(size); + this->increase_size_by_unchecked(size); + BLI::uninitialized_copy_n(values.data(), size, m_begin); } /** @@ -129,45 +181,53 @@ class Vector { } /** - * Create a vector from a ListBase. + * Create a vector from a ListBase. The caller has to make sure that the values in the linked + * list have the correct type. + * + * Example Usage: + * Vector modifiers(ob->modifiers); */ Vector(ListBase &values) : Vector() { - for (T value : ListBaseWrapper::type>(values)) { + LISTBASE_FOREACH (T, value, &values) { this->append(value); } } /** - * Create a copy of another vector. - * The other vector will not be changed. - * If the other vector has less than InlineBufferCapacity elements, no allocation will be made. + * Create a copy of another vector. The other vector will not be changed. If the other vector has + * less than InlineBufferCapacity elements, no allocation will be made. */ Vector(const Vector &other) : m_allocator(other.m_allocator) { this->init_copy_from_other_vector(other); } - template - Vector(const Vector &other) : m_allocator(other.m_allocator) + /** + * Create a copy of a vector with a different InlineBufferCapacity. This needs to be handled + * separately, so that the other one is a valid copy constructor. + */ + template + Vector(const Vector &other) + : m_allocator(other.m_allocator) { this->init_copy_from_other_vector(other); } /** - * Steal the elements from another vector. - * This does not do an allocation. - * The other vector will have zero elements afterwards. + * Steal the elements from another vector. This does not do an allocation. The other vector will + * have zero elements afterwards. */ - template - Vector(Vector &&other) noexcept : m_allocator(other.m_allocator) + template + Vector(Vector &&other) noexcept + : m_allocator(other.m_allocator) { uint size = other.size(); - if (other.is_small()) { + if (other.is_inline()) { if (size <= InlineBufferCapacity) { /* Copy between inline buffers. */ - m_begin = this->small_buffer(); + m_begin = this->inline_buffer(); m_end = m_begin + size; m_capacity_end = m_begin + InlineBufferCapacity; uninitialized_relocate_n(other.m_begin, size, m_begin); @@ -175,8 +235,7 @@ class Vector { else { /* Copy from inline buffer to newly allocated buffer. */ uint capacity = size; - m_begin = (T *)m_allocator.allocate_aligned( - sizeof(T) * capacity, std::alignment_of::value, __func__); + m_begin = (T *)m_allocator.allocate(sizeof(T) * capacity, alignof(T), AT); m_end = m_begin + size; m_capacity_end = m_begin + capacity; uninitialized_relocate_n(other.m_begin, size, m_begin); @@ -189,9 +248,9 @@ class Vector { m_capacity_end = other.m_capacity_end; } - other.m_begin = other.small_buffer(); + other.m_begin = other.inline_buffer(); other.m_end = other.m_begin; - other.m_capacity_end = other.m_begin + OtherN; + other.m_capacity_end = other.m_begin + OtherInlineBufferCapacity; UPDATE_VECTOR_SIZE(this); UPDATE_VECTOR_SIZE(&other); } @@ -199,7 +258,7 @@ class Vector { ~Vector() { destruct_n(m_begin, this->size()); - if (!this->is_small()) { + if (!this->is_inline()) { m_allocator.deallocate(m_begin); } } @@ -242,8 +301,8 @@ class Vector { return *this; } - /* This can fail, when the vector is used to build a recursive data structure. - See https://youtu.be/7Qgd9B1KuMQ?t=840. */ + /* This can be incorrect, when the vector is used to build a recursive data structure. However, + we don't take care of it at this low level. See https://youtu.be/7Qgd9B1KuMQ?t=840. */ this->~Vector(); new (this) Vector(std::move(other)); @@ -251,13 +310,55 @@ class Vector { } /** - * Make sure that enough memory is allocated to hold size elements. - * This won't necessarily make an allocation when size is small. + * Make sure that enough memory is allocated to hold min_capacity elements. + * This won't necessarily make an allocation when min_capacity is small. * The actual size of the vector does not change. */ - void reserve(uint size) + void reserve(uint min_capacity) + { + if (min_capacity > this->capacity()) { + this->realloc_to_at_least(min_capacity); + } + } + + /** + * Change the size of the vector so that it contains new_size elements. + * If new_size is smaller than the old size, the elements at the end of the vector are + * destructed. If new_size is larger than the old size, the new elements at the end are default + * constructed. If T is trivially constructible, the memory is not touched by this function. + */ + void resize(uint new_size) { - this->grow(size); + uint old_size = this->size(); + if (new_size > old_size) { + this->reserve(new_size); + default_construct_n(m_begin + old_size, new_size - old_size); + } + else { + destruct_n(m_begin + new_size, old_size - new_size); + } + m_end = m_begin + new_size; + UPDATE_VECTOR_SIZE(this); + } + + /** + * Change the size of the vector so that it contains new_size elements. + * If new_size is smaller than the old size, the elements at the end of the vector are + * destructed. If new_size is larger than the old size, the new elements will be copy constructed + * from the given value. + */ + void resize(uint new_size, const T &value) + { + uint old_size = this->size(); + if (new_size > old_size) { + this->reserve(new_size); + uninitialized_fill_n(m_begin + old_size, new_size - old_size, value); + } + else { + destruct_n(m_begin + new_size, old_size - new_size); + } + m_end = m_begin + new_size; + UPDATE_VECTOR_SIZE(this); } /** @@ -275,14 +376,14 @@ class Vector { * Afterwards the vector has 0 elements and any allocated memory * will be freed. */ - void clear_and_make_small() + void clear_and_make_inline() { destruct_n(m_begin, this->size()); - if (!this->is_small()) { + if (!this->is_inline()) { m_allocator.deallocate(m_begin); } - m_begin = this->small_buffer(); + m_begin = this->inline_buffer(); m_end = m_begin; m_capacity_end = m_begin + InlineBufferCapacity; UPDATE_VECTOR_SIZE(this); @@ -291,19 +392,24 @@ class Vector { /** * Insert a new element at the end of the vector. * This might cause a reallocation with the capacity is exceeded. + * + * This is similar to std::vector::push_back. */ void append(const T &value) { this->ensure_space_for_one(); this->append_unchecked(value); } - void append(T &&value) { this->ensure_space_for_one(); this->append_unchecked(std::move(value)); } + /** + * Append the value to the vector and return the index that can be used to access the newly + * added value. + */ uint append_and_get_index(const T &value) { uint index = this->size(); @@ -311,6 +417,11 @@ class Vector { return index; } + /** + * Append the value if it is not yet in the vector. This has to do a linear search to check if + * the value is in the vector. Therefore, this should only be called when it is known that the + * vector is small. + */ void append_non_duplicates(const T &value) { if (!this->contains(value)) { @@ -318,6 +429,11 @@ class Vector { } } + /** + * Append the value and assume that vector has enough memory reserved. This invokes undefined + * behavior when not enough capacity has been reserved beforehand. Only use this in performance + * critical code. + */ void append_unchecked(const T &value) { BLI_assert(m_end < m_capacity_end); @@ -325,7 +441,6 @@ class Vector { m_end++; UPDATE_VECTOR_SIZE(this); } - void append_unchecked(T &&value) { BLI_assert(m_end < m_capacity_end); @@ -342,10 +457,16 @@ class Vector { { this->reserve(this->size() + n); BLI::uninitialized_fill_n(m_end, n, value); - this->increase_size_unchecked(n); + this->increase_size_by_unchecked(n); } - void increase_size_unchecked(uint n) + /** + * Enlarges the size of the internal buffer that is considered to be initialized. This invokes + * undefined behavior when when the new size is larger than the capacity. The method can be + * useful when you want to call constructors in the vector yourself. This should only be done in + * very rare cases and has to be justified every time. + */ + void increase_size_by_unchecked(uint n) { BLI_assert(m_end + n <= m_capacity_end); m_end += n; @@ -354,18 +475,24 @@ class Vector { /** * Copy the elements of another array to the end of this vector. + * + * This can be used to emulate parts of std::vector::insert. */ void extend(ArrayRef array) { - this->extend(array.begin(), array.size()); + this->extend(array.data(), array.size()); } - void extend(const T *start, uint amount) { this->reserve(this->size() + amount); this->extend_unchecked(start, amount); } + /** + * Adds all elements from the array that are not already in the vector. This is an expensive + * operation when the vector is large, but can be very cheap when it is known that the vector is + * small. + */ void extend_non_duplicates(ArrayRef array) { for (const T &value : array) { @@ -373,11 +500,14 @@ class Vector { } } + /** + * Extend the vector without bounds checking. It is assumed that enough memory has been reserved + * beforehand. Only use this in performance critical code. + */ void extend_unchecked(ArrayRef array) { - this->extend_unchecked(array.begin(), array.size()); + this->extend_unchecked(array.data(), array.size()); } - void extend_unchecked(const T *start, uint amount) { BLI_assert(m_begin + amount <= m_capacity_end); @@ -395,7 +525,6 @@ class Vector { BLI_assert(this->size() > 0); return *(m_end - 1); } - T &last() { BLI_assert(this->size() > 0); @@ -407,9 +536,12 @@ class Vector { */ void fill(const T &value) { - std::fill(m_begin, m_end, value); + initialized_fill_n(m_begin, this->size(), value); } + /** + * Copy the value to all positions specified by the indices array. + */ void fill_indices(ArrayRef indices, const T &value) { MutableArrayRef(*this).fill_indices(indices, value); @@ -426,6 +558,8 @@ class Vector { /** * Returns true when the vector contains no elements, otherwise false. + * + * This is the same as std::vector::empty. */ bool is_empty() const { @@ -433,33 +567,36 @@ class Vector { } /** - * Deconstructs the last element and decreases the size by one. - * This will assert when the vector is empty. + * Destructs the last element and decreases the size by one. This invokes undefined behavior when + * the vector is empty. */ void remove_last() { BLI_assert(!this->is_empty()); m_end--; - destruct(m_end); + m_end->~T(); UPDATE_VECTOR_SIZE(this); } /** - * Remove the last element from the vector and return it. + * Remove the last element from the vector and return it. This invokes undefined behavior when + * the vector is empty. + * + * This is similar to std::vector::pop_back. */ T pop_last() { BLI_assert(!this->is_empty()); m_end--; T value = std::move(*m_end); - destruct(m_end); + m_end->~T(); UPDATE_VECTOR_SIZE(this); return value; } /** - * Delete any element in the vector. - * The empty space will be filled by the previously last element. + * Delete any element in the vector. The empty space will be filled by the previously last + * element. This takes O(1) time. */ void remove_and_reorder(uint index) { @@ -469,37 +606,60 @@ class Vector { if (element_to_remove < m_end) { *element_to_remove = std::move(*m_end); } - destruct(m_end); + m_end->~T(); UPDATE_VECTOR_SIZE(this); } + /** + * Finds the first occurence of the value, removes it and copies the last element to the hole in + * the vector. This takes O(n) time. + */ void remove_first_occurrence_and_reorder(const T &value) { - uint index = this->index(value); + uint index = this->first_index_of(value); this->remove_and_reorder((uint)index); } + /** + * Remove the element at the given index and move all values coming after it one towards the + * front. This takes O(n) time. If the order is not important, remove_and_reorder should be used + * instead. + * + * This is similar to std::vector::erase. + */ + void remove(uint index) + { + BLI_assert(index < this->size()); + uint last_index = this->size() - 1; + for (uint i = index; i < last_index; i++) { + m_begin[i] = std::move(m_begin[i + 1]); + } + m_begin[last_index].~T(); + m_end--; + UPDATE_VECTOR_SIZE(this); + } + /** * Do a linear search to find the value in the vector. * When found, return the first index, otherwise return -1. */ - int index_try(const T &value) const + int first_index_of_try(const T &value) const { for (T *current = m_begin; current != m_end; current++) { if (*current == value) { - return current - m_begin; + return (int)(current - m_begin); } } return -1; } /** - * Do a linear search to find the value in the vector. - * When found, return the first index, otherwise fail. + * Do a linear search to find the value in the vector and return the found index. This invokes + * undefined behavior when the value is not in the vector. */ - uint index(const T &value) const + uint first_index_of(const T &value) const { - int index = this->index_try(value); + int index = this->first_index_of_try(value); BLI_assert(index >= 0); return (uint)index; } @@ -510,27 +670,13 @@ class Vector { */ bool contains(const T &value) const { - return this->index_try(value) != -1; + return this->first_index_of_try(value) != -1; } /** - * Compare vectors element-wise. - * Return true when they have the same length and all elements - * compare equal, otherwise false. + * Get the value at the given index. This invokes undefined behavior when the index is out of + * bounds. */ - static bool all_equal(const Vector &a, const Vector &b) - { - if (a.size() != b.size()) { - return false; - } - for (uint i = 0; i < a.size(); i++) { - if (a[i] != b[i]) { - return false; - } - } - return true; - } - const T &operator[](uint index) const { BLI_assert(index < this->size()); @@ -543,6 +689,22 @@ class Vector { return m_begin[index]; } + /** + * Get access to the underlying array. + */ + T *data() + { + return m_begin; + } + + /** + * Get access to the underlying array. + */ + const T *data() const + { + return m_begin; + } + T *begin() { return m_begin; @@ -562,74 +724,94 @@ class Vector { } /** - * Get the current capacity of the vector. + * Get the current capacity of the vector, i.e. the maximum number of elements the vector can + * hold, before it has to reallocate. */ uint capacity() const { return (uint)(m_capacity_end - m_begin); } + /** + * Get an index range that makes looping over all indices more convenient and less error prone. + * Obviously, this should only be used when you actually need the index in the loop. + * + * Example: + * for (uint i : myvector.index_range()) { + * do_something(i, my_vector[i]); + * } + */ IndexRange index_range() const { return IndexRange(this->size()); } - void print_stats() const + /** + * Print some debug information about the vector. + */ + void print_stats(StringRef name = "") const { - std::cout << "Small Vector at " << (void *)this << ":" << std::endl; - std::cout << " Elements: " << this->size() << std::endl; - std::cout << " Capacity: " << (m_capacity_end - m_begin) << std::endl; - std::cout << " Small Elements: " << InlineBufferCapacity - << " Size on Stack: " << sizeof(*this) << std::endl; + std::cout << "Vector Stats: " << name << "\n"; + std::cout << " Address: " << this << "\n"; + std::cout << " Elements: " << this->size() << "\n"; + std::cout << " Capacity: " << (m_capacity_end - m_begin) << "\n"; + std::cout << " Inline Capacity: " << InlineBufferCapacity << "\n"; + + char memory_size_str[15]; + BLI_str_format_byte_unit(memory_size_str, sizeof(*this), true); + std::cout << " Size on Stack: " << memory_size_str << "\n"; } private: - T *small_buffer() const + T *inline_buffer() const { - return (T *)m_small_buffer.ptr(); + return (T *)m_inline_buffer.ptr(); } - bool is_small() const + bool is_inline() const { - return m_begin == this->small_buffer(); + return m_begin == this->inline_buffer(); } void ensure_space_for_one() { if (UNLIKELY(m_end >= m_capacity_end)) { - this->grow(std::max(this->size() * 2, (uint)1)); + this->realloc_to_at_least(this->size() + 1); } + std::vector a; + a.push_back(4); } - BLI_NOINLINE void grow(uint min_capacity) + BLI_NOINLINE void realloc_to_at_least(uint min_capacity) { if (this->capacity() >= min_capacity) { return; } - /* Round up to the next power of two. Otherwise consecutive calls to grow can cause a - * reallocation every time even though the min_capacity only increments. */ - min_capacity = power_of_2_max_u(min_capacity); + /* At least double the size of the previous allocation. Otherwise consecutive calls to grow can + * cause a reallocation every time even though min_capacity only increments. */ + uint min_new_capacity = this->capacity() * 2; + uint new_capacity = std::max(min_capacity, min_new_capacity); uint size = this->size(); - T *new_array = (T *)m_allocator.allocate_aligned( - min_capacity * (uint)sizeof(T), std::alignment_of::value, "grow BLI::Vector"); + T *new_array = (T *)m_allocator.allocate(new_capacity * (uint)sizeof(T), alignof(T), AT); uninitialized_relocate_n(m_begin, size, new_array); - if (!this->is_small()) { + if (!this->is_inline()) { m_allocator.deallocate(m_begin); } m_begin = new_array; m_end = m_begin + size; - m_capacity_end = m_begin + min_capacity; + m_capacity_end = m_begin + new_capacity; } /** * Initialize all properties, except for m_allocator, which has to be initialized beforehand. */ - template void init_copy_from_other_vector(const Vector &other) + template + void init_copy_from_other_vector(const Vector &other) { m_allocator = other.m_allocator; @@ -637,19 +819,18 @@ class Vector { uint capacity = size; if (size <= InlineBufferCapacity) { - m_begin = this->small_buffer(); + m_begin = this->inline_buffer(); capacity = InlineBufferCapacity; } else { - m_begin = (T *)m_allocator.allocate_aligned( - sizeof(T) * size, std::alignment_of::value, __func__); + m_begin = (T *)m_allocator.allocate(sizeof(T) * size, alignof(T), AT); capacity = size; } m_end = m_begin + size; m_capacity_end = m_begin + capacity; - uninitialized_copy(other.begin(), other.end(), m_begin); + uninitialized_copy_n(other.data(), size, m_begin); UPDATE_VECTOR_SIZE(this); } }; diff --git a/source/blender/blenlib/BLI_vector_set.hh b/source/blender/blenlib/BLI_vector_set.hh index f402f47c357..7e6a7de6ee6 100644 --- a/source/blender/blenlib/BLI_vector_set.hh +++ b/source/blender/blenlib/BLI_vector_set.hh @@ -20,138 +20,195 @@ /** \file * \ingroup bli * - * A VectorSet is a set built on top of a vector. The elements are stored in a continuous array, - * but every element exists at most once. The insertion order is maintained, as long as there are - * no deletes. The expected time to check if a value is in the VectorSet is O(1). + * A `BLI::VectorSet` is an ordered container for elements of type `Key`. It has the same + * interface as `BLI::Set` with the following extensions: + * - The insertion order of keys is maintained as long as no elements are removed. + * - The keys are stored in a contiguous array. + * + * All core operations (add, remove and contains) can be done in O(1) amortized expected time. + * + * Using a VectorSet instead of a normal Set can be benefitial in any of the following + * circumstances: + * - The insertion order is important. + * - Iteration over all keys has to be fast. + * - The keys in the set are supposed to be passed to a function that does not have to know that + * the keys are stored in a set. With a VectorSet, one can get an ArrayRef containing all keys + * without additional copies. + * + * BLI::VectorSet is implemented using open addressing in a slot array with a power-of-two size. + * Other than in BLI::Set, a slot does not contain the key though. Instead it only contains an + * index into an array of keys that is stored separately. + * + * Some noteworthy information: + * - Key must be a movable type. + * - Pointers to keys might be invalidated, when the vector set is changed or moved. + * - The hash function can be customized. See BLI_hash.hh for details. + * - The probing strategy can be customized. See BLI_probing_strategies.hh for details. + * - The slot type can be customized. See BLI_vector_set_slots.hh for details. + * - The methods `add_new` and `remove_contained` should be used instead of `add` and `remove` + * whenever appropriate. Assumptions and intention are described better this way. + * - Using a range-for loop over a vector set, is as efficient as iterating over an array (because + * it is the same thing). + * - Lookups can be performed using types other than Key without conversion. For that use the + * methods ending with `_as`. The template parameters Hash and IsEqual have to support the other + * key type. This can greatly improve performance when the strings are used as keys. + * - The default constructor is cheap. + * - The `print_stats` method can be used to get information about the distribution of keys and + * memory usage. + * + * Possible Improvements: + * - Small buffer optimization for the keys. */ +#include "BLI_array.hh" #include "BLI_hash.hh" -#include "BLI_open_addressing.hh" -#include "BLI_vector.hh" +#include "BLI_hash_tables.hh" +#include "BLI_probing_strategies.hh" +#include "BLI_vector_set_slots.hh" namespace BLI { -// clang-format off - -#define ITER_SLOTS_BEGIN(VALUE, ARRAY, OPTIONAL_CONST, R_SLOT) \ - uint32_t hash = DefaultHash{}(VALUE); \ - uint32_t perturb = hash; \ - while (true) { \ - for (uint i = 0; i < 4; i++) {\ - uint32_t slot_index = (hash + i) & ARRAY.slot_mask(); \ - OPTIONAL_CONST Slot &R_SLOT = ARRAY.item(slot_index); - -#define ITER_SLOTS_END \ - } \ - perturb >>= 5; \ - hash = hash * 5 + 1 + perturb; \ - } ((void)0) - -// clang-format on - -template class VectorSet { +template< + /** + * Type of the elements that are stored in this set. It has to be movable. Furthermore, the + * hash and is-equal functions have to support it. + */ + typename Key, + /** + * The strategy used to deal with collisions. They are defined in BLI_probing_strategies.hh. + */ + typename ProbingStrategy = DefaultProbingStrategy, + /** + * The hash function used to hash the keys. There is a default for many types. See BLI_hash.hh + * for examples on how to define a custom hash function. + */ + typename Hash = DefaultHash, + /** + * The equality operator used to compare keys. By default it will simply compare keys using the + * `==` operator. + */ + typename IsEqual = DefaultEquality, + /** + * This is what will actually be stored in the hash table array. At a minimum a slot has to be + * able to hold an array index and information about whether the slot is empty, occupied or + * removed. Using a non-standard slot type can improve performance for some types. + * Also see BLI_vector_set_slots.hh. + */ + typename Slot = typename DefaultVectorSetSlot::type, + /** + * The allocator used by this set. Should rarely be changed, except when you don't want that + * MEM_* etc. is used internally. + */ + typename Allocator = GuardedAllocator> +class VectorSet { private: - static constexpr int32_t IS_EMPTY = -1; - static constexpr int32_t IS_DUMMY = -2; - - class Slot { - private: - int32_t m_value = IS_EMPTY; - - public: - static constexpr uint slots_per_item = 1; - - bool is_set() const - { - return m_value >= 0; - } - - bool is_empty() const - { - return m_value == IS_EMPTY; - } - - bool is_dummy() const - { - return m_value == IS_DUMMY; - } + /** + * Slots are either empty, occupied or removed. The number of occupied slots can be computed by + * subtracting the removed slots from the occupied-and-removed slots. + */ + uint32_t m_removed_slots; + uint32_t m_occupied_and_removed_slots; - bool has_value(const T &value, const T *elements) const - { - return this->is_set() && elements[this->index()] == value; - } + /** + * The maximum number of slots that can be used (either occupied or removed) until the set has to + * grow. This is the total number of slots times the max load factor. + */ + uint32_t m_usable_slots; - bool has_index(uint index) const - { - return m_value == (int32_t)index; - } + /** + * The number of slots minus one. This is a bit mask that can be used to turn any integer into a + * valid slot index efficiently. + */ + uint32_t m_slot_mask; - uint index() const - { - BLI_assert(this->is_set()); - return (uint)m_value; - } + /** This is called to hash incoming keys. */ + Hash m_hash; - int32_t &index_ref() - { - return m_value; - } + /** This is called to check equality of two keys. */ + IsEqual m_is_equal; - void set_index(uint index) - { - BLI_assert(!this->is_set()); - m_value = (int32_t)index; - } + /** The max load factor is 1/2 = 50% by default. */ +#define LOAD_FACTOR 1, 2 + LoadFactor m_max_load_factor = LoadFactor(LOAD_FACTOR); + using SlotArray = Array; +#undef LOAD_FACTOR - void set_dummy() - { - BLI_assert(this->is_set()); - m_value = IS_DUMMY; - } - }; + /** + * This is the array that contains the actual slots. There is always at least one empty slot and + * the size of the array is a power of two. + */ + SlotArray m_slots; - using ArrayType = OpenAddressingArray; - ArrayType m_array; + /** + * Pointer to an array that contains all keys. The keys are sorted by insertion order as long as + * no keys are removed. The first set->size() elements in this array are initialized. The + * capacity of the array is m_usable_slots. + */ + Key *m_keys; - /* The capacity of the array should always be at least m_array.slots_usable(). */ - T *m_elements = nullptr; + /** Iterate over a slot index sequence for a given hash. */ +#define VECTOR_SET_SLOT_PROBING_BEGIN(HASH, R_SLOT) \ + SLOT_PROBING_BEGIN (ProbingStrategy, HASH, m_slot_mask, SLOT_INDEX) \ + auto &R_SLOT = m_slots[SLOT_INDEX]; +#define VECTOR_SET_SLOT_PROBING_END() SLOT_PROBING_END() public: + /** + * Initialize an empty vector set. This is a cheap operation and won't do an allocation. This is + * necessary to avoid a high cost when no elements are added at all. An optimized grow operation + * is performed on the first insertion. + */ VectorSet() + : m_removed_slots(0), + m_occupied_and_removed_slots(0), + m_usable_slots(0), + m_slot_mask(0), + m_slots(1), + m_keys(nullptr) { - m_elements = this->allocate_elements_array(m_array.slots_usable()); } - VectorSet(ArrayRef values) : VectorSet() - { - this->add_multiple(values); - } - - VectorSet(const std::initializer_list &values) : VectorSet() - { - this->add_multiple(values); - } - - VectorSet(const Vector &values) : VectorSet() + /** + * Construct a vector set that contains the given keys. Duplicates will be removed automatically. + */ + VectorSet(const std::initializer_list &keys) : VectorSet() { - this->add_multiple(values); + this->add_multiple(keys); } - VectorSet(const VectorSet &other) : m_array(other.m_array) + ~VectorSet() { - m_elements = this->allocate_elements_array(m_array.slots_usable()); - uninitialized_copy_n(other.m_elements, m_array.slots_set(), m_elements); + destruct_n(m_keys, this->size()); + if (m_keys != nullptr) { + this->deallocate_keys_array(m_keys); + } } - VectorSet(VectorSet &&other) : m_array(std::move(other.m_array)), m_elements(other.m_elements) + VectorSet(const VectorSet &other) + : m_removed_slots(other.m_removed_slots), + m_occupied_and_removed_slots(other.m_occupied_and_removed_slots), + m_usable_slots(other.m_usable_slots), + m_slot_mask(other.m_slot_mask), + m_slots(other.m_slots) { - other.m_elements = other.allocate_elements_array(other.m_array.slots_usable()); + m_keys = this->allocate_keys_array(m_usable_slots); + uninitialized_copy_n(other.m_keys, other.size(), m_keys); } - ~VectorSet() + VectorSet(VectorSet &&other) noexcept + : m_removed_slots(other.m_removed_slots), + m_occupied_and_removed_slots(other.m_occupied_and_removed_slots), + m_usable_slots(other.m_usable_slots), + m_slot_mask(other.m_slot_mask), + m_slots(std::move(other.m_slots)), + m_keys(other.m_keys) { - destruct_n(m_elements, this->size()); - this->deallocate_elements_array(m_elements); + other.m_removed_slots = 0; + other.m_occupied_and_removed_slots = 0; + other.m_usable_slots = 0; + other.m_slot_mask = 0; + other.m_slots = SlotArray(1); + other.m_keys = nullptr; } VectorSet &operator=(const VectorSet &other) @@ -159,8 +216,10 @@ template class VectorSet { if (this == &other) { return *this; } + this->~VectorSet(); new (this) VectorSet(other); + return *this; } @@ -169,318 +228,529 @@ template class VectorSet { if (this == &other) { return *this; } + this->~VectorSet(); new (this) VectorSet(std::move(other)); + return *this; } /** - * Allocate memory such that at least min_usable_slots can be added without having to grow again. + * Add a new key to the vector set. This invokes undefined behavior when the key is in the set + * already. When you know for certain that a key is not in the set yet, use this method for + * better performance. This also expresses the intent better. */ - void reserve(uint min_usable_slots) + void add_new(const Key &key) { - if (m_array.slots_usable() < min_usable_slots) { - this->grow(min_usable_slots); - } + this->add_new__impl(key, m_hash(key)); + } + void add_new(Key &&key) + { + this->add_new__impl(std::move(key), m_hash(key)); } /** - * Add a new element. The method assumes that the value did not exist before. + * Add a key to the vector set. If the key exists in the set already, nothing is done. The return + * value is true if the key was newly added. + * + * This is similar to std::unordered_set::insert. */ - void add_new(const T &value) + bool add(const Key &key) { - this->add_new__impl(value); + return this->add_as(key); } - void add_new(T &&value) + bool add(Key &&key) { - this->add_new__impl(std::move(value)); + return this->add_as(std::move(key)); } /** - * Add a new element if it does not exist yet. Does not add the value again if it exists already. + * Same as `add`, but accepts other key types that are supported by the hash function. */ - bool add(const T &value) + template bool add_as(ForwardKey &&key) { - return this->add__impl(value); + return this->add__impl(std::forward(key), m_hash(key)); } - bool add(T &&value) + + /** + * Convenience function to add many keys to the vector set at once. Duplicates are removed + * automatically. + * + * We might be able to make this faster than sequentially adding all keys, but that is not + * implemented yet. + */ + void add_multiple(ArrayRef keys) { - return this->add__impl(std::move(value)); + for (const Key &key : keys) { + this->add(key); + } } /** - * Add multiple values. Duplicates will not be inserted. + * Returns true if the key is in the vector set. + * + * This is similar to std::unordered_set::find() != std::unordered_set::end(). */ - void add_multiple(ArrayRef values) + bool contains(const Key &key) const { - for (const T &value : values) { - this->add(value); - } + return this->contains_as(key); } /** - * Returns true when the value is in the set-vector, otherwise false. + * Same as `contains`, but accepts other key types that are supported by the hash function. */ - bool contains(const T &value) const + template bool contains_as(const ForwardKey &key) const { - ITER_SLOTS_BEGIN (value, m_array, const, slot) { - if (slot.is_empty()) { - return false; - } - else if (slot.has_value(value, m_elements)) { - return true; - } - } - ITER_SLOTS_END; + return this->contains__impl(key, m_hash(key)); } /** - * Remove a value from the set-vector. The method assumes that the value exists. + * Deletes the key from the set. Returns true when the key existed in the set and is now removed. + * This might change the order of elements in the vector. + * + * This is similar to std::unordered_set::erase. */ - void remove(const T &value) + bool remove(const Key &key) { - BLI_assert(this->contains(value)); - ITER_SLOTS_BEGIN (value, m_array, , slot) { - if (slot.has_value(value, m_elements)) { - uint old_index = this->size() - 1; - uint new_index = slot.index(); + return this->remove_as(key); + } - if (new_index < old_index) { - m_elements[new_index] = std::move(m_elements[old_index]); - this->update_slot_index(m_elements[new_index], old_index, new_index); - } + /** + * Same as `remove`, but accepts other key types that are supported by the hash function. + */ + template bool remove_as(const ForwardKey &key) + { + return this->remove__impl(key, m_hash(key)); + } - destruct(m_elements + old_index); - slot.set_dummy(); - m_array.update__set_to_dummy(); - return; - } - } - ITER_SLOTS_END; + /** + * Deletes the key from the set. This invokes undefined behavior when the key is not in the set. + * It might change the order of elements in the vector. + */ + void remove_contained(const Key &key) + { + this->remove_contained_as(key); } /** - * Get and remove the last element of the vector. + * Same as `remove_contained`, but accepts other key types that are supported by the hash + * function. */ - T pop() + template void remove_contained_as(const ForwardKey &key) { - BLI_assert(this->size() > 0); - uint index_to_pop = this->size() - 1; - T value = std::move(m_elements[index_to_pop]); - destruct(m_elements + index_to_pop); + this->remove_contained__impl(key, m_hash(key)); + } - ITER_SLOTS_BEGIN (value, m_array, , slot) { - if (slot.has_index(index_to_pop)) { - slot.set_dummy(); - m_array.update__set_to_dummy(); - return value; - } - } - ITER_SLOTS_END; + /** + * Delete and return a key from the set. This will remove the last element in the vector. The + * order of the remaining elements in the set is not changed. + */ + Key pop() + { + return this->pop__impl(); } /** - * Get the index of the value in the vector. It is assumed that the value is in the vector. + * Return the location of the key in the vector. It is assumed, that the key is in the vector + * set. If this is not necessarily the case, use `index_of_try`. */ - uint index(const T &value) const + uint32_t index_of(const Key &key) const { - BLI_assert(this->contains(value)); - ITER_SLOTS_BEGIN (value, m_array, const, slot) { - if (slot.has_value(value, m_elements)) { - return slot.index(); - } - } - ITER_SLOTS_END; + return this->index_of_as(key); } /** - * Get the index of the value in the vector. If it does not exist return -1. + * Same as `index_of`, but accepts other key types that are supported by the hash function. */ - int index_try(const T &value) const + template uint32_t index_of_as(const ForwardKey &key) const { - ITER_SLOTS_BEGIN (value, m_array, const, slot) { - if (slot.has_value(value, m_elements)) { - return slot.index(); - } - else if (slot.is_empty()) { - return -1; - } - } - ITER_SLOTS_END; + return this->index_of__impl(key, m_hash(key)); } /** - * Get the number of elements in the set-vector. + * Return the location of the key in the vector. If the key is not in the set, -1 is returned. + * If you know for sure that the key is in the set, it is better to use `index_of` instead. */ - uint size() const + int32_t index_of_try(const Key &key) const { - return m_array.slots_set(); + return (int32_t)this->index_of_try_as(key); } - bool is_empty() const + /** + * Same as `index_of_try`, but accepts other key types that are supported by the hash function. + */ + template int32_t index_of_try_as(const ForwardKey &key) const { - return this->size() == 0; + return this->index_of_try__impl(key, m_hash(key)); } - const T *begin() const + /** + * Get a pointer to the beginning of the array containing all keys. + */ + const Key *data() const { - return m_elements; + return m_keys; } - const T *end() const + const Key *begin() const { - return m_elements + this->size(); + return m_keys; } - const T &operator[](uint index) const + const Key *end() const + { + return m_keys + this->size(); + } + + /** + * Get the key stored at the given position in the vector. + */ + const Key &operator[](uint32_t index) const { BLI_assert(index <= this->size()); - return m_elements[index]; + return m_keys[index]; } - ArrayRef as_ref() const + operator ArrayRef() const + { + return ArrayRef(m_keys, this->size()); + } + + /** + * Get an ArrayRef referencing the keys vector. The referenced memory buffer is only valid as + * long as the vector set is not changed. + * + * The keys must not be changed, because this would change their hash value. + */ + ArrayRef as_ref() const { return *this; } - operator ArrayRef() const + /** + * Print common statistics like size and collision count. This is useful for debugging purposes. + */ + void print_stats(StringRef name = "") const { - return ArrayRef(m_elements, this->size()); + HashTableStats stats(*this, this->as_ref()); + stats.print(); } - void print_stats() const + /** + * Returns the number of keys stored in the vector set. + */ + uint32_t size() const { - std::cout << "VectorSet at " << (void *)this << ":\n"; - std::cout << " Size: " << this->size() << "\n"; - std::cout << " Usable Slots: " << m_array.slots_usable() << "\n"; - std::cout << " Total Slots: " << m_array.slots_total() << "\n"; - std::cout << " Average Collisions: " << this->compute_average_collisions() << "\n"; + return m_occupied_and_removed_slots - m_removed_slots; } - private: - void update_slot_index(T &value, uint old_index, uint new_index) + /** + * Returns true if no keys are stored. + */ + bool is_empty() const { - ITER_SLOTS_BEGIN (value, m_array, , slot) { - int32_t &stored_index = slot.index_ref(); - if (stored_index == old_index) { - stored_index = new_index; - return; - } - } - ITER_SLOTS_END; + return m_occupied_and_removed_slots == m_removed_slots; } - template void add_new_in_slot(Slot &slot, ForwardT &&value) + /** + * Returns the number of available slots. This is mostly for debugging purposes. + */ + uint32_t capacity() const { - uint index = this->size(); - slot.set_index(index); - new (m_elements + index) T(std::forward(value)); - m_array.update__empty_to_set(); + return m_slots.size(); } - void ensure_can_add() + /** + * Returns the amount of removed slots in the set. This is mostly for debugging purposes. + */ + uint32_t removed_amount() const { - if (UNLIKELY(m_array.should_grow())) { - this->grow(this->size() + 1); + return m_removed_slots; + } + + /** + * Returns the bytes required per element. This is mostly for debugging purposes. + */ + uint32_t size_per_element() const + { + return sizeof(Slot) + sizeof(Key); + } + + /** + * Returns the approximate memory requirements of the set in bytes. This is more correct for + * larger sets. + */ + uint32_t size_in_bytes() const + { + return sizeof(Slot) * m_slots.size() + sizeof(Key) * m_usable_slots; + } + + /** + * Potentially resize the vector set such that it can hold n elements without doing another grow. + */ + void reserve(uint32_t n) + { + if (m_usable_slots < n) { + this->realloc_and_reinsert(n); } } - BLI_NOINLINE void grow(uint min_usable_slots) + /** + * Get the number of collisions that the probing strategy has to go through to find the key or + * determine that it is not in the set. + */ + uint32_t count_collisions(const Key &key) const { - uint size = this->size(); + return this->count_collisions__impl(key, m_hash(key)); + } + + private: + BLI_NOINLINE void realloc_and_reinsert(uint32_t min_usable_slots) + { + uint32_t total_slots, usable_slots; + m_max_load_factor.compute_total_and_usable_slots( + SlotArray::inline_buffer_capacity(), min_usable_slots, &total_slots, &usable_slots); + uint32_t new_slot_mask = total_slots - 1; + + /* Optimize the case when the set was empty beforehand. We can avoid some copies here. */ + if (this->size() == 0) { + m_slots.~Array(); + new (&m_slots) SlotArray(total_slots); + m_removed_slots = 0; + m_occupied_and_removed_slots = 0; + m_usable_slots = usable_slots; + m_slot_mask = new_slot_mask; + m_keys = this->allocate_keys_array(usable_slots); + return; + } - ArrayType new_array = m_array.init_reserved(min_usable_slots); - T *new_elements = this->allocate_elements_array(new_array.slots_usable()); + SlotArray new_slots(total_slots); - for (uint i : IndexRange(size)) { - this->add_after_grow(i, new_array); + for (Slot &slot : m_slots) { + if (slot.is_occupied()) { + this->add_after_grow_and_destruct_old(slot, new_slots, new_slot_mask); + } } - uninitialized_relocate_n(m_elements, size, new_elements); - this->deallocate_elements_array(m_elements); + Key *new_keys = this->allocate_keys_array(usable_slots); + uninitialized_relocate_n(m_keys, this->size(), new_keys); + this->deallocate_keys_array(m_keys); - m_array = std::move(new_array); - m_elements = new_elements; + /* All occupied slots have been destructed already and empty/removed slots are assumed to be + * trivially destructible. */ + m_slots.clear_without_destruct(); + m_slots = std::move(new_slots); + m_keys = new_keys; + m_occupied_and_removed_slots -= m_removed_slots; + m_usable_slots = usable_slots; + m_removed_slots = 0; + m_slot_mask = new_slot_mask; } - void add_after_grow(uint index, ArrayType &new_array) + void add_after_grow_and_destruct_old(Slot &old_slot, + SlotArray &new_slots, + uint32_t new_slot_mask) { - const T &value = m_elements[index]; - ITER_SLOTS_BEGIN (value, new_array, , slot) { + const Key &key = m_keys[old_slot.index()]; + uint32_t hash = old_slot.get_hash(key, Hash()); + + SLOT_PROBING_BEGIN (ProbingStrategy, hash, new_slot_mask, slot_index) { + Slot &slot = new_slots[slot_index]; if (slot.is_empty()) { - slot.set_index(index); + slot.relocate_occupied_here(old_slot, hash); return; } } - ITER_SLOTS_END; + SLOT_PROBING_END(); } - float compute_average_collisions() const + template bool contains__impl(const ForwardKey &key, uint32_t hash) const { - if (this->size() == 0) { - return 0.0f; - } - - uint collisions_sum = 0; - for (const T &value : this->as_ref()) { - collisions_sum += this->count_collisions(value); + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + return false; + } + if (slot.contains(key, m_is_equal, hash, m_keys)) { + return true; + } } - return (float)collisions_sum / (float)this->size(); + VECTOR_SET_SLOT_PROBING_END(); } - uint count_collisions(const T &value) const + template void add_new__impl(ForwardKey &&key, uint32_t hash) { - uint collisions = 0; - ITER_SLOTS_BEGIN (value, m_array, const, slot) { - if (slot.is_empty() || slot.has_value(value, m_elements)) { - return collisions; + BLI_assert(!this->contains_as(key)); + + this->ensure_can_add(); + + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.is_empty()) { + uint32_t index = this->size(); + new (m_keys + index) Key(std::forward(key)); + slot.occupy(index, hash); + m_occupied_and_removed_slots++; + return; } - collisions++; } - ITER_SLOTS_END; + VECTOR_SET_SLOT_PROBING_END(); } - template void add_new__impl(ForwardT &&value) + template bool add__impl(ForwardKey &&key, uint32_t hash) { - BLI_assert(!this->contains(value)); this->ensure_can_add(); - ITER_SLOTS_BEGIN (value, m_array, , slot) { + + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { if (slot.is_empty()) { - this->add_new_in_slot(slot, std::forward(value)); - return; + uint32_t index = this->size(); + new (m_keys + index) Key(std::forward(key)); + m_occupied_and_removed_slots++; + slot.occupy(index, hash); + return true; + } + if (slot.contains(key, m_is_equal, hash, m_keys)) { + return false; } } - ITER_SLOTS_END; + VECTOR_SET_SLOT_PROBING_END(); } - template bool add__impl(ForwardT &&value) + template uint32_t index_of__impl(const ForwardKey &key, uint32_t hash) const { - this->ensure_can_add(); - ITER_SLOTS_BEGIN (value, m_array, , slot) { + BLI_assert(this->contains_as(key)); + + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash, m_keys)) { + return slot.index(); + } + } + VECTOR_SET_SLOT_PROBING_END(); + } + + template + int32_t index_of_try__impl(const ForwardKey &key, uint32_t hash) const + { + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash, m_keys)) { + return (int32_t)slot.index(); + } if (slot.is_empty()) { - this->add_new_in_slot(slot, std::forward(value)); + return -1; + } + } + VECTOR_SET_SLOT_PROBING_END(); + } + + Key pop__impl() + { + BLI_assert(this->size() > 0); + + uint32_t index_to_pop = this->size() - 1; + Key key = std::move(m_keys[index_to_pop]); + m_keys[index_to_pop].~Key(); + uint32_t hash = m_hash(key); + + m_removed_slots++; + + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.has_index(index_to_pop)) { + slot.remove(); + return key; + } + } + VECTOR_SET_SLOT_PROBING_END(); + } + + template bool remove__impl(const ForwardKey &key, uint32_t hash) + { + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash, m_keys)) { + this->remove_key_internal(slot); return true; } - else if (slot.has_value(value, m_elements)) { + if (slot.is_empty()) { return false; } } - ITER_SLOTS_END; + VECTOR_SET_SLOT_PROBING_END(); } - T *allocate_elements_array(uint size) + template void remove_contained__impl(const ForwardKey &key, uint32_t hash) { - return (T *)m_array.allocator().allocate_aligned((uint)sizeof(T) * size, alignof(T), __func__); + BLI_assert(this->contains_as(key)); + + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash, m_keys)) { + this->remove_key_internal(slot); + return; + } + } + VECTOR_SET_SLOT_PROBING_END(); } - void deallocate_elements_array(T *elements) + void remove_key_internal(Slot &slot) { - m_array.allocator().deallocate(elements); + uint32_t index_to_remove = slot.index(); + uint32_t size = this->size(); + uint32_t last_element_index = size - 1; + + if (index_to_remove < last_element_index) { + m_keys[index_to_remove] = std::move(m_keys[last_element_index]); + this->update_slot_index(m_keys[index_to_remove], last_element_index, index_to_remove); + } + + m_keys[last_element_index].~Key(); + slot.remove(); + m_removed_slots++; + return; } -}; -#undef ITER_SLOTS_BEGIN -#undef ITER_SLOTS_END + void update_slot_index(const Key &key, uint32_t old_index, uint32_t new_index) + { + uint32_t hash = m_hash(key); + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.has_index(old_index)) { + slot.update_index(new_index); + return; + } + } + VECTOR_SET_SLOT_PROBING_END(); + } + + template + uint32_t count_collisions__impl(const ForwardKey &key, uint32_t hash) const + { + uint32_t collisions = 0; + + VECTOR_SET_SLOT_PROBING_BEGIN (hash, slot) { + if (slot.contains(key, m_is_equal, hash, m_keys)) { + return collisions; + } + if (slot.is_empty()) { + return collisions; + } + collisions++; + } + VECTOR_SET_SLOT_PROBING_END(); + } + + void ensure_can_add() + { + if (m_occupied_and_removed_slots >= m_usable_slots) { + this->realloc_and_reinsert(this->size() + 1); + BLI_assert(m_occupied_and_removed_slots < m_usable_slots); + } + } + + Key *allocate_keys_array(uint32_t size) + { + return (Key *)m_slots.allocator().allocate((uint32_t)sizeof(Key) * size, alignof(Key), AT); + } + + void deallocate_keys_array(Key *keys) + { + m_slots.allocator().deallocate(keys); + } +}; } // namespace BLI diff --git a/source/blender/blenlib/BLI_vector_set_slots.hh b/source/blender/blenlib/BLI_vector_set_slots.hh new file mode 100644 index 00000000000..13d6acb05c5 --- /dev/null +++ b/source/blender/blenlib/BLI_vector_set_slots.hh @@ -0,0 +1,171 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __BLI_VECTOR_SET_SLOTS_HH__ +#define __BLI_VECTOR_SET_SLOTS_HH__ + +/** \file + * \ingroup bli + * + * This file contains slot types that are supposed to be used with BLI::VectorSet. + * + * Every slot type has to be able to hold an integer index and state information. + * A vector set slot has three possible states: empty, occupied and removed. + * + * A vector slot type has to implement a couple of methods that are explained in + * SimpleVectorSetSlot. + * A vector slot type is assumed to be trivially destructible, when it is in empty or removed + * state. + * + * Possible Improvements: + * - Implement a slot type that stores the hash. + * - Implement a slot type that stores the key. That means that the key would be stored in two + * places: the key vector and the slot itself. Maybe storing the key in the slot as well, can + * result in better performance, due to better cache utilization. + */ + +#include "BLI_sys_types.h" + +namespace BLI { + +/** + * The simplest possible vector set slot. It stores the index and state in a signed integer. If the + * value is negative, it represents empty or occupied state. Otherwise it represents the index. + */ +template class SimpleVectorSetSlot { + private: +#define s_is_empty -1 +#define s_is_removed -2 + + /** + * After the default constructor has run, the slot has to be in the empty state. + */ + int32_t m_state = s_is_empty; + + public: + /** + * Return true if this slot contains an index to a key. + */ + bool is_occupied() const + { + return m_state >= 0; + } + + /** + * Return true if the slot is empty, i.e. it does not contain an index. + */ + bool is_empty() const + { + return m_state == s_is_empty; + } + + /** + * Return the stored index. It is assumed that the slot is occupied. + */ + uint32_t index() const + { + BLI_assert(this->is_occupied()); + return (uint32_t)m_state; + } + + /** + * Return true if the slot contains the given key, i.e. its index points to a key that compares + * equal to it. The hash can be used by other implementations to determine inequality faster. + */ + template + bool contains(const ForwardKey &key, + const IsEqual &is_equal, + uint32_t UNUSED(hash), + const Key *keys) const + { + if (m_state >= 0) { + return is_equal(key, keys[m_state]); + } + return false; + } + + /** + * Move the other slot into this slot and destruct it. We do destruction here, because this way + * we can avoid a comparison with the state, since we know the slot is occupied. For this + * specific slot implementation, this does not make a difference. + */ + void relocate_occupied_here(SimpleVectorSetSlot &other, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + BLI_assert(other.is_occupied()); + m_state = other.m_state; + } + + /** + * Change the state of this slot from empty/removed to occupied. The hash can be used by other + * slot implementations. + */ + void occupy(uint32_t index, uint32_t UNUSED(hash)) + { + BLI_assert(!this->is_occupied()); + m_state = (int32_t)index; + } + + /** + * The key has changed its position in the vector, so the index has to be updated. This method + * can assume that the slot is currently occupied. + */ + void update_index(uint32_t index) + { + BLI_assert(this->is_occupied()); + m_state = (int32_t)index; + } + + /** + * Change the state of this slot from occupied to removed. + */ + void remove() + { + BLI_assert(this->is_occupied()); + m_state = s_is_removed; + } + + /** + * Return true if this slot is currently occupied and its corresponding key has the given index. + */ + bool has_index(uint32_t index) const + { + return (uint32_t)m_state == index; + } + + /** + * Return the hash of the currently stored key. In this simple set slot implementation, we just + * compute the hash here. Other implementations might store the hash in the slot instead. + */ + template uint32_t get_hash(const Key &key, const Hash &hash) const + { + BLI_assert(this->is_occupied()); + return hash(key); + } + +#undef s_is_empty +#undef s_is_removed +}; + +template struct DefaultVectorSetSlot; + +template struct DefaultVectorSetSlot { + using type = SimpleVectorSetSlot; +}; + +} // namespace BLI + +#endif /* __BLI_VECTOR_SET_SLOTS_HH__ */ diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt index 73b35a17ec2..79a65bbb98d 100644 --- a/source/blender/blenlib/CMakeLists.txt +++ b/source/blender/blenlib/CMakeLists.txt @@ -187,6 +187,7 @@ set(SRC BLI_hash_md5.h BLI_hash_mm2a.h BLI_hash_mm3.h + BLI_hash_tables.hh BLI_heap.h BLI_heap_simple.h BLI_index_mask.hh @@ -204,6 +205,7 @@ set(SRC BLI_listbase.h BLI_listbase_wrapper.hh BLI_map.hh + BLI_map_slots.hh BLI_math.h BLI_math_base.h BLI_math_bits.h @@ -224,16 +226,17 @@ set(SRC BLI_memory_utils.hh BLI_mempool.h BLI_noise.h - BLI_open_addressing.hh BLI_optional.hh BLI_path_util.h BLI_polyfill_2d.h BLI_polyfill_2d_beautify.h + BLI_probing_strategies.hh BLI_quadric.h BLI_rand.h BLI_rect.h BLI_scanfill.h BLI_set.hh + BLI_set_slots.hh BLI_smallhash.h BLI_sort.h BLI_sort_utils.h @@ -242,7 +245,6 @@ set(SRC BLI_strict_flags.h BLI_string.h BLI_string_cursor_utf8.h - BLI_string_map.hh BLI_string_ref.hh BLI_string_utf8.h BLI_string_utils.h @@ -261,6 +263,7 @@ set(SRC BLI_uvproject.h BLI_vector.hh BLI_vector_set.hh + BLI_vector_set_slots.hh BLI_vfontdata.h BLI_voronoi_2d.h BLI_voxel.h diff --git a/source/blender/blenlib/intern/BLI_index_range.cc b/source/blender/blenlib/intern/BLI_index_range.cc index fefb6e6598e..0fa87cf854d 100644 --- a/source/blender/blenlib/intern/BLI_index_range.cc +++ b/source/blender/blenlib/intern/BLI_index_range.cc @@ -50,7 +50,7 @@ ArrayRef IndexRange::as_array_ref() const } arrays.append(std::move(new_array)); - current_array = arrays.last().begin(); + current_array = arrays.last().data(); std::atomic_thread_fence(std::memory_order_seq_cst); current_array_size = new_size; diff --git a/source/blender/blenlib/intern/math_bits_inline.c b/source/blender/blenlib/intern/math_bits_inline.c index a6883c2aaba..8f8f257f1e7 100644 --- a/source/blender/blenlib/intern/math_bits_inline.c +++ b/source/blender/blenlib/intern/math_bits_inline.c @@ -77,8 +77,7 @@ MINLINE int bitscan_reverse_i(int a) MINLINE unsigned int bitscan_reverse_clear_uint(unsigned int *a) { unsigned int i = bitscan_reverse_uint(*a); - /* TODO(sergey): This could probably be optimized. */ - *a &= ~(1 << (sizeof(unsigned int) * 8 - i - 1)); + *a &= ~(0x80000000 >> i); return i; } @@ -97,10 +96,10 @@ MINLINE unsigned int highest_order_bit_uint(unsigned int n) MINLINE unsigned short highest_order_bit_s(unsigned short n) { - n |= (n >> 1); - n |= (n >> 2); - n |= (n >> 4); - n |= (n >> 8); + n |= (unsigned short)(n >> 1); + n |= (unsigned short)(n >> 2); + n |= (unsigned short)(n >> 4); + n |= (unsigned short)(n >> 8); return (unsigned short)(n - (n >> 1)); } diff --git a/source/blender/depsgraph/intern/node/deg_node_id.cc b/source/blender/depsgraph/intern/node/deg_node_id.cc index 4a7e5c4568b..984873fbcac 100644 --- a/source/blender/depsgraph/intern/node/deg_node_id.cc +++ b/source/blender/depsgraph/intern/node/deg_node_id.cc @@ -66,6 +66,13 @@ bool IDNode::ComponentIDKey::operator==(const ComponentIDKey &other) const return type == other.type && STREQ(name, other.name); } +uint32_t IDNode::ComponentIDKey::hash() const +{ + const int type_as_int = static_cast(type); + return BLI_ghashutil_combine_hash(BLI_ghashutil_uinthash(type_as_int), + BLI_ghashutil_strhash_p(name)); +} + /* Initialize 'id' node - from pointer data given. */ void IDNode::init(const ID *id, const char *UNUSED(subdata)) { diff --git a/source/blender/depsgraph/intern/node/deg_node_id.h b/source/blender/depsgraph/intern/node/deg_node_id.h index c7663b50c6f..1e315195c1a 100644 --- a/source/blender/depsgraph/intern/node/deg_node_id.h +++ b/source/blender/depsgraph/intern/node/deg_node_id.h @@ -50,6 +50,7 @@ const char *linkedStateAsString(eDepsNode_LinkedState_Type linked_state); struct IDNode : public Node { struct ComponentIDKey { ComponentIDKey(NodeType type, const char *name = ""); + uint32_t hash() const; bool operator==(const ComponentIDKey &other) const; NodeType type; @@ -115,16 +116,3 @@ struct IDNode : public Node { }; } // namespace DEG - -namespace BLI { - -template<> struct DefaultHash { - uint32_t operator()(const DEG::IDNode::ComponentIDKey &key) const - { - const int type_as_int = static_cast(key.type); - return BLI_ghashutil_combine_hash(BLI_ghashutil_uinthash(type_as_int), - BLI_ghashutil_strhash_p(key.name)); - } -}; - -} // namespace BLI diff --git a/source/blender/functions/FN_cpp_type.hh b/source/blender/functions/FN_cpp_type.hh index 10e95c0341e..df6da7a3f47 100644 --- a/source/blender/functions/FN_cpp_type.hh +++ b/source/blender/functions/FN_cpp_type.hh @@ -534,22 +534,20 @@ namespace CPPTypeUtil { template void construct_default_cb(void *ptr) { - BLI::construct_default((T *)ptr); + new (ptr) T; } template void construct_default_n_cb(void *ptr, uint n) { - for (uint i = 0; i < n; i++) { - BLI::construct_default((T *)ptr + i); - } + BLI::default_construct_n((T *)ptr, n); } template void construct_default_indices_cb(void *ptr, IndexMask index_mask) { - index_mask.foreach_index([&](uint i) { BLI::construct_default((T *)ptr + i); }); + index_mask.foreach_index([&](uint i) { new ((T *)ptr + i) T; }); } template void destruct_cb(void *ptr) { - BLI::destruct((T *)ptr); + ((T *)ptr)->~T(); } template void destruct_n_cb(void *ptr, uint n) { @@ -557,7 +555,8 @@ template void destruct_n_cb(void *ptr, uint n) } template void destruct_indices_cb(void *ptr, IndexMask index_mask) { - index_mask.foreach_index([&](uint i) { BLI::destruct((T *)ptr + i); }); + T *ptr_ = (T *)ptr; + index_mask.foreach_index([&](uint i) { ptr_[i].~T(); }); } template void copy_to_initialized_cb(const void *src, void *dst) @@ -601,11 +600,15 @@ void copy_to_uninitialized_indices_cb(const void *src, void *dst, IndexMask inde template void relocate_to_initialized_cb(void *src, void *dst) { - BLI::relocate((T *)src, (T *)dst); + T *src_ = (T *)src; + T *dst_ = (T *)dst; + + *dst_ = std::move(*src_); + src_->~T(); } template void relocate_to_initialized_n_cb(void *src, void *dst, uint n) { - BLI::relocate_n((T *)src, n, (T *)dst); + BLI::initialized_relocate_n((T *)src, n, (T *)dst); } template void relocate_to_initialized_indices_cb(void *src, void *dst, IndexMask index_mask) @@ -621,7 +624,11 @@ void relocate_to_initialized_indices_cb(void *src, void *dst, IndexMask index_ma template void relocate_to_uninitialized_cb(void *src, void *dst) { - BLI::uninitialized_relocate((T *)src, (T *)dst); + T *src_ = (T *)src; + T *dst_ = (T *)dst; + + new (dst_) T(std::move(*src_)); + src_->~T(); } template void relocate_to_uninitialized_n_cb(void *src, void *dst, uint n) { -- cgit v1.2.3