diff options
Diffstat (limited to 'source')
-rw-r--r-- | source/blender/blenlib/BLI_hash_cxx.h | 100 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_map.h | 596 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_math_base.h | 2 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_open_addressing.h | 302 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_set.h | 470 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_set_vector.h | 366 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_string_map.h | 410 | ||||
-rw-r--r-- | source/blender/blenlib/CMakeLists.txt | 6 | ||||
-rw-r--r-- | source/blender/blenlib/intern/math_base_inline.c | 15 |
9 files changed, 2267 insertions, 0 deletions
diff --git a/source/blender/blenlib/BLI_hash_cxx.h b/source/blender/blenlib/BLI_hash_cxx.h new file mode 100644 index 00000000000..b9a53f29a04 --- /dev/null +++ b/source/blender/blenlib/BLI_hash_cxx.h @@ -0,0 +1,100 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup bli + * + * This file provides default hash functions for some primitive types. The hash functions can be + * used by containers such as Map and Set. + */ + +#pragma once + +#include <functional> +#include <string> +#include <utility> + +#include "BLI_utildefines.h" +#include "BLI_math_base.h" + +namespace BLI { + +template<typename T> struct DefaultHash { +}; + +#define TRIVIAL_DEFAULT_INT_HASH(TYPE) \ + template<> struct DefaultHash<TYPE> { \ + uint32_t operator()(TYPE value) const \ + { \ + return (uint32_t)value; \ + } \ + } + +/** + * Cannot make any assumptions about the distribution of keys, so use a trivial hash function by + * default. The hash table implementations are designed to take all bits of the hash into account + * to avoid really bad behavior when the lower bits are all zero. Special hash functions can be + * implemented when more knowledge about a specific key distribution is available. + */ +TRIVIAL_DEFAULT_INT_HASH(int8_t); +TRIVIAL_DEFAULT_INT_HASH(uint8_t); +TRIVIAL_DEFAULT_INT_HASH(int16_t); +TRIVIAL_DEFAULT_INT_HASH(uint16_t); +TRIVIAL_DEFAULT_INT_HASH(int32_t); +TRIVIAL_DEFAULT_INT_HASH(uint32_t); +TRIVIAL_DEFAULT_INT_HASH(int64_t); + +template<> struct DefaultHash<float> { + uint32_t operator()(float value) const + { + return *(uint32_t *)&value; + } +}; + +template<> struct DefaultHash<std::string> { + uint32_t operator()(const std::string &value) const + { + uint32_t hash = 5381; + for (char c : value) { + hash = hash * 33 + c; + } + return hash; + } +}; + +/** + * While we cannot guarantee that the lower 3 bits or a pointer are zero, it is safe to assume + * this in the general case. MEM_malloc only returns 8 byte aligned addresses on 64-bit systems. + */ +template<typename T> struct DefaultHash<T *> { + uint32_t operator()(const T *value) const + { + uintptr_t ptr = POINTER_AS_UINT(value); + uint32_t hash = (uint32_t)(ptr >> 3); + return hash; + } +}; + +template<typename T1, typename T2> struct DefaultHash<std::pair<T1, T2>> { + uint32_t operator()(const std::pair<T1, T2> &value) const + { + uint32_t hash1 = DefaultHash<T1>{}(value.first); + uint32_t hash2 = DefaultHash<T2>{}(value.second); + return hash1 ^ (hash2 * 33); + } +}; + +} // namespace BLI diff --git a/source/blender/blenlib/BLI_map.h b/source/blender/blenlib/BLI_map.h new file mode 100644 index 00000000000..5328dac1106 --- /dev/null +++ b/source/blender/blenlib/BLI_map.h @@ -0,0 +1,596 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup bli + * + * This file provides a map implementation that uses open addressing with probing. + * + * The key and value objects are stored directly in the hash table to avoid indirect memory + * lookups. Keys and values are stored in groups of four to avoid wasting memory due to padding. + */ + +#pragma once + +#include "BLI_hash_cxx.h" +#include "BLI_array_ref.h" +#include "BLI_open_addressing.h" + +namespace BLI { + +// clang-format off + +#define ITER_SLOTS_BEGIN(KEY, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \ + uint32_t hash = DefaultHash<KeyT>{}(KEY); \ + uint32_t perturb = hash; \ + while (true) { \ + uint32_t item_index = (hash & ARRAY.slot_mask()) >> OFFSET_SHIFT; \ + uint8_t R_OFFSET = hash & OFFSET_MASK; \ + uint8_t initial_offset = R_OFFSET; \ + OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \ + do { + +#define ITER_SLOTS_END(R_OFFSET) \ + R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \ + } while (R_OFFSET != initial_offset); \ + perturb >>= 5; \ + hash = hash * 5 + 1 + perturb; \ + } ((void)0) + +// clang-format on + +template<typename KeyT, typename ValueT, typename Allocator = GuardedAllocator> class Map { + private: + static constexpr uint OFFSET_MASK = 3; + static constexpr uint OFFSET_SHIFT = 2; + + class Item { + private: + static constexpr uint8_t IS_EMPTY = 0; + static constexpr uint8_t IS_SET = 1; + static constexpr uint8_t IS_DUMMY = 2; + + uint8_t m_status[4]; + char m_keys[4 * sizeof(KeyT)]; + char m_values[4 * sizeof(ValueT)]; + + public: + static constexpr uint slots_per_item = 4; + + Item() + { + for (uint offset = 0; offset < 4; offset++) { + m_status[offset] = IS_EMPTY; + } + } + + ~Item() + { + for (uint offset = 0; offset < 4; offset++) { + if (m_status[offset] == IS_SET) { + this->key(offset)->~KeyT(); + this->value(offset)->~ValueT(); + } + } + } + + Item(const Item &other) + { + for (uint offset = 0; offset < 4; offset++) { + uint8_t status = other.m_status[offset]; + m_status[offset] = status; + if (status == IS_SET) { + new (this->key(offset)) KeyT(*other.key(offset)); + new (this->value(offset)) ValueT(*other.value(offset)); + } + } + } + + Item(Item &&other) noexcept + { + for (uint offset = 0; offset < 4; offset++) { + uint8_t status = other.m_status[offset]; + m_status[offset] = status; + if (status == IS_SET) { + new (this->key(offset)) KeyT(std::move(*other.key(offset))); + new (this->value(offset)) ValueT(std::move(*other.value(offset))); + } + } + } + + bool has_key(uint offset, const KeyT &key) const + { + return m_status[offset] == IS_SET && key == *this->key(offset); + } + + bool is_set(uint offset) const + { + return m_status[offset] == IS_SET; + } + + bool is_empty(uint offset) const + { + return m_status[offset] == IS_EMPTY; + } + + KeyT *key(uint offset) const + { + return (KeyT *)(m_keys + offset * sizeof(KeyT)); + } + + ValueT *value(uint offset) const + { + return (ValueT *)(m_values + offset * sizeof(ValueT)); + } + + void copy_in(uint offset, const KeyT &key, const ValueT &value) + { + BLI_assert(m_status[offset] != IS_SET); + m_status[offset] = IS_SET; + new (this->key(offset)) KeyT(key); + new (this->value(offset)) ValueT(value); + } + + void move_in(uint offset, KeyT &key, ValueT &value) + { + BLI_assert(m_status[offset] != IS_SET); + m_status[offset] = IS_SET; + new (this->key(offset)) KeyT(std::move(key)); + new (this->value(offset)) ValueT(std::move(value)); + } + + void set_dummy(uint offset) + { + BLI_assert(m_status[offset] == IS_SET); + m_status[offset] = IS_DUMMY; + destruct(this->key(offset)); + destruct(this->value(offset)); + } + }; + + using ArrayType = OpenAddressingArray<Item, 1, Allocator>; + ArrayType m_array; + + public: + Map() = default; + + /** + * Insert a new key-value-pair in the map. + * Asserts when the key existed before. + */ + void add_new(const KeyT &key, const ValueT &value) + { + BLI_assert(!this->contains(key)); + this->ensure_can_add(); + + ITER_SLOTS_BEGIN (key, m_array, , item, offset) { + if (item.is_empty(offset)) { + item.copy_in(offset, key, value); + m_array.update__empty_to_set(); + return; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Insert a new key-value-pair in the map if the key does not exist yet. + * Returns true when the pair was newly inserted, otherwise false. + */ + bool add(const KeyT &key, const ValueT &value) + { + this->ensure_can_add(); + + ITER_SLOTS_BEGIN (key, m_array, , item, offset) { + if (item.is_empty(offset)) { + item.copy_in(offset, key, value); + m_array.update__empty_to_set(); + return true; + } + else if (item.has_key(offset, key)) { + return false; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Remove the key from the map. + * Asserts when the key does not exist in the map. + */ + void remove(const KeyT &key) + { + BLI_assert(this->contains(key)); + ITER_SLOTS_BEGIN (key, m_array, , item, offset) { + if (item.has_key(offset, key)) { + item.set_dummy(offset); + m_array.update__set_to_dummy(); + return; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Get the value for the given key and remove it from the map. + * Asserts when the key does not exist in the map. + */ + ValueT pop(const KeyT &key) + { + BLI_assert(this->contains(key)); + ITER_SLOTS_BEGIN (key, m_array, , item, offset) { + if (item.has_key(offset, key)) { + ValueT value = std::move(*item.value(offset)); + item.set_dummy(offset); + m_array.update__set_to_dummy(); + return value; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Returns true when the key exists in the map, otherwise false. + */ + bool contains(const KeyT &key) const + { + ITER_SLOTS_BEGIN (key, m_array, const, item, offset) { + if (item.is_empty(offset)) { + return false; + } + else if (item.has_key(offset, key)) { + return true; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Check if the key exists in the map. + * If it does exist, call the modify function with a reference to the corresponding value. + * If it does not exist, call the create function and insert a new key-value-pair. + * Returns true when a new pair was inserted, otherwise false. + */ + template<typename CreateValueF, typename ModifyValueF> + bool add_or_modify(const KeyT &key, + const CreateValueF &create_value, + const ModifyValueF &modify_value) + { + this->ensure_can_add(); + + ITER_SLOTS_BEGIN (key, m_array, , item, offset) { + if (item.is_empty(offset)) { + item.copy_in(offset, key, create_value()); + m_array.update__empty_to_set(); + return true; + } + else if (item.has_key(offset, key)) { + modify_value(*item.value(offset)); + return false; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Similar to add, but overrides the value for the key when it exists already. + */ + bool add_override(const KeyT &key, const ValueT &value) + { + return this->add_or_modify( + key, [&value]() { return value; }, [&value](ValueT &old_value) { old_value = value; }); + } + + /** + * Check if the key exists in the map. + * Return a pointer to the value, when it exists. + * Otherwise return nullptr. + */ + const ValueT *lookup_ptr(const KeyT &key) const + { + ITER_SLOTS_BEGIN (key, m_array, const, item, offset) { + if (item.is_empty(offset)) { + return nullptr; + } + else if (item.has_key(offset, key)) { + return item.value(offset); + } + } + ITER_SLOTS_END(offset); + } + + /** + * Lookup the value that corresponds to the key. + * Asserts when the key does not exist. + */ + const ValueT &lookup(const KeyT &key) const + { + const ValueT *ptr = this->lookup_ptr(key); + BLI_assert(ptr != nullptr); + return *ptr; + } + + ValueT *lookup_ptr(const KeyT &key) + { + const Map *const_this = this; + return const_cast<ValueT *>(const_this->lookup_ptr(key)); + } + + ValueT &lookup(const KeyT &key) + { + const Map *const_this = this; + return const_cast<ValueT &>(const_this->lookup(key)); + } + + /** + * Check if the key exists in the map. + * If it does, return a copy of the value. + * Otherwise, return the default value. + */ + ValueT lookup_default(const KeyT &key, ValueT default_value) const + { + const ValueT *ptr = this->lookup_ptr(key); + if (ptr != nullptr) { + return *ptr; + } + else { + return default_value; + } + } + + /** + * Return the value that corresponds to the given key. + * If it does not exist yet, create and insert it first. + */ + template<typename CreateValueF> + ValueT &lookup_or_add(const KeyT &key, const CreateValueF &create_value) + { + this->ensure_can_add(); + + ITER_SLOTS_BEGIN (key, m_array, , item, offset) { + if (item.is_empty(offset)) { + item.copy_in(offset, key, create_value()); + m_array.update__empty_to_set(); + return *item.value(offset); + } + else if (item.has_key(offset, key)) { + return *item.value(offset); + } + } + ITER_SLOTS_END(offset); + } + + /** + * Get the number of elements in the map. + */ + uint32_t size() const + { + return m_array.slots_set(); + } + + void print_table() const + { + std::cout << "Hash Table:\n"; + std::cout << " Size: " << m_array.slots_set() << '\n'; + std::cout << " Capacity: " << m_array.slots_total() << '\n'; + uint32_t item_index = 0; + for (const Item &item : m_array) { + std::cout << " Item: " << item_index++ << '\n'; + for (uint offset = 0; offset < 4; offset++) { + std::cout << " " << offset << " \t"; + if (item.is_empty(offset)) { + std::cout << " <empty>\n"; + } + else if (item.is_set(offset)) { + const KeyT &key = *item.key(offset); + const ValueT &value = *item.value(offset); + uint32_t collisions = this->count_collisions(value); + std::cout << " " << key << " -> " << value << " \t Collisions: " << collisions + << '\n'; + } + else if (item.is_dummy(offset)) { + std::cout << " <dummy>\n"; + } + } + } + } + + template<typename SubIterator> class BaseIterator { + protected: + const Map *m_map; + uint32_t m_slot; + + public: + BaseIterator(const Map *map, uint32_t slot) : m_map(map), m_slot(slot) + { + } + + BaseIterator &operator++() + { + m_slot = m_map->next_slot(m_slot + 1); + return *this; + } + + friend bool operator==(const BaseIterator &a, const BaseIterator &b) + { + BLI_assert(a.m_map == b.m_map); + return a.m_slot == b.m_slot; + } + + friend bool operator!=(const BaseIterator &a, const BaseIterator &b) + { + return !(a == b); + } + + SubIterator begin() const + { + return SubIterator(m_map, m_map->next_slot(0)); + } + + SubIterator end() const + { + return SubIterator(m_map, m_map->m_array.slots_total()); + } + }; + + class KeyIterator final : public BaseIterator<KeyIterator> { + public: + KeyIterator(const Map *map, uint32_t slot) : BaseIterator<KeyIterator>(map, slot) + { + } + + const KeyT &operator*() const + { + uint32_t item_index = this->m_slot >> OFFSET_SHIFT; + uint offset = this->m_slot & OFFSET_MASK; + const Item &item = this->m_map->m_array.item(item_index); + BLI_assert(item.is_set(offset)); + return *item.key(offset); + } + }; + + class ValueIterator final : public BaseIterator<ValueIterator> { + public: + ValueIterator(const Map *map, uint32_t slot) : BaseIterator<ValueIterator>(map, slot) + { + } + + ValueT &operator*() const + { + uint32_t item_index = this->m_slot >> OFFSET_SHIFT; + uint offset = this->m_slot & OFFSET_MASK; + const Item &item = this->m_map->m_array.item(item_index); + BLI_assert(item.is_set(offset)); + return *item.value(offset); + } + }; + + class ItemIterator final : public BaseIterator<ItemIterator> { + public: + ItemIterator(const Map *map, uint32_t slot) : BaseIterator<ItemIterator>(map, slot) + { + } + + struct UserItem { + const KeyT &key; + ValueT &value; + + friend std::ostream &operator<<(std::ostream &stream, const Item &item) + { + stream << item.key << " -> " << item.value; + return stream; + } + }; + + UserItem operator*() const + { + uint32_t item_index = this->m_slot >> OFFSET_SHIFT; + uint offset = this->m_slot & OFFSET_MASK; + const Item &item = this->m_map->m_array.item(item_index); + BLI_assert(item.is_set(offset)); + return {*item.key(offset), *item.value(offset)}; + } + }; + + template<typename SubIterator> friend class BaseIterator; + + /** + * Iterate over all keys in the map. + */ + KeyIterator keys() const + { + return KeyIterator(this, 0); + } + + /** + * Iterate over all values in the map. + */ + ValueIterator values() const + { + return ValueIterator(this, 0); + } + + /** + * Iterate over all key-value-pairs in the map. + * They can be accessed with item.key and item.value. + */ + ItemIterator items() const + { + return ItemIterator(this, 0); + } + + private: + uint32_t next_slot(uint32_t slot) const + { + for (; slot < m_array.slots_total(); slot++) { + uint32_t item_index = slot >> OFFSET_SHIFT; + uint offset = slot & OFFSET_MASK; + const Item &item = m_array.item(item_index); + if (item.is_set(offset)) { + return slot; + } + } + return slot; + } + + uint32_t count_collisions(const KeyT &key) const + { + uint32_t collisions = 0; + ITER_SLOTS_BEGIN (key, m_array, const, item, offset) { + if (item.is_empty(offset) || item.has_key(offset, key)) { + return collisions; + } + collisions++; + } + ITER_SLOTS_END(offset); + } + + void ensure_can_add() + { + if (UNLIKELY(m_array.should_grow())) { + this->grow(this->size() + 1); + } + } + + BLI_NOINLINE void grow(uint32_t min_usable_slots) + { + ArrayType new_array = m_array.init_reserved(min_usable_slots); + for (Item &old_item : m_array) { + for (uint offset = 0; offset < 4; offset++) { + if (old_item.is_set(offset)) { + this->add_after_grow(*old_item.key(offset), *old_item.value(offset), new_array); + } + } + } + m_array = std::move(new_array); + } + + void add_after_grow(KeyT &key, ValueT &value, ArrayType &new_array) + { + ITER_SLOTS_BEGIN (key, new_array, , item, offset) { + if (item.is_empty(offset)) { + item.move_in(offset, key, value); + return; + } + } + ITER_SLOTS_END(offset); + } +}; + +#undef ITER_SLOTS_BEGIN +#undef ITER_SLOTS_END + +} // namespace BLI diff --git a/source/blender/blenlib/BLI_math_base.h b/source/blender/blenlib/BLI_math_base.h index 7b770f2dd3e..4f841f75d3a 100644 --- a/source/blender/blenlib/BLI_math_base.h +++ b/source/blender/blenlib/BLI_math_base.h @@ -158,6 +158,8 @@ MINLINE int power_of_2_min_i(int n); MINLINE unsigned int power_of_2_max_u(unsigned int x); MINLINE unsigned int power_of_2_min_u(unsigned int x); +MINLINE unsigned int log2_floor_u(unsigned int x); +MINLINE unsigned int log2_ceil_u(unsigned int x); MINLINE int divide_round_i(int a, int b); MINLINE int mod_i(int i, int n); diff --git a/source/blender/blenlib/BLI_open_addressing.h b/source/blender/blenlib/BLI_open_addressing.h new file mode 100644 index 00000000000..3bdacae18d1 --- /dev/null +++ b/source/blender/blenlib/BLI_open_addressing.h @@ -0,0 +1,302 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup bli + * + * This class offers a useful abstraction for other containers that implement hash tables using + * open addressing. It handles the following aspects: + * - Allocation and deallocation of the open addressing array. + * - Optional small object optimization. + * - Keeps track of how many elements and dummies are in the table. + * + * The nice thing about this abstraction is that it does not get in the way of any performance + * optimizations. The data that is actually stored in the table is still fully defined by the + * actual hash table implementation. + */ + +#pragma once + +#include <cmath> + +#include "BLI_utildefines.h" +#include "BLI_memory_utils_cxx.h" +#include "BLI_math_base.h" +#include "BLI_allocator.h" + +namespace BLI { + +template<typename Item, uint32_t ItemsInSmallStorage = 1, typename Allocator = GuardedAllocator> +class OpenAddressingArray { + private: + static constexpr auto slots_per_item = Item::slots_per_item; + static constexpr float max_load_factor = 0.5f; + + /* Invariants: + * 2^m_item_exponent = m_item_amount + * m_item_amount * slots_per_item = m_slots_total + * m_slot_mask = m_slots_total - 1 + * m_slots_set_or_dummy < m_slots_total + */ + + /* Array containing the actual hash table. Might be a pointer to the inlined storage. */ + Item *m_items; + /* Number of items in the hash table. Must be a power of two. */ + uint32_t m_item_amount; + /* Exponent of the current item amount. */ + uint8_t m_item_exponent; + /* Number of elements that could be stored in the table when the load factor is 1. */ + uint32_t m_slots_total; + /* Number of elements that are not empty. */ + uint32_t m_slots_set_or_dummy; + /* Number of dummy entries. */ + uint32_t m_slots_dummy; + /* Max number of slots that can be non-empty according to the load factor. */ + uint32_t m_slots_usable; + /* Can be used to map a hash value into the range of valid slot indices. */ + uint32_t m_slot_mask; + Allocator m_allocator; + char m_local_storage[sizeof(Item) * ItemsInSmallStorage]; + + public: + explicit OpenAddressingArray(uint8_t item_exponent = 0) + { + m_slots_total = (1 << item_exponent) * slots_per_item; + m_slots_set_or_dummy = 0; + m_slots_dummy = 0; + m_slots_usable = m_slots_total * max_load_factor; + m_slot_mask = m_slots_total - 1; + m_item_amount = m_slots_total / slots_per_item; + m_item_exponent = item_exponent; + + if (m_item_amount <= ItemsInSmallStorage) { + m_items = this->small_storage(); + } + else { + m_items = (Item *)m_allocator.allocate_aligned( + sizeof(Item) * m_item_amount, std::alignment_of<Item>::value, __func__); + } + + for (uint32_t i = 0; i < m_item_amount; i++) { + new (m_items + i) Item(); + } + } + + ~OpenAddressingArray() + { + if (m_items != nullptr) { + for (uint32_t i = 0; i < m_item_amount; i++) { + m_items[i].~Item(); + } + if (!this->is_in_small_storage()) { + m_allocator.deallocate((void *)m_items); + } + } + } + + OpenAddressingArray(const OpenAddressingArray &other) + { + m_slots_total = other.m_slots_total; + m_slots_set_or_dummy = other.m_slots_set_or_dummy; + m_slots_dummy = other.m_slots_dummy; + m_slots_usable = other.m_slots_usable; + m_slot_mask = other.m_slot_mask; + m_item_amount = other.m_item_amount; + m_item_exponent = other.m_item_exponent; + + if (m_item_amount <= ItemsInSmallStorage) { + m_items = this->small_storage(); + } + else { + m_items = (Item *)m_allocator.allocate_aligned( + sizeof(Item) * m_item_amount, std::alignment_of<Item>::value, __func__); + } + + uninitialized_copy_n(other.m_items, m_item_amount, m_items); + } + + OpenAddressingArray(OpenAddressingArray &&other) noexcept + { + m_slots_total = other.m_slots_total; + m_slots_set_or_dummy = other.m_slots_set_or_dummy; + m_slots_dummy = other.m_slots_dummy; + m_slots_usable = other.m_slots_usable; + m_slot_mask = other.m_slot_mask; + m_item_amount = other.m_item_amount; + m_item_exponent = other.m_item_exponent; + if (other.is_in_small_storage()) { + m_items = this->small_storage(); + uninitialized_relocate_n(other.m_items, m_item_amount, m_items); + } + else { + m_items = other.m_items; + } + + other.m_items = nullptr; + other.~OpenAddressingArray(); + new (&other) OpenAddressingArray(); + } + + OpenAddressingArray &operator=(const OpenAddressingArray &other) + { + if (this == &other) { + return *this; + } + this->~OpenAddressingArray(); + new (this) OpenAddressingArray(other); + return *this; + } + + OpenAddressingArray &operator=(OpenAddressingArray &&other) + { + if (this == &other) { + return *this; + } + this->~OpenAddressingArray(); + new (this) OpenAddressingArray(std::move(other)); + return *this; + } + + /* Prepare a new array that can hold a minimum of min_usable_slots elements. All entries are + * empty. */ + OpenAddressingArray init_reserved(uint32_t min_usable_slots) const + { + float min_total_slots = (float)min_usable_slots / max_load_factor; + uint32_t min_total_items = (uint32_t)std::ceil(min_total_slots / (float)slots_per_item); + uint8_t item_exponent = log2_ceil_u(min_total_items); + OpenAddressingArray grown(item_exponent); + grown.m_slots_set_or_dummy = this->slots_set(); + return grown; + } + + /** + * Amount of items in the array times the number of slots per item. + */ + uint32_t slots_total() const + { + return m_slots_total; + } + + /** + * Amount of slots that are initialized with some value that is not empty or dummy. + */ + uint32_t slots_set() const + { + return m_slots_set_or_dummy - m_slots_dummy; + } + + /** + * Amount of slots that can be used before the array should grow. + */ + uint32_t slots_usable() const + { + return m_slots_usable; + } + + /** + * Update the counters after one empty element is used for a newly added element. + */ + void update__empty_to_set() + { + m_slots_set_or_dummy++; + } + + /** + * Update the counters after one previously dummy element becomes set. + */ + void update__dummy_to_set() + { + m_slots_dummy--; + } + + /** + * Update the counters after one previously set element becomes a dummy. + */ + void update__set_to_dummy() + { + m_slots_dummy++; + } + + /** + * Access the current slot mask for this array. + */ + uint32_t slot_mask() const + { + return m_slot_mask; + } + + /** + * Access the item for a specific item index. + * Note: The item index is not necessarily the slot index. + */ + const Item &item(uint32_t item_index) const + { + return m_items[item_index]; + } + + Item &item(uint32_t item_index) + { + return m_items[item_index]; + } + + uint8_t item_exponent() const + { + return m_item_exponent; + } + + uint32_t item_amount() const + { + return m_item_amount; + } + + bool should_grow() const + { + return m_slots_set_or_dummy >= m_slots_usable; + } + + Item *begin() + { + return m_items; + } + + Item *end() + { + return m_items + m_item_amount; + } + + const Item *begin() const + { + return m_items; + } + + const Item *end() const + { + return m_items + m_item_amount; + } + + private: + Item *small_storage() const + { + return reinterpret_cast<Item *>((char *)m_local_storage); + } + + bool is_in_small_storage() const + { + return m_items == this->small_storage(); + } +}; + +} // namespace BLI diff --git a/source/blender/blenlib/BLI_set.h b/source/blender/blenlib/BLI_set.h new file mode 100644 index 00000000000..33b02badf48 --- /dev/null +++ b/source/blender/blenlib/BLI_set.h @@ -0,0 +1,470 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup bli + * + * This file provides a set implementation that uses open addressing with probing. + */ + +#pragma once + +#include "BLI_hash_cxx.h" +#include "BLI_open_addressing.h" +#include "BLI_vector.h" + +namespace BLI { + +// clang-format off + +#define ITER_SLOTS_BEGIN(VALUE, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \ + uint32_t hash = DefaultHash<T>{}(VALUE); \ + uint32_t perturb = hash; \ + while (true) { \ + uint32_t item_index = (hash & ARRAY.slot_mask()) >> OFFSET_SHIFT; \ + uint8_t R_OFFSET = hash & OFFSET_MASK; \ + uint8_t initial_offset = R_OFFSET; \ + OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \ + do { + +#define ITER_SLOTS_END(R_OFFSET) \ + R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \ + } while (R_OFFSET != initial_offset); \ + perturb >>= 5; \ + hash = hash * 5 + 1 + perturb; \ + } ((void)0) + +// clang-format on + +template<typename T, typename Allocator = GuardedAllocator> class Set { + private: + static constexpr uint OFFSET_MASK = 3; + static constexpr uint OFFSET_SHIFT = 2; + + class Item { + private: + static constexpr uint8_t IS_EMPTY = 0; + static constexpr uint8_t IS_SET = 1; + static constexpr uint8_t IS_DUMMY = 2; + + uint8_t m_status[4]; + char m_values[4 * sizeof(T)]; + + public: + static constexpr uint slots_per_item = 4; + + Item() + { + for (uint offset = 0; offset < 4; offset++) { + m_status[offset] = IS_EMPTY; + } + } + + ~Item() + { + for (uint offset = 0; offset < 4; offset++) { + if (m_status[offset] == IS_SET) { + destruct(this->value(offset)); + } + } + } + + Item(const Item &other) + { + for (uint offset = 0; offset < 4; offset++) { + uint8_t status = other.m_status[offset]; + m_status[offset] = status; + if (status == IS_SET) { + T *src = other.value(offset); + T *dst = this->value(offset); + new (dst) T(*src); + } + } + } + + Item(Item &&other) noexcept + { + for (uint offset = 0; offset < 4; offset++) { + uint8_t status = other.m_status[offset]; + m_status[offset] = status; + if (status == IS_SET) { + T *src = other.value(offset); + T *dst = this->value(offset); + new (dst) T(std::move(*src)); + } + } + } + + Item &operator=(const Item &other) = delete; + Item &operator=(Item &&other) = delete; + + T *value(uint offset) const + { + return (T *)(m_values + offset * sizeof(T)); + } + + void copy_in(uint offset, const T &value) + { + BLI_assert(m_status[offset] != IS_SET); + m_status[offset] = IS_SET; + T *dst = this->value(offset); + new (dst) T(value); + } + + void move_in(uint offset, T &value) + { + BLI_assert(m_status[offset] != IS_SET); + m_status[offset] = IS_SET; + T *dst = this->value(offset); + new (dst) T(std::move(value)); + } + + void set_dummy(uint offset) + { + BLI_assert(m_status[offset] == IS_SET); + m_status[offset] = IS_DUMMY; + destruct(this->value(offset)); + } + + bool is_empty(uint offset) const + { + return m_status[offset] == IS_EMPTY; + } + + bool is_set(uint offset) const + { + return m_status[offset] == IS_SET; + } + + bool is_dummy(uint offset) const + { + return m_status[offset] == IS_DUMMY; + } + + bool has_value(uint offset, const T &value) const + { + return m_status[offset] == IS_SET && *this->value(offset) == value; + } + }; + + using ArrayType = OpenAddressingArray<Item, 1, Allocator>; + ArrayType m_array = OpenAddressingArray<Item>(); + + public: + Set() = default; + + /** + * Create a new set that contains the given elements. + */ + Set(ArrayRef<T> values) + { + this->reserve(values.size()); + for (const T &value : values) { + this->add(value); + } + } + + /** + * Create a new set from an initializer list. + */ + Set(std::initializer_list<T> values) : Set(ArrayRef<T>(values)) + { + } + + /** + * Make the set large enough to hold the given amount of elements. + */ + void reserve(uint32_t min_usable_slots) + { + if (m_array.slots_usable() < min_usable_slots) { + this->grow(min_usable_slots); + } + } + + /** + * Add a new element to the set. + * Asserts that the element did not exist in the set before. + */ + void add_new(const T &value) + { + BLI_assert(!this->contains(value)); + this->ensure_can_add(); + + ITER_SLOTS_BEGIN (value, m_array, , item, offset) { + if (item.is_empty(offset)) { + item.copy_in(offset, value); + m_array.update__empty_to_set(); + return; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Add a new value to the set if it does not exist yet. + */ + bool add(const T &value) + { + this->ensure_can_add(); + + ITER_SLOTS_BEGIN (value, m_array, , item, offset) { + if (item.is_empty(offset)) { + item.copy_in(offset, value); + m_array.update__empty_to_set(); + return true; + } + else if (item.has_value(offset, value)) { + return false; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Add multiple elements to the set. + */ + void add_multiple(ArrayRef<T> values) + { + for (const T &value : values) { + this->add(value); + } + } + + /** + * Add multiple new elements to the set. + * Asserts that none of the elements existed in the set before. + */ + void add_multiple_new(ArrayRef<T> values) + { + for (const T &value : values) { + this->add_new(value); + } + } + + /** + * Returns true when the value is in the set, otherwise false. + */ + bool contains(const T &value) const + { + ITER_SLOTS_BEGIN (value, m_array, const, item, offset) { + if (item.is_empty(offset)) { + return false; + } + else if (item.has_value(offset, value)) { + return true; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Remove the value from the set. + * Asserts that the value exists in the set currently. + */ + void remove(const T &value) + { + BLI_assert(this->contains(value)); + ITER_SLOTS_BEGIN (value, m_array, , item, offset) { + if (item.has_value(offset, value)) { + item.set_dummy(offset); + m_array.update__set_to_dummy(); + return; + } + } + ITER_SLOTS_END(offset); + } + + Vector<T> to_small_vector() const + { + Vector<T> vector; + vector.reserve(this->size()); + for (const T &value : *this) { + vector.append(value); + } + return vector; + } + + uint32_t size() const + { + return m_array.slots_set(); + } + + /** + * Returns true when there is at least one element that is in both sets. + * Otherwise false. + */ + static bool Intersects(const Set &a, const Set &b) + { + /* Make sure we iterate over the shorter set. */ + if (a.size() > b.size()) { + return Intersects(b, a); + } + + for (const T &value : a) { + if (b.contains(value)) { + return true; + } + } + return false; + } + + /** + * Returns true when there is no value that is in both sets. + * Otherwise false. + */ + static bool Disjoint(const Set &a, const Set &b) + { + return !Intersects(a, b); + } + + void print_table() const + { + std::cout << "Hash Table:\n"; + std::cout << " Size: " << m_array.slots_set() << '\n'; + std::cout << " Capacity: " << m_array.slots_total() << '\n'; + uint32_t item_index = 0; + for (const Item &item : m_array) { + std::cout << " Item: " << item_index++ << '\n'; + for (uint offset = 0; offset < 4; offset++) { + std::cout << " " << offset << " \t"; + if (item.is_empty(offset)) { + std::cout << " <empty>\n"; + } + else if (item.is_set(offset)) { + const T &value = *item.value(offset); + uint32_t collisions = this->count_collisions(value); + std::cout << " " << value << " \t Collisions: " << collisions << '\n'; + } + else if (item.is_dummy(offset)) { + std::cout << " <dummy>\n"; + } + } + } + } + + class Iterator { + private: + const Set *m_set; + uint32_t m_slot; + + public: + Iterator(const Set *set, uint32_t slot) : m_set(set), m_slot(slot) + { + } + + Iterator &operator++() + { + m_slot = m_set->next_slot(m_slot + 1); + return *this; + } + + const T &operator*() const + { + uint32_t item_index = m_slot >> OFFSET_SHIFT; + uint offset = m_slot & OFFSET_MASK; + const Item &item = m_set->m_array.item(item_index); + BLI_assert(item.is_set(offset)); + return *item.value(offset); + } + + friend bool operator==(const Iterator &a, const Iterator &b) + { + BLI_assert(a.m_set == b.m_set); + return a.m_slot == b.m_slot; + } + + friend bool operator!=(const Iterator &a, const Iterator &b) + { + return !(a == b); + } + }; + + friend Iterator; + + Iterator begin() const + { + return Iterator(this, this->next_slot(0)); + } + + Iterator end() const + { + return Iterator(this, m_array.slots_total()); + } + + private: + uint32_t next_slot(uint32_t slot) const + { + for (; slot < m_array.slots_total(); slot++) { + uint32_t item_index = slot >> OFFSET_SHIFT; + uint offset = slot & OFFSET_MASK; + const Item &item = m_array.item(item_index); + if (item.is_set(offset)) { + return slot; + } + } + return slot; + } + + void ensure_can_add() + { + if (UNLIKELY(m_array.should_grow())) { + this->grow(this->size() + 1); + } + } + + BLI_NOINLINE void grow(uint32_t min_usable_slots) + { + ArrayType new_array = m_array.init_reserved(min_usable_slots); + + for (Item &old_item : m_array) { + for (uint8_t offset = 0; offset < 4; offset++) { + if (old_item.is_set(offset)) { + this->add_after_grow(*old_item.value(offset), new_array); + } + } + } + + m_array = std::move(new_array); + } + + void add_after_grow(T &old_value, ArrayType &new_array) + { + ITER_SLOTS_BEGIN (old_value, new_array, , item, offset) { + if (item.is_empty(offset)) { + item.move_in(offset, old_value); + return; + } + } + ITER_SLOTS_END(offset); + } + + uint32_t count_collisions(const T &value) const + { + uint32_t collisions = 0; + ITER_SLOTS_BEGIN (value, m_array, const, item, offset) { + if (item.is_empty(offset) || item.has_value(offset, value)) { + return collisions; + } + collisions++; + } + ITER_SLOTS_END(offset); + } +}; + +#undef ITER_SLOTS_BEGIN +#undef ITER_SLOTS_END + +} // namespace BLI diff --git a/source/blender/blenlib/BLI_set_vector.h b/source/blender/blenlib/BLI_set_vector.h new file mode 100644 index 00000000000..083e72f5a9b --- /dev/null +++ b/source/blender/blenlib/BLI_set_vector.h @@ -0,0 +1,366 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup bli + * + * A SetVector is a combination of a set and a vector. The elements are stored in a continuous + * array, but every element exists at most once. The insertion order is maintained, as long as + * there are no deletes. The expected time to check if a value is in the SetVector is O(1). + */ + +#pragma once + +#include "BLI_hash_cxx.h" +#include "BLI_open_addressing.h" +#include "BLI_vector.h" + +namespace BLI { + +// clang-format off + +#define ITER_SLOTS_BEGIN(VALUE, ARRAY, OPTIONAL_CONST, R_SLOT) \ + uint32_t hash = DefaultHash<T>{}(VALUE); \ + uint32_t perturb = hash; \ + while (true) { \ + for (uint i = 0; i < 4; i++) {\ + uint32_t slot_index = (hash + i) & ARRAY.slot_mask(); \ + OPTIONAL_CONST Slot &R_SLOT = ARRAY.item(slot_index); + +#define ITER_SLOTS_END \ + } \ + perturb >>= 5; \ + hash = hash * 5 + 1 + perturb; \ + } ((void)0) + +// clang-format on + +template<typename T, typename Allocator = GuardedAllocator> class SetVector { + private: + static constexpr int32_t IS_EMPTY = -1; + static constexpr int32_t IS_DUMMY = -2; + + class Slot { + private: + int32_t m_value = IS_EMPTY; + + public: + static constexpr uint slots_per_item = 1; + + bool is_set() const + { + return m_value >= 0; + } + + bool is_empty() const + { + return m_value == IS_EMPTY; + } + + bool is_dummy() const + { + return m_value == IS_DUMMY; + } + + bool has_value(const T &value, const Vector<T> &elements) const + { + return this->is_set() && elements[this->index()] == value; + } + + bool has_index(uint index) const + { + return m_value == (int32_t)index; + } + + uint index() const + { + BLI_assert(this->is_set()); + return m_value; + } + + int32_t &index_ref() + { + return m_value; + } + + void set_index(uint index) + { + BLI_assert(!this->is_set()); + m_value = index; + } + + void set_dummy() + { + BLI_assert(this->is_set()); + m_value = IS_DUMMY; + } + }; + + using ArrayType = OpenAddressingArray<Slot, 4, Allocator>; + ArrayType m_array; + Vector<T, 4, Allocator> m_elements; + + public: + SetVector() = default; + + SetVector(ArrayRef<T> values) + { + this->add_multiple(values); + } + + SetVector(const std::initializer_list<T> &values) + { + this->add_multiple(values); + } + + SetVector(const Vector<T> &values) + { + this->add_multiple(values); + } + + /** + * Add a new element. The method assumes that the value did not exist before. + */ + void add_new(const T &value) + { + BLI_assert(!this->contains(value)); + this->ensure_can_add(); + ITER_SLOTS_BEGIN (value, m_array, , slot) { + if (slot.is_empty()) { + this->add_new_in_slot(slot, value); + return; + } + } + ITER_SLOTS_END; + } + + /** + * Add a new element if it does not exist yet. Does not add the value again if it exists already. + */ + bool add(const T &value) + { + this->ensure_can_add(); + ITER_SLOTS_BEGIN (value, m_array, , slot) { + if (slot.is_empty()) { + this->add_new_in_slot(slot, value); + return true; + } + else if (slot.has_value(value, m_elements)) { + return false; + } + } + ITER_SLOTS_END; + } + + /** + * Add multiple values. Duplicates will not be inserted. + */ + void add_multiple(ArrayRef<T> values) + { + for (const T &value : values) { + this->add(value); + } + } + + /** + * Returns true when the value is in the set-vector, otherwise false. + */ + bool contains(const T &value) const + { + ITER_SLOTS_BEGIN (value, m_array, const, slot) { + if (slot.is_empty()) { + return false; + } + else if (slot.has_value(value, m_elements)) { + return true; + } + } + ITER_SLOTS_END; + } + + /** + * Remove a value from the set-vector. The method assumes that the value exists. + */ + void remove(const T &value) + { + BLI_assert(this->contains(value)); + ITER_SLOTS_BEGIN (value, m_array, , slot) { + if (slot.has_value(value, m_elements)) { + uint old_index = m_elements.size() - 1; + uint new_index = slot.index(); + + m_elements.remove_and_reorder(new_index); + slot.set_dummy(); + m_array.update__set_to_dummy(); + + if (old_index != new_index) { + T &moved_value = m_elements[new_index]; + this->update_slot_index(moved_value, old_index, new_index); + } + return; + } + } + ITER_SLOTS_END; + } + + /** + * Get and remove the last element of the vector. + */ + T pop() + { + BLI_assert(this->size() > 0); + T value = m_elements.pop_last(); + uint old_index = m_elements.size(); + + ITER_SLOTS_BEGIN (value, m_array, , slot) { + if (slot.has_index(old_index)) { + slot.set_dummy(); + m_array.update__set_to_dummy(); + return value; + } + } + ITER_SLOTS_END; + } + + /** + * Get the index of the value in the vector. It is assumed that the value is in the vector. + */ + uint index(const T &value) const + { + BLI_assert(this->contains(value)); + ITER_SLOTS_BEGIN (value, m_array, const, slot) { + if (slot.has_value(value, m_elements)) { + return slot.index(); + } + } + ITER_SLOTS_END; + } + + /** + * Get the index of the value in the vector. If it does not exist return -1. + */ + int index_try(const T &value) const + { + ITER_SLOTS_BEGIN (value, m_array, const, slot) { + if (slot.has_value(value, m_elements)) { + return slot.index(); + } + else if (slot.is_empty()) { + return -1; + } + } + ITER_SLOTS_END; + } + + /** + * Get the number of elements in the set-vector. + */ + uint size() const + { + return m_array.slots_set(); + } + + T *begin() + { + return m_elements.begin(); + } + + T *end() + { + return m_elements.end(); + } + + const T *begin() const + { + return m_elements.begin(); + } + + const T *end() const + { + return m_elements.end(); + } + + const T &operator[](uint index) const + { + return m_elements[index]; + } + + operator ArrayRef<T>() const + { + return m_elements; + } + + operator MutableArrayRef<T>() + { + return m_elements; + } + + private: + void update_slot_index(T &value, uint old_index, uint new_index) + { + ITER_SLOTS_BEGIN (value, m_array, , slot) { + int32_t &stored_index = slot.index_ref(); + if (stored_index == old_index) { + stored_index = new_index; + return; + } + } + ITER_SLOTS_END; + } + + void add_new_in_slot(Slot &slot, const T &value) + { + uint index = m_elements.size(); + slot.set_index(index); + m_elements.append(value); + m_array.update__empty_to_set(); + } + + void ensure_can_add() + { + if (UNLIKELY(m_array.should_grow())) { + this->grow(this->size() + 1); + } + } + + BLI_NOINLINE void grow(uint min_usable_slots) + { + ArrayType new_array = m_array.init_reserved(min_usable_slots); + + for (uint i = 0; i < m_elements.size(); i++) { + this->add_after_grow(i, new_array); + } + + m_array = std::move(new_array); + } + + void add_after_grow(uint index, ArrayType &new_array) + { + const T &value = m_elements[index]; + ITER_SLOTS_BEGIN (value, new_array, , slot) { + if (slot.is_empty()) { + slot.set_index(index); + return; + } + } + ITER_SLOTS_END; + } +}; + +#undef ITER_SLOTS_BEGIN +#undef ITER_SLOTS_END + +} // namespace BLI diff --git a/source/blender/blenlib/BLI_string_map.h b/source/blender/blenlib/BLI_string_map.h new file mode 100644 index 00000000000..0cf0bb40e4d --- /dev/null +++ b/source/blender/blenlib/BLI_string_map.h @@ -0,0 +1,410 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/** \file + * \ingroup bli + * + * This tries to solve the issue that a normal map with std::string as key might do many + * allocations when the keys are longer than 16 bytes (the usual small string optimization size). + * + * For now this still uses std::string, but having this abstraction in place will make it easier to + * make it more efficient later on. Also, even if we will never implement this optimization, having + * a special map with string keys can be quite handy. */ + +#pragma once + +#include "BLI_map.h" +#include "BLI_string_ref.h" +#include "BLI_vector.h" + +namespace BLI { + +// clang-format off + +#define ITER_SLOTS_BEGIN(HASH, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \ + uint32_t hash_copy = HASH; \ + uint32_t perturb = HASH; \ + while (true) { \ + uint32_t item_index = (hash_copy & ARRAY.slot_mask()) >> OFFSET_SHIFT; \ + uint8_t R_OFFSET = hash_copy & OFFSET_MASK; \ + uint8_t initial_offset = R_OFFSET; \ + OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \ + do { + +#define ITER_SLOTS_END(R_OFFSET) \ + R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \ + } while (R_OFFSET != initial_offset); \ + perturb >>= 5; \ + hash_copy = hash_copy * 5 + 1 + perturb; \ + } ((void)0) + +// clang-format on + +template<typename T, typename Allocator = GuardedAllocator> class StringMap { + private: + static constexpr uint32_t OFFSET_MASK = 3; + static constexpr uint32_t OFFSET_SHIFT = 2; + + class Item { + private: + static constexpr int32_t IS_EMPTY = -1; + + uint32_t m_hashes[4]; + int32_t m_indices[4]; + char m_values[sizeof(T) * 4]; + + public: + static constexpr uint slots_per_item = 4; + + Item() + { + for (uint offset = 0; offset < 4; offset++) { + m_indices[offset] = IS_EMPTY; + } + } + + ~Item() + { + for (uint offset = 0; offset < 4; offset++) { + if (this->is_set(offset)) { + destruct(this->value(offset)); + } + } + } + + Item(const Item &other) + { + for (uint offset = 0; offset < 4; offset++) { + m_indices[offset] = other.m_indices[offset]; + if (other.is_set(offset)) { + m_hashes[offset] = other.m_hashes[offset]; + new (this->value(offset)) T(*other.value(offset)); + } + } + } + + Item(Item &&other) noexcept + { + for (uint offset = 0; offset < 4; offset++) { + m_indices[offset] = other.m_indices[offset]; + if (other.is_set(offset)) { + m_hashes[offset] = other.m_hashes[offset]; + new (this->value(offset)) T(std::move(*other.value(offset))); + } + } + } + + uint32_t index(uint offset) const + { + return m_indices[offset]; + } + + uint32_t hash(uint offset) const + { + return m_hashes[offset]; + } + + T *value(uint offset) const + { + return (T *)POINTER_OFFSET(m_values, offset * sizeof(T)); + } + + bool is_set(uint offset) const + { + return m_indices[offset] >= 0; + } + + bool is_empty(uint offset) const + { + return m_indices[offset] == IS_EMPTY; + } + + bool has_hash(uint offset, uint32_t hash) const + { + BLI_assert(this->is_set(offset)); + return m_hashes[offset] == hash; + } + + bool has_exact_key(uint offset, StringRef key, const Vector<char> &chars) const + { + return key == this->get_key(offset, chars); + } + + StringRefNull get_key(uint offset, const Vector<char> &chars) const + { + const char *ptr = chars.begin() + m_indices[offset]; + uint length = *(uint *)ptr; + const char *start = ptr + sizeof(uint); + return StringRefNull(start, length); + } + + void move_in(uint offset, uint32_t hash, uint32_t index, T &value) + { + BLI_assert(!this->is_set(offset)); + m_hashes[offset] = hash; + m_indices[offset] = index; + new (this->value(offset)) T(std::move(value)); + } + + void copy_in(uint offset, uint32_t hash, uint32_t index, const T &value) + { + BLI_assert(!this->is_set(offset)); + m_hashes[offset] = hash; + m_indices[offset] = index; + new (this->value(offset)) T(value); + } + }; + + using ArrayType = OpenAddressingArray<Item, 1, Allocator>; + ArrayType m_array; + Vector<char> m_chars; + + public: + StringMap() = default; + + /** + * Get the number of key-value pairs in the map. + */ + uint size() const + { + return m_array.slots_set(); + } + + /** + * Add a new element to the map. It is assumed that the key did not exist before. + */ + void add_new(StringRef key, const T &value) + { + BLI_assert(!this->contains(key)); + this->ensure_can_add(); + uint32_t hash = this->compute_string_hash(key); + ITER_SLOTS_BEGIN (hash, m_array, , item, offset) { + if (item.is_empty(offset)) { + uint32_t index = this->save_key_in_array(key); + item.copy_in(offset, hash, index, value); + m_array.update__empty_to_set(); + return; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Return true when the key exists in the map, otherwise false. + */ + bool contains(StringRef key) const + { + uint32_t hash = this->compute_string_hash(key); + ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) { + if (item.is_empty(offset)) { + return false; + } + else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) { + return true; + } + } + ITER_SLOTS_END(offset); + } + + /** + * Get a reference to the value corresponding to a key. It is assumed that the key does exist. + */ + const T &lookup(StringRef key) const + { + BLI_assert(this->contains(key)); + T *found_value = nullptr; + uint32_t hash = this->compute_string_hash(key); + ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) { + if (item.is_empty(offset)) { + return *found_value; + } + else if (item.has_hash(offset, hash)) { + if (found_value == nullptr) { + /* Common case: the first slot with the correct hash contains the key. + * However, still need to iterate until the next empty slot to make sure there is no + * other key with the exact same hash. */ + /* TODO: Check if we can guarantee that every hash only exists once in some cases. */ + found_value = item.value(offset); + } + else if (item.has_exact_key(offset, key, m_chars)) { + /* Found the hash more than once, now check for actual string equality. */ + return *item.value(offset); + } + } + } + ITER_SLOTS_END(offset); + } + + /** + * Get a pointer to the value corresponding to the key. Return nullptr, if the key does not + * exist. + */ + const T *lookup_ptr(StringRef key) const + { + uint32_t hash = this->compute_string_hash(key); + ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) { + if (item.is_empty(offset)) { + return nullptr; + } + else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) { + return item.value(offset); + } + } + ITER_SLOTS_END(offset); + } + + /** + * Get a copy of the value corresponding to the key. If the key does not exist, return the + * default value. + */ + T lookup_default(StringRef key, const T &default_value) const + { + const T *ptr = this->lookup_ptr(key); + if (ptr != nullptr) { + return *ptr; + } + else { + return default_value; + } + } + + /** + * Do a linear search over all items to find a key for a value. + */ + StringRefNull find_key_for_value(const T &value) const + { + for (const Item &item : m_array) { + for (uint offset = 0; offset < 4; offset++) { + if (item.is_set(offset) && value == *item.value(offset)) { + return item.get_key(offset, m_chars); + } + } + } + BLI_assert(false); + return {}; + } + + /** + * Run a function for every value in the map. + */ + template<typename FuncT> void foreach_value(const FuncT &func) + { + for (Item &item : m_array) { + for (uint offset = 0; offset < 4; offset++) { + if (item.is_set(offset)) { + func(*item.value(offset)); + } + } + } + } + + /** + * Run a function for every key in the map. + */ + template<typename FuncT> void foreach_key(const FuncT &func) + { + for (Item &item : m_array) { + for (uint offset = 0; offset < 4; offset++) { + if (item.is_set(offset)) { + StringRefNull key = item.get_key(offset, m_chars); + func(key); + } + } + } + } + + /** + * Run a function for every key-value-pair in the map. + */ + template<typename FuncT> void foreach_key_value_pair(const FuncT &func) + { + for (Item &item : m_array) { + for (uint offset = 0; offset < 4; offset++) { + if (item.is_set(offset)) { + StringRefNull key = item.get_key(offset, m_chars); + T &value = *item.value(offset); + func(key, value); + } + } + } + } + + private: + uint32_t compute_string_hash(StringRef key) const + { + /* TODO: check if this can be optimized more because we know the key length already. */ + uint32_t hash = 5381; + for (char c : key) { + hash = hash * 33 + c; + } + return hash; + } + + uint32_t save_key_in_array(StringRef key) + { + uint index = m_chars.size(); + uint string_size = key.size(); + m_chars.extend(ArrayRef<char>((char *)&string_size, sizeof(uint))); + m_chars.extend(key); + m_chars.append('\0'); + return index; + } + + StringRefNull key_from_index(uint32_t index) const + { + const char *ptr = m_chars.begin() + index; + uint length = *(uint *)ptr; + const char *start = ptr + sizeof(uint); + return StringRefNull(start, length); + } + + void ensure_can_add() + { + if (UNLIKELY(m_array.should_grow())) { + this->grow(this->size() + 1); + } + } + + BLI_NOINLINE void grow(uint min_usable_slots) + { + ArrayType new_array = m_array.init_reserved(min_usable_slots); + for (Item &old_item : m_array) { + for (uint offset = 0; offset < 4; offset++) { + if (old_item.is_set(offset)) { + this->add_after_grow( + *old_item.value(offset), old_item.hash(offset), old_item.index(offset), new_array); + } + } + } + m_array = std::move(new_array); + } + + void add_after_grow(T &value, uint32_t hash, uint32_t index, ArrayType &new_array) + { + ITER_SLOTS_BEGIN (hash, new_array, , item, offset) { + if (item.is_empty(offset)) { + item.move_in(offset, hash, index, value); + return; + } + } + ITER_SLOTS_END(offset); + } +}; + +#undef ITER_SLOTS_BEGIN +#undef ITER_SLOTS_END + +} // namespace BLI diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt index abef583913c..a5950051f90 100644 --- a/source/blender/blenlib/CMakeLists.txt +++ b/source/blender/blenlib/CMakeLists.txt @@ -172,6 +172,7 @@ set(SRC BLI_ghash.h BLI_gsqueue.h BLI_hash.h + BLI_hash_cxx.h BLI_hash_md5.h BLI_hash_mm2a.h BLI_hash_mm3.h @@ -190,6 +191,7 @@ set(SRC BLI_linklist_stack.h BLI_listbase.h BLI_listbase_wrapper.h + BLI_map.h BLI_math.h BLI_math_base.h BLI_math_bits.h @@ -210,6 +212,7 @@ set(SRC BLI_memory_utils_cxx.h BLI_mempool.h BLI_noise.h + BLI_open_addressing.h BLI_path_util.h BLI_polyfill_2d.h BLI_polyfill_2d_beautify.h @@ -217,6 +220,8 @@ set(SRC BLI_rand.h BLI_rect.h BLI_scanfill.h + BLI_set.h + BLI_set_vector.h BLI_smallhash.h BLI_sort.h BLI_sort_utils.h @@ -225,6 +230,7 @@ set(SRC BLI_strict_flags.h BLI_string.h BLI_string_cursor_utf8.h + BLI_string_map.h BLI_string_ref.h BLI_string_utf8.h BLI_string_utils.h diff --git a/source/blender/blenlib/intern/math_base_inline.c b/source/blender/blenlib/intern/math_base_inline.c index 47327a878d4..0309876d8fb 100644 --- a/source/blender/blenlib/intern/math_base_inline.c +++ b/source/blender/blenlib/intern/math_base_inline.c @@ -230,6 +230,21 @@ MINLINE unsigned power_of_2_min_u(unsigned x) return x - (x >> 1); } +MINLINE unsigned int log2_floor_u(unsigned int x) +{ + return x <= 1 ? 0 : 1 + log2_floor_u(x >> 1); +} + +MINLINE unsigned int log2_ceil_u(unsigned int x) +{ + if (is_power_of_2_i((int)x)) { + return log2_floor_u(x); + } + else { + return log2_floor_u(x) + 1; + } +} + /* rounding and clamping */ #define _round_clamp_fl_impl(arg, ty, min, max) \ |