Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacques Lucke <mail@jlucke.com>2019-09-13 11:06:02 +0300
committerJacques Lucke <mail@jlucke.com>2019-09-13 11:06:02 +0300
commit1c44d08a69eb3e66c7f942d748f549d6b8ca138f (patch)
treeb74784ffdc6f6a66a92d02a985865247101a6093 /source/blender/blenlib/BLI_map.h
parent8d12c2a83658cb6b1311197c292e5906394c4321 (diff)
BLI: new C++ hash table data structures
This commit adds some new hashing based data structures to blenlib. All of them use open addressing with probing currently. Furthermore, they support small object optimization, but it is not customizable yet. I'll add support for this when necessary. The following main data structures are included: **Set** A collection of values, where every value must exist at most once. This is similar to a Python `set`. **SetVector** A combination of a Set and a Vector. It supports fast search for elements and maintains insertion order when there are no deletes. All elements are stored in a continuous array. So they can be iterated over using a normal `ArrayRef`. **Map** A set of key-value-pairs, where every key must exist at most once. This is similar to a Python `dict`. **StringMap** A special map for the case when the keys are strings. This case is fairly common and allows for some optimizations. Most importantly, many unnecessary allocations can be avoided by storing strings in a single buffer. Furthermore, the interface of this class uses `StringRef` to avoid unnecessary conversions. This commit is a continuation of rB369d5e8ad2bb7.
Diffstat (limited to 'source/blender/blenlib/BLI_map.h')
-rw-r--r--source/blender/blenlib/BLI_map.h596
1 files changed, 596 insertions, 0 deletions
diff --git a/source/blender/blenlib/BLI_map.h b/source/blender/blenlib/BLI_map.h
new file mode 100644
index 00000000000..5328dac1106
--- /dev/null
+++ b/source/blender/blenlib/BLI_map.h
@@ -0,0 +1,596 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/** \file
+ * \ingroup bli
+ *
+ * This file provides a map implementation that uses open addressing with probing.
+ *
+ * The key and value objects are stored directly in the hash table to avoid indirect memory
+ * lookups. Keys and values are stored in groups of four to avoid wasting memory due to padding.
+ */
+
+#pragma once
+
+#include "BLI_hash_cxx.h"
+#include "BLI_array_ref.h"
+#include "BLI_open_addressing.h"
+
+namespace BLI {
+
+// clang-format off
+
+#define ITER_SLOTS_BEGIN(KEY, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \
+ uint32_t hash = DefaultHash<KeyT>{}(KEY); \
+ uint32_t perturb = hash; \
+ while (true) { \
+ uint32_t item_index = (hash & ARRAY.slot_mask()) >> OFFSET_SHIFT; \
+ uint8_t R_OFFSET = hash & OFFSET_MASK; \
+ uint8_t initial_offset = R_OFFSET; \
+ OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \
+ do {
+
+#define ITER_SLOTS_END(R_OFFSET) \
+ R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \
+ } while (R_OFFSET != initial_offset); \
+ perturb >>= 5; \
+ hash = hash * 5 + 1 + perturb; \
+ } ((void)0)
+
+// clang-format on
+
+template<typename KeyT, typename ValueT, typename Allocator = GuardedAllocator> class Map {
+ private:
+ static constexpr uint OFFSET_MASK = 3;
+ static constexpr uint OFFSET_SHIFT = 2;
+
+ class Item {
+ private:
+ static constexpr uint8_t IS_EMPTY = 0;
+ static constexpr uint8_t IS_SET = 1;
+ static constexpr uint8_t IS_DUMMY = 2;
+
+ uint8_t m_status[4];
+ char m_keys[4 * sizeof(KeyT)];
+ char m_values[4 * sizeof(ValueT)];
+
+ public:
+ static constexpr uint slots_per_item = 4;
+
+ Item()
+ {
+ for (uint offset = 0; offset < 4; offset++) {
+ m_status[offset] = IS_EMPTY;
+ }
+ }
+
+ ~Item()
+ {
+ for (uint offset = 0; offset < 4; offset++) {
+ if (m_status[offset] == IS_SET) {
+ this->key(offset)->~KeyT();
+ this->value(offset)->~ValueT();
+ }
+ }
+ }
+
+ Item(const Item &other)
+ {
+ for (uint offset = 0; offset < 4; offset++) {
+ uint8_t status = other.m_status[offset];
+ m_status[offset] = status;
+ if (status == IS_SET) {
+ new (this->key(offset)) KeyT(*other.key(offset));
+ new (this->value(offset)) ValueT(*other.value(offset));
+ }
+ }
+ }
+
+ Item(Item &&other) noexcept
+ {
+ for (uint offset = 0; offset < 4; offset++) {
+ uint8_t status = other.m_status[offset];
+ m_status[offset] = status;
+ if (status == IS_SET) {
+ new (this->key(offset)) KeyT(std::move(*other.key(offset)));
+ new (this->value(offset)) ValueT(std::move(*other.value(offset)));
+ }
+ }
+ }
+
+ bool has_key(uint offset, const KeyT &key) const
+ {
+ return m_status[offset] == IS_SET && key == *this->key(offset);
+ }
+
+ bool is_set(uint offset) const
+ {
+ return m_status[offset] == IS_SET;
+ }
+
+ bool is_empty(uint offset) const
+ {
+ return m_status[offset] == IS_EMPTY;
+ }
+
+ KeyT *key(uint offset) const
+ {
+ return (KeyT *)(m_keys + offset * sizeof(KeyT));
+ }
+
+ ValueT *value(uint offset) const
+ {
+ return (ValueT *)(m_values + offset * sizeof(ValueT));
+ }
+
+ void copy_in(uint offset, const KeyT &key, const ValueT &value)
+ {
+ BLI_assert(m_status[offset] != IS_SET);
+ m_status[offset] = IS_SET;
+ new (this->key(offset)) KeyT(key);
+ new (this->value(offset)) ValueT(value);
+ }
+
+ void move_in(uint offset, KeyT &key, ValueT &value)
+ {
+ BLI_assert(m_status[offset] != IS_SET);
+ m_status[offset] = IS_SET;
+ new (this->key(offset)) KeyT(std::move(key));
+ new (this->value(offset)) ValueT(std::move(value));
+ }
+
+ void set_dummy(uint offset)
+ {
+ BLI_assert(m_status[offset] == IS_SET);
+ m_status[offset] = IS_DUMMY;
+ destruct(this->key(offset));
+ destruct(this->value(offset));
+ }
+ };
+
+ using ArrayType = OpenAddressingArray<Item, 1, Allocator>;
+ ArrayType m_array;
+
+ public:
+ Map() = default;
+
+ /**
+ * Insert a new key-value-pair in the map.
+ * Asserts when the key existed before.
+ */
+ void add_new(const KeyT &key, const ValueT &value)
+ {
+ BLI_assert(!this->contains(key));
+ this->ensure_can_add();
+
+ ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
+ if (item.is_empty(offset)) {
+ item.copy_in(offset, key, value);
+ m_array.update__empty_to_set();
+ return;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Insert a new key-value-pair in the map if the key does not exist yet.
+ * Returns true when the pair was newly inserted, otherwise false.
+ */
+ bool add(const KeyT &key, const ValueT &value)
+ {
+ this->ensure_can_add();
+
+ ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
+ if (item.is_empty(offset)) {
+ item.copy_in(offset, key, value);
+ m_array.update__empty_to_set();
+ return true;
+ }
+ else if (item.has_key(offset, key)) {
+ return false;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Remove the key from the map.
+ * Asserts when the key does not exist in the map.
+ */
+ void remove(const KeyT &key)
+ {
+ BLI_assert(this->contains(key));
+ ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
+ if (item.has_key(offset, key)) {
+ item.set_dummy(offset);
+ m_array.update__set_to_dummy();
+ return;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Get the value for the given key and remove it from the map.
+ * Asserts when the key does not exist in the map.
+ */
+ ValueT pop(const KeyT &key)
+ {
+ BLI_assert(this->contains(key));
+ ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
+ if (item.has_key(offset, key)) {
+ ValueT value = std::move(*item.value(offset));
+ item.set_dummy(offset);
+ m_array.update__set_to_dummy();
+ return value;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Returns true when the key exists in the map, otherwise false.
+ */
+ bool contains(const KeyT &key) const
+ {
+ ITER_SLOTS_BEGIN (key, m_array, const, item, offset) {
+ if (item.is_empty(offset)) {
+ return false;
+ }
+ else if (item.has_key(offset, key)) {
+ return true;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Check if the key exists in the map.
+ * If it does exist, call the modify function with a reference to the corresponding value.
+ * If it does not exist, call the create function and insert a new key-value-pair.
+ * Returns true when a new pair was inserted, otherwise false.
+ */
+ template<typename CreateValueF, typename ModifyValueF>
+ bool add_or_modify(const KeyT &key,
+ const CreateValueF &create_value,
+ const ModifyValueF &modify_value)
+ {
+ this->ensure_can_add();
+
+ ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
+ if (item.is_empty(offset)) {
+ item.copy_in(offset, key, create_value());
+ m_array.update__empty_to_set();
+ return true;
+ }
+ else if (item.has_key(offset, key)) {
+ modify_value(*item.value(offset));
+ return false;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Similar to add, but overrides the value for the key when it exists already.
+ */
+ bool add_override(const KeyT &key, const ValueT &value)
+ {
+ return this->add_or_modify(
+ key, [&value]() { return value; }, [&value](ValueT &old_value) { old_value = value; });
+ }
+
+ /**
+ * Check if the key exists in the map.
+ * Return a pointer to the value, when it exists.
+ * Otherwise return nullptr.
+ */
+ const ValueT *lookup_ptr(const KeyT &key) const
+ {
+ ITER_SLOTS_BEGIN (key, m_array, const, item, offset) {
+ if (item.is_empty(offset)) {
+ return nullptr;
+ }
+ else if (item.has_key(offset, key)) {
+ return item.value(offset);
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Lookup the value that corresponds to the key.
+ * Asserts when the key does not exist.
+ */
+ const ValueT &lookup(const KeyT &key) const
+ {
+ const ValueT *ptr = this->lookup_ptr(key);
+ BLI_assert(ptr != nullptr);
+ return *ptr;
+ }
+
+ ValueT *lookup_ptr(const KeyT &key)
+ {
+ const Map *const_this = this;
+ return const_cast<ValueT *>(const_this->lookup_ptr(key));
+ }
+
+ ValueT &lookup(const KeyT &key)
+ {
+ const Map *const_this = this;
+ return const_cast<ValueT &>(const_this->lookup(key));
+ }
+
+ /**
+ * Check if the key exists in the map.
+ * If it does, return a copy of the value.
+ * Otherwise, return the default value.
+ */
+ ValueT lookup_default(const KeyT &key, ValueT default_value) const
+ {
+ const ValueT *ptr = this->lookup_ptr(key);
+ if (ptr != nullptr) {
+ return *ptr;
+ }
+ else {
+ return default_value;
+ }
+ }
+
+ /**
+ * Return the value that corresponds to the given key.
+ * If it does not exist yet, create and insert it first.
+ */
+ template<typename CreateValueF>
+ ValueT &lookup_or_add(const KeyT &key, const CreateValueF &create_value)
+ {
+ this->ensure_can_add();
+
+ ITER_SLOTS_BEGIN (key, m_array, , item, offset) {
+ if (item.is_empty(offset)) {
+ item.copy_in(offset, key, create_value());
+ m_array.update__empty_to_set();
+ return *item.value(offset);
+ }
+ else if (item.has_key(offset, key)) {
+ return *item.value(offset);
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Get the number of elements in the map.
+ */
+ uint32_t size() const
+ {
+ return m_array.slots_set();
+ }
+
+ void print_table() const
+ {
+ std::cout << "Hash Table:\n";
+ std::cout << " Size: " << m_array.slots_set() << '\n';
+ std::cout << " Capacity: " << m_array.slots_total() << '\n';
+ uint32_t item_index = 0;
+ for (const Item &item : m_array) {
+ std::cout << " Item: " << item_index++ << '\n';
+ for (uint offset = 0; offset < 4; offset++) {
+ std::cout << " " << offset << " \t";
+ if (item.is_empty(offset)) {
+ std::cout << " <empty>\n";
+ }
+ else if (item.is_set(offset)) {
+ const KeyT &key = *item.key(offset);
+ const ValueT &value = *item.value(offset);
+ uint32_t collisions = this->count_collisions(value);
+ std::cout << " " << key << " -> " << value << " \t Collisions: " << collisions
+ << '\n';
+ }
+ else if (item.is_dummy(offset)) {
+ std::cout << " <dummy>\n";
+ }
+ }
+ }
+ }
+
+ template<typename SubIterator> class BaseIterator {
+ protected:
+ const Map *m_map;
+ uint32_t m_slot;
+
+ public:
+ BaseIterator(const Map *map, uint32_t slot) : m_map(map), m_slot(slot)
+ {
+ }
+
+ BaseIterator &operator++()
+ {
+ m_slot = m_map->next_slot(m_slot + 1);
+ return *this;
+ }
+
+ friend bool operator==(const BaseIterator &a, const BaseIterator &b)
+ {
+ BLI_assert(a.m_map == b.m_map);
+ return a.m_slot == b.m_slot;
+ }
+
+ friend bool operator!=(const BaseIterator &a, const BaseIterator &b)
+ {
+ return !(a == b);
+ }
+
+ SubIterator begin() const
+ {
+ return SubIterator(m_map, m_map->next_slot(0));
+ }
+
+ SubIterator end() const
+ {
+ return SubIterator(m_map, m_map->m_array.slots_total());
+ }
+ };
+
+ class KeyIterator final : public BaseIterator<KeyIterator> {
+ public:
+ KeyIterator(const Map *map, uint32_t slot) : BaseIterator<KeyIterator>(map, slot)
+ {
+ }
+
+ const KeyT &operator*() const
+ {
+ uint32_t item_index = this->m_slot >> OFFSET_SHIFT;
+ uint offset = this->m_slot & OFFSET_MASK;
+ const Item &item = this->m_map->m_array.item(item_index);
+ BLI_assert(item.is_set(offset));
+ return *item.key(offset);
+ }
+ };
+
+ class ValueIterator final : public BaseIterator<ValueIterator> {
+ public:
+ ValueIterator(const Map *map, uint32_t slot) : BaseIterator<ValueIterator>(map, slot)
+ {
+ }
+
+ ValueT &operator*() const
+ {
+ uint32_t item_index = this->m_slot >> OFFSET_SHIFT;
+ uint offset = this->m_slot & OFFSET_MASK;
+ const Item &item = this->m_map->m_array.item(item_index);
+ BLI_assert(item.is_set(offset));
+ return *item.value(offset);
+ }
+ };
+
+ class ItemIterator final : public BaseIterator<ItemIterator> {
+ public:
+ ItemIterator(const Map *map, uint32_t slot) : BaseIterator<ItemIterator>(map, slot)
+ {
+ }
+
+ struct UserItem {
+ const KeyT &key;
+ ValueT &value;
+
+ friend std::ostream &operator<<(std::ostream &stream, const Item &item)
+ {
+ stream << item.key << " -> " << item.value;
+ return stream;
+ }
+ };
+
+ UserItem operator*() const
+ {
+ uint32_t item_index = this->m_slot >> OFFSET_SHIFT;
+ uint offset = this->m_slot & OFFSET_MASK;
+ const Item &item = this->m_map->m_array.item(item_index);
+ BLI_assert(item.is_set(offset));
+ return {*item.key(offset), *item.value(offset)};
+ }
+ };
+
+ template<typename SubIterator> friend class BaseIterator;
+
+ /**
+ * Iterate over all keys in the map.
+ */
+ KeyIterator keys() const
+ {
+ return KeyIterator(this, 0);
+ }
+
+ /**
+ * Iterate over all values in the map.
+ */
+ ValueIterator values() const
+ {
+ return ValueIterator(this, 0);
+ }
+
+ /**
+ * Iterate over all key-value-pairs in the map.
+ * They can be accessed with item.key and item.value.
+ */
+ ItemIterator items() const
+ {
+ return ItemIterator(this, 0);
+ }
+
+ private:
+ uint32_t next_slot(uint32_t slot) const
+ {
+ for (; slot < m_array.slots_total(); slot++) {
+ uint32_t item_index = slot >> OFFSET_SHIFT;
+ uint offset = slot & OFFSET_MASK;
+ const Item &item = m_array.item(item_index);
+ if (item.is_set(offset)) {
+ return slot;
+ }
+ }
+ return slot;
+ }
+
+ uint32_t count_collisions(const KeyT &key) const
+ {
+ uint32_t collisions = 0;
+ ITER_SLOTS_BEGIN (key, m_array, const, item, offset) {
+ if (item.is_empty(offset) || item.has_key(offset, key)) {
+ return collisions;
+ }
+ collisions++;
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ void ensure_can_add()
+ {
+ if (UNLIKELY(m_array.should_grow())) {
+ this->grow(this->size() + 1);
+ }
+ }
+
+ BLI_NOINLINE void grow(uint32_t min_usable_slots)
+ {
+ ArrayType new_array = m_array.init_reserved(min_usable_slots);
+ for (Item &old_item : m_array) {
+ for (uint offset = 0; offset < 4; offset++) {
+ if (old_item.is_set(offset)) {
+ this->add_after_grow(*old_item.key(offset), *old_item.value(offset), new_array);
+ }
+ }
+ }
+ m_array = std::move(new_array);
+ }
+
+ void add_after_grow(KeyT &key, ValueT &value, ArrayType &new_array)
+ {
+ ITER_SLOTS_BEGIN (key, new_array, , item, offset) {
+ if (item.is_empty(offset)) {
+ item.move_in(offset, key, value);
+ return;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+};
+
+#undef ITER_SLOTS_BEGIN
+#undef ITER_SLOTS_END
+
+} // namespace BLI