Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacques Lucke <mail@jlucke.com>2019-09-13 11:06:02 +0300
committerJacques Lucke <mail@jlucke.com>2019-09-13 11:06:02 +0300
commit1c44d08a69eb3e66c7f942d748f549d6b8ca138f (patch)
treeb74784ffdc6f6a66a92d02a985865247101a6093 /source/blender/blenlib/BLI_string_map.h
parent8d12c2a83658cb6b1311197c292e5906394c4321 (diff)
BLI: new C++ hash table data structures
This commit adds some new hashing based data structures to blenlib. All of them use open addressing with probing currently. Furthermore, they support small object optimization, but it is not customizable yet. I'll add support for this when necessary. The following main data structures are included: **Set** A collection of values, where every value must exist at most once. This is similar to a Python `set`. **SetVector** A combination of a Set and a Vector. It supports fast search for elements and maintains insertion order when there are no deletes. All elements are stored in a continuous array. So they can be iterated over using a normal `ArrayRef`. **Map** A set of key-value-pairs, where every key must exist at most once. This is similar to a Python `dict`. **StringMap** A special map for the case when the keys are strings. This case is fairly common and allows for some optimizations. Most importantly, many unnecessary allocations can be avoided by storing strings in a single buffer. Furthermore, the interface of this class uses `StringRef` to avoid unnecessary conversions. This commit is a continuation of rB369d5e8ad2bb7.
Diffstat (limited to 'source/blender/blenlib/BLI_string_map.h')
-rw-r--r--source/blender/blenlib/BLI_string_map.h410
1 files changed, 410 insertions, 0 deletions
diff --git a/source/blender/blenlib/BLI_string_map.h b/source/blender/blenlib/BLI_string_map.h
new file mode 100644
index 00000000000..0cf0bb40e4d
--- /dev/null
+++ b/source/blender/blenlib/BLI_string_map.h
@@ -0,0 +1,410 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/** \file
+ * \ingroup bli
+ *
+ * This tries to solve the issue that a normal map with std::string as key might do many
+ * allocations when the keys are longer than 16 bytes (the usual small string optimization size).
+ *
+ * For now this still uses std::string, but having this abstraction in place will make it easier to
+ * make it more efficient later on. Also, even if we will never implement this optimization, having
+ * a special map with string keys can be quite handy. */
+
+#pragma once
+
+#include "BLI_map.h"
+#include "BLI_string_ref.h"
+#include "BLI_vector.h"
+
+namespace BLI {
+
+// clang-format off
+
+#define ITER_SLOTS_BEGIN(HASH, ARRAY, OPTIONAL_CONST, R_ITEM, R_OFFSET) \
+ uint32_t hash_copy = HASH; \
+ uint32_t perturb = HASH; \
+ while (true) { \
+ uint32_t item_index = (hash_copy & ARRAY.slot_mask()) >> OFFSET_SHIFT; \
+ uint8_t R_OFFSET = hash_copy & OFFSET_MASK; \
+ uint8_t initial_offset = R_OFFSET; \
+ OPTIONAL_CONST Item &R_ITEM = ARRAY.item(item_index); \
+ do {
+
+#define ITER_SLOTS_END(R_OFFSET) \
+ R_OFFSET = (R_OFFSET + 1) & OFFSET_MASK; \
+ } while (R_OFFSET != initial_offset); \
+ perturb >>= 5; \
+ hash_copy = hash_copy * 5 + 1 + perturb; \
+ } ((void)0)
+
+// clang-format on
+
+template<typename T, typename Allocator = GuardedAllocator> class StringMap {
+ private:
+ static constexpr uint32_t OFFSET_MASK = 3;
+ static constexpr uint32_t OFFSET_SHIFT = 2;
+
+ class Item {
+ private:
+ static constexpr int32_t IS_EMPTY = -1;
+
+ uint32_t m_hashes[4];
+ int32_t m_indices[4];
+ char m_values[sizeof(T) * 4];
+
+ public:
+ static constexpr uint slots_per_item = 4;
+
+ Item()
+ {
+ for (uint offset = 0; offset < 4; offset++) {
+ m_indices[offset] = IS_EMPTY;
+ }
+ }
+
+ ~Item()
+ {
+ for (uint offset = 0; offset < 4; offset++) {
+ if (this->is_set(offset)) {
+ destruct(this->value(offset));
+ }
+ }
+ }
+
+ Item(const Item &other)
+ {
+ for (uint offset = 0; offset < 4; offset++) {
+ m_indices[offset] = other.m_indices[offset];
+ if (other.is_set(offset)) {
+ m_hashes[offset] = other.m_hashes[offset];
+ new (this->value(offset)) T(*other.value(offset));
+ }
+ }
+ }
+
+ Item(Item &&other) noexcept
+ {
+ for (uint offset = 0; offset < 4; offset++) {
+ m_indices[offset] = other.m_indices[offset];
+ if (other.is_set(offset)) {
+ m_hashes[offset] = other.m_hashes[offset];
+ new (this->value(offset)) T(std::move(*other.value(offset)));
+ }
+ }
+ }
+
+ uint32_t index(uint offset) const
+ {
+ return m_indices[offset];
+ }
+
+ uint32_t hash(uint offset) const
+ {
+ return m_hashes[offset];
+ }
+
+ T *value(uint offset) const
+ {
+ return (T *)POINTER_OFFSET(m_values, offset * sizeof(T));
+ }
+
+ bool is_set(uint offset) const
+ {
+ return m_indices[offset] >= 0;
+ }
+
+ bool is_empty(uint offset) const
+ {
+ return m_indices[offset] == IS_EMPTY;
+ }
+
+ bool has_hash(uint offset, uint32_t hash) const
+ {
+ BLI_assert(this->is_set(offset));
+ return m_hashes[offset] == hash;
+ }
+
+ bool has_exact_key(uint offset, StringRef key, const Vector<char> &chars) const
+ {
+ return key == this->get_key(offset, chars);
+ }
+
+ StringRefNull get_key(uint offset, const Vector<char> &chars) const
+ {
+ const char *ptr = chars.begin() + m_indices[offset];
+ uint length = *(uint *)ptr;
+ const char *start = ptr + sizeof(uint);
+ return StringRefNull(start, length);
+ }
+
+ void move_in(uint offset, uint32_t hash, uint32_t index, T &value)
+ {
+ BLI_assert(!this->is_set(offset));
+ m_hashes[offset] = hash;
+ m_indices[offset] = index;
+ new (this->value(offset)) T(std::move(value));
+ }
+
+ void copy_in(uint offset, uint32_t hash, uint32_t index, const T &value)
+ {
+ BLI_assert(!this->is_set(offset));
+ m_hashes[offset] = hash;
+ m_indices[offset] = index;
+ new (this->value(offset)) T(value);
+ }
+ };
+
+ using ArrayType = OpenAddressingArray<Item, 1, Allocator>;
+ ArrayType m_array;
+ Vector<char> m_chars;
+
+ public:
+ StringMap() = default;
+
+ /**
+ * Get the number of key-value pairs in the map.
+ */
+ uint size() const
+ {
+ return m_array.slots_set();
+ }
+
+ /**
+ * Add a new element to the map. It is assumed that the key did not exist before.
+ */
+ void add_new(StringRef key, const T &value)
+ {
+ BLI_assert(!this->contains(key));
+ this->ensure_can_add();
+ uint32_t hash = this->compute_string_hash(key);
+ ITER_SLOTS_BEGIN (hash, m_array, , item, offset) {
+ if (item.is_empty(offset)) {
+ uint32_t index = this->save_key_in_array(key);
+ item.copy_in(offset, hash, index, value);
+ m_array.update__empty_to_set();
+ return;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Return true when the key exists in the map, otherwise false.
+ */
+ bool contains(StringRef key) const
+ {
+ uint32_t hash = this->compute_string_hash(key);
+ ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) {
+ if (item.is_empty(offset)) {
+ return false;
+ }
+ else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
+ return true;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Get a reference to the value corresponding to a key. It is assumed that the key does exist.
+ */
+ const T &lookup(StringRef key) const
+ {
+ BLI_assert(this->contains(key));
+ T *found_value = nullptr;
+ uint32_t hash = this->compute_string_hash(key);
+ ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) {
+ if (item.is_empty(offset)) {
+ return *found_value;
+ }
+ else if (item.has_hash(offset, hash)) {
+ if (found_value == nullptr) {
+ /* Common case: the first slot with the correct hash contains the key.
+ * However, still need to iterate until the next empty slot to make sure there is no
+ * other key with the exact same hash. */
+ /* TODO: Check if we can guarantee that every hash only exists once in some cases. */
+ found_value = item.value(offset);
+ }
+ else if (item.has_exact_key(offset, key, m_chars)) {
+ /* Found the hash more than once, now check for actual string equality. */
+ return *item.value(offset);
+ }
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Get a pointer to the value corresponding to the key. Return nullptr, if the key does not
+ * exist.
+ */
+ const T *lookup_ptr(StringRef key) const
+ {
+ uint32_t hash = this->compute_string_hash(key);
+ ITER_SLOTS_BEGIN (hash, m_array, const, item, offset) {
+ if (item.is_empty(offset)) {
+ return nullptr;
+ }
+ else if (item.has_hash(offset, hash) && item.has_exact_key(offset, key, m_chars)) {
+ return item.value(offset);
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+
+ /**
+ * Get a copy of the value corresponding to the key. If the key does not exist, return the
+ * default value.
+ */
+ T lookup_default(StringRef key, const T &default_value) const
+ {
+ const T *ptr = this->lookup_ptr(key);
+ if (ptr != nullptr) {
+ return *ptr;
+ }
+ else {
+ return default_value;
+ }
+ }
+
+ /**
+ * Do a linear search over all items to find a key for a value.
+ */
+ StringRefNull find_key_for_value(const T &value) const
+ {
+ for (const Item &item : m_array) {
+ for (uint offset = 0; offset < 4; offset++) {
+ if (item.is_set(offset) && value == *item.value(offset)) {
+ return item.get_key(offset, m_chars);
+ }
+ }
+ }
+ BLI_assert(false);
+ return {};
+ }
+
+ /**
+ * Run a function for every value in the map.
+ */
+ template<typename FuncT> void foreach_value(const FuncT &func)
+ {
+ for (Item &item : m_array) {
+ for (uint offset = 0; offset < 4; offset++) {
+ if (item.is_set(offset)) {
+ func(*item.value(offset));
+ }
+ }
+ }
+ }
+
+ /**
+ * Run a function for every key in the map.
+ */
+ template<typename FuncT> void foreach_key(const FuncT &func)
+ {
+ for (Item &item : m_array) {
+ for (uint offset = 0; offset < 4; offset++) {
+ if (item.is_set(offset)) {
+ StringRefNull key = item.get_key(offset, m_chars);
+ func(key);
+ }
+ }
+ }
+ }
+
+ /**
+ * Run a function for every key-value-pair in the map.
+ */
+ template<typename FuncT> void foreach_key_value_pair(const FuncT &func)
+ {
+ for (Item &item : m_array) {
+ for (uint offset = 0; offset < 4; offset++) {
+ if (item.is_set(offset)) {
+ StringRefNull key = item.get_key(offset, m_chars);
+ T &value = *item.value(offset);
+ func(key, value);
+ }
+ }
+ }
+ }
+
+ private:
+ uint32_t compute_string_hash(StringRef key) const
+ {
+ /* TODO: check if this can be optimized more because we know the key length already. */
+ uint32_t hash = 5381;
+ for (char c : key) {
+ hash = hash * 33 + c;
+ }
+ return hash;
+ }
+
+ uint32_t save_key_in_array(StringRef key)
+ {
+ uint index = m_chars.size();
+ uint string_size = key.size();
+ m_chars.extend(ArrayRef<char>((char *)&string_size, sizeof(uint)));
+ m_chars.extend(key);
+ m_chars.append('\0');
+ return index;
+ }
+
+ StringRefNull key_from_index(uint32_t index) const
+ {
+ const char *ptr = m_chars.begin() + index;
+ uint length = *(uint *)ptr;
+ const char *start = ptr + sizeof(uint);
+ return StringRefNull(start, length);
+ }
+
+ void ensure_can_add()
+ {
+ if (UNLIKELY(m_array.should_grow())) {
+ this->grow(this->size() + 1);
+ }
+ }
+
+ BLI_NOINLINE void grow(uint min_usable_slots)
+ {
+ ArrayType new_array = m_array.init_reserved(min_usable_slots);
+ for (Item &old_item : m_array) {
+ for (uint offset = 0; offset < 4; offset++) {
+ if (old_item.is_set(offset)) {
+ this->add_after_grow(
+ *old_item.value(offset), old_item.hash(offset), old_item.index(offset), new_array);
+ }
+ }
+ }
+ m_array = std::move(new_array);
+ }
+
+ void add_after_grow(T &value, uint32_t hash, uint32_t index, ArrayType &new_array)
+ {
+ ITER_SLOTS_BEGIN (hash, new_array, , item, offset) {
+ if (item.is_empty(offset)) {
+ item.move_in(offset, hash, index, value);
+ return;
+ }
+ }
+ ITER_SLOTS_END(offset);
+ }
+};
+
+#undef ITER_SLOTS_BEGIN
+#undef ITER_SLOTS_END
+
+} // namespace BLI