diff options
author | Jacques Lucke <jacques@blender.org> | 2020-06-09 11:10:56 +0300 |
---|---|---|
committer | Jacques Lucke <jacques@blender.org> | 2020-06-09 11:15:43 +0300 |
commit | d8678e02ecec9375bec1dcf1388c6fc8b4ce3ad2 (patch) | |
tree | 6e7d2a7452091877f73d413d830e6cb12e86745f /source/blender/blenlib/BLI_probing_strategies.hh | |
parent | 50258d55e7c1360274d40e303386cf70b16c8b2f (diff) |
BLI: generally improve C++ data structures
The main focus here was to improve the docs significantly. Furthermore,
I reimplemented `Set`, `Map` and `VectorSet`. They are now (usually)
faster, simpler and more customizable. I also rewrote `Stack` to make
it more efficient by avoiding unnecessary copies.
Thanks to everyone who helped with constructive feedback.
Approved by brecht and sybren.
Differential Revision: https://developer.blender.org/D7931
Diffstat (limited to 'source/blender/blenlib/BLI_probing_strategies.hh')
-rw-r--r-- | source/blender/blenlib/BLI_probing_strategies.hh | 250 |
1 files changed, 250 insertions, 0 deletions
diff --git a/source/blender/blenlib/BLI_probing_strategies.hh b/source/blender/blenlib/BLI_probing_strategies.hh new file mode 100644 index 00000000000..9fa402d4244 --- /dev/null +++ b/source/blender/blenlib/BLI_probing_strategies.hh @@ -0,0 +1,250 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __BLI_PROBING_STRATEGIES_HH__ +#define __BLI_PROBING_STRATEGIES_HH__ + +/** \file + * \ingroup bli + * + * This file implements different probing strategies. Those can be used by different hash table + * implementations like BLI::Set and BLI::Map. A probing strategy produces a sequence of values + * based on an initial hash value. + * + * A probing strategy has to implement the following methods: + * - Constructor(uint32_t hash): Start a new probing sequence based on the given hash. + * - get() const -> uint32_t: Get the current value in the sequence. + * - next() -> void: Update the internal state, so that the next value can be accessed with get(). + * - linear_steps() -> uint32_t: Returns number of linear probing steps that should be done. + * + * Using linear probing steps between larger jumps can result in better performance, due to + * improved cache usage. It's a way of getting the benefits or linear probing without the + * clustering issues. However, more linear steps can also make things slower when the initial hash + * produces many collisions. + * + * Every probing strategy has to guarantee, that every possible uint32_t is returned eventually. + * This is necessary for correctness. If this is not the case, empty slots might not be found. + * + * The SLOT_PROBING_BEGIN and SLOT_PROBING_END macros can be used to implement a loop that iterates + * over a probing sequence. + * + * Probing strategies can be evaluated with many different criteria. Different use cases often + * have different optimal strategies. Examples: + * - If the hash function generates a well distributed initial hash value, the constructor should + * be as short as possible. This is because the hash value can be used as slot index almost + * immediately, without too many collisions. This is also a perfect use case for linear steps. + * - If the hash function is bad, it can help if the probing strategy remixes the hash value, + * before the first slot is accessed. + * - Different next() methods can remix the hash value in different ways. Depending on which bits + * of the hash value contain the most information, different rehashing strategies work best. + * - When the hash table is very small, having a trivial hash function and then doing linear + * probing might work best. + */ + +#include "BLI_sys_types.h" + +namespace BLI { + +/** + * The simplest probing strategy. It's bad in most cases, because it produces clusters in the hash + * table, which result in many collisions. However, if the hash function is very good or the hash + * table is small, this strategy might even work best. + */ +class LinearProbingStrategy { + private: + uint32_t m_hash; + + public: + LinearProbingStrategy(uint32_t hash) : m_hash(hash) + { + } + + void next() + { + m_hash++; + } + + uint32_t get() const + { + return m_hash; + } + + uint32_t linear_steps() const + { + return UINT32_MAX; + } +}; + +/** + * A slightly adapted quadratic probing strategy. The distance to the original slot increases + * quadratically. This method also leads to clustering. Another disadvantage is that not all bits + * of the original hash are used. + * + * The distance i * i is not used, because it does not guarantee, that every slot is hit. + * Instead (i * i + i) / 2 is used, which has this desired property. + * + * In the first few steps, this strategy can have good cache performance. It largely depends on how + * many keys fit into a cache line in the hash table. + */ +class QuadraticProbingStrategy { + private: + uint32_t m_original_hash; + uint32_t m_current_hash; + uint32_t m_iteration; + + public: + QuadraticProbingStrategy(uint32_t hash) + : m_original_hash(hash), m_current_hash(hash), m_iteration(1) + { + } + + void next() + { + m_current_hash = m_original_hash + ((m_iteration * m_iteration + m_iteration) >> 1); + m_iteration++; + } + + uint32_t get() const + { + return m_current_hash; + } + + uint32_t linear_steps() const + { + return 1; + } +}; + +/** + * This is the probing strategy used by CPython (in 2020). + * + * It is very fast when the original hash value is good. If there are collisions, more bits of the + * hash value are taken into account. + * + * LinearSteps: Can be set to something larger than 1 for improved cache performance in some cases. + * PreShuffle: When true, the initial call to next() will be done to the constructor. This can help + * when the hash function has put little information into the lower bits. + */ +template<uint32_t LinearSteps = 1, bool PreShuffle = false> class PythonProbingStrategy { + private: + uint32_t m_hash; + uint32_t m_perturb; + + public: + PythonProbingStrategy(uint32_t hash) : m_hash(hash), m_perturb(hash) + { + if (PreShuffle) { + this->next(); + } + } + + void next() + { + m_perturb >>= 5; + m_hash = 5 * m_hash + 1 + m_perturb; + } + + uint32_t get() const + { + return m_hash; + } + + uint32_t linear_steps() const + { + return LinearSteps; + } +}; + +/** + * Similar to the Python probing strategy. However, it does a bit more shuffling in the next() + * method. This way more bits are taken into account earlier. After a couple of collisions (that + * should happen rarely), it will fallback to a sequence that hits every slot. + */ +template<uint32_t LinearSteps = 2, bool PreShuffle = false> class ShuffleProbingStrategy { + private: + uint32_t m_hash; + uint32_t m_perturb; + + public: + ShuffleProbingStrategy(uint32_t hash) : m_hash(hash), m_perturb(hash) + { + if (PreShuffle) { + this->next(); + } + } + + void next() + { + if (m_perturb != 0) { + m_perturb >>= 10; + m_hash = ((m_hash >> 16) ^ m_hash) * 0x45d9f3b + m_perturb; + } + else { + m_hash = 5 * m_hash + 1; + } + } + + uint32_t get() const + { + return m_hash; + } + + uint32_t linear_steps() const + { + return LinearSteps; + } +}; + +/** + * Having a specified default is convenient. + */ +using DefaultProbingStrategy = PythonProbingStrategy<>; + +/* Turning off clang format here, because otherwise it will mess up the alignment between the + * macros. */ +// clang-format off + +/** + * Both macros together form a loop that iterates over slot indices in a hash table with a + * power-of-two size. + * + * You must not `break` out of this loop. Only `return` is permitted. If you don't return + * out of the loop, it will be an infinite loop. These loops should not be nested within the + * same function. + * + * PROBING_STRATEGY: Class describing the probing strategy. + * HASH: The initial hash as produced by a hash function. + * MASK: A bit mask such that (hash & MASK) is a valid slot index. + * R_SLOT_INDEX: Name of the variable that will contain the slot index. + */ +#define SLOT_PROBING_BEGIN(PROBING_STRATEGY, HASH, MASK, R_SLOT_INDEX) \ + PROBING_STRATEGY probing_strategy(HASH); \ + do { \ + uint32_t linear_offset = 0; \ + uint32_t current_hash = probing_strategy.get(); \ + do { \ + uint32_t R_SLOT_INDEX = (current_hash + linear_offset) & MASK; + +#define SLOT_PROBING_END() \ + } while (++linear_offset < probing_strategy.linear_steps()); \ + probing_strategy.next(); \ + } while (true) + +// clang-format on + +} // namespace BLI + +#endif /* __BLI_PROBING_STRATEGIES_HH__ */ |