From d8678e02ecec9375bec1dcf1388c6fc8b4ce3ad2 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Tue, 9 Jun 2020 10:10:56 +0200 Subject: BLI: generally improve C++ data structures The main focus here was to improve the docs significantly. Furthermore, I reimplemented `Set`, `Map` and `VectorSet`. They are now (usually) faster, simpler and more customizable. I also rewrote `Stack` to make it more efficient by avoiding unnecessary copies. Thanks to everyone who helped with constructive feedback. Approved by brecht and sybren. Differential Revision: https://developer.blender.org/D7931 --- source/blender/blenlib/BLI_hash.hh | 107 ++++++++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 14 deletions(-) (limited to 'source/blender/blenlib/BLI_hash.hh') diff --git a/source/blender/blenlib/BLI_hash.hh b/source/blender/blenlib/BLI_hash.hh index 3b3448f66b1..029fa32553a 100644 --- a/source/blender/blenlib/BLI_hash.hh +++ b/source/blender/blenlib/BLI_hash.hh @@ -20,8 +20,58 @@ /** \file * \ingroup bli * - * This file provides default hash functions for some primitive types. The hash functions can be - * used by containers such as Map and Set. + * A specialization of `BLI::DefaultHash` provides a hash function for values of type T. This + * hash function is used by default in hash table implementations in blenlib. + * + * The actual hash function is in the `operator()` method of DefaultHash. The following code + * computes the hash of some value using DefaultHash. + * + * T value = ...; + * DefaultHash hash_function; + * uint32_t hash = hash_function(value); + * + * Hash table implementations like BLI::Set support heterogeneous key lookups. That means that one + * can do a lookup with a key of type A in a hash table that stores keys of type B. This is + * commonly done when B is std::string, because the conversion from e.g. a StringRef to std::string + * can be costly and is unnecessary. To make this work, values of type A and B that compare equal + * have to have the same hash value. This is achieved by defining potentially multiple `operator()` + * in a specialization of DefaultHash. All those methods have to compute the same hash for values + * that compare equal. + * + * The computed hash is an unsigned 32 bit integer. Ideally, the hash function would generate + * uniformly random hash values for a set of keys. However, in many cases trivial hash functions + * are faster and produce a good enough distribution. In general it is better when more information + * is in the lower bits of the hash. By choosing a good probing strategy, the effects of a bad hash + * function are less noticable though. In this context a good probing strategy is one that takes + * all bits of the hash into account eventually. One has to check on a case by case basis to see if + * a better but more expensive or trivial hash function works better. + * + * There are three main ways to provide a hash table implementation with a custom hash function. + * + * - When you want to provide a default hash function for your own custom type: Add a `hash` + * member function to it. The function should return `uint32_t` and take no arguments. This + * method will be called by the default implementation of DefaultHash. It will automatically be + * used by hash table implementations. + * + * - When you want to provide a default hash function for a type that you cannot modify: Add a new + * specialization to the DefaultHash struct. This can be done by writing code like below in + * either global or BLI namespace. + * + * template<> struct BLI::DefaultHash { + * uint32_t operator()(const TheType &value) const { + * return ...; + * } + * }; + * + * - When you want to provide a different hash function for a type that already has a default hash + * function: Implement a struct like the one below and pass it as template parameter to the hash + * table explicitely. + * + * struct MyCustomHash { + * uint32_t operator()(const TheType &value) const { + * return ...; + * } + * }; */ #include @@ -35,7 +85,16 @@ namespace BLI { +/** + * If there is no other specialization of DefaultHash for a given type, try to call `hash()` on the + * value. If there is no such method, this will result in a compiler error. Usually that means that + * you have to implement a hash function using one of three strategies listed above. + */ template struct DefaultHash { + uint32_t operator()(const T &value) const + { + return value.hash(); + } }; #define TRIVIAL_DEFAULT_INT_HASH(TYPE) \ @@ -47,9 +106,9 @@ template struct DefaultHash { } /** - * Cannot make any assumptions about the distribution of keys, so use a trivial hash function by - * default. The hash table implementations are designed to take all bits of the hash into account - * to avoid really bad behavior when the lower bits are all zero. Special hash functions can be + * We cannot make any assumptions about the distribution of keys, so use a trivial hash function by + * default. The default probing strategy is designed to take all bits of the hash into account + * to avoid worst case behavior when the lower bits are all zero. Special hash functions can be * implemented when more knowledge about a specific key distribution is available. */ TRIVIAL_DEFAULT_INT_HASH(int8_t); @@ -58,9 +117,26 @@ TRIVIAL_DEFAULT_INT_HASH(int16_t); TRIVIAL_DEFAULT_INT_HASH(uint16_t); TRIVIAL_DEFAULT_INT_HASH(int32_t); TRIVIAL_DEFAULT_INT_HASH(uint32_t); -TRIVIAL_DEFAULT_INT_HASH(int64_t); -TRIVIAL_DEFAULT_INT_HASH(uint64_t); +template<> struct DefaultHash { + uint32_t operator()(uint64_t value) const + { + uint32_t low = (uint32_t)value; + uint32_t high = (uint32_t)(value >> 32); + return low ^ (high * 0x45d9f3b); + } +}; + +template<> struct DefaultHash { + uint32_t operator()(uint64_t value) const + { + return DefaultHash{}((uint64_t)value); + } +}; + +/** + * One should try to avoid using floats as keys in hash tables, but sometimes it is convenient. + */ template<> struct DefaultHash { uint32_t operator()(float value) const { @@ -78,35 +154,38 @@ inline uint32_t hash_string(StringRef str) } template<> struct DefaultHash { - uint32_t operator()(const std::string &value) const + /** + * Take a StringRef as parameter to support heterogeneous lookups in hash table implementations + * when std::string is used as key. + */ + uint32_t operator()(StringRef value) const { return hash_string(value); } }; template<> struct DefaultHash { - uint32_t operator()(const StringRef &value) const + uint32_t operator()(StringRef value) const { return hash_string(value); } }; template<> struct DefaultHash { - uint32_t operator()(const StringRefNull &value) const + uint32_t operator()(StringRef value) const { return hash_string(value); } }; /** - * While we cannot guarantee that the lower 3 bits or a pointer are zero, it is safe to assume - * this in the general case. MEM_malloc only returns 8 byte aligned addresses on 64-bit systems. + * While we cannot guarantee that the lower 4 bits of a pointer are zero, it is often the case. */ template struct DefaultHash { uint32_t operator()(const T *value) const { - uintptr_t ptr = POINTER_AS_UINT(value); - uint32_t hash = (uint32_t)(ptr >> 3); + uintptr_t ptr = (uintptr_t)value; + uint32_t hash = (uint32_t)(ptr >> 4); return hash; } }; -- cgit v1.2.3