diff options
Diffstat (limited to 'source/blender/blenlib/intern/BLI_ghash.c')
-rw-r--r-- | source/blender/blenlib/intern/BLI_ghash.c | 878 |
1 files changed, 669 insertions, 209 deletions
diff --git a/source/blender/blenlib/intern/BLI_ghash.c b/source/blender/blenlib/intern/BLI_ghash.c index 6747e5c4e7e..7e6dabdffef 100644 --- a/source/blender/blenlib/intern/BLI_ghash.c +++ b/source/blender/blenlib/intern/BLI_ghash.c @@ -28,24 +28,31 @@ /** \file blender/blenlib/intern/BLI_ghash.c * \ingroup bli * - * A general (pointer -> pointer) hash table ADT + * A general (pointer -> pointer) chaining hash table + * for 'Abstract Data Types' (known as an ADT Hash Table). * * \note edgehash.c is based on this, make sure they stay in sync. */ #include <string.h> #include <stdlib.h> +#include <stdarg.h> #include <limits.h> #include "MEM_guardedalloc.h" #include "BLI_sys_types.h" /* for intptr_t support */ #include "BLI_utildefines.h" +#include "BLI_hash_mm2a.h" #include "BLI_mempool.h" + +#define GHASH_INTERNAL_API #include "BLI_ghash.h" #include "BLI_strict_flags.h" +#define GHASH_USE_MODULO_BUCKETS +/* Also used by smallhash! */ const unsigned int hashsizes[] = { 5, 11, 17, 37, 67, 131, 257, 521, 1031, 2053, 4099, 8209, 16411, 32771, 65537, 131101, 262147, 524309, 1048583, 2097169, @@ -53,24 +60,43 @@ const unsigned int hashsizes[] = { 268435459 }; -/* internal flag to ensure sets values aren't used */ -#ifndef NDEBUG -# define GHASH_FLAG_IS_SET (1 << 8) -# define IS_GHASH_ASSERT(gh) BLI_assert((gh->flag & GHASH_FLAG_IS_SET) == 0) -// # define IS_GSET_ASSERT(gs) BLI_assert((gs->flag & GHASH_FLAG_IS_SET) != 0) +#ifdef GHASH_USE_MODULO_BUCKETS +# define GHASH_MAX_SIZE 27 #else -# define IS_GHASH_ASSERT(gh) -// # define IS_GSET_ASSERT(eh) +# define GHASH_BUCKET_BIT_MIN 2 +# define GHASH_BUCKET_BIT_MAX 28 /* About 268M of buckets... */ #endif +/** + * \note Max load #GHASH_LIMIT_GROW used to be 3. (pre 2.74). + * Python uses 0.6666, tommyhaslib even goes down to 0.5. + * Reducing our from 3 to 0.75 gives huge speedup (about twice quicker pure GHash insertions/lookup, + * about 25% - 30% quicker 'dynamic-topology' stroke drawing e.g.). + * Min load #GHASH_LIMIT_SHRINK is a quarter of max load, to avoid resizing to quickly. + */ +#define GHASH_LIMIT_GROW(_nbkt) (((_nbkt) * 3) / 4) +#define GHASH_LIMIT_SHRINK(_nbkt) (((_nbkt) * 3) / 16) + /***/ +/* WARNING! Keep in sync with ugly _gh_Entry in header!!! */ typedef struct Entry { struct Entry *next; - void *key, *val; + void *key; } Entry; +typedef struct GHashEntry { + Entry e; + + void *val; +} GHashEntry; + +typedef Entry GSetEntry; + +#define GHASH_ENTRY_SIZE(_is_gset) \ + ((_is_gset) ? sizeof(GSetEntry) : sizeof(GHashEntry)) + struct GHash { GHashHashFP hashfp; GHashCmpFP cmpfp; @@ -78,11 +104,34 @@ struct GHash { Entry **buckets; struct BLI_mempool *entrypool; unsigned int nbuckets; + unsigned int limit_grow, limit_shrink; +#ifdef GHASH_USE_MODULO_BUCKETS + unsigned int cursize, size_min; +#else + unsigned int bucket_mask, bucket_bit, bucket_bit_min; +#endif + unsigned int nentries; - unsigned int cursize, flag; + unsigned int flag; }; +BLI_INLINE void ghash_entry_copy( + GHash *gh_dst, Entry *dst, GHash *gh_src, Entry *src, + GHashKeyCopyFP keycopyfp, GHashValCopyFP valcopyfp) +{ + dst->key = (keycopyfp) ? keycopyfp(src->key) : src->key; + + if ((gh_dst->flag & GHASH_FLAG_IS_GSET) == 0) { + if ((gh_src->flag & GHASH_FLAG_IS_GSET) == 0) { + ((GHashEntry *)dst)->val = (valcopyfp) ? valcopyfp(((GHashEntry *)src)->val) : ((GHashEntry *)src)->val; + } + else { + ((GHashEntry *)dst)->val = NULL; + } + } +} + /* -------------------------------------------------------------------- */ /* GHash API */ @@ -90,25 +139,37 @@ struct GHash { * \{ */ /** - * Get the hash for a key. + * Get the full hash for a key. */ BLI_INLINE unsigned int ghash_keyhash(GHash *gh, const void *key) { - return gh->hashfp(key) % gh->nbuckets; + return gh->hashfp(key); } /** - * Check if the number of items in the GHash is large enough to require more buckets. + * Get the full hash for an entry. */ -BLI_INLINE bool ghash_test_expand_buckets(const unsigned int nentries, const unsigned int nbuckets) +BLI_INLINE unsigned int ghash_entryhash(GHash *gh, const Entry *e) { - return (nentries > nbuckets * 3); + return gh->hashfp(e->key); } /** - * Expand buckets to the next size up. + * Get the bucket-hash for an already-computed full hash. */ -BLI_INLINE void ghash_resize_buckets(GHash *gh, const unsigned int nbuckets) +BLI_INLINE unsigned int ghash_bucket_index(GHash *gh, const unsigned int hash) +{ +#ifdef GHASH_USE_MODULO_BUCKETS + return hash % gh->nbuckets; +#else + return hash & gh->bucket_mask; +#endif +} + +/** + * Expand buckets to the next size up or down. + */ +static void ghash_buckets_resize(GHash *gh, const unsigned int nbuckets) { Entry **buckets_old = gh->buckets; Entry **buckets_new; @@ -116,49 +177,223 @@ BLI_INLINE void ghash_resize_buckets(GHash *gh, const unsigned int nbuckets) unsigned int i; Entry *e; - BLI_assert(gh->nbuckets != nbuckets); + BLI_assert((gh->nbuckets != nbuckets) || !gh->buckets); +// printf("%s: %d -> %d\n", __func__, nbuckets_old, nbuckets); gh->nbuckets = nbuckets; - buckets_new = (Entry **)MEM_callocN(gh->nbuckets * sizeof(*gh->buckets), "buckets"); - - for (i = 0; i < nbuckets_old; i++) { - Entry *e_next; - for (e = buckets_old[i]; e; e = e_next) { - const unsigned hash = ghash_keyhash(gh, e->key); - e_next = e->next; - e->next = buckets_new[hash]; - buckets_new[hash] = e; +#ifdef GHASH_USE_MODULO_BUCKETS +#else + gh->bucket_mask = nbuckets - 1; +#endif + + buckets_new = (Entry **)MEM_callocN(sizeof(*gh->buckets) * gh->nbuckets, __func__); + + if (buckets_old) { + if (nbuckets > nbuckets_old) { + for (i = 0; i < nbuckets_old; i++) { + Entry *e_next; + for (e = buckets_old[i]; e; e = e_next) { + const unsigned hash = ghash_entryhash(gh, e); + const unsigned bucket_index = ghash_bucket_index(gh, hash); + e_next = e->next; + e->next = buckets_new[bucket_index]; + buckets_new[bucket_index] = e; + } + } + } + else { + for (i = 0; i < nbuckets_old; i++) { +#ifdef GHASH_USE_MODULO_BUCKETS + Entry *e_next; + for (e = buckets_old[i]; e; e = e_next) { + const unsigned hash = ghash_entryhash(gh, e); + const unsigned bucket_index = ghash_bucket_index(gh, hash); + e_next = e->next; + e->next = buckets_new[bucket_index]; + buckets_new[bucket_index] = e; + } +#else + /* No need to recompute hashes in this case, since our mask is just smaller, all items in old bucket i + * will go in same new bucket (i & new_mask)! */ + const unsigned bucket_index = ghash_bucket_index(gh, i); + BLI_assert(!buckets_old[i] || (bucket_index == ghash_bucket_index(gh, ghash_entryhash(gh, buckets_old[i])))); + for (e = buckets_old[i]; e && e->next; e = e->next); + if (e) { + e->next = buckets_new[bucket_index]; + buckets_new[bucket_index] = buckets_old[i]; + } +#endif + } } } gh->buckets = buckets_new; - MEM_freeN(buckets_old); + if (buckets_old) { + MEM_freeN(buckets_old); + } } /** - * Increase initial bucket size to match a reserved amount. + * Check if the number of items in the GHash is large enough to require more buckets, + * or small enough to require less buckets, and resize \a gh accordingly. */ -BLI_INLINE void ghash_buckets_reserve(GHash *gh, const unsigned int nentries_reserve) +static void ghash_buckets_expand( + GHash *gh, const unsigned int nentries, const bool user_defined) { - while (ghash_test_expand_buckets(nentries_reserve, gh->nbuckets)) { - gh->nbuckets = hashsizes[++gh->cursize]; + unsigned int new_nbuckets; + + if (LIKELY(gh->buckets && (nentries < gh->limit_grow))) { + return; } + + new_nbuckets = gh->nbuckets; + +#ifdef GHASH_USE_MODULO_BUCKETS + while ((nentries > gh->limit_grow) && + (gh->cursize < GHASH_MAX_SIZE - 1)) + { + new_nbuckets = hashsizes[++gh->cursize]; + gh->limit_grow = GHASH_LIMIT_GROW(new_nbuckets); + } +#else + while ((nentries > gh->limit_grow) && + (gh->bucket_bit < GHASH_BUCKET_BIT_MAX)) + { + new_nbuckets = 1u << ++gh->bucket_bit; + gh->limit_grow = GHASH_LIMIT_GROW(new_nbuckets); + } +#endif + + if (user_defined) { +#ifdef GHASH_USE_MODULO_BUCKETS + gh->size_min = gh->cursize; +#else + gh->bucket_bit_min = gh->bucket_bit; +#endif + } + + if ((new_nbuckets == gh->nbuckets) && gh->buckets) { + return; + } + + gh->limit_grow = GHASH_LIMIT_GROW(new_nbuckets); + gh->limit_shrink = GHASH_LIMIT_SHRINK(new_nbuckets); + ghash_buckets_resize(gh, new_nbuckets); +} + +static void ghash_buckets_contract( + GHash *gh, const unsigned int nentries, const bool user_defined, const bool force_shrink) +{ + unsigned int new_nbuckets; + + if (!(force_shrink || (gh->flag & GHASH_FLAG_ALLOW_SHRINK))) { + return; + } + + if (LIKELY(gh->buckets && (nentries > gh->limit_shrink))) { + return; + } + + new_nbuckets = gh->nbuckets; + +#ifdef GHASH_USE_MODULO_BUCKETS + while ((nentries < gh->limit_shrink) && + (gh->cursize > gh->size_min)) + { + new_nbuckets = hashsizes[--gh->cursize]; + gh->limit_shrink = GHASH_LIMIT_SHRINK(new_nbuckets); + } +#else + while ((nentries < gh->limit_shrink) && + (gh->bucket_bit > gh->bucket_bit_min)) + { + new_nbuckets = 1u << --gh->bucket_bit; + gh->limit_shrink = GHASH_LIMIT_SHRINK(new_nbuckets); + } +#endif + + if (user_defined) { +#ifdef GHASH_USE_MODULO_BUCKETS + gh->size_min = gh->cursize; +#else + gh->bucket_bit_min = gh->bucket_bit; +#endif + } + + if ((new_nbuckets == gh->nbuckets) && gh->buckets) { + return; + } + + gh->limit_grow = GHASH_LIMIT_GROW(new_nbuckets); + gh->limit_shrink = GHASH_LIMIT_SHRINK(new_nbuckets); + ghash_buckets_resize(gh, new_nbuckets); +} + +/** + * Clear and reset \a gh buckets, reserve again buckets for given number of entries. + */ +BLI_INLINE void ghash_buckets_reset(GHash *gh, const unsigned int nentries) +{ + MEM_SAFE_FREE(gh->buckets); + +#ifdef GHASH_USE_MODULO_BUCKETS + gh->cursize = 0; + gh->size_min = 0; + gh->nbuckets = hashsizes[gh->cursize]; +#else + gh->bucket_bit = GHASH_BUCKET_BIT_MIN; + gh->bucket_bit_min = GHASH_BUCKET_BIT_MIN; + gh->nbuckets = 1u << gh->bucket_bit; + gh->bucket_mask = gh->nbuckets - 1; +#endif + + gh->limit_grow = GHASH_LIMIT_GROW(gh->nbuckets); + gh->limit_shrink = GHASH_LIMIT_SHRINK(gh->nbuckets); + + gh->nentries = 0; + + ghash_buckets_expand(gh, nentries, (nentries != 0)); } /** * Internal lookup function. - * Takes a hash argument to avoid calling #ghash_keyhash multiple times. + * Takes hash and bucket_index arguments to avoid calling #ghash_keyhash and #ghash_bucket_index multiple times. */ -BLI_INLINE Entry *ghash_lookup_entry_ex(GHash *gh, const void *key, - const unsigned int hash) +BLI_INLINE Entry *ghash_lookup_entry_ex( + GHash *gh, const void *key, const unsigned int bucket_index) { Entry *e; + /* If we do not store GHash, not worth computing it for each entry here! + * Typically, comparison function will be quicker, and since it's needed in the end anyway... */ + for (e = gh->buckets[bucket_index]; e; e = e->next) { + if (UNLIKELY(gh->cmpfp(key, e->key) == false)) { + return e; + } + } - for (e = gh->buckets[hash]; e; e = e->next) { + return NULL; +} + +/** + * Internal lookup function, returns previous entry of target one too. + * Takes hash and bucket_index arguments to avoid calling #ghash_keyhash and #ghash_bucket_index multiple times. + * Useful when modifying buckets somehow (like removing an entry...). + */ +BLI_INLINE Entry *ghash_lookup_entry_prev_ex( + GHash *gh, const void *key, Entry **r_e_prev, const unsigned int bucket_index) +{ + Entry *e, *e_prev = NULL; + + /* If we do not store GHash, not worth computing it for each entry here! + * Typically, comparison function will be quicker, and since it's needed in the end anyway... */ + for (e = gh->buckets[bucket_index]; e; e_prev = e, e = e->next) { if (UNLIKELY(gh->cmpfp(key, e->key) == false)) { + *r_e_prev = e_prev; return e; } } + + *r_e_prev = NULL; return NULL; } @@ -168,105 +403,157 @@ BLI_INLINE Entry *ghash_lookup_entry_ex(GHash *gh, const void *key, BLI_INLINE Entry *ghash_lookup_entry(GHash *gh, const void *key) { const unsigned int hash = ghash_keyhash(gh, key); - return ghash_lookup_entry_ex(gh, key, hash); + const unsigned int bucket_index = ghash_bucket_index(gh, hash); + return ghash_lookup_entry_ex(gh, key, bucket_index); } static GHash *ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info, - const unsigned int nentries_reserve, - const unsigned int entry_size) + const unsigned int nentries_reserve, const unsigned int flag) { GHash *gh = MEM_mallocN(sizeof(*gh), info); gh->hashfp = hashfp; gh->cmpfp = cmpfp; - gh->nbuckets = hashsizes[0]; /* gh->cursize */ - gh->nentries = 0; - gh->cursize = 0; - gh->flag = 0; + gh->buckets = NULL; + gh->flag = flag; - /* if we have reserved the number of elements that this hash will contain */ - if (nentries_reserve) { - ghash_buckets_reserve(gh, nentries_reserve); - } - - gh->buckets = MEM_callocN(gh->nbuckets * sizeof(*gh->buckets), "buckets"); - gh->entrypool = BLI_mempool_create(entry_size, 64, 64, BLI_MEMPOOL_NOP); + ghash_buckets_reset(gh, nentries_reserve); + gh->entrypool = BLI_mempool_create(GHASH_ENTRY_SIZE(flag & GHASH_FLAG_IS_GSET), 64, 64, BLI_MEMPOOL_NOP); return gh; } /** * Internal insert function. - * Takes a hash argument to avoid calling #ghash_keyhash multiple times. + * Takes hash and bucket_index arguments to avoid calling #ghash_keyhash and #ghash_bucket_index multiple times. */ -BLI_INLINE void ghash_insert_ex(GHash *gh, void *key, void *val, - unsigned int hash) +BLI_INLINE void ghash_insert_ex( + GHash *gh, void *key, void *val, const unsigned int bucket_index) { - Entry *e = (Entry *)BLI_mempool_alloc(gh->entrypool); + GHashEntry *e = BLI_mempool_alloc(gh->entrypool); + BLI_assert((gh->flag & GHASH_FLAG_ALLOW_DUPES) || (BLI_ghash_haskey(gh, key) == 0)); - IS_GHASH_ASSERT(gh); + BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET)); - e->next = gh->buckets[hash]; - e->key = key; + e->e.next = gh->buckets[bucket_index]; + e->e.key = key; e->val = val; - gh->buckets[hash] = e; + gh->buckets[bucket_index] = (Entry *)e; - if (UNLIKELY(ghash_test_expand_buckets(++gh->nentries, gh->nbuckets))) { - ghash_resize_buckets(gh, hashsizes[++gh->cursize]); - } + ghash_buckets_expand(gh, ++gh->nentries, false); +} + +/** + * Insert function that takes a pre-allocated entry. + */ +BLI_INLINE void ghash_insert_ex_keyonly_entry( + GHash *gh, void *key, const unsigned int bucket_index, + Entry *e) +{ + BLI_assert((gh->flag & GHASH_FLAG_ALLOW_DUPES) || (BLI_ghash_haskey(gh, key) == 0)); + + e->next = gh->buckets[bucket_index]; + e->key = key; + gh->buckets[bucket_index] = e; + + ghash_buckets_expand(gh, ++gh->nentries, false); } /** * Insert function that doesn't set the value (use for GSet) */ -BLI_INLINE void ghash_insert_ex_keyonly(GHash *gh, void *key, - unsigned int hash) +BLI_INLINE void ghash_insert_ex_keyonly( + GHash *gh, void *key, const unsigned int bucket_index) { - Entry *e = (Entry *)BLI_mempool_alloc(gh->entrypool); + Entry *e = BLI_mempool_alloc(gh->entrypool); + BLI_assert((gh->flag & GHASH_FLAG_ALLOW_DUPES) || (BLI_ghash_haskey(gh, key) == 0)); - e->next = gh->buckets[hash]; + BLI_assert((gh->flag & GHASH_FLAG_IS_GSET) != 0); + + e->next = gh->buckets[bucket_index]; e->key = key; - /* intentionally leave value unset */ - gh->buckets[hash] = e; + gh->buckets[bucket_index] = e; - if (UNLIKELY(ghash_test_expand_buckets(++gh->nentries, gh->nbuckets))) { - ghash_resize_buckets(gh, hashsizes[++gh->cursize]); - } + ghash_buckets_expand(gh, ++gh->nentries, false); } BLI_INLINE void ghash_insert(GHash *gh, void *key, void *val) { const unsigned int hash = ghash_keyhash(gh, key); - ghash_insert_ex(gh, key, val, hash); + const unsigned int bucket_index = ghash_bucket_index(gh, hash); + + ghash_insert_ex(gh, key, val, bucket_index); +} + +BLI_INLINE bool ghash_insert_safe( + GHash *gh, void *key, void *val, const bool override, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp) +{ + const unsigned int hash = ghash_keyhash(gh, key); + const unsigned int bucket_index = ghash_bucket_index(gh, hash); + GHashEntry *e = (GHashEntry *)ghash_lookup_entry_ex(gh, key, bucket_index); + + BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET)); + + if (e) { + if (override) { + if (keyfreefp) keyfreefp(e->e.key); + if (valfreefp) valfreefp(e->val); + e->e.key = key; + e->val = val; + } + return false; + } + else { + ghash_insert_ex(gh, key, val, bucket_index); + return true; + } +} + +BLI_INLINE bool ghash_insert_safe_keyonly(GHash *gh, void *key, const bool override, GHashKeyFreeFP keyfreefp) +{ + const unsigned int hash = ghash_keyhash(gh, key); + const unsigned int bucket_index = ghash_bucket_index(gh, hash); + Entry *e = ghash_lookup_entry_ex(gh, key, bucket_index); + + BLI_assert((gh->flag & GHASH_FLAG_IS_GSET) != 0); + + if (e) { + if (override) { + if (keyfreefp) keyfreefp(e->key); + e->key = key; + } + return false; + } + else { + ghash_insert_ex_keyonly(gh, key, bucket_index); + return true; + } } /** * Remove the entry and return it, caller must free from gh->entrypool. */ -static Entry *ghash_remove_ex(GHash *gh, void *key, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp, - unsigned int hash) +static Entry *ghash_remove_ex( + GHash *gh, const void *key, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp, + const unsigned int bucket_index) { - Entry *e; - Entry *e_prev = NULL; + Entry *e_prev; + Entry *e = ghash_lookup_entry_prev_ex(gh, key, &e_prev, bucket_index); - for (e = gh->buckets[hash]; e; e = e->next) { - if (UNLIKELY(gh->cmpfp(key, e->key) == false)) { - Entry *e_next = e->next; + BLI_assert(!valfreefp || !(gh->flag & GHASH_FLAG_IS_GSET)); - if (keyfreefp) keyfreefp(e->key); - if (valfreefp) valfreefp(e->val); + if (e) { + if (keyfreefp) keyfreefp(e->key); + if (valfreefp) valfreefp(((GHashEntry *)e)->val); - if (e_prev) e_prev->next = e_next; - else gh->buckets[hash] = e_next; + if (e_prev) e_prev->next = e->next; + else gh->buckets[bucket_index] = e->next; - gh->nentries--; - return e; - } - e_prev = e; + ghash_buckets_contract(gh, --gh->nentries, false, false); } - return NULL; + return e; } /** @@ -276,21 +563,57 @@ static void ghash_free_cb(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP va { unsigned int i; - BLI_assert(keyfreefp || valfreefp); + BLI_assert(keyfreefp || valfreefp); + BLI_assert(!valfreefp || !(gh->flag & GHASH_FLAG_IS_GSET)); for (i = 0; i < gh->nbuckets; i++) { Entry *e; - for (e = gh->buckets[i]; e; ) { - Entry *e_next = e->next; - + for (e = gh->buckets[i]; e; e = e->next) { if (keyfreefp) keyfreefp(e->key); - if (valfreefp) valfreefp(e->val); + if (valfreefp) valfreefp(((GHashEntry *)e)->val); + } + } +} + +/** + * Copy the GHash. + */ +static GHash *ghash_copy(GHash *gh, GHashKeyCopyFP keycopyfp, GHashValCopyFP valcopyfp) +{ + GHash *gh_new; + unsigned int i; + /* This allows us to be sure to get the same number of buckets in gh_new as in ghash. */ + const unsigned int reserve_nentries_new = MAX2(GHASH_LIMIT_GROW(gh->nbuckets) - 1, gh->nentries); + + BLI_assert(!valcopyfp || !(gh->flag & GHASH_FLAG_IS_GSET)); + + gh_new = ghash_new(gh->hashfp, gh->cmpfp, __func__, 0, gh->flag); + ghash_buckets_expand(gh_new, reserve_nentries_new, false); + + BLI_assert(gh_new->nbuckets == gh->nbuckets); + + for (i = 0; i < gh->nbuckets; i++) { + Entry *e; + + for (e = gh->buckets[i]; e; e = e->next) { + Entry *e_new = BLI_mempool_alloc(gh_new->entrypool); + ghash_entry_copy(gh_new, e_new, gh, e, keycopyfp, valcopyfp); + + /* Warning! + * This means entries in buckets in new copy will be in reversed order! + * This shall not be an issue though, since order should never be assumed in ghash. */ - e = e_next; + /* Note: We can use 'i' here, since we are sure that 'gh' and 'gh_new' have the same number of buckets! */ + e_new->next = gh_new->buckets[i]; + gh_new->buckets[i] = e_new; } } + gh_new->nentries = gh->nentries; + + return gh_new; } + /** \} */ @@ -310,9 +633,7 @@ static void ghash_free_cb(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP va GHash *BLI_ghash_new_ex(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info, const unsigned int nentries_reserve) { - return ghash_new(hashfp, cmpfp, info, - nentries_reserve, - (unsigned int)sizeof(Entry)); + return ghash_new(hashfp, cmpfp, info, nentries_reserve, 0); } /** @@ -324,11 +645,28 @@ GHash *BLI_ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info) } /** + * Copy given GHash. Keys and values are also copied if relevant callback is provided, else pointers remain the same. + */ +GHash *BLI_ghash_copy(GHash *gh, GHashKeyCopyFP keycopyfp, GHashValCopyFP valcopyfp) +{ + return ghash_copy(gh, keycopyfp, valcopyfp); +} + +/** + * Reserve given amount of entries (resize \a gh accordingly if needed). + */ +void BLI_ghash_reserve(GHash *gh, const unsigned int nentries_reserve) +{ + ghash_buckets_expand(gh, nentries_reserve, true); + ghash_buckets_contract(gh, nentries_reserve, true, false); +} + +/** * \return size of the GHash. */ -int BLI_ghash_size(GHash *gh) +unsigned int BLI_ghash_size(GHash *gh) { - return (int)gh->nentries; + return gh->nentries; } /** @@ -352,19 +690,7 @@ void BLI_ghash_insert(GHash *gh, void *key, void *val) */ bool BLI_ghash_reinsert(GHash *gh, void *key, void *val, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp) { - const unsigned int hash = ghash_keyhash(gh, key); - Entry *e = ghash_lookup_entry_ex(gh, key, hash); - if (e) { - if (keyfreefp) keyfreefp(e->key); - if (valfreefp) valfreefp(e->val); - e->key = key; - e->val = val; - return false; - } - else { - ghash_insert_ex(gh, key, val, hash); - return true; - } + return ghash_insert_safe(gh, key, val, true, keyfreefp, valfreefp); } /** @@ -378,8 +704,8 @@ bool BLI_ghash_reinsert(GHash *gh, void *key, void *val, GHashKeyFreeFP keyfreef */ void *BLI_ghash_lookup(GHash *gh, const void *key) { - Entry *e = ghash_lookup_entry(gh, key); - IS_GHASH_ASSERT(gh); + GHashEntry *e = (GHashEntry *)ghash_lookup_entry(gh, key); + BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET)); return e ? e->val : NULL; } @@ -388,8 +714,8 @@ void *BLI_ghash_lookup(GHash *gh, const void *key) */ void *BLI_ghash_lookup_default(GHash *gh, const void *key, void *val_default) { - Entry *e = ghash_lookup_entry(gh, key); - IS_GHASH_ASSERT(gh); + GHashEntry *e = (GHashEntry *)ghash_lookup_entry(gh, key); + BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET)); return e ? e->val : val_default; } @@ -405,12 +731,64 @@ void *BLI_ghash_lookup_default(GHash *gh, const void *key, void *val_default) */ void **BLI_ghash_lookup_p(GHash *gh, const void *key) { - Entry *e = ghash_lookup_entry(gh, key); - IS_GHASH_ASSERT(gh); + GHashEntry *e = (GHashEntry *)ghash_lookup_entry(gh, key); + BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET)); return e ? &e->val : NULL; } /** + * Ensure \a key is exists in \a gh. + * + * This handles the common situation where the caller needs ensure a key is added to \a gh, + * constructing a new value in the case the key isn't found. + * Otherwise use the existing value. + * + * Such situations typically incur multiple lookups, however this function + * avoids them by ensuring the key is added, + * returning a pointer to the value so it can be used or initialized by the caller. + * + * \returns true when the value didn't need to be added. + * (when false, the caller _must_ initialize the value). + */ +bool BLI_ghash_ensure_p(GHash *gh, void *key, void ***r_val) +{ + const unsigned int hash = ghash_keyhash(gh, key); + const unsigned int bucket_index = ghash_bucket_index(gh, hash); + GHashEntry *e = (GHashEntry *)ghash_lookup_entry_ex(gh, key, bucket_index); + const bool haskey = (e != NULL); + + if (!haskey) { + e = BLI_mempool_alloc(gh->entrypool); + ghash_insert_ex_keyonly_entry(gh, key, bucket_index, (Entry *)e); + } + + *r_val = &e->val; + return haskey; +} + +/** + * A version of #BLI_ghash_ensure_p copies the key on insertion. + */ +bool BLI_ghash_ensure_p_ex( + GHash *gh, const void *key, void ***r_val, + GHashKeyCopyFP keycopyfp) +{ + const unsigned int hash = ghash_keyhash(gh, key); + const unsigned int bucket_index = ghash_bucket_index(gh, hash); + GHashEntry *e = (GHashEntry *)ghash_lookup_entry_ex(gh, key, bucket_index); + const bool haskey = (e != NULL); + + if (!haskey) { + /* keycopyfp(key) is the only difference to BLI_ghash_ensure_p */ + e = BLI_mempool_alloc(gh->entrypool); + ghash_insert_ex_keyonly_entry(gh, keycopyfp(key), bucket_index, (Entry *)e); + } + + *r_val = &e->val; + return haskey; +} + +/** * Remove \a key from \a gh, or return false if the key wasn't found. * * \param key The key to remove. @@ -418,10 +796,11 @@ void **BLI_ghash_lookup_p(GHash *gh, const void *key) * \param valfreefp Optional callback to free the value. * \return true if \a key was removed from \a gh. */ -bool BLI_ghash_remove(GHash *gh, void *key, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp) +bool BLI_ghash_remove(GHash *gh, const void *key, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp) { const unsigned int hash = ghash_keyhash(gh, key); - Entry *e = ghash_remove_ex(gh, key, keyfreefp, valfreefp, hash); + const unsigned int bucket_index = ghash_bucket_index(gh, hash); + Entry *e = ghash_remove_ex(gh, key, keyfreefp, valfreefp, bucket_index); if (e) { BLI_mempool_free(gh->entrypool, e); return true; @@ -440,11 +819,12 @@ bool BLI_ghash_remove(GHash *gh, void *key, GHashKeyFreeFP keyfreefp, GHashValFr * \param keyfreefp Optional callback to free the key. * \return the value of \a key int \a gh or NULL. */ -void *BLI_ghash_popkey(GHash *gh, void *key, GHashKeyFreeFP keyfreefp) +void *BLI_ghash_popkey(GHash *gh, const void *key, GHashKeyFreeFP keyfreefp) { const unsigned int hash = ghash_keyhash(gh, key); - Entry *e = ghash_remove_ex(gh, key, keyfreefp, NULL, hash); - IS_GHASH_ASSERT(gh); + const unsigned int bucket_index = ghash_bucket_index(gh, hash); + GHashEntry *e = (GHashEntry *)ghash_remove_ex(gh, key, keyfreefp, NULL, bucket_index); + BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET)); if (e) { void *val = e->val; BLI_mempool_free(gh->entrypool, e); @@ -476,17 +856,7 @@ void BLI_ghash_clear_ex(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP valf if (keyfreefp || valfreefp) ghash_free_cb(gh, keyfreefp, valfreefp); - gh->nbuckets = hashsizes[0]; /* gh->cursize */ - gh->nentries = 0; - gh->cursize = 0; - - if (nentries_reserve) { - ghash_buckets_reserve(gh, nentries_reserve); - } - - MEM_freeN(gh->buckets); - gh->buckets = MEM_callocN(gh->nbuckets * sizeof(*gh->buckets), "buckets"); - + ghash_buckets_reset(gh, nentries_reserve); BLI_mempool_clear_ex(gh->entrypool, nentries_reserve ? (int)nentries_reserve : -1); } @@ -700,6 +1070,10 @@ unsigned int BLI_ghashutil_uinthash_v4(const unsigned int key[4]) hash += key[3]; return hash; } +unsigned int BLI_ghashutil_uinthash_v4_murmur(const unsigned int key[4]) +{ + return BLI_hash_mm2((const unsigned char *)key, sizeof(int) * 4 /* sizeof(key) */, 0); +} bool BLI_ghashutil_uinthash_v4_cmp(const void *a, const void *b) { @@ -732,6 +1106,18 @@ unsigned int BLI_ghashutil_inthash_p(const void *ptr) return (unsigned int)(key & 0xffffffff); } +unsigned int BLI_ghashutil_inthash_p_murmur(const void *ptr) +{ + uintptr_t key = (uintptr_t)ptr; + + return BLI_hash_mm2((const unsigned char *)&key, sizeof(key), 0); +} + +unsigned int BLI_ghashutil_inthash_p_simple(const void *ptr) +{ + return GET_UINT_FROM_POINTER(ptr); +} + bool BLI_ghashutil_intcmp(const void *a, const void *b) { return (a != b); @@ -768,9 +1154,15 @@ unsigned int BLI_ghashutil_strhash_p(const void *ptr) return h; } +unsigned int BLI_ghashutil_strhash_p_murmur(const void *ptr) +{ + const unsigned char *key = ptr; + + return BLI_hash_mm2(key, strlen((const char *)key) + 1, 0); +} bool BLI_ghashutil_strcmp(const void *a, const void *b) { - return (strcmp(a, b) != 0); + return (a == b) ? false : !STREQ(a, b); } GHashPair *BLI_ghashutil_pairalloc(const void *first, const void *second) @@ -808,44 +1200,36 @@ void BLI_ghashutil_pairfree(void *ptr) /** \name Convenience GHash Creation Functions * \{ */ -GHash *BLI_ghash_ptr_new_ex(const char *info, - const unsigned int nentries_reserve) +GHash *BLI_ghash_ptr_new_ex(const char *info, const unsigned int nentries_reserve) { - return BLI_ghash_new_ex(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, info, - nentries_reserve); + return BLI_ghash_new_ex(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, info, nentries_reserve); } GHash *BLI_ghash_ptr_new(const char *info) { return BLI_ghash_ptr_new_ex(info, 0); } -GHash *BLI_ghash_str_new_ex(const char *info, - const unsigned int nentries_reserve) +GHash *BLI_ghash_str_new_ex(const char *info, const unsigned int nentries_reserve) { - return BLI_ghash_new_ex(BLI_ghashutil_strhash_p, BLI_ghashutil_strcmp, info, - nentries_reserve); + return BLI_ghash_new_ex(BLI_ghashutil_strhash_p, BLI_ghashutil_strcmp, info, nentries_reserve); } GHash *BLI_ghash_str_new(const char *info) { return BLI_ghash_str_new_ex(info, 0); } -GHash *BLI_ghash_int_new_ex(const char *info, - const unsigned int nentries_reserve) +GHash *BLI_ghash_int_new_ex(const char *info, const unsigned int nentries_reserve) { - return BLI_ghash_new_ex(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, info, - nentries_reserve); + return BLI_ghash_new_ex(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, info, nentries_reserve); } GHash *BLI_ghash_int_new(const char *info) { return BLI_ghash_int_new_ex(info, 0); } -GHash *BLI_ghash_pair_new_ex(const char *info, - const unsigned int nentries_reserve) +GHash *BLI_ghash_pair_new_ex(const char *info, const unsigned int nentries_reserve) { - return BLI_ghash_new_ex(BLI_ghashutil_pairhash, BLI_ghashutil_paircmp, info, - nentries_reserve); + return BLI_ghash_new_ex(BLI_ghashutil_pairhash, BLI_ghashutil_paircmp, info, nentries_reserve); } GHash *BLI_ghash_pair_new(const char *info) { @@ -860,21 +1244,12 @@ GHash *BLI_ghash_pair_new(const char *info) /* Use ghash API to give 'set' functionality */ -/* TODO: typical set functions - * isdisjoint/issubset/issuperset/union/intersection/difference etc */ - /** \name GSet Functions * \{ */ GSet *BLI_gset_new_ex(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info, const unsigned int nentries_reserve) { - GSet *gs = (GSet *)ghash_new(hashfp, cmpfp, info, - nentries_reserve, - sizeof(Entry) - sizeof(void *)); -#ifndef NDEBUG - ((GHash *)gs)->flag |= GHASH_FLAG_IS_SET; -#endif - return gs; + return (GSet *)ghash_new(hashfp, cmpfp, info, nentries_reserve, GHASH_FLAG_IS_GSET); } GSet *BLI_gset_new(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info) @@ -882,9 +1257,17 @@ GSet *BLI_gset_new(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info) return BLI_gset_new_ex(hashfp, cmpfp, info, 0); } -int BLI_gset_size(GSet *gs) +/** + * Copy given GSet. Keys are also copied if callback is provided, else pointers remain the same. + */ +GSet *BLI_gset_copy(GSet *gs, GHashKeyCopyFP keycopyfp) +{ + return (GSet *)ghash_copy((GHash *)gs, keycopyfp, NULL); +} + +unsigned int BLI_gset_size(GSet *gs) { - return (int)((GHash *)gs)->nentries; + return ((GHash *)gs)->nentries; } /** @@ -894,7 +1277,8 @@ int BLI_gset_size(GSet *gs) void BLI_gset_insert(GSet *gs, void *key) { const unsigned int hash = ghash_keyhash((GHash *)gs, key); - ghash_insert_ex_keyonly((GHash *)gs, key, hash); + const unsigned int bucket_index = ghash_bucket_index((GHash *)gs, hash); + ghash_insert_ex_keyonly((GHash *)gs, key, bucket_index); } /** @@ -905,15 +1289,7 @@ void BLI_gset_insert(GSet *gs, void *key) */ bool BLI_gset_add(GSet *gs, void *key) { - const unsigned int hash = ghash_keyhash((GHash *)gs, key); - Entry *e = ghash_lookup_entry_ex((GHash *)gs, key, hash); - if (e) { - return false; - } - else { - ghash_insert_ex_keyonly((GHash *)gs, key, hash); - return true; - } + return ghash_insert_safe_keyonly((GHash *)gs, key, false, NULL); } /** @@ -924,20 +1300,10 @@ bool BLI_gset_add(GSet *gs, void *key) */ bool BLI_gset_reinsert(GSet *gs, void *key, GSetKeyFreeFP keyfreefp) { - const unsigned int hash = ghash_keyhash((GHash *)gs, key); - Entry *e = ghash_lookup_entry_ex((GHash *)gs, key, hash); - if (e) { - if (keyfreefp) keyfreefp(e->key); - e->key = key; - return false; - } - else { - ghash_insert_ex_keyonly((GHash *)gs, key, hash); - return true; - } + return ghash_insert_safe_keyonly((GHash *)gs, key, true, keyfreefp); } -bool BLI_gset_remove(GSet *gs, void *key, GSetKeyFreeFP keyfreefp) +bool BLI_gset_remove(GSet *gs, const void *key, GSetKeyFreeFP keyfreefp) { return BLI_ghash_remove((GHash *)gs, key, keyfreefp, NULL); } @@ -981,22 +1347,27 @@ void BLI_gset_flag_clear(GSet *gs, unsigned int flag) /** \name Convenience GSet Creation Functions * \{ */ -GSet *BLI_gset_ptr_new_ex(const char *info, - const unsigned int nentries_reserve) +GSet *BLI_gset_ptr_new_ex(const char *info, const unsigned int nentries_reserve) { - return BLI_gset_new_ex(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, info, - nentries_reserve); + return BLI_gset_new_ex(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, info, nentries_reserve); } GSet *BLI_gset_ptr_new(const char *info) { return BLI_gset_ptr_new_ex(info, 0); } -GSet *BLI_gset_pair_new_ex(const char *info, - const unsigned int nentries_reserve) +GSet *BLI_gset_str_new_ex(const char *info, const unsigned int nentries_reserve) +{ + return BLI_gset_new_ex(BLI_ghashutil_strhash_p, BLI_ghashutil_strcmp, info, nentries_reserve); +} +GSet *BLI_gset_str_new(const char *info) +{ + return BLI_gset_str_new_ex(info, 0); +} + +GSet *BLI_gset_pair_new_ex(const char *info, const unsigned int nentries_reserve) { - return BLI_gset_new_ex(BLI_ghashutil_pairhash, BLI_ghashutil_paircmp, info, - nentries_reserve); + return BLI_gset_new_ex(BLI_ghashutil_pairhash, BLI_ghashutil_paircmp, info, nentries_reserve); } GSet *BLI_gset_pair_new(const char *info) { @@ -1008,37 +1379,126 @@ GSet *BLI_gset_pair_new(const char *info) /** \name Debugging & Introspection * \{ */ -#ifdef DEBUG + +#include "BLI_math.h" + +/** + * \return number of buckets in the GHash. + */ +int BLI_ghash_buckets_size(GHash *gh) +{ + return (int)gh->nbuckets; +} +int BLI_gset_buckets_size(GSet *gs) +{ + return BLI_ghash_buckets_size((GHash *)gs); +} /** - * Measure how well the hash function performs - * (1.0 is approx as good as random distribution). + * Measure how well the hash function performs (1.0 is approx as good as random distribution), + * and return a few other stats like load, variance of the distribution of the entries in the buckets, etc. * * Smaller is better! */ -double BLI_ghash_calc_quality(GHash *gh) +double BLI_ghash_calc_quality_ex( + GHash *gh, double *r_load, double *r_variance, + double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket) { - uint64_t sum = 0; + double mean; unsigned int i; - if (gh->nentries == 0) - return -1.0; + if (gh->nentries == 0) { + if (r_load) { + *r_load = 0.0; + } + if (r_variance) { + *r_variance = 0.0; + } + if (r_prop_empty_buckets) { + *r_prop_empty_buckets = 1.0; + } + if (r_prop_overloaded_buckets) { + *r_prop_overloaded_buckets = 0.0; + } + if (r_biggest_bucket) { + *r_biggest_bucket = 0; + } - for (i = 0; i < gh->nbuckets; i++) { - uint64_t count = 0; - Entry *e; - for (e = gh->buckets[i]; e; e = e->next) { - count += 1; + return 0.0; + } + + mean = (double)gh->nentries / (double)gh->nbuckets; + if (r_load) { + *r_load = mean; + } + if (r_biggest_bucket) { + *r_biggest_bucket = 0; + } + + if (r_variance) { + /* We already know our mean (i.e. load factor), easy to compute variance. + * See http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm + */ + double sum = 0.0; + for (i = 0; i < gh->nbuckets; i++) { + int count = 0; + Entry *e; + for (e = gh->buckets[i]; e; e = e->next) { + count++; + } + sum += ((double)count - mean) * ((double)count - mean); + } + *r_variance = sum / (double)(gh->nbuckets - 1); + } + + { + uint64_t sum = 0; + uint64_t overloaded_buckets_threshold = (uint64_t)max_ii(GHASH_LIMIT_GROW(1), 1); + uint64_t sum_overloaded = 0; + uint64_t sum_empty = 0; + + for (i = 0; i < gh->nbuckets; i++) { + uint64_t count = 0; + Entry *e; + for (e = gh->buckets[i]; e; e = e->next) { + count++; + } + if (r_biggest_bucket) { + *r_biggest_bucket = max_ii(*r_biggest_bucket, (int)count); + } + if (r_prop_overloaded_buckets && (count > overloaded_buckets_threshold)) { + sum_overloaded++; + } + if (r_prop_empty_buckets && !count) { + sum_empty++; + } + sum += count * (count + 1); } - sum += count * (count + 1); + if (r_prop_overloaded_buckets) { + *r_prop_overloaded_buckets = (double)sum_overloaded / (double)gh->nbuckets; + } + if (r_prop_empty_buckets) { + *r_prop_empty_buckets = (double)sum_empty / (double)gh->nbuckets; + } + return ((double)sum * (double)gh->nbuckets / + ((double)gh->nentries * (gh->nentries + 2 * gh->nbuckets - 1))); } - return ((double)sum * (double)gh->nbuckets / - ((double)gh->nentries * (gh->nentries + 2 * gh->nbuckets - 1))); +} +double BLI_gset_calc_quality_ex( + GSet *gs, double *r_load, double *r_variance, + double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket) +{ + return BLI_ghash_calc_quality_ex((GHash *)gs, r_load, r_variance, + r_prop_empty_buckets, r_prop_overloaded_buckets, r_biggest_bucket); +} + +double BLI_ghash_calc_quality(GHash *gh) +{ + return BLI_ghash_calc_quality_ex(gh, NULL, NULL, NULL, NULL, NULL); } double BLI_gset_calc_quality(GSet *gs) { - return BLI_ghash_calc_quality((GHash *)gs); + return BLI_ghash_calc_quality_ex((GHash *)gs, NULL, NULL, NULL, NULL, NULL); } -#endif /** \} */ |