GHash - code reorganization, performance enhancements, add a few missing utils to API.

This patch is the root of the GHash rework, all other diff will be based on it: Reduce average load from 3.0 to 0.75 ---------------------------------- This is the big performance booster part, e.g. makes tracing a dyntopo stroke between 25% and 30% faster. Not much to say about it, aside that it obviously increase memory footprint (about 25% - 30% too). Add optional shrinking ---------------------------------- I.e. ghashes/gsets can now shrink their buckets array when you remove enough entries. This remains optional and OFF by default. Add code to use masking instead of modulo ---------------------------------- Buckets indices are obtained from hashes by “reducing” the hash value into the valid bucket range. This can be done either by bit-masking, or using modulo operation. The former is quicker, but requires real hashes, while the later is slower (average 10% impact on ghash operations) but can also be used as a 'fake' hashing on raw values, like e.g. indices. In Blender currently not all ghash usages actually hash their keys, so we stick to modulo for now (masking is ifdef’ed out), we may however investigate the benefits of switching to masking with systematic very basic hashing later… Add various missing API helpers ---------------------------------- I.e. a way to deep-copy a ghash/gset, and a way to (re-)reserve entries (i.e. manually grow or shrink the ghash after its creation). Various code refactoring ---------------------------------- * Get rid of the 'hack' regarding ghash size when used as gset (it’s simpler and safer to have two structs defined here, and cast pointers as needed). * Various re-shuffle and factorization in low-level internal code. * Some work on hashing helpers, introducing some murmur2a-based hashing too. Thanks a bunch to Campbell for the extensive review work. :) Reviewers: sergey, campbellbarton Subscribers: psy-fi, lukastoenne Projects: #bf_blender Maniphest Tasks: T43766 Differential Revision: https://developer.blender.org/D1178
author: Bastien Montagne <montagne29@wanadoo.fr> 2015-03-19 19:36:59 +0300
committer: Bastien Montagne <montagne29@wanadoo.fr> 2015-03-19 19:37:54 +0300
commit: cfdd27381c6730dc0d8d4bb51c132b29337b8af5 (patch)
tree: 27d5b8c646fc31dc9fd623f3e2995b1c3f391305
parent: 881e05fc54058bc04b217817808315b2f9691c2a (diff)
5 files changed, 667 insertions, 210 deletions
diff --git a/source/blender/blenlib/BLI_ghash.h b/source/blender/blenlib/BLI_ghash.h
index bf2b4126453..16d18ef1315 100644
--- a/source/blender/blenlib/BLI_ghash.h
+++ b/source/blender/blenlib/BLI_ghash.h
@@ -44,6 +44,8 @@ typedef unsigned int  (*GHashHashFP)     (const void *key);
 typedef bool          (*GHashCmpFP)      (const void *a, const void *b);
 typedef void          (*GHashKeyFreeFP)  (void *key);
 typedef void          (*GHashValFreeFP)  (void *val);
+typedef void         *(*GHashKeyCopyFP)  (void *key);
+typedef void         *(*GHashValCopyFP)  (void *val);
 
 typedef struct GHash GHash;
 
@@ -54,7 +56,13 @@ typedef struct GHashIterator {
 } GHashIterator;
 
 enum {
-	GHASH_FLAG_ALLOW_DUPES = (1 << 0),  /* only checked for in debug mode */
+	GHASH_FLAG_ALLOW_DUPES  = (1 << 0),  /* Only checked for in debug mode */
+	GHASH_FLAG_ALLOW_SHRINK = (1 << 1),  /* Allow to shrink buckets' size. */
+
+#ifdef GHASH_INTERNAL_API
+	/* Internal usage only */
+	GHASH_FLAG_IS_GSET      = (1 << 16),  /* Whether the GHash is actually used as GSet (no value storage). */
+#endif
 };
 
 /* *** */
@@ -62,7 +70,10 @@ enum {
 GHash *BLI_ghash_new_ex(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info,
                         const unsigned int nentries_reserve) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
 GHash *BLI_ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
+GHash *BLI_ghash_copy(GHash *gh, GHashKeyCopyFP keycopyfp,
+                      GHashValCopyFP valcopyfp) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
 void   BLI_ghash_free(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp);
+void   BLI_ghash_reserve(GHash *gh, const unsigned int nentries_reserve);
 void   BLI_ghash_insert(GHash *gh, void *key, void *val);
 bool   BLI_ghash_reinsert(GHash *gh, void *key, void *val, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp);
 void  *BLI_ghash_lookup(GHash *gh, const void *key) ATTR_WARN_UNUSED_RESULT;
@@ -127,23 +138,37 @@ unsigned int    BLI_ghashutil_strhash_n(const char *key, size_t n);
                 CHECK_TYPE_ANY(key, char *, const char *, const char * const), \
                 BLI_ghashutil_strhash_p(key))
 unsigned int    BLI_ghashutil_strhash_p(const void *key);
+unsigned int    BLI_ghashutil_strhash_p_murmur(const void *key);
 bool            BLI_ghashutil_strcmp(const void *a, const void *b);
 
 #define         BLI_ghashutil_inthash(key) ( \
                 CHECK_TYPE_ANY(&(key), int *, const int *), \
                 BLI_ghashutil_uinthash((unsigned int)key))
 unsigned int    BLI_ghashutil_uinthash(unsigned int key);
+unsigned int    BLI_ghashutil_inthash_p(const void *ptr);
+unsigned int    BLI_ghashutil_inthash_p_murmur(const void *ptr);
+bool            BLI_ghashutil_intcmp(const void *a, const void *b);
+
+
+unsigned int    BLI_ghashutil_uinthash_v4(const unsigned int key[4]);
 #define         BLI_ghashutil_inthash_v4(key) ( \
                 CHECK_TYPE_ANY(key, int *, const int *), \
                 BLI_ghashutil_uinthash_v4((const unsigned int *)key))
-unsigned int    BLI_ghashutil_uinthash_v4(const unsigned int key[4]);
 #define         BLI_ghashutil_inthash_v4_p \
    ((GSetHashFP)BLI_ghashutil_uinthash_v4)
+#define         BLI_ghashutil_uinthash_v4_p \
+   ((GSetHashFP)BLI_ghashutil_uinthash_v4)
+unsigned int    BLI_ghashutil_uinthash_v4_murmur(const unsigned int key[4]);
+#define         BLI_ghashutil_inthash_v4_murmur(key) ( \
+                CHECK_TYPE_ANY(key, int *, const int *), \
+                BLI_ghashutil_uinthash_v4_murmur((const unsigned int *)key))
+#define         BLI_ghashutil_inthash_v4_p_murmur \
+   ((GSetHashFP)BLI_ghashutil_uinthash_v4_murmur)
+#define         BLI_ghashutil_uinthash_v4_p_murmur \
+   ((GSetHashFP)BLI_ghashutil_uinthash_v4_murmur)
 bool            BLI_ghashutil_uinthash_v4_cmp(const void *a, const void *b);
 #define         BLI_ghashutil_inthash_v4_cmp \
                 BLI_ghashutil_uinthash_v4_cmp
-unsigned int    BLI_ghashutil_inthash_p(const void *ptr);
-bool            BLI_ghashutil_intcmp(const void *a, const void *b);
 
 /** \} */
 
@@ -178,6 +203,7 @@ typedef struct GSet GSet;
 typedef GHashHashFP GSetHashFP;
 typedef GHashCmpFP GSetCmpFP;
 typedef GHashKeyFreeFP GSetKeyFreeFP;
+typedef GHashKeyCopyFP GSetKeyCopyFP;
 
 /* so we can cast but compiler sees as different */
 typedef struct GSetIterator {
@@ -191,6 +217,7 @@ typedef struct GSetIterator {
 GSet  *BLI_gset_new_ex(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info,
                        const unsigned int nentries_reserve) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
 GSet  *BLI_gset_new(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
+GSet  *BLI_gset_copy(GSet *gs, GSetKeyCopyFP keycopyfp) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
 int    BLI_gset_size(GSet *gs) ATTR_WARN_UNUSED_RESULT;
 void   BLI_gset_flag_set(GSet *gs, unsigned int flag);
 void   BLI_gset_flag_clear(GSet *gs, unsigned int flag);
@@ -202,7 +229,7 @@ bool   BLI_gset_haskey(GSet *gs, const void *key) ATTR_WARN_UNUSED_RESULT;
 bool   BLI_gset_remove(GSet *gs, void *key, GSetKeyFreeFP keyfreefp);
 void   BLI_gset_clear_ex(GSet *gs, GSetKeyFreeFP keyfreefp,
                          const unsigned int nentries_reserve);
-void  BLI_gset_clear(GSet *gs, GSetKeyFreeFP keyfreefp);
+void   BLI_gset_clear(GSet *gs, GSetKeyFreeFP keyfreefp);
 
 GSet *BLI_gset_ptr_new_ex(const char *info,
                           const unsigned int nentries_reserve) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT;
@@ -229,10 +256,22 @@ BLI_INLINE bool BLI_gsetIterator_done(GSetIterator *gsi) { return BLI_ghashItera
 	     BLI_gsetIterator_done(&gs_iter_) == false;                           \
 	     BLI_gsetIterator_step(&gs_iter_), i_++)
 
-#ifdef DEBUG
+
+/* For testing, debugging only */
+#ifdef GHASH_INTERNAL_API
+int BLI_ghash_buckets_size(GHash *gh);
+int BLI_gset_buckets_size(GSet *gs);
+
+double BLI_ghash_calc_quality_ex(
+        GHash *gh, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket);
+double BLI_gset_calc_quality_ex(
+        GSet *gs, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket);
 double BLI_ghash_calc_quality(GHash *gh);
 double BLI_gset_calc_quality(GSet *gs);
-#endif
+#endif  /* GHASH_INTERNAL_API */
+
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/blenlib/BLI_hash_mm2a.h b/source/blender/blenlib/BLI_hash_mm2a.h
index 007dec4f4d6..6beaf50ae8f 100644
--- a/source/blender/blenlib/BLI_hash_mm2a.h
+++ b/source/blender/blenlib/BLI_hash_mm2a.h
@@ -42,4 +42,6 @@ void BLI_hash_mm2a_add_int(BLI_HashMurmur2A *mm2, int data);
 
 uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2);
 
+uint32_t BLI_hash_mm2(const unsigned char *data, size_t len, uint32_t seed);
+
 #endif  /* __BLI_HASH_MM2A_H__ */
diff --git a/source/blender/blenlib/intern/BLI_ghash.c b/source/blender/blenlib/intern/BLI_ghash.c
index 5360ea744a1..a2233c38270 100644
--- a/source/blender/blenlib/intern/BLI_ghash.c
+++ b/source/blender/blenlib/intern/BLI_ghash.c
@@ -36,17 +36,23 @@
 
 #include <string.h>
 #include <stdlib.h>
+#include <stdarg.h>
 #include <limits.h>
 
 #include "MEM_guardedalloc.h"
 
 #include "BLI_sys_types.h"  /* for intptr_t support */
 #include "BLI_utildefines.h"
+#include "BLI_hash_mm2a.h"
 #include "BLI_mempool.h"
+
+#define GHASH_INTERNAL_API
 #include "BLI_ghash.h"
 #include "BLI_strict_flags.h"
 
+#define GHASH_USE_MODULO_BUCKETS
 
+/* Also used by smallhash! */
 const unsigned int hashsizes[] = {
 	5, 11, 17, 37, 67, 131, 257, 521, 1031, 2053, 4099, 8209, 
 	16411, 32771, 65537, 131101, 262147, 524309, 1048583, 2097169, 
@@ -54,24 +60,43 @@ const unsigned int hashsizes[] = {
 	268435459
 };
 
-/* internal flag to ensure sets values aren't used */
-#ifndef NDEBUG
-#  define GHASH_FLAG_IS_SET (1 << 8)
-#  define IS_GHASH_ASSERT(gh) BLI_assert((gh->flag & GHASH_FLAG_IS_SET) == 0)
-// #  define IS_GSET_ASSERT(gs) BLI_assert((gs->flag & GHASH_FLAG_IS_SET) != 0)
+#ifdef GHASH_USE_MODULO_BUCKETS
+#  define GHASH_MAX_SIZE 27
 #else
-#  define IS_GHASH_ASSERT(gh)
-// #  define IS_GSET_ASSERT(eh)
+#  define GHASH_BUCKET_BIT_MIN 2
+#  define GHASH_BUCKET_BIT_MAX 28  /* About 268M of buckets... */
 #endif
 
+/**
+ * \note Max load #GHASH_LIMIT_GROW used to be 3. (pre 2.74).
+ * Python uses 0.6666, tommyhaslib even goes down to 0.5.
+ * Reducing our from 3 to 0.75 gives huge speedup (about twice quicker pure GHash insertions/lookup,
+ * about 25% - 30% quicker 'dynamic-topology' stroke drawing e.g.).
+ * Min load #GHASH_LIMIT_SHRINK is a quarter of max load, to avoid resizing to quickly.
+ */
+#define GHASH_LIMIT_GROW(_nbkt)   (((_nbkt) * 3) /  4)
+#define GHASH_LIMIT_SHRINK(_nbkt) (((_nbkt) * 3) / 16)
+
 /***/
 
+/* WARNING! Keep in sync with ugly _gh_Entry in header!!! */
 typedef struct Entry {
 	struct Entry *next;
 
-	void *key, *val;
+	void *key;
 } Entry;
 
+typedef struct GHashEntry {
+	Entry e;
+
+	void *val;
+} GHashEntry;
+
+typedef Entry GSetEntry;
+
+#define GHASH_ENTRY_SIZE(_is_gset) \
+	((_is_gset) ? sizeof(GSetEntry) : sizeof(GHashEntry))
+
 struct GHash {
 	GHashHashFP hashfp;
 	GHashCmpFP cmpfp;
@@ -79,11 +104,34 @@ struct GHash {
 	Entry **buckets;
 	struct BLI_mempool *entrypool;
 	unsigned int nbuckets;
+	unsigned int limit_grow, limit_shrink;
+#ifdef GHASH_USE_MODULO_BUCKETS
+	unsigned int cursize, size_min;
+#else
+	unsigned int bucket_mask, bucket_bit, bucket_bit_min;
+#endif
+
 	unsigned int nentries;
-	unsigned int cursize, flag;
+	unsigned int flag;
 };
 
 
+BLI_INLINE void ghash_entry_copy(
+        GHash *gh_dst, Entry *dst, GHash *gh_src, Entry *src,
+        GHashKeyCopyFP keycopyfp, GHashValCopyFP valcopyfp)
+{
+	dst->key = (keycopyfp) ? keycopyfp(src->key) : src->key;
+
+	if ((gh_dst->flag & GHASH_FLAG_IS_GSET) == 0) {
+		if ((gh_src->flag & GHASH_FLAG_IS_GSET) == 0) {
+			((GHashEntry *)dst)->val = (valcopyfp) ? valcopyfp(((GHashEntry *)src)->val) : ((GHashEntry *)src)->val;
+		}
+		else {
+			((GHashEntry *)dst)->val = NULL;
+		}
+	}
+}
+
 /* -------------------------------------------------------------------- */
 /* GHash API */
 
@@ -91,25 +139,37 @@ struct GHash {
  * \{ */
 
 /**
- * Get the hash for a key.
+ * Get the full hash for a key.
  */
 BLI_INLINE unsigned int ghash_keyhash(GHash *gh, const void *key)
 {
-	return gh->hashfp(key) % gh->nbuckets;
+	return gh->hashfp(key);
 }
 
 /**
- * Check if the number of items in the GHash is large enough to require more buckets.
+ * Get the full hash for an entry.
  */
-BLI_INLINE bool ghash_test_expand_buckets(const unsigned int nentries, const unsigned int nbuckets)
+BLI_INLINE unsigned int ghash_entryhash(GHash *gh, const Entry *e)
 {
-	return (nentries > nbuckets * 3);
+	return gh->hashfp(e->key);
 }
 
 /**
- * Expand buckets to the next size up.
+ * Get the bucket-hash for an already-computed full hash.
  */
-BLI_INLINE void ghash_resize_buckets(GHash *gh, const unsigned int nbuckets)
+BLI_INLINE unsigned int ghash_bucket_index(GHash *gh, const unsigned int hash)
+{
+#ifdef GHASH_USE_MODULO_BUCKETS
+	return hash % gh->nbuckets;
+#else
+	return full_hash & gh->bucket_mask;
+#endif
+}
+
+/**
+ * Expand buckets to the next size up or down.
+ */
+static void ghash_buckets_resize(GHash *gh, const unsigned int nbuckets)
 {
 	Entry **buckets_old = gh->buckets;
 	Entry **buckets_new;
@@ -117,49 +177,223 @@ BLI_INLINE void ghash_resize_buckets(GHash *gh, const unsigned int nbuckets)
 	unsigned int i;
 	Entry *e;
 
-	BLI_assert(gh->nbuckets != nbuckets);
+	BLI_assert((gh->nbuckets != nbuckets) || !gh->buckets);
+//	printf("%s: %d -> %d\n", __func__, nbuckets_old, nbuckets);
 
 	gh->nbuckets = nbuckets;
-	buckets_new = (Entry **)MEM_callocN(gh->nbuckets * sizeof(*gh->buckets), "buckets");
-
-	for (i = 0; i < nbuckets_old; i++) {
-		Entry *e_next;
-		for (e = buckets_old[i]; e; e = e_next) {
-			const unsigned hash = ghash_keyhash(gh, e->key);
-			e_next = e->next;
-			e->next = buckets_new[hash];
-			buckets_new[hash] = e;
+#ifdef GHASH_USE_MODULO_BUCKETS
+#else
+	gh->bucket_mask = nbuckets - 1;
+#endif
+
+	buckets_new = (Entry **)MEM_callocN(sizeof(*gh->buckets) * gh->nbuckets, __func__);
+
+	if (buckets_old) {
+		if (nbuckets > nbuckets_old) {
+			for (i = 0; i < nbuckets_old; i++) {
+				Entry *e_next;
+				for (e = buckets_old[i]; e; e = e_next) {
+					const unsigned hash = ghash_entryhash(gh, e);
+					const unsigned bucket_index = ghash_bucket_index(gh, hash);
+					e_next = e->next;
+					e->next = buckets_new[bucket_index];
+					buckets_new[bucket_index] = e;
+				}
+			}
+		}
+		else {
+			for (i = 0; i < nbuckets_old; i++) {
+#ifdef GHASH_USE_MODULO_BUCKETS
+				Entry *e_next;
+				for (e = buckets_old[i]; e; e = e_next) {
+					const unsigned hash = ghash_entryhash(gh, e);
+					const unsigned bucket_index = ghash_bucket_index(gh, hash);
+					e_next = e->next;
+					e->next = buckets_new[bucket_index];
+					buckets_new[bucket_index] = e;
+				}
+#else
+				/* No need to recompute hashes in this case, since our mask is just smaller, all items in old bucket i
+				 * will go in same new bucket (i & new_mask)! */
+				const unsigned bucket_index = ghash_bucket_index(gh, i);
+				BLI_assert(!buckets_old[i] || (bucket_index == ghash_bucket_index(gh, ghash_entryhash(gh, buckets_old[i]))));
+				for (e = buckets_old[i]; e && e->next; e = e->next);
+				if (e) {
+					e->next = buckets_new[bucket_index];
+					buckets_new[bucket_index] = buckets_old[i];
+				}
+#endif
+			}
 		}
 	}
 
 	gh->buckets = buckets_new;
-	MEM_freeN(buckets_old);
+	if (buckets_old) {
+		MEM_freeN(buckets_old);
+	}
 }
 
 /**
- * Increase initial bucket size to match a reserved amount.
+ * Check if the number of items in the GHash is large enough to require more buckets,
+ * or small enough to require less buckets, and resize \a gh accordingly.
  */
-BLI_INLINE void ghash_buckets_reserve(GHash *gh, const unsigned int nentries_reserve)
+static void ghash_buckets_expand(
+        GHash *gh, const unsigned int nentries, const bool user_defined)
 {
-	while (ghash_test_expand_buckets(nentries_reserve, gh->nbuckets)) {
-		gh->nbuckets = hashsizes[++gh->cursize];
+	unsigned int new_nbuckets;
+
+	if (LIKELY(gh->buckets && (nentries < gh->limit_grow))) {
+		return;
+	}
+
+	new_nbuckets = gh->nbuckets;
+
+#ifdef GHASH_USE_MODULO_BUCKETS
+	while ((nentries    > gh->limit_grow) &&
+	       (gh->cursize < GHASH_MAX_SIZE - 1))
+	{
+		new_nbuckets = hashsizes[++gh->cursize];
+		gh->limit_grow = GHASH_LIMIT_GROW(new_nbuckets);
+	}
+#else
+	while ((nentries       > gh->limit_grow) &&
+	       (gh->bucket_bit < GHASH_BUCKET_BIT_MAX))
+	{
+		new_nbuckets = 1u << ++gh->bucket_bit;
+		gh->limit_grow = GHASH_LIMIT_GROW(new_nbuckets);
+	}
+#endif
+
+	if (user_defined) {
+#ifdef GHASH_USE_MODULO_BUCKETS
+		gh->size_min = gh->cursize;
+#else
+		gh->bucket_bit_min = gh->bucket_bit;
+#endif
 	}
+
+	if ((new_nbuckets == gh->nbuckets) && gh->buckets) {
+		return;
+	}
+
+	gh->limit_grow   = GHASH_LIMIT_GROW(new_nbuckets);
+	gh->limit_shrink = GHASH_LIMIT_SHRINK(new_nbuckets);
+	ghash_buckets_resize(gh, new_nbuckets);
+}
+
+static void ghash_buckets_contract(
+        GHash *gh, const unsigned int nentries, const bool user_defined, const bool force_shrink)
+{
+	unsigned int new_nbuckets;
+
+	if (!(force_shrink || (gh->flag & GHASH_FLAG_ALLOW_SHRINK))) {
+		return;
+	}
+
+	if (LIKELY(gh->buckets && (nentries > gh->limit_shrink))) {
+		return;
+	}
+
+	new_nbuckets = gh->nbuckets;
+
+#ifdef GHASH_USE_MODULO_BUCKETS
+	while ((nentries    < gh->limit_shrink) &&
+		   (gh->cursize > gh->size_min))
+	{
+		new_nbuckets = hashsizes[--gh->cursize];
+		gh->limit_shrink = GHASH_LIMIT_SHRINK(new_nbuckets);
+	}
+#else
+	while ((nentries       < gh->limit_shrink) &&
+		   (gh->bucket_bit > gh->bucket_bit_min))
+	{
+		new_nbuckets = 1u << --gh->bucket_bit;
+		gh->limit_shrink = GHASH_LIMIT_SHRINK(new_nbuckets);
+	}
+#endif
+
+	if (user_defined) {
+#ifdef GHASH_USE_MODULO_BUCKETS
+		gh->size_min = gh->cursize;
+#else
+		gh->bucket_bit_min = gh->bucket_bit;
+#endif
+	}
+
+	if ((new_nbuckets == gh->nbuckets) && gh->buckets) {
+		return;
+	}
+
+	gh->limit_grow   = GHASH_LIMIT_GROW(new_nbuckets);
+	gh->limit_shrink = GHASH_LIMIT_SHRINK(new_nbuckets);
+	ghash_buckets_resize(gh, new_nbuckets);
+}
+
+/**
+ * Clear and reset \a gh buckets, reserve again buckets for given number of entries.
+ */
+BLI_INLINE void ghash_buckets_reset(GHash *gh, const unsigned int nentries)
+{
+	MEM_SAFE_FREE(gh->buckets);
+
+#ifdef GHASH_USE_MODULO_BUCKETS
+	gh->cursize = 0;
+	gh->size_min = 0;
+	gh->nbuckets = hashsizes[gh->cursize];
+#else
+	gh->bucket_bit = GHASH_BUCKET_BIT_MIN;
+	gh->bucket_bit_min = GHASH_BUCKET_BIT_MIN;
+	gh->nbuckets = 1u << gh->bucket_bit;
+	gh->bucket_mask = gh->nbuckets - 1;
+#endif
+
+	gh->limit_grow   = GHASH_LIMIT_GROW(gh->nbuckets);
+	gh->limit_shrink = GHASH_LIMIT_SHRINK(gh->nbuckets);
+
+	gh->nentries = 0;
+
+	ghash_buckets_expand(gh, nentries, (nentries != 0));
 }
 
 /**
  * Internal lookup function.
- * Takes a hash argument to avoid calling #ghash_keyhash multiple times.
+ * Takes hash and bucket_index arguments to avoid calling #ghash_keyhash and #ghash_bucket_index multiple times.
  */
-BLI_INLINE Entry *ghash_lookup_entry_ex(GHash *gh, const void *key,
-                                        const unsigned int hash)
+BLI_INLINE Entry *ghash_lookup_entry_ex(
+        GHash *gh, const void *key, const unsigned int bucket_index)
 {
 	Entry *e;
+	/* If we do not store GHash, not worth computing it for each entry here!
+	 * Typically, comparison function will be quicker, and since it's needed in the end anyway... */
+	for (e = gh->buckets[bucket_index]; e; e = e->next) {
+		if (UNLIKELY(gh->cmpfp(key, e->key) == false)) {
+			return e;
+		}
+	}
+
+	return NULL;
+}
 
-	for (e = gh->buckets[hash]; e; e = e->next) {
+/**
+ * Internal lookup function, returns previous entry of target one too.
+ * Takes hash and bucket_index arguments to avoid calling #ghash_keyhash and #ghash_bucket_index multiple times.
+ * Useful when modifying buckets somehow (like removing an entry...).
+ */
+BLI_INLINE Entry *ghash_lookup_entry_prev_ex(
+        GHash *gh, const void *key, Entry **r_e_prev, const unsigned int bucket_index)
+{
+	Entry *e, *e_prev = NULL;
+
+	/* If we do not store GHash, not worth computing it for each entry here!
+	 * Typically, comparison function will be quicker, and since it's needed in the end anyway... */
+	for (e = gh->buckets[bucket_index]; e; e_prev = e, e = e->next) {
 		if (UNLIKELY(gh->cmpfp(key, e->key) == false)) {
+			*r_e_prev = e_prev;
 			return e;
 		}
 	}
+
+	*r_e_prev = NULL;
 	return NULL;
 }
 
@@ -169,105 +403,141 @@ BLI_INLINE Entry *ghash_lookup_entry_ex(GHash *gh, const void *key,
 BLI_INLINE Entry *ghash_lookup_entry(GHash *gh, const void *key)
 {
 	const unsigned int hash = ghash_keyhash(gh, key);
-	return ghash_lookup_entry_ex(gh, key, hash);
+	const unsigned int bucket_index = ghash_bucket_index(gh, hash);
+	return ghash_lookup_entry_ex(gh, key, bucket_index);
 }
 
 static GHash *ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info,
-                        const unsigned int nentries_reserve,
-                        const unsigned int entry_size)
+                        const unsigned int nentries_reserve, const unsigned int flag)
 {
 	GHash *gh = MEM_mallocN(sizeof(*gh), info);
 
 	gh->hashfp = hashfp;
 	gh->cmpfp = cmpfp;
 
-	gh->nbuckets = hashsizes[0];  /* gh->cursize */
-	gh->nentries = 0;
-	gh->cursize = 0;
-	gh->flag = 0;
+	gh->buckets = NULL;
+	gh->flag = flag;
 
-	/* if we have reserved the number of elements that this hash will contain */
-	if (nentries_reserve) {
-		ghash_buckets_reserve(gh, nentries_reserve);
-	}
-
-	gh->buckets = MEM_callocN(gh->nbuckets * sizeof(*gh->buckets), "buckets");
-	gh->entrypool = BLI_mempool_create(entry_size, 64, 64, BLI_MEMPOOL_NOP);
+	ghash_buckets_reset(gh, nentries_reserve);
+	gh->entrypool = BLI_mempool_create(GHASH_ENTRY_SIZE(flag & GHASH_FLAG_IS_GSET), 64, 64, BLI_MEMPOOL_NOP);
 
 	return gh;
 }
 
 /**
  * Internal insert function.
- * Takes a hash argument to avoid calling #ghash_keyhash multiple times.
+ * Takes hash and bucket_index arguments to avoid calling #ghash_keyhash and #ghash_bucket_index multiple times.
  */
-BLI_INLINE void ghash_insert_ex(GHash *gh, void *key, void *val,
-                                unsigned int hash)
+BLI_INLINE void ghash_insert_ex(
+        GHash *gh, void *key, void *val, const unsigned int bucket_index)
 {
-	Entry *e = (Entry *)BLI_mempool_alloc(gh->entrypool);
+	GHashEntry *e = BLI_mempool_alloc(gh->entrypool);
+
 	BLI_assert((gh->flag & GHASH_FLAG_ALLOW_DUPES) || (BLI_ghash_haskey(gh, key) == 0));
-	IS_GHASH_ASSERT(gh);
+	BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET));
 
-	e->next = gh->buckets[hash];
-	e->key = key;
+	e->e.next = gh->buckets[bucket_index];
+	e->e.key = key;
 	e->val = val;
-	gh->buckets[hash] = e;
+	gh->buckets[bucket_index] = (Entry *)e;
 
-	if (UNLIKELY(ghash_test_expand_buckets(++gh->nentries, gh->nbuckets))) {
-		ghash_resize_buckets(gh, hashsizes[++gh->cursize]);
-	}
+	ghash_buckets_expand(gh, ++gh->nentries, false);
 }
 
 /**
  * Insert function that doesn't set the value (use for GSet)
  */
-BLI_INLINE void ghash_insert_ex_keyonly(GHash *gh, void *key,
-                                        unsigned int hash)
+BLI_INLINE void ghash_insert_ex_keyonly(
+        GHash *gh, void *key, const unsigned int bucket_index)
 {
-	Entry *e = (Entry *)BLI_mempool_alloc(gh->entrypool);
+	GSetEntry *e = BLI_mempool_alloc(gh->entrypool);
+
 	BLI_assert((gh->flag & GHASH_FLAG_ALLOW_DUPES) || (BLI_ghash_haskey(gh, key) == 0));
-	e->next = gh->buckets[hash];
+	BLI_assert((gh->flag & GHASH_FLAG_IS_GSET) != 0);
+
+	e->next = gh->buckets[bucket_index];
 	e->key = key;
-	/* intentionally leave value unset */
-	gh->buckets[hash] = e;
+	gh->buckets[bucket_index] = (Entry *)e;
 
-	if (UNLIKELY(ghash_test_expand_buckets(++gh->nentries, gh->nbuckets))) {
-		ghash_resize_buckets(gh, hashsizes[++gh->cursize]);
-	}
+	ghash_buckets_expand(gh, ++gh->nentries, false);
 }
 
 BLI_INLINE void ghash_insert(GHash *gh, void *key, void *val)
 {
 	const unsigned int hash = ghash_keyhash(gh, key);
-	ghash_insert_ex(gh, key, val, hash);
+	const unsigned int bucket_index = ghash_bucket_index(gh, hash);
+
+	ghash_insert_ex(gh, key, val, bucket_index);
+}
+
+BLI_INLINE bool ghash_insert_safe(
+        GHash *gh, void *key, void *val, const bool override, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp)
+{
+	const unsigned int hash = ghash_keyhash(gh, key);
+	const unsigned int bucket_index = ghash_bucket_index(gh, hash);
+	GHashEntry *e = (GHashEntry *)ghash_lookup_entry_ex(gh, key, bucket_index);
+
+	BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET));
+
+	if (e) {
+		if (override) {
+			if (keyfreefp) keyfreefp(e->e.key);
+			if (valfreefp) valfreefp(e->val);
+			e->e.key = key;
+			e->val = val;
+		}
+		return false;
+	}
+	else {
+		ghash_insert_ex(gh, key, val, bucket_index);
+		return true;
+	}
+}
+
+BLI_INLINE bool ghash_insert_safe_keyonly(GHash *gh, void *key, const bool override, GHashKeyFreeFP keyfreefp)
+{
+	const unsigned int hash = ghash_keyhash(gh, key);
+	const unsigned int bucket_index = ghash_bucket_index(gh, hash);
+	GSetEntry *e = ghash_lookup_entry_ex(gh, key, bucket_index);
+
+	BLI_assert((gh->flag & GHASH_FLAG_IS_GSET) != 0);
+
+	if (e) {
+		if (override) {
+			if (keyfreefp) keyfreefp(e->key);
+			e->key = key;
+		}
+		return false;
+	}
+	else {
+		ghash_insert_ex_keyonly(gh, key, bucket_index);
+		return true;
+	}
 }
 
 /**
  * Remove the entry and return it, caller must free from gh->entrypool.
  */
-static Entry *ghash_remove_ex(GHash *gh, void *key, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp,
-                              unsigned int hash)
+static Entry *ghash_remove_ex(
+        GHash *gh, void *key, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp,
+        const unsigned int bucket_index)
 {
-	Entry *e;
-	Entry *e_prev = NULL;
+	Entry *e_prev;
+	Entry *e = ghash_lookup_entry_prev_ex(gh, key, &e_prev, bucket_index);
 
-	for (e = gh->buckets[hash]; e; e = e->next) {
-		if (UNLIKELY(gh->cmpfp(key, e->key) == false)) {
-			Entry *e_next = e->next;
+	BLI_assert(!valfreefp|| !(gh->flag & GHASH_FLAG_IS_GSET));
 
-			if (keyfreefp) keyfreefp(e->key);
-			if (valfreefp) valfreefp(e->val);
+	if (e) {
+		if (keyfreefp) keyfreefp(e->key);
+		if (valfreefp) valfreefp(((GHashEntry *)e)->val);
 
-			if (e_prev) e_prev->next = e_next;
-			else   gh->buckets[hash] = e_next;
+		if (e_prev) e_prev->next = e->next;
+		else gh->buckets[bucket_index] = e->next;
 
-			gh->nentries--;
-			return e;
-		}
-		e_prev = e;
+		ghash_buckets_contract(gh, --gh->nentries, false, false);
 	}
 
-	return NULL;
+	return e;
 }
 
 /**
@@ -278,20 +548,56 @@ static void ghash_free_cb(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP va
 	unsigned int i;
 
 	BLI_assert(keyfreefp || valfreefp);
+	BLI_assert(!valfreefp|| !(gh->flag & GHASH_FLAG_IS_GSET));
 
 	for (i = 0; i < gh->nbuckets; i++) {
 		Entry *e;
 
-		for (e = gh->buckets[i]; e; ) {
-			Entry *e_next = e->next;
-
+		for (e = gh->buckets[i]; e; e = e->next) {
 			if (keyfreefp) keyfreefp(e->key);
-			if (valfreefp) valfreefp(e->val);
+			if (valfreefp) valfreefp(((GHashEntry *)e)->val);
+		}
+	}
+}
+
+/**
+ * Copy the GHash.
+ */
+static GHash *ghash_copy(GHash *gh, GHashKeyCopyFP keycopyfp, GHashValCopyFP valcopyfp)
+{
+	GHash *gh_new;
+	unsigned int i;
+	/* This allows us to be sure to get the same number of buckets in gh_new as in ghash. */
+	const unsigned int reserve_nentries_new = MAX2(GHASH_LIMIT_GROW(gh->nbuckets) - 1, gh->nentries);
+
+	BLI_assert(!valcopyfp || !(gh->flag & GHASH_FLAG_IS_GSET));
+
+	gh_new = ghash_new(gh->hashfp, gh->cmpfp, __func__, 0, gh->flag);
+	ghash_buckets_expand(gh_new, reserve_nentries_new, false);
+
+	BLI_assert(gh_new->nbuckets == gh->nbuckets);
+
+	for (i = 0; i < gh->nbuckets; i++) {
+		Entry *e;
+
+		for (e = gh->buckets[i]; e; e = e->next) {
+			Entry *e_new = BLI_mempool_alloc(gh_new->entrypool);
+			ghash_entry_copy(gh_new, e_new, gh, e, keycopyfp, valcopyfp);
 
-			e = e_next;
+			/* Warning!
+			 * This means entries in buckets in new copy will be in reversed order!
+			 * This shall not be an issue though, since order should never be assumed in ghash. */
+
+			/* Note: We can use 'i' here, since we are sure that 'gh' and 'gh_new' have the same number of buckets! */
+			e_new->next = gh_new->buckets[i];
+			gh_new->buckets[i] = e_new;
 		}
 	}
+	gh_new->nentries = gh->nentries;
+
+	return gh_new;
 }
+
 /** \} */
 
 
@@ -311,9 +617,7 @@ static void ghash_free_cb(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP va
 GHash *BLI_ghash_new_ex(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info,
                         const unsigned int nentries_reserve)
 {
-	return ghash_new(hashfp, cmpfp, info,
-	                 nentries_reserve,
-	                 (unsigned int)sizeof(Entry));
+	return ghash_new(hashfp, cmpfp, info, nentries_reserve, 0);
 }
 
 /**
@@ -325,6 +629,23 @@ GHash *BLI_ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info)
 }
 
 /**
+ * Copy given GHash. Keys and values are also copied if relevant callback is provided, else pointers remain the same.
+ */
+GHash *BLI_ghash_copy(GHash *gh, GHashKeyCopyFP keycopyfp, GHashValCopyFP valcopyfp)
+{
+	return ghash_copy(gh, keycopyfp, valcopyfp);
+}
+
+/**
+ * Reverve given ammount of entries (resize \a gh accordingly if needed).
+ */
+void BLI_ghash_reserve(GHash *gh, const unsigned int nentries_reserve)
+{
+	ghash_buckets_expand(gh, nentries_reserve, true);
+	ghash_buckets_contract(gh, nentries_reserve, true, false);
+}
+
+/**
  * \return size of the GHash.
  */
 int BLI_ghash_size(GHash *gh)
@@ -353,19 +674,7 @@ void BLI_ghash_insert(GHash *gh, void *key, void *val)
  */
 bool BLI_ghash_reinsert(GHash *gh, void *key, void *val, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp)
 {
-	const unsigned int hash = ghash_keyhash(gh, key);
-	Entry *e = ghash_lookup_entry_ex(gh, key, hash);
-	if (e) {
-		if (keyfreefp) keyfreefp(e->key);
-		if (valfreefp) valfreefp(e->val);
-		e->key = key;
-		e->val = val;
-		return false;
-	}
-	else {
-		ghash_insert_ex(gh, key, val, hash);
-		return true;
-	}
+	return ghash_insert_safe(gh, key, val, true, keyfreefp, valfreefp);
 }
 
 /**
@@ -379,8 +688,8 @@ bool BLI_ghash_reinsert(GHash *gh, void *key, void *val, GHashKeyFreeFP keyfreef
  */
 void *BLI_ghash_lookup(GHash *gh, const void *key)
 {
-	Entry *e = ghash_lookup_entry(gh, key);
-	IS_GHASH_ASSERT(gh);
+	GHashEntry *e = (GHashEntry *)ghash_lookup_entry(gh, key);
+	BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET));
 	return e ? e->val : NULL;
 }
 
@@ -389,8 +698,8 @@ void *BLI_ghash_lookup(GHash *gh, const void *key)
  */
 void *BLI_ghash_lookup_default(GHash *gh, const void *key, void *val_default)
 {
-	Entry *e = ghash_lookup_entry(gh, key);
-	IS_GHASH_ASSERT(gh);
+	GHashEntry *e = (GHashEntry *)ghash_lookup_entry(gh, key);
+	BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET));
 	return e ? e->val : val_default;
 }
 
@@ -406,8 +715,8 @@ void *BLI_ghash_lookup_default(GHash *gh, const void *key, void *val_default)
  */
 void **BLI_ghash_lookup_p(GHash *gh, const void *key)
 {
-	Entry *e = ghash_lookup_entry(gh, key);
-	IS_GHASH_ASSERT(gh);
+	GHashEntry *e = (GHashEntry *)ghash_lookup_entry(gh, key);
+	BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET));
 	return e ? &e->val : NULL;
 }
 
@@ -422,7 +731,8 @@ void **BLI_ghash_lookup_p(GHash *gh, const void *key)
 bool BLI_ghash_remove(GHash *gh, void *key, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp)
 {
 	const unsigned int hash = ghash_keyhash(gh, key);
-	Entry *e = ghash_remove_ex(gh, key, keyfreefp, valfreefp, hash);
+	const unsigned int bucket_index = ghash_bucket_index(gh, hash);
+	Entry *e = ghash_remove_ex(gh, key, keyfreefp, valfreefp, bucket_index);
 	if (e) {
 		BLI_mempool_free(gh->entrypool, e);
 		return true;
@@ -444,8 +754,9 @@ bool BLI_ghash_remove(GHash *gh, void *key, GHashKeyFreeFP keyfreefp, GHashValFr
 void *BLI_ghash_popkey(GHash *gh, void *key, GHashKeyFreeFP keyfreefp)
 {
 	const unsigned int hash = ghash_keyhash(gh, key);
-	Entry *e = ghash_remove_ex(gh, key, keyfreefp, NULL, hash);
-	IS_GHASH_ASSERT(gh);
+	const unsigned int bucket_index = ghash_bucket_index(gh, hash);
+	GHashEntry *e = (GHashEntry *)ghash_remove_ex(gh, key, keyfreefp, NULL, bucket_index);
+	BLI_assert(!(gh->flag & GHASH_FLAG_IS_GSET));
 	if (e) {
 		void *val = e->val;
 		BLI_mempool_free(gh->entrypool, e);
@@ -477,17 +788,7 @@ void BLI_ghash_clear_ex(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP valf
 	if (keyfreefp || valfreefp)
 		ghash_free_cb(gh, keyfreefp, valfreefp);
 
-	gh->nbuckets = hashsizes[0];  /* gh->cursize */
-	gh->nentries = 0;
-	gh->cursize = 0;
-
-	if (nentries_reserve) {
-		ghash_buckets_reserve(gh, nentries_reserve);
-	}
-
-	MEM_freeN(gh->buckets);
-	gh->buckets = MEM_callocN(gh->nbuckets * sizeof(*gh->buckets), "buckets");
-
+	ghash_buckets_reset(gh, nentries_reserve);
 	BLI_mempool_clear_ex(gh->entrypool, nentries_reserve ? (int)nentries_reserve : -1);
 }
 
@@ -701,6 +1002,10 @@ unsigned int BLI_ghashutil_uinthash_v4(const unsigned int key[4])
 	hash += key[3];
 	return hash;
 }
+unsigned int BLI_ghashutil_uinthash_v4_murmur(const unsigned int key[4])
+{
+	return BLI_hash_mm2((const unsigned char *)key, sizeof(key), 0);
+}
 
 bool BLI_ghashutil_uinthash_v4_cmp(const void *a, const void *b)
 {
@@ -733,6 +1038,13 @@ unsigned int BLI_ghashutil_inthash_p(const void *ptr)
 	return (unsigned int)(key & 0xffffffff);
 }
 
+unsigned int BLI_ghashutil_inthash_p_murmur(const void *ptr)
+{
+	uintptr_t key = (uintptr_t)ptr;
+
+	return BLI_hash_mm2((const unsigned char *)&key, sizeof(key), 0);
+}
+
 bool BLI_ghashutil_intcmp(const void *a, const void *b)
 {
 	return (a != b);
@@ -769,9 +1081,15 @@ unsigned int BLI_ghashutil_strhash_p(const void *ptr)
 
 	return h;
 }
+unsigned int BLI_ghashutil_strhash_p_murmur(const void *ptr)
+{
+	const unsigned char *key = ptr;
+
+	return BLI_hash_mm2(key, strlen((const char *)key) + 1, 0);
+}
 bool BLI_ghashutil_strcmp(const void *a, const void *b)
 {
-	return (!STREQ(a, b));
+	return (a == b) ? false : !STREQ(a, b);
 }
 
 GHashPair *BLI_ghashutil_pairalloc(const void *first, const void *second)
@@ -809,44 +1127,36 @@ void BLI_ghashutil_pairfree(void *ptr)
 /** \name Convenience GHash Creation Functions
  * \{ */
 
-GHash *BLI_ghash_ptr_new_ex(const char *info,
-                            const unsigned int nentries_reserve)
+GHash *BLI_ghash_ptr_new_ex(const char *info, const unsigned int nentries_reserve)
 {
-	return BLI_ghash_new_ex(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, info,
-	                        nentries_reserve);
+	return BLI_ghash_new_ex(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, info, nentries_reserve);
 }
 GHash *BLI_ghash_ptr_new(const char *info)
 {
 	return BLI_ghash_ptr_new_ex(info, 0);
 }
 
-GHash *BLI_ghash_str_new_ex(const char *info,
-                            const unsigned int nentries_reserve)
+GHash *BLI_ghash_str_new_ex(const char *info, const unsigned int nentries_reserve)
 {
-	return BLI_ghash_new_ex(BLI_ghashutil_strhash_p, BLI_ghashutil_strcmp, info,
-	                        nentries_reserve);
+	return BLI_ghash_new_ex(BLI_ghashutil_strhash_p, BLI_ghashutil_strcmp, info, nentries_reserve);
 }
 GHash *BLI_ghash_str_new(const char *info)
 {
 	return BLI_ghash_str_new_ex(info, 0);
 }
 
-GHash *BLI_ghash_int_new_ex(const char *info,
-                            const unsigned int nentries_reserve)
+GHash *BLI_ghash_int_new_ex(const char *info, const unsigned int nentries_reserve)
 {
-	return BLI_ghash_new_ex(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, info,
-	                        nentries_reserve);
+	return BLI_ghash_new_ex(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, info, nentries_reserve);
 }
 GHash *BLI_ghash_int_new(const char *info)
 {
 	return BLI_ghash_int_new_ex(info, 0);
 }
 
-GHash *BLI_ghash_pair_new_ex(const char *info,
-                             const unsigned int nentries_reserve)
+GHash *BLI_ghash_pair_new_ex(const char *info, const unsigned int nentries_reserve)
 {
-	return BLI_ghash_new_ex(BLI_ghashutil_pairhash, BLI_ghashutil_paircmp, info,
-	                        nentries_reserve);
+	return BLI_ghash_new_ex(BLI_ghashutil_pairhash, BLI_ghashutil_paircmp, info, nentries_reserve);
 }
 GHash *BLI_ghash_pair_new(const char *info)
 {
@@ -861,21 +1171,12 @@ GHash *BLI_ghash_pair_new(const char *info)
 
 /* Use ghash API to give 'set' functionality */
 
-/* TODO: typical set functions
- * isdisjoint/issubset/issuperset/union/intersection/difference etc */
-
 /** \name GSet Functions
  * \{ */
 GSet *BLI_gset_new_ex(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info,
                       const unsigned int nentries_reserve)
 {
-	GSet *gs = (GSet *)ghash_new(hashfp, cmpfp, info,
-	                             nentries_reserve,
-	                             sizeof(Entry) - sizeof(void *));
-#ifndef NDEBUG
-	((GHash *)gs)->flag |= GHASH_FLAG_IS_SET;
-#endif
-	return gs;
+	return (GSet *)ghash_new(hashfp, cmpfp, info, nentries_reserve, GHASH_FLAG_IS_GSET);
 }
 
 GSet *BLI_gset_new(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info)
@@ -883,6 +1184,14 @@ GSet *BLI_gset_new(GSetHashFP hashfp, GSetCmpFP cmpfp, const char *info)
 	return BLI_gset_new_ex(hashfp, cmpfp, info, 0);
 }
 
+/**
+ * Copy given GSet. Keys are also copied if callback is provided, else pointers remain the same.
+ */
+GSet *BLI_gset_copy(GSet *gs, GHashKeyCopyFP keycopyfp)
+{
+	return (GSet *)ghash_copy((GHash *)gs, keycopyfp, NULL);
+}
+
 int BLI_gset_size(GSet *gs)
 {
 	return (int)((GHash *)gs)->nentries;
@@ -895,7 +1204,8 @@ int BLI_gset_size(GSet *gs)
 void BLI_gset_insert(GSet *gs, void *key)
 {
 	const unsigned int hash = ghash_keyhash((GHash *)gs, key);
-	ghash_insert_ex_keyonly((GHash *)gs, key, hash);
+	const unsigned int bucket_index = ghash_bucket_index((GHash *)gs, hash);
+	ghash_insert_ex_keyonly((GHash *)gs, key, bucket_index);
 }
 
 /**
@@ -906,15 +1216,7 @@ void BLI_gset_insert(GSet *gs, void *key)
  */
 bool BLI_gset_add(GSet *gs, void *key)
 {
-	const unsigned int hash = ghash_keyhash((GHash *)gs, key);
-	Entry *e = ghash_lookup_entry_ex((GHash *)gs, key, hash);
-	if (e) {
-		return false;
-	}
-	else {
-		ghash_insert_ex_keyonly((GHash *)gs, key, hash);
-		return true;
-	}
+	return ghash_insert_safe_keyonly((GHash *)gs, key, false, NULL);
 }
 
 /**
@@ -925,17 +1227,7 @@ bool BLI_gset_add(GSet *gs, void *key)
  */
 bool BLI_gset_reinsert(GSet *gs, void *key, GSetKeyFreeFP keyfreefp)
 {
-	const unsigned int hash = ghash_keyhash((GHash *)gs, key);
-	Entry *e = ghash_lookup_entry_ex((GHash *)gs, key, hash);
-	if (e) {
-		if (keyfreefp) keyfreefp(e->key);
-		e->key = key;
-		return false;
-	}
-	else {
-		ghash_insert_ex_keyonly((GHash *)gs, key, hash);
-		return true;
-	}
+	return ghash_insert_safe_keyonly((GHash *)gs, key, true, keyfreefp);
 }
 
 bool BLI_gset_remove(GSet *gs, void *key, GSetKeyFreeFP keyfreefp)
@@ -982,22 +1274,18 @@ void BLI_gset_flag_clear(GSet *gs, unsigned int flag)
 /** \name Convenience GSet Creation Functions
  * \{ */
 
-GSet *BLI_gset_ptr_new_ex(const char *info,
-                          const unsigned int nentries_reserve)
+GSet *BLI_gset_ptr_new_ex(const char *info, const unsigned int nentries_reserve)
 {
-	return BLI_gset_new_ex(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, info,
-	                       nentries_reserve);
+	return BLI_gset_new_ex(BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, info, nentries_reserve);
 }
 GSet *BLI_gset_ptr_new(const char *info)
 {
 	return BLI_gset_ptr_new_ex(info, 0);
 }
 
-GSet *BLI_gset_pair_new_ex(const char *info,
-                             const unsigned int nentries_reserve)
+GSet *BLI_gset_pair_new_ex(const char *info, const unsigned int nentries_reserve)
 {
-	return BLI_gset_new_ex(BLI_ghashutil_pairhash, BLI_ghashutil_paircmp, info,
-	                        nentries_reserve);
+	return BLI_gset_new_ex(BLI_ghashutil_pairhash, BLI_ghashutil_paircmp, info, nentries_reserve);
 }
 GSet *BLI_gset_pair_new(const char *info)
 {
@@ -1009,37 +1297,126 @@ GSet *BLI_gset_pair_new(const char *info)
 
 /** \name Debugging & Introspection
  * \{ */
-#ifdef DEBUG
+
+#include "BLI_math.h"
 
 /**
- * Measure how well the hash function performs
- * (1.0 is approx as good as random distribution).
+ * \return number of buckets in the GHash.
+ */
+int BLI_ghash_buckets_size(GHash *gh)
+{
+	return (int)gh->nbuckets;
+}
+int BLI_gset_buckets_size(GSet *gs)
+{
+	return BLI_ghash_buckets_size((GHash *)gs);
+}
+
+/**
+ * Measure how well the hash function performs (1.0 is approx as good as random distribution),
+ * and return a few other stats like load, variance of the distribution of the entries in the buckets, etc.
  *
  * Smaller is better!
  */
-double BLI_ghash_calc_quality(GHash *gh)
+double BLI_ghash_calc_quality_ex(
+        GHash *gh, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket)
 {
-	uint64_t sum = 0;
+	double mean;
 	unsigned int i;
 
-	if (gh->nentries == 0)
-		return -1.0;
+	if (gh->nentries == 0) {
+		if (r_load) {
+			*r_load = 0.0;
+		}
+		if (r_variance) {
+			*r_variance = 0.0;
+		}
+		if (r_prop_empty_buckets) {
+			*r_prop_empty_buckets = 1.0;
+		}
+		if (r_prop_overloaded_buckets) {
+			*r_prop_overloaded_buckets = 0.0;
+		}
+		if (r_biggest_bucket) {
+			*r_biggest_bucket = 0;
+		}
+
+		return 0.0;
+	}
 
-	for (i = 0; i < gh->nbuckets; i++) {
-		uint64_t count = 0;
-		Entry *e;
-		for (e = gh->buckets[i]; e; e = e->next) {
-			count += 1;
+	mean = (double)gh->nentries / (double)gh->nbuckets;
+	if (r_load) {
+		*r_load = mean;
+	}
+	if (r_biggest_bucket) {
+		*r_biggest_bucket = 0;
+	}
+
+	if (r_variance) {
+		/* We already know our mean (i.e. load factor), easy to compute variance.
+		 * See http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+		 */
+		double sum = 0.0;
+		for (i = 0; i < gh->nbuckets; i++) {
+			int count = 0;
+			Entry *e;
+			for (e = gh->buckets[i]; e; e = e->next) {
+				count++;
+			}
+			sum += ((double)count - mean) * ((double)count - mean);
 		}
-		sum += count * (count + 1);
+		*r_variance = sum / (double)(gh->nbuckets - 1);
 	}
-	return ((double)sum * (double)gh->nbuckets /
-	        ((double)gh->nentries * (gh->nentries + 2 * gh->nbuckets - 1)));
+
+	{
+	   uint64_t sum = 0;
+	   uint64_t overloaded_buckets_threshold = (uint64_t)max_ii(GHASH_LIMIT_GROW(1), 1);
+	   uint64_t sum_overloaded = 0;
+	   uint64_t sum_empty = 0;
+
+	   for (i = 0; i < gh->nbuckets; i++) {
+		   uint64_t count = 0;
+		   Entry *e;
+		   for (e = gh->buckets[i]; e; e = e->next) {
+			   count++;
+		   }
+		   if (r_biggest_bucket) {
+			   *r_biggest_bucket = max_ii(*r_biggest_bucket, (int)count);
+		   }
+		   if (r_prop_overloaded_buckets && (count > overloaded_buckets_threshold)) {
+			   sum_overloaded++;
+		   }
+		   if (r_prop_empty_buckets && !count) {
+			   sum_empty++;
+		   }
+		   sum += count * (count + 1);
+	   }
+	   if (r_prop_overloaded_buckets) {
+		   *r_prop_overloaded_buckets = (double)sum_overloaded / (double)gh->nbuckets;
+	   }
+	   if (r_prop_empty_buckets) {
+		   *r_prop_empty_buckets = (double)sum_empty / (double)gh->nbuckets;
+	   }
+	   return ((double)sum * (double)gh->nbuckets /
+			   ((double)gh->nentries * (gh->nentries + 2 * gh->nbuckets - 1)));
+   }
+}
+double BLI_gset_calc_quality_ex(
+        GSet *gs, double *r_load, double *r_variance,
+        double *r_prop_empty_buckets, double *r_prop_overloaded_buckets, int *r_biggest_bucket)
+{
+	return BLI_ghash_calc_quality_ex((GHash *)gs, r_load, r_variance,
+	                                 r_prop_empty_buckets, r_prop_overloaded_buckets, r_biggest_bucket);
+}
+
+double BLI_ghash_calc_quality(GHash *gh)
+{
+	return BLI_ghash_calc_quality_ex(gh, NULL, NULL, NULL, NULL, NULL);
 }
 double BLI_gset_calc_quality(GSet *gs)
 {
-	return BLI_ghash_calc_quality((GHash *)gs);
+	return BLI_ghash_calc_quality_ex((GHash *)gs, NULL, NULL, NULL, NULL, NULL);
 }
 
-#endif
 /** \} */
diff --git a/source/blender/blenlib/intern/hash_mm2a.c b/source/blender/blenlib/intern/hash_mm2a.c
index bae098ae96b..87ba542e147 100644
--- a/source/blender/blenlib/intern/hash_mm2a.c
+++ b/source/blender/blenlib/intern/hash_mm2a.c
@@ -49,6 +49,13 @@
 	(h) = ((h) * MM2A_M) ^ (k);  \
 } (void)0
 
+#define MM2A_MIX_FINALIZE(h)     \
+{                                \
+	(h) ^= (h) >> 13;            \
+	(h) *= MM2A_M;               \
+	(h) ^= (h) >> 15;            \
+} (void)0
+
 static void mm2a_mix_tail(BLI_HashMurmur2A *mm2, const unsigned char **data, size_t *len)
 {
 	while (*len && ((*len < 4) || mm2->count)) {
@@ -99,9 +106,40 @@ uint32_t BLI_hash_mm2a_end(BLI_HashMurmur2A *mm2)
 	MM2A_MIX(mm2->hash, mm2->tail);
 	MM2A_MIX(mm2->hash, mm2->size);
 
-	mm2->hash ^= mm2->hash >> 13;
-	mm2->hash *= MM2A_M;
-	mm2->hash ^= mm2->hash >> 15;
+	MM2A_MIX_FINALIZE(mm2->hash);
 
 	return mm2->hash;
 }
+
+/* Non-incremental version, quicker for small keys. */
+uint32_t BLI_hash_mm2(const unsigned char *data, size_t len, uint32_t seed)
+{
+	/* Initialize the hash to a 'random' value */
+	uint32_t h = seed ^ len;
+
+	/* Mix 4 bytes at a time into the hash */
+	for (; len >= 4; data += 4, len -= 4) {
+		uint32_t k = *(uint32_t *)data;
+
+		MM2A_MIX(h, k);
+	}
+
+	/* Handle the last few bytes of the input array */
+	switch (len) {
+		case 3:
+			h ^= data[2] << 16;
+			/* fall through */
+		case 2:
+			h ^= data[1] << 8;
+			/* fall through */
+		case 1:
+			h ^= data[0];
+			h *= MM2A_M;
+	};
+
+	/* Do a few final mixes of the hash to ensure the last few bytes are well-incorporated. */
+	MM2A_MIX_FINALIZE(h);
+
+	return h;
+}
+
diff --git a/source/blender/makesdna/intern/CMakeLists.txt b/source/blender/makesdna/intern/CMakeLists.txt
index 317141b14e3..08d2f93866a 100644
--- a/source/blender/makesdna/intern/CMakeLists.txt
+++ b/source/blender/makesdna/intern/CMakeLists.txt
@@ -97,6 +97,7 @@ set(SRC
 	../../blenlib/intern/BLI_mempool.c
 	../../blenlib/intern/listbase.c
 	../../blenlib/intern/BLI_ghash.c
+	../../blenlib/intern/hash_mm2a.c
 )
 
 blender_add_lib(bf_dna_blenlib "${SRC}" "${INC}" "${INC_SYS}")
author	Bastien Montagne <montagne29@wanadoo.fr>	2015-03-19 19:36:59 +0300
committer	Bastien Montagne <montagne29@wanadoo.fr>	2015-03-19 19:37:54 +0300
commit	cfdd27381c6730dc0d8d4bb51c132b29337b8af5 (patch)
tree	27d5b8c646fc31dc9fd623f3e2995b1c3f391305
parent	881e05fc54058bc04b217817808315b2f9691c2a (diff)