From 1f64371ec036c2f6ab450e37ad5a9c1120f1c54f Mon Sep 17 00:00:00 2001
From: Campbell Barton <ideasman42@gmail.com>
Date: Thu, 30 Jan 2014 20:51:44 +1100
Subject: Smallhash: refactor and fixes

- BLI_smallhash_remove didnt decrement total entries.
- rename vars to match closer to ghash.
- smallhash_lookup returns NULL when no entry found.
- using a zero value key wasn't supported.
- no need to memset or calloc bucket arrays
- add asserts for unsupported conditions.
- added BLI_smallhash_lookup_p
---
 source/blender/blenlib/intern/smallhash.c | 226 ++++++++++++++++++------------
 1 file changed, 133 insertions(+), 93 deletions(-)

(limited to 'source/blender/blenlib/intern/smallhash.c')

diff --git a/source/blender/blenlib/intern/smallhash.c b/source/blender/blenlib/intern/smallhash.c
index 3812d2955c9..7f9acab1f37 100644
--- a/source/blender/blenlib/intern/smallhash.c
+++ b/source/blender/blenlib/intern/smallhash.c
@@ -35,6 +35,7 @@
  */
 
 #include <string.h>
+#include <stdlib.h>
 
 #include "MEM_guardedalloc.h"
 #include "BLI_utildefines.h"
@@ -53,6 +54,7 @@
  */
 #define SMHASH_CELL_UNUSED  ((void *)0x7FFFFFFF)
 #define SMHASH_CELL_FREE    ((void *)0x7FFFFFFD)
+#define SMHASH_KEY_UNUSED ((uintptr_t)-1)
 
 /* typically this re-assigns 'h' */
 #define SMHASH_NEXT(h, hoff)  ( \
@@ -63,152 +65,190 @@
 
 extern const unsigned int hashsizes[];
 
-void BLI_smallhash_init(SmallHash *hash)
+/**
+ * Check if the number of items in the smallhash is large enough to require more buckets.
+ */
+BLI_INLINE bool smallhash_test_expand_buckets(const unsigned int nentries, const unsigned int nbuckets)
 {
-	unsigned int i;
-
-	memset(hash, 0, sizeof(*hash));
-
-	hash->table = hash->_stacktable;
-	hash->curhash = 2;
-	hash->size = hashsizes[hash->curhash];
+	return nentries * 3 > nbuckets;
+}
 
-	hash->copytable = hash->_copytable;
-	hash->stacktable = hash->_stacktable;
+BLI_INLINE void smallhash_init_empty(SmallHash *sh)
+{
+	unsigned int i;
 
-	for (i = 0; i < hash->size; i++) {
-		hash->table[i].val = SMHASH_CELL_FREE;
+	for (i = 0; i < sh->nbuckets; i++) {
+		sh->buckets[i].key = SMHASH_KEY_UNUSED;
+		sh->buckets[i].val = SMHASH_CELL_FREE;
 	}
 }
 
-/*NOTE: does *not* free *hash itself!  only the direct data!*/
-void BLI_smallhash_release(SmallHash *hash)
+BLI_INLINE SmallHashEntry *smallhash_lookup(SmallHash *sh, const uintptr_t key)
 {
-	if (hash->table != hash->stacktable) {
-		MEM_freeN(hash->table);
+	SmallHashEntry *e;
+	unsigned int h = (unsigned int)key;
+	unsigned int hoff = 1;
+
+	BLI_assert(key != SMHASH_KEY_UNUSED);
+
+	/* note: there are always more buckets then entries,
+	 * so we know there will always be a free bucket if the key isn't found. */
+	for (e = &sh->buckets[h % sh->nbuckets];
+	     e->val != SMHASH_CELL_FREE;
+	     h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
+	{
+		if (e->key == key) {
+			/* should never happen because unused keys are zero'd */
+			BLI_assert(e->val != SMHASH_CELL_UNUSED);
+			return e;
+		}
 	}
+
+	return NULL;
 }
 
-BLI_INLINE SmallHashEntry *smallhash_lookup_first_free(SmallHash *hash, uintptr_t key)
+BLI_INLINE SmallHashEntry *smallhash_lookup_first_free(SmallHash *sh, const uintptr_t key)
 {
-	SmallHashEntry *entry;
+	SmallHashEntry *e;
 	unsigned int h = (unsigned int)key;
 	unsigned int hoff = 1;
 
-	for (entry = &hash->table[h % hash->size];
-	     !ELEM(entry->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
-	     h = SMHASH_NEXT(h, hoff), entry = &hash->table[h % hash->size])
+	for (e = &sh->buckets[h % sh->nbuckets];
+	     !ELEM(e->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
+	     h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
 	{
-		/* Nothing else to do! */
+		/* pass */
 	}
 
-	return entry;
+	return e;
 }
 
-void BLI_smallhash_insert(SmallHash *hash, uintptr_t key, void *item)
+BLI_INLINE void smallhash_resize_buckets(SmallHash *sh, const unsigned int nbuckets)
 {
-	SmallHashEntry *entry;
+	SmallHashEntry *buckets_old = sh->buckets;
+	const unsigned int nbuckets_old = sh->nbuckets;
+	unsigned int i = 0;
 
-	if (hash->size < hash->used * 3) {
-		unsigned int newsize = hashsizes[++hash->curhash];
-		SmallHashEntry *tmp;
-		unsigned int i = 0;
+	BLI_assert(sh->nbuckets != nbuckets);
 
-		if (hash->table != hash->stacktable || newsize > SMSTACKSIZE) {
-			tmp = MEM_callocN(sizeof(*hash->table) * newsize, __func__);
-		}
-		else {
-			SWAP(SmallHashEntry *, hash->stacktable, hash->copytable);
-			tmp = hash->stacktable;
-		}
+	if (buckets_old == sh->buckets_stack && nbuckets <= SMSTACKSIZE) {
+		SWAP(SmallHashEntry *, sh->buckets_stack, sh->buckets_copy);
+		sh->buckets = sh->buckets_stack;
+	}
+	else {
+		sh->buckets = MEM_mallocN(sizeof(*sh->buckets) * nbuckets, __func__);
+	}
 
-		SWAP(SmallHashEntry *, tmp, hash->table);
+	sh->nbuckets = nbuckets;
 
-		hash->size = newsize;
+	smallhash_init_empty(sh);
 
-		for (i = 0; i < hash->size; i++) {
-			hash->table[i].val = SMHASH_CELL_FREE;
+	for (i = 0; i < nbuckets_old; i++) {
+		if (!ELEM(buckets_old[i].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
+			SmallHashEntry *e = smallhash_lookup_first_free(sh, buckets_old[i].key);
+			e->key = buckets_old[i].key;
+			e->val = buckets_old[i].val;
 		}
+	}
 
-		for (i = 0; i < hashsizes[hash->curhash - 1]; i++) {
-			if (ELEM(tmp[i].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
-				continue;
-			}
+	if (buckets_old != sh->buckets_stack && buckets_old != sh->buckets_copy) {
+		MEM_freeN(buckets_old);
+	}
+}
 
-			entry = smallhash_lookup_first_free(hash, tmp[i].key);
-			entry->key = tmp[i].key;
-			entry->val = tmp[i].val;
-		}
+void BLI_smallhash_init(SmallHash *sh)
+{
+	/* assume 'sh' is uninitialized */
 
-		if (tmp != hash->stacktable && tmp != hash->copytable) {
-			MEM_freeN(tmp);
-		}
-	}
+	sh->nentries = 0;
+	sh->cursize = 2;
+	sh->nbuckets = hashsizes[sh->cursize];
 
-	entry = smallhash_lookup_first_free(hash, key);
-	entry->key = key;
-	entry->val = item;
+	sh->buckets         = sh->_buckets_stack;
+	sh->buckets_copy    = sh->_buckets_copy;
+	sh->buckets_stack   = sh->_buckets_stack;
 
-	hash->used++;
+	smallhash_init_empty(sh);
 }
 
-BLI_INLINE SmallHashEntry *smallhash_lookup(SmallHash *hash, uintptr_t key)
+/*NOTE: does *not* free *sh itself!  only the direct data!*/
+void BLI_smallhash_release(SmallHash *sh)
 {
-	SmallHashEntry *entry;
-	unsigned int h = (unsigned int)key;
-	unsigned int hoff = 1;
+	if (sh->buckets != sh->buckets_stack) {
+		MEM_freeN(sh->buckets);
+	}
+}
 
-	for (entry = &hash->table[h % hash->size];
-	     ((entry->key != key) || (entry->val == SMHASH_CELL_UNUSED)) && (entry->val != SMHASH_CELL_FREE);
-	     h = SMHASH_NEXT(h, hoff), entry = &hash->table[h % hash->size])
-	{
-		/* Nothing else to do! */
+void BLI_smallhash_insert(SmallHash *sh, uintptr_t key, void *val)
+{
+	SmallHashEntry *e;
+
+	BLI_assert(key != SMHASH_KEY_UNUSED);
+	BLI_assert(!ELEM(val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE));
+	BLI_assert(BLI_smallhash_haskey(sh, key) == false);
+
+	if (UNLIKELY(smallhash_test_expand_buckets(++sh->nentries, sh->nbuckets))) {
+		smallhash_resize_buckets(sh, hashsizes[++sh->cursize]);
 	}
 
-	return entry;
+	e = smallhash_lookup_first_free(sh, key);
+	e->key = key;
+	e->val = val;
 }
 
-void BLI_smallhash_remove(SmallHash *hash, uintptr_t key)
+bool BLI_smallhash_remove(SmallHash *sh, uintptr_t key)
 {
-	SmallHashEntry *entry = smallhash_lookup(hash, key);
+	SmallHashEntry *e = smallhash_lookup(sh, key);
 
-	if (entry->val != SMHASH_CELL_FREE) {
-		entry->key = 0;
-		entry->val = SMHASH_CELL_UNUSED;
+	if (e) {
+		e->key = SMHASH_KEY_UNUSED;
+		e->val = SMHASH_CELL_UNUSED;
+		sh->nentries--;
+
+		return true;
+	}
+	else {
+		return false;
 	}
 }
 
-void *BLI_smallhash_lookup(SmallHash *hash, uintptr_t key)
+void *BLI_smallhash_lookup(SmallHash *sh, uintptr_t key)
 {
-	SmallHashEntry *entry = smallhash_lookup(hash, key);
+	SmallHashEntry *e = smallhash_lookup(sh, key);
 
-	return ELEM(entry->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE) ? NULL : entry->val;
+	return e ? e->val : NULL;
 }
 
+void **BLI_smallhash_lookup_p(SmallHash *sh, uintptr_t key)
+{
+	SmallHashEntry *e = smallhash_lookup(sh, key);
+
+	return e ? &e->val : NULL;
+}
 
-bool BLI_smallhash_haskey(SmallHash *hash, uintptr_t key)
+bool BLI_smallhash_haskey(SmallHash *sh, uintptr_t key)
 {
-	SmallHashEntry *entry = smallhash_lookup(hash, key);
+	SmallHashEntry *e = smallhash_lookup(sh, key);
 
-	return !ELEM(entry->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
+	return (e != NULL);
 }
 
-int BLI_smallhash_count(SmallHash *hash)
+int BLI_smallhash_count(SmallHash *sh)
 {
-	return (int)hash->used;
+	return (int)sh->nentries;
 }
 
 void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
 {
-	while (iter->i < iter->hash->size) {
-		if ((iter->hash->table[iter->i].val != SMHASH_CELL_UNUSED) &&
-		    (iter->hash->table[iter->i].val != SMHASH_CELL_FREE))
+	while (iter->i < iter->sh->nbuckets) {
+		if ((iter->sh->buckets[iter->i].val != SMHASH_CELL_UNUSED) &&
+		    (iter->sh->buckets[iter->i].val != SMHASH_CELL_FREE))
 		{
 			if (key) {
-				*key = iter->hash->table[iter->i].key;
+				*key = iter->sh->buckets[iter->i].key;
 			}
 
-			return iter->hash->table[iter->i++].val;
+			return iter->sh->buckets[iter->i++].val;
 		}
 
 		iter->i++;
@@ -217,9 +257,9 @@ void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
 	return NULL;
 }
 
-void *BLI_smallhash_iternew(SmallHash *hash, SmallHashIter *iter, uintptr_t *key)
+void *BLI_smallhash_iternew(SmallHash *sh, SmallHashIter *iter, uintptr_t *key)
 {
-	iter->hash = hash;
+	iter->sh = sh;
 	iter->i = 0;
 
 	return BLI_smallhash_iternext(iter, key);
@@ -228,23 +268,23 @@ void *BLI_smallhash_iternew(SmallHash *hash, SmallHashIter *iter, uintptr_t *key
 /* note, this was called _print_smhash in knifetool.c
  * it may not be intended for general use - campbell */
 #if 0
-void BLI_smallhash_print(SmallHash *hash)
+void BLI_smallhash_print(SmallHash *sh)
 {
-	int i, linecol = 79, c = 0;
+	unsigned int i, linecol = 79, c = 0;
 
 	printf("{");
-	for (i = 0; i < hash->size; i++) {
-		if (hash->table[i].val == SMHASH_CELL_UNUSED) {
+	for (i = 0; i < sh->nbuckets; i++) {
+		if (sh->buckets[i].val == SMHASH_CELL_UNUSED) {
 			printf("--u-");
 		}
-		else if (hash->table[i].val == SMHASH_CELL_FREE) {
+		else if (sh->buckets[i].val == SMHASH_CELL_FREE) {
 			printf("--f-");
 		}
 		else {
-			printf("%2x", (unsigned int)hash->table[i].key);
+			printf("%2x", (unsigned int)sh->buckets[i].key);
 		}
 
-		if (i != hash->size - 1)
+		if (i != sh->nbuckets - 1)
 			printf(", ");
 
 		c += 6;
-- 
cgit v1.2.3