Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/libgit2.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2011-02-22 22:59:36 +0300
committerVicent Marti <tanoku@gmail.com>2011-02-22 22:59:36 +0300
commitfc658755bf980170cf5a497870155a9fc97151cb (patch)
tree1e04116b1c9ca8234c4aacbba90d182c470081b7 /src
parent4378e8d470abae5e9e8f32f98869516c8b86b191 (diff)
Rewrite git_hashtable internals
The old hash table with chained buckets has been replaced by a new one using Cuckoo hashing, which offers guaranteed constant lookup times. This should improve speeds on most use cases, since hash tables in libgit2 are usually used as caches where the objects are stored once and queried several times. The Cuckoo hash implementation is based off the one in the Basekit library [1] for the IO language, but rewritten to support an arbritrary number of hashes. We currently use 3 to maximize the usage of the nodes pool. [1]: https://github.com/stevedekorte/basekit/blob/master/source/CHash.c Signed-off-by: Vicent Marti <tanoku@gmail.com>
Diffstat (limited to 'src')
-rw-r--r--src/hashtable.c287
-rw-r--r--src/hashtable.h49
-rw-r--r--src/refs.c34
-rw-r--r--src/repository.c28
-rw-r--r--src/revwalk.c28
5 files changed, 199 insertions, 227 deletions
diff --git a/src/hashtable.c b/src/hashtable.c
index 67fd49a46..e226c8191 100644
--- a/src/hashtable.c
+++ b/src/hashtable.c
@@ -27,46 +27,113 @@
#include "repository.h"
#include "commit.h"
+#define MAX_LOOPS 5
static const double max_load_factor = 0.65;
-static void hashtable_resize(git_hashtable *table)
+static int resize_to(git_hashtable *self, size_t new_size);
+static int set_size(git_hashtable *self, size_t new_size);
+static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id);
+static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other);
+static int node_insert(git_hashtable *self, git_hashtable_node *new_node);
+static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size);
+
+static int resize_to(git_hashtable *self, size_t new_size)
{
- git_hashtable_node **new_nodes;
- unsigned int new_size, i;
+ git_hashtable_node *old_nodes = self->nodes;
+ size_t old_size = self->size;
+
+ self->is_resizing = 1;
- assert(table);
+ do {
+ self->size = new_size;
+ self->size_mask = new_size - 1;
+ self->key_count = 0;
+ self->nodes = git__calloc(1, sizeof(git_hashtable_node) * self->size);
- new_size = (table->size_mask + 1) * 2;
+ if (self->nodes == NULL)
+ return GIT_ENOMEM;
- new_nodes = git__malloc(new_size * sizeof(git_hashtable_node *));
- if (new_nodes == NULL)
- return;
+ if (insert_nodes(self, old_nodes, old_size) == 0)
+ self->is_resizing = 0;
+ else {
+ new_size *= 2;
+ free(self->nodes);
+ }
+ } while(self->is_resizing);
+
+ free(old_nodes);
+ return GIT_SUCCESS;
+}
- memset(new_nodes, 0x0, new_size * sizeof(git_hashtable_node *));
+static int set_size(git_hashtable *self, size_t new_size)
+{
+ self->nodes = realloc(self->nodes, new_size * sizeof(git_hashtable_node));
+ if (self->nodes == NULL)
+ return GIT_ENOMEM;
- for (i = 0; i <= table->size_mask; ++i) {
- git_hashtable_node *n;
- unsigned int index;
+ if (new_size > self->size) {
+ memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(git_hashtable_node));
+ }
- while ((n = table->nodes[i]) != NULL) {
- table->nodes[i] = n->next;
- index = n->hash & (new_size - 1);
- n->next = new_nodes[index];
- new_nodes[index] = n;
+ self->size = new_size;
+ self->size_mask = new_size - 1;
+ return GIT_SUCCESS;
+}
+
+static git_hashtable_node *node_with_hash(git_hashtable *self, const void *key, int hash_id)
+{
+ size_t pos = self->hash(key, hash_id) & self->size_mask;
+ return git_hashtable_node_at(self->nodes, pos);
+}
+
+static void node_swap_with(git_hashtable_node *self, git_hashtable_node *other)
+{
+ git_hashtable_node tmp = *self;
+ *self = *other;
+ *other = tmp;
+}
+
+static int node_insert(git_hashtable *self, git_hashtable_node *new_node)
+{
+ int iteration, hash_id;
+
+ for (iteration = 0; iteration < MAX_LOOPS; iteration++) {
+ for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
+ git_hashtable_node *node;
+ node = node_with_hash(self, new_node->key, hash_id);
+ node_swap_with(new_node, node);
+ if(new_node->key == 0x0){
+ self->key_count++;
+ return GIT_SUCCESS;
+ }
}
}
- free(table->nodes);
- table->nodes = new_nodes;
- table->size_mask = (new_size - 1);
- table->max_count = (unsigned int)(new_size * max_load_factor);
+ if (self->is_resizing)
+ return GIT_EBUSY;
+
+ resize_to(self, self->size * 2);
+ git_hashtable_insert(self, new_node->key, new_node->value);
+ return GIT_SUCCESS;
+}
+
+static int insert_nodes(git_hashtable *self, git_hashtable_node *old_nodes, size_t old_size)
+{
+ size_t i;
+
+ for (i = 0; i < old_size; ++i) {
+ git_hashtable_node *node = git_hashtable_node_at(old_nodes, i);
+ if (node->key && git_hashtable_insert(self, node->key, node->value) < GIT_SUCCESS)
+ return GIT_ENOMEM;
+ }
+
+ return GIT_SUCCESS;
}
-git_hashtable *git_hashtable_alloc(unsigned int min_size,
+git_hashtable *git_hashtable_alloc(size_t min_size,
git_hash_ptr hash,
git_hash_keyeq_ptr key_eq)
{
- unsigned int i;
git_hashtable *table;
assert(hash && key_eq);
@@ -74,6 +141,11 @@ git_hashtable *git_hashtable_alloc(unsigned int min_size,
if ((table = git__malloc(sizeof(git_hashtable))) == NULL)
return NULL;
+ memset(table, 0x0, sizeof(git_hashtable));
+
+ if (min_size < 8)
+ min_size = 8;
+
/* round up size to closest power of 2 */
min_size--;
min_size |= min_size >> 1;
@@ -82,167 +154,90 @@ git_hashtable *git_hashtable_alloc(unsigned int min_size,
min_size |= min_size >> 8;
min_size |= min_size >> 16;
- table->size_mask = min_size;
- table->count = 0;
- table->max_count = (unsigned int)((min_size + 1) * max_load_factor);
-
table->hash = hash;
table->key_equal = key_eq;
- table->nodes = git__malloc((min_size + 1) * sizeof(git_hashtable_node *));
-
- if (table->nodes == NULL) {
- free(table);
- return NULL;
- }
-
- for (i = 0; i <= min_size; ++i)
- table->nodes[i] = NULL;
+ set_size(table, min_size + 1);
return table;
}
-void git_hashtable_clear(git_hashtable *table)
+void git_hashtable_clear(git_hashtable *self)
{
- unsigned int index;
-
- assert(table);
-
- for (index = 0; index <= table->size_mask; ++index) {
- git_hashtable_node *node, *next_node;
-
- node = table->nodes[index];
- while (node != NULL) {
- next_node = node->next;
- free(node);
- node = next_node;
- }
+ assert(self);
- table->nodes[index] = NULL;
- }
-
- table->count = 0;
+ memset(self->nodes, 0x0, sizeof(git_hashtable_node) * self->size);
+ self->key_count = 0;
}
-void git_hashtable_free(git_hashtable *table)
+void git_hashtable_free(git_hashtable *self)
{
- assert(table);
+ assert(self);
- git_hashtable_clear(table);
- free(table->nodes);
- free(table);
+ free(self->nodes);
+ free(self);
}
-int git_hashtable_insert(git_hashtable *table, const void *key, void *value)
+int git_hashtable_insert(git_hashtable *self, const void *key, void *value)
{
+ int hash_id;
git_hashtable_node *node;
- uint32_t index, hash;
-
- assert(table);
-
- if (table->count + 1 > table->max_count)
- hashtable_resize(table);
-
- node = git__malloc(sizeof(git_hashtable_node));
- if (node == NULL)
- return GIT_ENOMEM;
- hash = table->hash(key);
- index = (hash & table->size_mask);
+ for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
+ node = node_with_hash(self, key, hash_id);
- node->object = value;
- node->hash = hash;
- node->next = table->nodes[index];
+ if (!node->key) {
+ node->key = key;
+ node->value = value;
+ self->key_count++;
+ return GIT_SUCCESS;
+ }
- table->nodes[index] = node;
- table->count++;
+ if (key == node->key || self->key_equal(key, node->key) == 0) {
+ node->value = value;
+ return GIT_SUCCESS;
+ }
+ }
- return GIT_SUCCESS;
+ /* no space in table; must do cuckoo dance */
+ {
+ git_hashtable_node x;
+ x.key = key;
+ x.value = value;
+ return node_insert(self, &x);
+ }
}
-void *git_hashtable_lookup(git_hashtable *table, const void *key)
+void *git_hashtable_lookup(git_hashtable *self, const void *key)
{
+ int hash_id;
git_hashtable_node *node;
- uint32_t index, hash;
-
- assert(table);
-
- hash = table->hash(key);
- index = (hash & table->size_mask);
- node = table->nodes[index];
- while (node != NULL) {
- if (node->hash == hash && table->key_equal(node->object, key))
- return node->object;
-
- node = node->next;
+ for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
+ node = node_with_hash(self, key, hash_id);
+ if (node->key && self->key_equal(key, node->key) == 0)
+ return node->value;
}
return NULL;
}
-int git_hashtable_remove(git_hashtable *table, const void *key)
+int git_hashtable_remove(git_hashtable *self, const void *key)
{
- git_hashtable_node *node, *prev_node;
- uint32_t index, hash;
-
- assert(table);
-
- hash = table->hash(key);
- index = (hash & table->size_mask);
- node = table->nodes[index];
-
- prev_node = NULL;
-
- while (node != NULL) {
- if (node->hash == hash && table->key_equal(node->object, key)) {
- if (prev_node == NULL)
- table->nodes[index] = node->next;
- else
- prev_node->next = node->next;
+ int hash_id;
+ git_hashtable_node *node;
- free(node);
+ for (hash_id = 0; hash_id < GIT_HASHTABLE_HASHES; ++hash_id) {
+ node = node_with_hash(self, key, hash_id);
+ if (node->key && self->key_equal(key, node->key) == 0) {
+ node->key = NULL;
+ node->value = NULL;
+ self->key_count--;
return GIT_SUCCESS;
}
-
- prev_node = node;
- node = node->next;
}
return GIT_ENOTFOUND;
}
-
-
-void git_hashtable_iterator_init(git_hashtable *table, git_hashtable_iterator *it)
-{
- assert(table && it);
-
- memset(it, 0x0, sizeof(git_hashtable_iterator));
-
- it->nodes = table->nodes;
- it->current_node = NULL;
- it->current_pos = 0;
- it->size = table->size_mask + 1;
-}
-
-void *git_hashtable_iterator_next(git_hashtable_iterator *it)
-{
- git_hashtable_node *next = NULL;
-
- assert(it);
-
- while (it->current_node == NULL) {
- if (it->current_pos >= it->size)
- return NULL;
-
- it->current_node = it->nodes[it->current_pos++];
- }
-
- next = it->current_node;
- it->current_node = it->current_node->next;
-
- return next->object;
-}
-
diff --git a/src/hashtable.h b/src/hashtable.h
index 69535040d..74da580ef 100644
--- a/src/hashtable.h
+++ b/src/hashtable.h
@@ -5,39 +5,33 @@
#include "git2/oid.h"
#include "git2/odb.h"
+#define GIT_HASHTABLE_HASHES 3
-typedef uint32_t (*git_hash_ptr)(const void *);
-typedef int (*git_hash_keyeq_ptr)(void *obj, const void *obj_key);
+typedef uint32_t (*git_hash_ptr)(const void *, int hash_id);
+typedef int (*git_hash_keyeq_ptr)(const void *key_a, const void *key_b);
struct git_hashtable_node {
- void *object;
- uint32_t hash;
- struct git_hashtable_node *next;
+ const void *key;
+ void *value;
};
struct git_hashtable {
- struct git_hashtable_node **nodes;
+ struct git_hashtable_node *nodes;
- unsigned int size_mask;
- unsigned int count;
- unsigned int max_count;
+ size_t size_mask;
+ size_t size;
+ size_t key_count;
+
+ int is_resizing;
git_hash_ptr hash;
git_hash_keyeq_ptr key_equal;
};
-struct git_hashtable_iterator {
- struct git_hashtable_node **nodes;
- struct git_hashtable_node *current_node;
- unsigned int current_pos;
- unsigned int size;
-};
-
typedef struct git_hashtable_node git_hashtable_node;
typedef struct git_hashtable git_hashtable;
-typedef struct git_hashtable_iterator git_hashtable_iterator;
-git_hashtable *git_hashtable_alloc(unsigned int min_size,
+git_hashtable *git_hashtable_alloc(size_t min_size,
git_hash_ptr hash,
git_hash_keyeq_ptr key_eq);
int git_hashtable_insert(git_hashtable *h, const void *key, void *value);
@@ -46,7 +40,22 @@ int git_hashtable_remove(git_hashtable *table, const void *key);
void git_hashtable_free(git_hashtable *h);
void git_hashtable_clear(git_hashtable *h);
-void *git_hashtable_iterator_next(git_hashtable_iterator *it);
-void git_hashtable_iterator_init(git_hashtable *h, git_hashtable_iterator *it);
+#define git_hashtable_node_at(nodes, pos) ((git_hashtable_node *)(&nodes[pos]))
+
+#define GIT_HASHTABLE_FOREACH(self, pkey, pvalue, code) {\
+ git_hashtable *_self = (self);\
+ git_hashtable_node *_nodes = _self->nodes;\
+ unsigned int _i, _size = _self->size;\
+ for (_i = 0; _i < _size; _i ++) {\
+ git_hashtable_node *_node = git_hashtable_node_at(_nodes, _i);\
+ if (_node->key)\
+ {\
+ pkey = _node->key;\
+ pvalue = _node->value;\
+ code;\
+ }\
+ }\
+}
+
#endif
diff --git a/src/refs.c b/src/refs.c
index 1f434ea03..b95ec70cf 100644
--- a/src/refs.c
+++ b/src/refs.c
@@ -28,28 +28,21 @@
#include "repository.h"
#include "fileops.h"
-#define HASH_SEED 2147483647
#define MAX_NESTING_LEVEL 5
static const int default_table_size = 32;
-static uint32_t reftable_hash(const void *key)
+static uint32_t reftable_hash(const void *key, int hash_id)
{
- return git__hash(key, strlen((const char *)key), HASH_SEED);
-}
-
-static int reftable_haskey(void *reference, const void *key)
-{
- git_reference *ref;
- char *name;
+ static uint32_t hash_seeds[GIT_HASHTABLE_HASHES] = {
+ 2147483647,
+ 0x5d20bb23,
+ 0x7daaab3c
+ };
- ref = (git_reference *)reference;
- name = (char *)key;
-
- return strcmp(name, ref->name) == 0;
+ return git__hash(key, strlen((const char *)key), hash_seeds[hash_id]);
}
-
static int check_refname(const char *name)
{
/*
@@ -641,24 +634,21 @@ int git_repository__refcache_init(git_refcache *refs)
refs->cache = git_hashtable_alloc(
default_table_size,
reftable_hash,
- reftable_haskey);
+ (git_hash_keyeq_ptr)strcmp);
return refs->cache ? GIT_SUCCESS : GIT_ENOMEM;
}
void git_repository__refcache_free(git_refcache *refs)
{
- git_hashtable_iterator it;
+ const char *ref_name;
git_reference *reference;
assert(refs);
- git_hashtable_iterator_init(refs->cache, &it);
-
- while ((reference = (git_reference *)git_hashtable_iterator_next(&it)) != NULL) {
- git_hashtable_remove(refs->cache, reference->name);
- reference_free(reference);
- }
+ GIT_HASHTABLE_FOREACH(refs->cache, ref_name, reference,
+ reference_free(reference)
+ );
git_hashtable_free(refs->cache);
}
diff --git a/src/repository.c b/src/repository.c
index 2b17ba455..a1d2798b3 100644
--- a/src/repository.c
+++ b/src/repository.c
@@ -58,28 +58,16 @@ typedef struct {
* Callbacks for the ODB cache, implemented
* as a hash table
*/
-uint32_t object_table_hash(const void *key)
+uint32_t object_table_hash(const void *key, int hash_id)
{
uint32_t r;
git_oid *id;
id = (git_oid *)key;
- memcpy(&r, id->id, sizeof(r));
+ memcpy(&r, id->id + (hash_id * sizeof(uint32_t)), sizeof(r));
return r;
}
-int object_table_hashkey(void *object, const void *key)
-{
- git_object *obj;
- git_oid *oid;
-
- obj = (git_object *)object;
- oid = (git_oid *)key;
-
- return (git_oid_cmp(oid, &obj->id) == 0);
-}
-
-
/*
* Git repository open methods
*
@@ -245,9 +233,9 @@ static git_repository *repository_alloc()
repo->objects = git_hashtable_alloc(
OBJECT_TABLE_SIZE,
object_table_hash,
- object_table_hashkey);
+ (git_hash_keyeq_ptr)git_oid_cmp);
- if (repo->objects == NULL) {
+ if (repo->objects == NULL) {
free(repo);
return NULL;
}
@@ -369,8 +357,8 @@ cleanup:
void git_repository_free(git_repository *repo)
{
- git_hashtable_iterator it;
git_object *object;
+ const git_oid *oid;
if (repo == NULL)
return;
@@ -380,11 +368,9 @@ void git_repository_free(git_repository *repo)
free(repo->path_repository);
free(repo->path_odb);
- git_hashtable_iterator_init(repo->objects, &it);
-
- while ((object = (git_object *)
- git_hashtable_iterator_next(&it)) != NULL)
+ GIT_HASHTABLE_FOREACH(repo->objects, oid, object, {
git_object_free(object);
+ });
git_hashtable_free(repo->objects);
diff --git a/src/revwalk.c b/src/revwalk.c
index e30e543a8..c073be13f 100644
--- a/src/revwalk.c
+++ b/src/revwalk.c
@@ -28,28 +28,23 @@
#include "revwalk.h"
#include "hashtable.h"
-uint32_t git_revwalk__commit_hash(const void *key)
+uint32_t git_revwalk__commit_hash(const void *key, int hash_id)
{
uint32_t r;
git_commit *commit;
commit = (git_commit *)key;
- memcpy(&r, commit->object.id.id, sizeof(r));
+ memcpy(&r, commit->object.id.id + (hash_id * sizeof(uint32_t)), sizeof(r));
return r;
}
-int git_revwalk__commit_haskey(void *object, const void *key)
+int git_revwalk__commit_keycmp(const void *key_a, const void *key_b)
{
- git_revwalk_commit *walk_commit;
- git_commit *commit_object;
-
- walk_commit = (git_revwalk_commit *)object;
- commit_object = (git_commit *)key;
-
- return (walk_commit->commit_object == commit_object);
+ git_commit *a = (git_commit *)key_a;
+ git_commit *b = (git_commit *)key_b;
+ return git_oid_cmp(&a->object.id, &b->object.id);
}
-
int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo)
{
git_revwalk *walk;
@@ -62,7 +57,7 @@ int git_revwalk_new(git_revwalk **revwalk_out, git_repository *repo)
walk->commits = git_hashtable_alloc(64,
git_revwalk__commit_hash,
- git_revwalk__commit_haskey);
+ git_revwalk__commit_keycmp);
if (walk->commits == NULL) {
free(walk);
@@ -245,18 +240,15 @@ int git_revwalk_next(git_commit **commit, git_revwalk *walk)
void git_revwalk_reset(git_revwalk *walk)
{
- git_hashtable_iterator it;
+ const void *_unused;
git_revwalk_commit *commit;
assert(walk);
- git_hashtable_iterator_init(walk->commits, &it);
-
- while ((commit = (git_revwalk_commit *)
- git_hashtable_iterator_next(&it)) != NULL) {
+ GIT_HASHTABLE_FOREACH(walk->commits, _unused, commit, {
git_revwalk_list_clear(&commit->parents);
free(commit);
- }
+ });
git_hashtable_clear(walk->commits);
git_revwalk_list_clear(&walk->iterator);