Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.kernel.org/pub/scm/git/git.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2017-02-08 23:53:07 +0300
committerJunio C Hamano <gitster@pobox.com>2017-02-09 02:39:55 +0300
commit29c2bd5fa8cb97eedcd463d49cfc7e753feb3145 (patch)
tree8822280e8073c9af3dccb363d81a881382f6f879
parent41a078c60b82bad4edf9d1bd8e826aae5f020ee5 (diff)
add oidset API
This is similar to many of our uses of sha1-array, but it overcomes one limitation of a sha1-array: when you are de-duplicating a large input with relatively few unique entries, sha1-array uses 20 bytes per non-unique entry. Whereas this set will use memory linear in the number of unique entries (albeit a few more than 20 bytes due to hashmap overhead). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--Makefile1
-rw-r--r--oidset.c49
-rw-r--r--oidset.h45
3 files changed, 95 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 8e4081e061..a5433978e3 100644
--- a/Makefile
+++ b/Makefile
@@ -781,6 +781,7 @@ LIB_OBJS += notes-cache.o
LIB_OBJS += notes-merge.o
LIB_OBJS += notes-utils.o
LIB_OBJS += object.o
+LIB_OBJS += oidset.o
LIB_OBJS += pack-bitmap.o
LIB_OBJS += pack-bitmap-write.o
LIB_OBJS += pack-check.o
diff --git a/oidset.c b/oidset.c
new file mode 100644
index 0000000000..ac169f05d3
--- /dev/null
+++ b/oidset.c
@@ -0,0 +1,49 @@
+#include "cache.h"
+#include "oidset.h"
+
+struct oidset_entry {
+ struct hashmap_entry hash;
+ struct object_id oid;
+};
+
+static int oidset_hashcmp(const void *va, const void *vb,
+ const void *vkey)
+{
+ const struct oidset_entry *a = va, *b = vb;
+ const struct object_id *key = vkey;
+ return oidcmp(&a->oid, key ? key : &b->oid);
+}
+
+int oidset_contains(const struct oidset *set, const struct object_id *oid)
+{
+ struct hashmap_entry key;
+
+ if (!set->map.cmpfn)
+ return 0;
+
+ hashmap_entry_init(&key, sha1hash(oid->hash));
+ return !!hashmap_get(&set->map, &key, oid);
+}
+
+int oidset_insert(struct oidset *set, const struct object_id *oid)
+{
+ struct oidset_entry *entry;
+
+ if (!set->map.cmpfn)
+ hashmap_init(&set->map, oidset_hashcmp, 0);
+
+ if (oidset_contains(set, oid))
+ return 1;
+
+ entry = xmalloc(sizeof(*entry));
+ hashmap_entry_init(&entry->hash, sha1hash(oid->hash));
+ oidcpy(&entry->oid, oid);
+
+ hashmap_add(&set->map, entry);
+ return 0;
+}
+
+void oidset_clear(struct oidset *set)
+{
+ hashmap_free(&set->map, 1);
+}
diff --git a/oidset.h b/oidset.h
new file mode 100644
index 0000000000..b7eaab5b88
--- /dev/null
+++ b/oidset.h
@@ -0,0 +1,45 @@
+#ifndef OIDSET_H
+#define OIDSET_H
+
+/**
+ * This API is similar to sha1-array, in that it maintains a set of object ids
+ * in a memory-efficient way. The major differences are:
+ *
+ * 1. It uses a hash, so we can do online duplicate removal, rather than
+ * sort-and-uniq at the end. This can reduce memory footprint if you have
+ * a large list of oids with many duplicates.
+ *
+ * 2. The per-unique-oid memory footprint is slightly higher due to hash
+ * table overhead.
+ */
+
+/**
+ * A single oidset; should be zero-initialized (or use OIDSET_INIT).
+ */
+struct oidset {
+ struct hashmap map;
+};
+
+#define OIDSET_INIT { { NULL } }
+
+/**
+ * Returns true iff `set` contains `oid`.
+ */
+int oidset_contains(const struct oidset *set, const struct object_id *oid);
+
+/**
+ * Insert the oid into the set; a copy is made, so "oid" does not need
+ * to persist after this function is called.
+ *
+ * Returns 1 if the oid was already in the set, 0 otherwise. This can be used
+ * to perform an efficient check-and-add.
+ */
+int oidset_insert(struct oidset *set, const struct object_id *oid);
+
+/**
+ * Remove all entries from the oidset, freeing any resources associated with
+ * it.
+ */
+void oidset_clear(struct oidset *set);
+
+#endif /* OIDSET_H */