Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/libgit2.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2011-07-07 03:46:20 +0400
committerVicent Marti <tanoku@gmail.com>2011-07-07 04:54:07 +0400
commitde18f276683a5cf3d85d001f6521e52c8c802e60 (patch)
tree01db449c1fa2d11568aca66bf4a5ef9d1a5c60f3
parentc63aa494595a6d6e6d97cfa1bbc1741a0b2e0cc6 (diff)
vector: Timsort all of the things
Drop the GLibc implementation of Merge Sort and replace it with Timsort. The algorithm has been tuned to work on arrays of pointers (void **), so there's no longer a need to abstract the byte-width of each element in the array. All the comparison callbacks now take pointers-to-elements, not pointers-to-pointers, so there's now one less level of dereferencing. E.g. int index_cmp(const void *a, const void *b) { - const git_index_entry *entry_a = *(const git_index_entry **)(a); + const git_index_entry *entry_a = (const git_index_entry *)(a); The result is up to a 40% speed-up when sorting vectors. Memory usage remains lineal. A new `bsearch` implementation has been added, whose callback also supplies pointer-to-elements, to uniform the Vector API again.
-rw-r--r--src/config.c4
-rw-r--r--src/index.c12
-rw-r--r--src/odb.c4
-rw-r--r--src/odb_pack.c4
-rw-r--r--src/refs.c4
-rw-r--r--src/tree.c6
-rw-r--r--src/tsort.c380
-rw-r--r--src/util.c79
-rw-r--r--src/util.h5
-rw-r--r--src/vector.c6
-rw-r--r--tests/t00-core.c5
11 files changed, 424 insertions, 85 deletions
diff --git a/src/config.c b/src/config.c
index 730917634..815b08c55 100644
--- a/src/config.c
+++ b/src/config.c
@@ -56,8 +56,8 @@ void git_config_free(git_config *cfg)
static int config_backend_cmp(const void *a, const void *b)
{
- const file_internal *bk_a = *(const file_internal **)(a);
- const file_internal *bk_b = *(const file_internal **)(b);
+ const file_internal *bk_a = (const file_internal *)(a);
+ const file_internal *bk_b = (const file_internal *)(b);
return bk_b->priority - bk_a->priority;
}
diff --git a/src/index.c b/src/index.c
index dd33db92a..353c30025 100644
--- a/src/index.c
+++ b/src/index.c
@@ -109,15 +109,15 @@ static int write_index(git_index *index, git_filebuf *file);
int index_srch(const void *key, const void *array_member)
{
const char *filename = (const char *)key;
- const git_index_entry *entry = *(const git_index_entry **)(array_member);
+ const git_index_entry *entry = (const git_index_entry *)(array_member);
return strcmp(filename, entry->path);
}
int index_cmp(const void *a, const void *b)
{
- const git_index_entry *entry_a = *(const git_index_entry **)(a);
- const git_index_entry *entry_b = *(const git_index_entry **)(b);
+ const git_index_entry *entry_a = (const git_index_entry *)(a);
+ const git_index_entry *entry_b = (const git_index_entry *)(b);
return strcmp(entry_a->path, entry_b->path);
}
@@ -125,15 +125,15 @@ int index_cmp(const void *a, const void *b)
int unmerged_srch(const void *key, const void *array_member)
{
const char *path = (const char *) key;
- const git_index_entry_unmerged *entry = *(const git_index_entry_unmerged **) (array_member);
+ const git_index_entry_unmerged *entry = (const git_index_entry_unmerged *)(array_member);
return strcmp(path, entry->path);
}
int unmerged_cmp(const void *a, const void *b)
{
- const git_index_entry_unmerged *info_a = *(const git_index_entry_unmerged **)(a);
- const git_index_entry_unmerged *info_b = *(const git_index_entry_unmerged **)(b);
+ const git_index_entry_unmerged *info_a = (const git_index_entry_unmerged *)(a);
+ const git_index_entry_unmerged *info_b = (const git_index_entry_unmerged *)(b);
return strcmp(info_a->path, info_b->path);
}
diff --git a/src/odb.c b/src/odb.c
index b09931660..caa4e1bef 100644
--- a/src/odb.c
+++ b/src/odb.c
@@ -223,8 +223,8 @@ static int init_fake_wstream(git_odb_stream **stream_p, git_odb_backend *backend
static int backend_sort_cmp(const void *a, const void *b)
{
- const backend_internal *backend_a = *(const backend_internal **)(a);
- const backend_internal *backend_b = *(const backend_internal **)(b);
+ const backend_internal *backend_a = (const backend_internal *)(a);
+ const backend_internal *backend_b = (const backend_internal *)(b);
if (backend_a->is_alternate == backend_b->is_alternate)
return (backend_b->priority - backend_a->priority);
diff --git a/src/odb_pack.c b/src/odb_pack.c
index 44604ef91..20b0f1e4d 100644
--- a/src/odb_pack.c
+++ b/src/odb_pack.c
@@ -699,8 +699,8 @@ static int pack_index_open(struct pack_file *p)
static int packfile_sort__cb(const void *a_, const void *b_)
{
- struct pack_file *a = *((struct pack_file **)a_);
- struct pack_file *b = *((struct pack_file **)b_);
+ struct pack_file *a = (struct pack_file *)a_;
+ struct pack_file *b = (struct pack_file *)b_;
int st;
/*
diff --git a/src/refs.c b/src/refs.c
index 84224e88e..5e0fe09b2 100644
--- a/src/refs.c
+++ b/src/refs.c
@@ -825,8 +825,8 @@ static int packed_remove_loose(git_repository *repo, git_vector *packing_list)
static int packed_sort(const void *a, const void *b)
{
- const git_reference *ref_a = *(const git_reference **)a;
- const git_reference *ref_b = *(const git_reference **)b;
+ const git_reference *ref_a = (const git_reference *)a;
+ const git_reference *ref_b = (const git_reference *)b;
return strcmp(ref_a->name, ref_b->name);
}
diff --git a/src/tree.c b/src/tree.c
index 51b19f535..fff5068f8 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -40,7 +40,7 @@ static int valid_attributes(const int attributes) {
int entry_search_cmp(const void *key, const void *array_member)
{
const char *filename = (const char *)key;
- const git_tree_entry *entry = *(const git_tree_entry **)(array_member);
+ const git_tree_entry *entry = (const git_tree_entry *)(array_member);
return strcmp(filename, entry->filename);
}
@@ -53,8 +53,8 @@ static int valid_attributes(const int attributes) {
int entry_sort_cmp(const void *a, const void *b)
{
- const git_tree_entry *entry_a = *(const git_tree_entry **)(a);
- const git_tree_entry *entry_b = *(const git_tree_entry **)(b);
+ const git_tree_entry *entry_a = (const git_tree_entry *)(a);
+ const git_tree_entry *entry_b = (const git_tree_entry *)(b);
return git_futils_cmp_path(entry_a->filename, strlen(entry_a->filename),
entry_a->attr & 040000,
diff --git a/src/tsort.c b/src/tsort.c
new file mode 100644
index 000000000..eda9a2fd2
--- /dev/null
+++ b/src/tsort.c
@@ -0,0 +1,380 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/**
+ * An array-of-pointers implementation of Python's Timsort
+ * Based on code by Christopher Swenson under the MIT license
+ *
+ * Copyright (c) 2010 Christopher Swenson
+ * Copyright (c) 2011 Vicent Marti
+ */
+
+#ifndef MAX
+# define MAX(x,y) (((x) > (y) ? (x) : (y)))
+#endif
+
+#ifndef MIN
+# define MIN(x,y) (((x) < (y) ? (x) : (y)))
+#endif
+
+#if defined(__GNUC__)
+# define CLZ(x) __builtin_clz(x)
+#elif defined(_MSV_VER)
+# define CLZ(x) _CountLeadingZeros(x)
+#else
+int CLZ(int32_t x)
+{
+ int e = 31;
+
+ if (!x) return 32;
+ if (x&0xFFFF0000) { e -=16; x >>=16; }
+ if (x&0x0000FF00) { e -= 8; x >>= 8; }
+ if (x&0x000000F0) { e -= 4; x >>= 4; }
+ if (x&0x0000000C) { e -= 2; x >>= 2; }
+ if (x&0x00000002) { e -= 1; }
+ return e;
+}
+#endif
+
+typedef int (*cmp_ptr_t)(const void *, const void *);
+
+static int binsearch(void **dst, const void *x, size_t size, cmp_ptr_t cmp)
+{
+ int l, c, r;
+ void *lx, *cx, *rx;
+
+ l = 0;
+ r = size - 1;
+ c = r >> 1;
+ lx = dst[l];
+
+ /* check for beginning conditions */
+ if (cmp(x, lx) < 0)
+ return 0;
+
+ else if (cmp(x, lx) == 0) {
+ int i = 1;
+ while (cmp(x, dst[i]) == 0)
+ i++;
+ return i;
+ }
+
+ rx = dst[r];
+ /* guaranteed not to be >= rx */
+ cx = dst[c];
+ while (1) {
+ const int val = cmp(x, cx);
+ if (val < 0) {
+ if (c - l <= 1) return c;
+ r = c;
+ rx = cx;
+ } else if (val > 0) {
+ if (r - c <= 1) return c + 1;
+ l = c;
+ lx = cx;
+ } else {
+ do {
+ cx = dst[++c];
+ } while (cmp(x, cx) == 0);
+ return c;
+ }
+ c = l + ((r - l) >> 1);
+ cx = dst[c];
+ }
+}
+
+/* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */
+static void bisort(void **dst, size_t start, size_t size, cmp_ptr_t cmp)
+{
+ size_t i;
+
+ for (i = start; i < size; i++) {
+ int j;
+ /* If this entry is already correct, just move along */
+ if (cmp(dst[i - 1], dst[i]) <= 0)
+ continue;
+
+ /* Else we need to find the right place, shift everything over, and squeeze in */
+ void *x = dst[i];
+ int location = binsearch(dst, x, i, cmp);
+ for (j = i - 1; j >= location; j--) {
+ dst[j + 1] = dst[j];
+ }
+ dst[location] = x;
+ }
+}
+
+
+/* timsort implementation, based on timsort.txt */
+struct tsort_run {
+ ssize_t start;
+ ssize_t length;
+};
+
+struct tsort_store {
+ size_t alloc;
+ cmp_ptr_t cmp;
+ void **storage;
+};
+
+static void reverse_elements(void **dst, int start, int end)
+{
+ while (start < end) {
+ void *tmp = dst[start];
+ dst[start] = dst[end];
+ dst[end] = tmp;
+
+ start++;
+ end--;
+ }
+}
+
+static int count_run(void **dst, ssize_t start, ssize_t size, struct tsort_store *store)
+{
+ ssize_t curr = start + 2;
+
+ if (size - start == 1)
+ return 1;
+
+ if (start >= size - 2) {
+ if (store->cmp(dst[size - 2], dst[size - 1]) > 0) {
+ void *tmp = dst[size - 1];
+ dst[size - 1] = dst[size - 2];
+ dst[size - 2] = tmp;
+ }
+
+ return 2;
+ }
+
+ if (store->cmp(dst[start], dst[start + 1]) <= 0) {
+ while (curr < size - 1 && store->cmp(dst[curr - 1], dst[curr]) <= 0)
+ curr++;
+
+ return curr - start;
+ } else {
+ while (curr < size - 1 && store->cmp(dst[curr - 1], dst[curr]) > 0)
+ curr++;
+
+ /* reverse in-place */
+ reverse_elements(dst, start, curr - 1);
+ return curr - start;
+ }
+}
+
+static int compute_minrun(size_t n)
+{
+ int r = 0;
+ while (n >= 64) {
+ r |= n & 1;
+ n >>= 1;
+ }
+ return n + r;
+}
+
+static int check_invariant(struct tsort_run *stack, int stack_curr)
+{
+ if (stack_curr < 2)
+ return 1;
+
+ else if (stack_curr == 2) {
+ const int A = stack[stack_curr - 2].length;
+ const int B = stack[stack_curr - 1].length;
+ return (A > B);
+ } else {
+ const int A = stack[stack_curr - 3].length;
+ const int B = stack[stack_curr - 2].length;
+ const int C = stack[stack_curr - 1].length;
+ return !((A <= B + C) || (B <= C));
+ }
+}
+
+static int resize(struct tsort_store *store, size_t new_size)
+{
+ if (store->alloc < new_size) {
+ void **tempstore = realloc(store->storage, new_size * sizeof(void *));
+
+ /**
+ * Do not propagate on OOM; this will abort the sort and
+ * leave the array unsorted, but no error code will be
+ * raised
+ */
+ if (tempstore == NULL)
+ return -1;
+
+ store->storage = tempstore;
+ store->alloc = new_size;
+ }
+
+ return 0;
+}
+
+static void merge(void **dst, const struct tsort_run *stack, int stack_curr, struct tsort_store *store)
+{
+ const ssize_t A = stack[stack_curr - 2].length;
+ const ssize_t B = stack[stack_curr - 1].length;
+ const ssize_t curr = stack[stack_curr - 2].start;
+
+ void **storage;
+ ssize_t i, j, k;
+
+ if (resize(store, MIN(A, B)) < 0)
+ return;
+
+ storage = store->storage;
+
+ /* left merge */
+ if (A < B) {
+ memcpy(storage, &dst[curr], A * sizeof(void *));
+ i = 0;
+ j = curr + A;
+
+ for (k = curr; k < curr + A + B; k++) {
+ if ((i < A) && (j < curr + A + B)) {
+ if (store->cmp(storage[i], dst[j]) <= 0)
+ dst[k] = storage[i++];
+ else
+ dst[k] = dst[j++];
+ } else if (i < A) {
+ dst[k] = storage[i++];
+ } else
+ dst[k] = dst[j++];
+ }
+ } else {
+ memcpy(storage, &dst[curr + A], B * sizeof(void *));
+ i = B - 1;
+ j = curr + A - 1;
+
+ for (k = curr + A + B - 1; k >= curr; k--) {
+ if ((i >= 0) && (j >= curr)) {
+ if (store->cmp(dst[j], storage[i]) > 0)
+ dst[k] = dst[j--];
+ else
+ dst[k] = storage[i--];
+ } else if (i >= 0)
+ dst[k] = storage[i--];
+ else
+ dst[k] = dst[j--];
+ }
+ }
+}
+
+static ssize_t collapse(void **dst, struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store, ssize_t size)
+{
+ ssize_t A, B, C;
+
+ while (1) {
+ /* if the stack only has one thing on it, we are done with the collapse */
+ if (stack_curr <= 1)
+ break;
+
+ /* if this is the last merge, just do it */
+ if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
+ merge(dst, stack, stack_curr, store);
+ stack[0].length += stack[1].length;
+ stack_curr--;
+ break;
+ }
+
+ /* check if the invariant is off for a stack of 2 elements */
+ else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
+ merge(dst, stack, stack_curr, store);
+ stack[0].length += stack[1].length;
+ stack_curr--;
+ break;
+ }
+ else if (stack_curr == 2)
+ break;
+
+ A = stack[stack_curr - 3].length;
+ B = stack[stack_curr - 2].length;
+ C = stack[stack_curr - 1].length;
+
+ /* check first invariant */
+ if (A <= B + C) {
+ if (A < C) {
+ merge(dst, stack, stack_curr - 1, store);
+ stack[stack_curr - 3].length += stack[stack_curr - 2].length;
+ stack[stack_curr - 2] = stack[stack_curr - 1];
+ stack_curr--;
+ } else {
+ merge(dst, stack, stack_curr, store);
+ stack[stack_curr - 2].length += stack[stack_curr - 1].length;
+ stack_curr--;
+ }
+ } else if (B <= C) {
+ merge(dst, stack, stack_curr, store);
+ stack[stack_curr - 2].length += stack[stack_curr - 1].length;
+ stack_curr--;
+ } else
+ break;
+ }
+
+ return stack_curr;
+}
+
+#define PUSH_NEXT() do {\
+ len = count_run(dst, curr, size, store);\
+ run = minrun;\
+ if (run < minrun) run = minrun;\
+ if (run > (ssize_t)size - curr) run = size - curr;\
+ if (run > len) {\
+ bisort(&dst[curr], len, run, cmp);\
+ len = run;\
+ }\
+ run_stack[stack_curr++] = (struct tsort_run) {curr, len};\
+ curr += len;\
+ if (curr == (ssize_t)size) {\
+ /* finish up */ \
+ while (stack_curr > 1) { \
+ merge(dst, run_stack, stack_curr, store); \
+ run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \
+ stack_curr--; \
+ } \
+ if (store->storage != NULL) {\
+ free(store->storage);\
+ store->storage = NULL;\
+ }\
+ return;\
+ }\
+}\
+while (0)
+
+
+void git__tsort(void **dst, size_t size, cmp_ptr_t cmp)
+{
+ struct tsort_store _store, *store = &_store;
+ struct tsort_run run_stack[128];
+
+ ssize_t stack_curr = 0;
+ ssize_t len, run;
+ ssize_t curr = 0;
+ ssize_t minrun;
+
+ if (size < 64) {
+ bisort(dst, 1, size, cmp);
+ return;
+ }
+
+ /* compute the minimum run length */
+ minrun = compute_minrun(size);
+
+ /* temporary storage for merges */
+ store->alloc = 0;
+ store->storage = NULL;
+ store->cmp = cmp;
+
+ PUSH_NEXT();
+ PUSH_NEXT();
+ PUSH_NEXT();
+
+ while (1) {
+ if (!check_invariant(run_stack, stack_curr)) {
+ stack_curr = collapse(dst, run_stack, stack_curr, store, size);
+ continue;
+ }
+
+ PUSH_NEXT();
+ }
+}
diff --git a/src/util.c b/src/util.c
index 03e911241..4a44f9988 100644
--- a/src/util.c
+++ b/src/util.c
@@ -331,66 +331,27 @@ uint32_t git__hash(const void *key, int len, uint32_t seed)
}
#endif
-/*
- * A merge sort implementation, simplified from the qsort implementation
- * by Mike Haertel, which is a part of the GNU C Library.
+/**
+ * A modified `bsearch` from the BSD glibc.
+ *
+ * Copyright (c) 1990 Regents of the University of California.
+ * All rights reserved.
*/
-static void msort_with_tmp(void *b, size_t n, size_t s,
- int (*cmp)(const void *, const void *),
- char *t)
+void **git__bsearch(const void *key, void **base, size_t nmemb, int (*compar)(const void *, const void *))
{
- char *tmp;
- char *b1, *b2;
- size_t n1, n2;
-
- if (n <= 1)
- return;
-
- n1 = n / 2;
- n2 = n - n1;
- b1 = b;
- b2 = (char *)b + (n1 * s);
-
- msort_with_tmp(b1, n1, s, cmp, t);
- msort_with_tmp(b2, n2, s, cmp, t);
-
- tmp = t;
-
- while (n1 > 0 && n2 > 0) {
- if (cmp(b1, b2) <= 0) {
- memcpy(tmp, b1, s);
- tmp += s;
- b1 += s;
- --n1;
- } else {
- memcpy(tmp, b2, s);
- tmp += s;
- b2 += s;
- --n2;
- }
- }
- if (n1 > 0)
- memcpy(tmp, b1, n1 * s);
- memcpy(b, t, (n - n2) * s);
+ int lim, cmp;
+ void **p;
+
+ for (lim = nmemb; lim != 0; lim >>= 1) {
+ p = base + (lim >> 1);
+ cmp = (*compar)(key, *p);
+ if (cmp > 0) { /* key > p: move right */
+ base = p + 1;
+ lim--;
+ } else if (cmp == 0) {
+ return (void **)p;
+ } /* else move left */
+ }
+ return NULL;
}
-int git__msort(void *b, size_t n, size_t s,
- int (*cmp)(const void *, const void *))
-{
- const size_t size = n * s;
- char buf[1024];
-
- if (size < sizeof(buf)) {
- /* The temporary array fits on the small on-stack buffer. */
- msort_with_tmp(b, n, s, cmp, buf);
- } else {
- /* It's somewhat large, so malloc it. */
- char *tmp = git__malloc(size);
- if (tmp == NULL)
- return GIT_ENOMEM;
- msort_with_tmp(b, n, s, cmp, tmp);
- free(tmp);
- }
-
- return GIT_SUCCESS;
-}
diff --git a/src/util.h b/src/util.h
index c9ca4dec0..410ebfb26 100644
--- a/src/util.h
+++ b/src/util.h
@@ -118,7 +118,8 @@ extern int git__fnmatch(const char *pattern, const char *name, int flags);
} \
} while (0)
-extern int git__msort(void *b, size_t n, size_t s,
- int (*cmp)(const void *, const void *));
+extern int git__tsort(void **b, size_t n, int (*cmp)(const void *, const void *));
+extern void **git__bsearch(const void *key, void **base, size_t nmemb,
+ int (*compar)(const void *, const void *));
#endif /* INCLUDE_util_h__ */
diff --git a/src/vector.c b/src/vector.c
index 2fc051f0c..0b83b8b0d 100644
--- a/src/vector.c
+++ b/src/vector.c
@@ -94,7 +94,7 @@ void git_vector_sort(git_vector *v)
if (v->sorted || v->_cmp == NULL)
return;
- git__msort(v->contents, v->length, sizeof(void *), v->_cmp);
+ git__tsort(v->contents, v->length, v->_cmp);
v->sorted = 1;
}
@@ -110,7 +110,7 @@ int git_vector_bsearch2(git_vector *v, git_vector_cmp key_lookup, const void *ke
git_vector_sort(v);
- find = bsearch(key, v->contents, v->length, sizeof(void *), key_lookup);
+ find = git__bsearch(key, v->contents, v->length, key_lookup);
if (find != NULL)
return (int)(find - v->contents);
@@ -174,7 +174,7 @@ void git_vector_uniq(git_vector *v)
cmp = v->_cmp ? v->_cmp : strict_comparison;
for (i = 0, j = 1 ; j < v->length; ++j)
- if (!cmp(v->contents + i, v->contents + j))
+ if (!cmp(v->contents[i], v->contents[j]))
v->contents[i] = v->contents[j];
else
v->contents[++i] = v->contents[j];
diff --git a/tests/t00-core.c b/tests/t00-core.c
index 7bcc77aeb..660f10c89 100644
--- a/tests/t00-core.c
+++ b/tests/t00-core.c
@@ -75,10 +75,7 @@ END_TEST
static int test_cmp(const void *a, const void *b)
{
- int n1 = *(int *)a;
- int n2 = *(int *)b;
-
- return n1 - n2;
+ return a - b;
}
BEGIN_TEST(vector2, "remove duplicates")