diff options
author | Vicent Marti <tanoku@gmail.com> | 2011-07-07 03:46:20 +0400 |
---|---|---|
committer | Vicent Marti <tanoku@gmail.com> | 2011-07-07 04:54:07 +0400 |
commit | de18f276683a5cf3d85d001f6521e52c8c802e60 (patch) | |
tree | 01db449c1fa2d11568aca66bf4a5ef9d1a5c60f3 /src/tsort.c | |
parent | c63aa494595a6d6e6d97cfa1bbc1741a0b2e0cc6 (diff) |
vector: Timsort all of the things
Drop the GLibc implementation of Merge Sort and replace it with Timsort.
The algorithm has been tuned to work on arrays of pointers (void **),
so there's no longer a need to abstract the byte-width of each element
in the array.
All the comparison callbacks now take pointers-to-elements, not
pointers-to-pointers, so there's now one less level of dereferencing.
E.g.
int index_cmp(const void *a, const void *b)
{
- const git_index_entry *entry_a = *(const git_index_entry **)(a);
+ const git_index_entry *entry_a = (const git_index_entry *)(a);
The result is up to a 40% speed-up when sorting vectors. Memory usage
remains lineal.
A new `bsearch` implementation has been added, whose callback also
supplies pointer-to-elements, to uniform the Vector API again.
Diffstat (limited to 'src/tsort.c')
-rw-r--r-- | src/tsort.c | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/src/tsort.c b/src/tsort.c new file mode 100644 index 000000000..eda9a2fd2 --- /dev/null +++ b/src/tsort.c @@ -0,0 +1,380 @@ +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <stdlib.h> + +/** + * An array-of-pointers implementation of Python's Timsort + * Based on code by Christopher Swenson under the MIT license + * + * Copyright (c) 2010 Christopher Swenson + * Copyright (c) 2011 Vicent Marti + */ + +#ifndef MAX +# define MAX(x,y) (((x) > (y) ? (x) : (y))) +#endif + +#ifndef MIN +# define MIN(x,y) (((x) < (y) ? (x) : (y))) +#endif + +#if defined(__GNUC__) +# define CLZ(x) __builtin_clz(x) +#elif defined(_MSV_VER) +# define CLZ(x) _CountLeadingZeros(x) +#else +int CLZ(int32_t x) +{ + int e = 31; + + if (!x) return 32; + if (x&0xFFFF0000) { e -=16; x >>=16; } + if (x&0x0000FF00) { e -= 8; x >>= 8; } + if (x&0x000000F0) { e -= 4; x >>= 4; } + if (x&0x0000000C) { e -= 2; x >>= 2; } + if (x&0x00000002) { e -= 1; } + return e; +} +#endif + +typedef int (*cmp_ptr_t)(const void *, const void *); + +static int binsearch(void **dst, const void *x, size_t size, cmp_ptr_t cmp) +{ + int l, c, r; + void *lx, *cx, *rx; + + l = 0; + r = size - 1; + c = r >> 1; + lx = dst[l]; + + /* check for beginning conditions */ + if (cmp(x, lx) < 0) + return 0; + + else if (cmp(x, lx) == 0) { + int i = 1; + while (cmp(x, dst[i]) == 0) + i++; + return i; + } + + rx = dst[r]; + /* guaranteed not to be >= rx */ + cx = dst[c]; + while (1) { + const int val = cmp(x, cx); + if (val < 0) { + if (c - l <= 1) return c; + r = c; + rx = cx; + } else if (val > 0) { + if (r - c <= 1) return c + 1; + l = c; + lx = cx; + } else { + do { + cx = dst[++c]; + } while (cmp(x, cx) == 0); + return c; + } + c = l + ((r - l) >> 1); + cx = dst[c]; + } +} + +/* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */ +static void bisort(void **dst, size_t start, size_t size, cmp_ptr_t cmp) +{ + size_t i; + + for (i = start; i < size; i++) { + int j; + /* If this entry is already correct, just move along */ + if (cmp(dst[i - 1], dst[i]) <= 0) + continue; + + /* Else we need to find the right place, shift everything over, and squeeze in */ + void *x = dst[i]; + int location = binsearch(dst, x, i, cmp); + for (j = i - 1; j >= location; j--) { + dst[j + 1] = dst[j]; + } + dst[location] = x; + } +} + + +/* timsort implementation, based on timsort.txt */ +struct tsort_run { + ssize_t start; + ssize_t length; +}; + +struct tsort_store { + size_t alloc; + cmp_ptr_t cmp; + void **storage; +}; + +static void reverse_elements(void **dst, int start, int end) +{ + while (start < end) { + void *tmp = dst[start]; + dst[start] = dst[end]; + dst[end] = tmp; + + start++; + end--; + } +} + +static int count_run(void **dst, ssize_t start, ssize_t size, struct tsort_store *store) +{ + ssize_t curr = start + 2; + + if (size - start == 1) + return 1; + + if (start >= size - 2) { + if (store->cmp(dst[size - 2], dst[size - 1]) > 0) { + void *tmp = dst[size - 1]; + dst[size - 1] = dst[size - 2]; + dst[size - 2] = tmp; + } + + return 2; + } + + if (store->cmp(dst[start], dst[start + 1]) <= 0) { + while (curr < size - 1 && store->cmp(dst[curr - 1], dst[curr]) <= 0) + curr++; + + return curr - start; + } else { + while (curr < size - 1 && store->cmp(dst[curr - 1], dst[curr]) > 0) + curr++; + + /* reverse in-place */ + reverse_elements(dst, start, curr - 1); + return curr - start; + } +} + +static int compute_minrun(size_t n) +{ + int r = 0; + while (n >= 64) { + r |= n & 1; + n >>= 1; + } + return n + r; +} + +static int check_invariant(struct tsort_run *stack, int stack_curr) +{ + if (stack_curr < 2) + return 1; + + else if (stack_curr == 2) { + const int A = stack[stack_curr - 2].length; + const int B = stack[stack_curr - 1].length; + return (A > B); + } else { + const int A = stack[stack_curr - 3].length; + const int B = stack[stack_curr - 2].length; + const int C = stack[stack_curr - 1].length; + return !((A <= B + C) || (B <= C)); + } +} + +static int resize(struct tsort_store *store, size_t new_size) +{ + if (store->alloc < new_size) { + void **tempstore = realloc(store->storage, new_size * sizeof(void *)); + + /** + * Do not propagate on OOM; this will abort the sort and + * leave the array unsorted, but no error code will be + * raised + */ + if (tempstore == NULL) + return -1; + + store->storage = tempstore; + store->alloc = new_size; + } + + return 0; +} + +static void merge(void **dst, const struct tsort_run *stack, int stack_curr, struct tsort_store *store) +{ + const ssize_t A = stack[stack_curr - 2].length; + const ssize_t B = stack[stack_curr - 1].length; + const ssize_t curr = stack[stack_curr - 2].start; + + void **storage; + ssize_t i, j, k; + + if (resize(store, MIN(A, B)) < 0) + return; + + storage = store->storage; + + /* left merge */ + if (A < B) { + memcpy(storage, &dst[curr], A * sizeof(void *)); + i = 0; + j = curr + A; + + for (k = curr; k < curr + A + B; k++) { + if ((i < A) && (j < curr + A + B)) { + if (store->cmp(storage[i], dst[j]) <= 0) + dst[k] = storage[i++]; + else + dst[k] = dst[j++]; + } else if (i < A) { + dst[k] = storage[i++]; + } else + dst[k] = dst[j++]; + } + } else { + memcpy(storage, &dst[curr + A], B * sizeof(void *)); + i = B - 1; + j = curr + A - 1; + + for (k = curr + A + B - 1; k >= curr; k--) { + if ((i >= 0) && (j >= curr)) { + if (store->cmp(dst[j], storage[i]) > 0) + dst[k] = dst[j--]; + else + dst[k] = storage[i--]; + } else if (i >= 0) + dst[k] = storage[i--]; + else + dst[k] = dst[j--]; + } + } +} + +static ssize_t collapse(void **dst, struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store, ssize_t size) +{ + ssize_t A, B, C; + + while (1) { + /* if the stack only has one thing on it, we are done with the collapse */ + if (stack_curr <= 1) + break; + + /* if this is the last merge, just do it */ + if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) { + merge(dst, stack, stack_curr, store); + stack[0].length += stack[1].length; + stack_curr--; + break; + } + + /* check if the invariant is off for a stack of 2 elements */ + else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) { + merge(dst, stack, stack_curr, store); + stack[0].length += stack[1].length; + stack_curr--; + break; + } + else if (stack_curr == 2) + break; + + A = stack[stack_curr - 3].length; + B = stack[stack_curr - 2].length; + C = stack[stack_curr - 1].length; + + /* check first invariant */ + if (A <= B + C) { + if (A < C) { + merge(dst, stack, stack_curr - 1, store); + stack[stack_curr - 3].length += stack[stack_curr - 2].length; + stack[stack_curr - 2] = stack[stack_curr - 1]; + stack_curr--; + } else { + merge(dst, stack, stack_curr, store); + stack[stack_curr - 2].length += stack[stack_curr - 1].length; + stack_curr--; + } + } else if (B <= C) { + merge(dst, stack, stack_curr, store); + stack[stack_curr - 2].length += stack[stack_curr - 1].length; + stack_curr--; + } else + break; + } + + return stack_curr; +} + +#define PUSH_NEXT() do {\ + len = count_run(dst, curr, size, store);\ + run = minrun;\ + if (run < minrun) run = minrun;\ + if (run > (ssize_t)size - curr) run = size - curr;\ + if (run > len) {\ + bisort(&dst[curr], len, run, cmp);\ + len = run;\ + }\ + run_stack[stack_curr++] = (struct tsort_run) {curr, len};\ + curr += len;\ + if (curr == (ssize_t)size) {\ + /* finish up */ \ + while (stack_curr > 1) { \ + merge(dst, run_stack, stack_curr, store); \ + run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \ + stack_curr--; \ + } \ + if (store->storage != NULL) {\ + free(store->storage);\ + store->storage = NULL;\ + }\ + return;\ + }\ +}\ +while (0) + + +void git__tsort(void **dst, size_t size, cmp_ptr_t cmp) +{ + struct tsort_store _store, *store = &_store; + struct tsort_run run_stack[128]; + + ssize_t stack_curr = 0; + ssize_t len, run; + ssize_t curr = 0; + ssize_t minrun; + + if (size < 64) { + bisort(dst, 1, size, cmp); + return; + } + + /* compute the minimum run length */ + minrun = compute_minrun(size); + + /* temporary storage for merges */ + store->alloc = 0; + store->storage = NULL; + store->cmp = cmp; + + PUSH_NEXT(); + PUSH_NEXT(); + PUSH_NEXT(); + + while (1) { + if (!check_invariant(run_stack, stack_curr)) { + stack_curr = collapse(dst, run_stack, stack_curr, store, size); + continue; + } + + PUSH_NEXT(); + } +} |