From fee6ab18e7e9a38203bf8eb95d114ac837578aa7 Mon Sep 17 00:00:00 2001 From: Alexander Gavrilov Date: Mon, 5 Nov 2018 19:14:40 +0300 Subject: BLI_heap: implement a limited but faster version of heap. If the user only needs insertion and removal from top, there is no need to allocate and manage separate HeapNode objects: the data can be stored directly in the main tree array. This measured a 24% FPS increase on a ~50% heap-heavy workload. Reviewers: brecht Differential Revision: https://developer.blender.org/D3898 --- source/blender/blenlib/intern/BLI_heap.c | 199 +++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) (limited to 'source/blender/blenlib/intern/BLI_heap.c') diff --git a/source/blender/blenlib/intern/BLI_heap.c b/source/blender/blenlib/intern/BLI_heap.c index c785c1ac012..cef3eb2dafb 100644 --- a/source/blender/blenlib/intern/BLI_heap.c +++ b/source/blender/blenlib/intern/BLI_heap.c @@ -73,6 +73,17 @@ struct Heap { } nodes; }; +typedef struct FastHeapNode { + float value; + void *ptr; +} FastHeapNode; + +struct FastHeap { + uint size; + uint bufsize; + FastHeapNode *tree; +}; + /** \name Internal Functions * \{ */ @@ -441,3 +452,191 @@ bool BLI_heap_is_valid(const Heap *heap) } /** \} */ + +/** \name FastHeap Internal Functions + * \{ */ + +static void fastheap_down(FastHeap *heap, uint start_i, const FastHeapNode *init) +{ +#if 1 + /* The compiler isn't smart enough to realize that all computations + * using index here can be modified to work with byte offset. */ + uint8_t *const tree_buf = (uint8_t*)heap->tree; + +#define OFFSET(i) (i * (uint)sizeof(FastHeapNode)) +#define NODE(offset) (*(FastHeapNode*)(tree_buf + (offset))) +#else + FastHeapNode *const tree = heap->tree; + +#define OFFSET(i) (i) +#define NODE(i) tree[i] +#endif + +#define HEAP_LEFT_OFFSET(i) (((i) << 1) + OFFSET(1)) + + const uint size = OFFSET(heap->size); + + /* Pull the active node values into locals. This allows spilling + * the data from registers instead of literally swapping nodes. */ + float active_val = init->value; + void *active_ptr = init->ptr; + + /* Prepare the first iteration and spill value. */ + uint i = OFFSET(start_i); + + NODE(i).value = active_val; + + for (;;) { + const uint l = HEAP_LEFT_OFFSET(i); + const uint r = l + OFFSET(1); /* right */ + + /* Find the child with the smallest value. */ + uint smallest = i; + + if (LIKELY(l < size) && NODE(l).value < active_val) { + smallest = l; + } + if (LIKELY(r < size) && NODE(r).value < NODE(smallest).value) { + smallest = r; + } + + if (UNLIKELY(smallest == i)) { + break; + } + + /* Move the smallest child into the current node. + * Skip padding: for some reason that makes it faster here. */ + NODE(i).value = NODE(smallest).value; + NODE(i).ptr = NODE(smallest).ptr; + + /* Proceed to next iteration and spill value. */ + i = smallest; + NODE(i).value = active_val; + } + + /* Spill the pointer into the final position of the node. */ + NODE(i).ptr = active_ptr; + +#undef NODE +#undef OFFSET +#undef HEAP_LEFT_OFFSET +} + +static void fastheap_up(FastHeap *heap, uint i, float active_val, void *active_ptr) +{ + FastHeapNode *const tree = heap->tree; + + while (LIKELY(i > 0)) { + const uint p = HEAP_PARENT(i); + + if (active_val >= tree[p].value) { + break; + } + + tree[i] = tree[p]; + i = p; + } + + tree[i].value = active_val; + tree[i].ptr = active_ptr; +} + +/** \} */ + +/** \name Public FastHeap API + * \{ */ + +/** + * Creates a new fast heap, which only supports insertion and removal from top. + * + * \note Use when the size of the heap is known in advance. + */ +FastHeap *BLI_fastheap_new_ex(uint tot_reserve) +{ + FastHeap *heap = MEM_mallocN(sizeof(FastHeap), __func__); + /* ensure we have at least one so we can keep doubling it */ + heap->size = 0; + heap->bufsize = MAX2(1u, tot_reserve); + heap->tree = MEM_mallocN(heap->bufsize * sizeof(FastHeapNode), "BLIFastHeapTree"); + return heap; +} + +FastHeap *BLI_fastheap_new(void) +{ + return BLI_fastheap_new_ex(1); +} + +void BLI_fastheap_free(FastHeap *heap, HeapFreeFP ptrfreefp) +{ + if (ptrfreefp) { + for (uint i = 0; i < heap->size; i++) { + ptrfreefp(heap->tree[i].ptr); + } + } + + MEM_freeN(heap->tree); + MEM_freeN(heap); +} + +void BLI_fastheap_clear(FastHeap *heap, HeapFreeFP ptrfreefp) +{ + if (ptrfreefp) { + for (uint i = 0; i < heap->size; i++) { + ptrfreefp(heap->tree[i].ptr); + } + } + + heap->size = 0; +} + +/** + * Insert heap node with a value (often a 'cost') and pointer into the heap, + * duplicate values are allowed. + */ +void BLI_fastheap_insert(FastHeap *heap, float value, void *ptr) +{ + if (UNLIKELY(heap->size >= heap->bufsize)) { + heap->bufsize *= 2; + heap->tree = MEM_reallocN(heap->tree, heap->bufsize * sizeof(*heap->tree)); + } + + fastheap_up(heap, heap->size++, value, ptr); +} + +bool BLI_fastheap_is_empty(const FastHeap *heap) +{ + return (heap->size == 0); +} + +uint BLI_fastheap_len(const FastHeap *heap) +{ + return heap->size; +} + +/** + * Return the lowest value of the heap. + */ +float BLI_fastheap_top_value(const FastHeap *heap) +{ + BLI_assert(heap->size != 0); + + return heap->tree[0].value; +} + +/** + * Pop the top node off the heap and return it's pointer. + */ +void *BLI_fastheap_pop_min(FastHeap *heap) +{ + BLI_assert(heap->size != 0); + + void *ptr = heap->tree[0].ptr; + + if (--heap->size) { + fastheap_down(heap, 0, &heap->tree[heap->size]); + } + + return ptr; +} + +/** \} */ -- cgit v1.2.3