From fee6ab18e7e9a38203bf8eb95d114ac837578aa7 Mon Sep 17 00:00:00 2001 From: Alexander Gavrilov Date: Mon, 5 Nov 2018 19:14:40 +0300 Subject: BLI_heap: implement a limited but faster version of heap. If the user only needs insertion and removal from top, there is no need to allocate and manage separate HeapNode objects: the data can be stored directly in the main tree array. This measured a 24% FPS increase on a ~50% heap-heavy workload. Reviewers: brecht Differential Revision: https://developer.blender.org/D3898 --- source/blender/blenlib/BLI_heap.h | 15 +++ source/blender/blenlib/intern/BLI_heap.c | 199 +++++++++++++++++++++++++++++++ source/blender/blenlib/intern/astar.c | 24 ++-- 3 files changed, 226 insertions(+), 12 deletions(-) (limited to 'source/blender/blenlib') diff --git a/source/blender/blenlib/BLI_heap.h b/source/blender/blenlib/BLI_heap.h index 35c8df3075c..08adb0d538c 100644 --- a/source/blender/blenlib/BLI_heap.h +++ b/source/blender/blenlib/BLI_heap.h @@ -54,4 +54,19 @@ void *BLI_heap_node_ptr(const HeapNode *heap) ATTR_WARN_UNUSED_RESULT /* only for gtest */ bool BLI_heap_is_valid(const Heap *heap); +/* Simplified version of the heap that only supports insertion and removal from top. */ + +struct FastHeap; +typedef struct FastHeap FastHeap; + +FastHeap *BLI_fastheap_new_ex(unsigned int tot_reserve) ATTR_WARN_UNUSED_RESULT; +FastHeap *BLI_fastheap_new(void) ATTR_WARN_UNUSED_RESULT; +void BLI_fastheap_clear(FastHeap *heap, HeapFreeFP ptrfreefp) ATTR_NONNULL(1); +void BLI_fastheap_free(FastHeap *heap, HeapFreeFP ptrfreefp) ATTR_NONNULL(1); +void BLI_fastheap_insert(FastHeap *heap, float value, void *ptr) ATTR_NONNULL(1); +bool BLI_fastheap_is_empty(const FastHeap *heap) ATTR_NONNULL(1); +unsigned int BLI_fastheap_len(const FastHeap *heap) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); +float BLI_fastheap_top_value(const FastHeap *heap) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1); +void *BLI_fastheap_pop_min(FastHeap *heap) ATTR_NONNULL(1); + #endif /* __BLI_HEAP_H__ */ diff --git a/source/blender/blenlib/intern/BLI_heap.c b/source/blender/blenlib/intern/BLI_heap.c index c785c1ac012..cef3eb2dafb 100644 --- a/source/blender/blenlib/intern/BLI_heap.c +++ b/source/blender/blenlib/intern/BLI_heap.c @@ -73,6 +73,17 @@ struct Heap { } nodes; }; +typedef struct FastHeapNode { + float value; + void *ptr; +} FastHeapNode; + +struct FastHeap { + uint size; + uint bufsize; + FastHeapNode *tree; +}; + /** \name Internal Functions * \{ */ @@ -441,3 +452,191 @@ bool BLI_heap_is_valid(const Heap *heap) } /** \} */ + +/** \name FastHeap Internal Functions + * \{ */ + +static void fastheap_down(FastHeap *heap, uint start_i, const FastHeapNode *init) +{ +#if 1 + /* The compiler isn't smart enough to realize that all computations + * using index here can be modified to work with byte offset. */ + uint8_t *const tree_buf = (uint8_t*)heap->tree; + +#define OFFSET(i) (i * (uint)sizeof(FastHeapNode)) +#define NODE(offset) (*(FastHeapNode*)(tree_buf + (offset))) +#else + FastHeapNode *const tree = heap->tree; + +#define OFFSET(i) (i) +#define NODE(i) tree[i] +#endif + +#define HEAP_LEFT_OFFSET(i) (((i) << 1) + OFFSET(1)) + + const uint size = OFFSET(heap->size); + + /* Pull the active node values into locals. This allows spilling + * the data from registers instead of literally swapping nodes. */ + float active_val = init->value; + void *active_ptr = init->ptr; + + /* Prepare the first iteration and spill value. */ + uint i = OFFSET(start_i); + + NODE(i).value = active_val; + + for (;;) { + const uint l = HEAP_LEFT_OFFSET(i); + const uint r = l + OFFSET(1); /* right */ + + /* Find the child with the smallest value. */ + uint smallest = i; + + if (LIKELY(l < size) && NODE(l).value < active_val) { + smallest = l; + } + if (LIKELY(r < size) && NODE(r).value < NODE(smallest).value) { + smallest = r; + } + + if (UNLIKELY(smallest == i)) { + break; + } + + /* Move the smallest child into the current node. + * Skip padding: for some reason that makes it faster here. */ + NODE(i).value = NODE(smallest).value; + NODE(i).ptr = NODE(smallest).ptr; + + /* Proceed to next iteration and spill value. */ + i = smallest; + NODE(i).value = active_val; + } + + /* Spill the pointer into the final position of the node. */ + NODE(i).ptr = active_ptr; + +#undef NODE +#undef OFFSET +#undef HEAP_LEFT_OFFSET +} + +static void fastheap_up(FastHeap *heap, uint i, float active_val, void *active_ptr) +{ + FastHeapNode *const tree = heap->tree; + + while (LIKELY(i > 0)) { + const uint p = HEAP_PARENT(i); + + if (active_val >= tree[p].value) { + break; + } + + tree[i] = tree[p]; + i = p; + } + + tree[i].value = active_val; + tree[i].ptr = active_ptr; +} + +/** \} */ + +/** \name Public FastHeap API + * \{ */ + +/** + * Creates a new fast heap, which only supports insertion and removal from top. + * + * \note Use when the size of the heap is known in advance. + */ +FastHeap *BLI_fastheap_new_ex(uint tot_reserve) +{ + FastHeap *heap = MEM_mallocN(sizeof(FastHeap), __func__); + /* ensure we have at least one so we can keep doubling it */ + heap->size = 0; + heap->bufsize = MAX2(1u, tot_reserve); + heap->tree = MEM_mallocN(heap->bufsize * sizeof(FastHeapNode), "BLIFastHeapTree"); + return heap; +} + +FastHeap *BLI_fastheap_new(void) +{ + return BLI_fastheap_new_ex(1); +} + +void BLI_fastheap_free(FastHeap *heap, HeapFreeFP ptrfreefp) +{ + if (ptrfreefp) { + for (uint i = 0; i < heap->size; i++) { + ptrfreefp(heap->tree[i].ptr); + } + } + + MEM_freeN(heap->tree); + MEM_freeN(heap); +} + +void BLI_fastheap_clear(FastHeap *heap, HeapFreeFP ptrfreefp) +{ + if (ptrfreefp) { + for (uint i = 0; i < heap->size; i++) { + ptrfreefp(heap->tree[i].ptr); + } + } + + heap->size = 0; +} + +/** + * Insert heap node with a value (often a 'cost') and pointer into the heap, + * duplicate values are allowed. + */ +void BLI_fastheap_insert(FastHeap *heap, float value, void *ptr) +{ + if (UNLIKELY(heap->size >= heap->bufsize)) { + heap->bufsize *= 2; + heap->tree = MEM_reallocN(heap->tree, heap->bufsize * sizeof(*heap->tree)); + } + + fastheap_up(heap, heap->size++, value, ptr); +} + +bool BLI_fastheap_is_empty(const FastHeap *heap) +{ + return (heap->size == 0); +} + +uint BLI_fastheap_len(const FastHeap *heap) +{ + return heap->size; +} + +/** + * Return the lowest value of the heap. + */ +float BLI_fastheap_top_value(const FastHeap *heap) +{ + BLI_assert(heap->size != 0); + + return heap->tree[0].value; +} + +/** + * Pop the top node off the heap and return it's pointer. + */ +void *BLI_fastheap_pop_min(FastHeap *heap) +{ + BLI_assert(heap->size != 0); + + void *ptr = heap->tree[0].ptr; + + if (--heap->size) { + fastheap_down(heap, 0, &heap->tree[heap->size]); + } + + return ptr; +} + +/** \} */ diff --git a/source/blender/blenlib/intern/astar.c b/source/blender/blenlib/intern/astar.c index 86c1faad096..54c80def972 100644 --- a/source/blender/blenlib/intern/astar.c +++ b/source/blender/blenlib/intern/astar.c @@ -206,7 +206,7 @@ bool BLI_astar_graph_solve( BLI_AStarGraph *as_graph, const int node_index_src, const int node_index_dst, astar_f_cost f_cost_cb, BLI_AStarSolution *r_solution, const int max_steps) { - Heap *todo_nodes; + FastHeap *todo_nodes; BLI_bitmap *done_nodes = r_solution->done_nodes; int *prev_nodes = r_solution->prev_nodes; @@ -225,13 +225,13 @@ bool BLI_astar_graph_solve( return true; } - todo_nodes = BLI_heap_new(); - BLI_heap_insert(todo_nodes, - f_cost_cb(as_graph, r_solution, NULL, -1, node_index_src, node_index_dst), - POINTER_FROM_INT(node_index_src)); + todo_nodes = BLI_fastheap_new(); + BLI_fastheap_insert(todo_nodes, + f_cost_cb(as_graph, r_solution, NULL, -1, node_index_src, node_index_dst), + POINTER_FROM_INT(node_index_src)); - while (!BLI_heap_is_empty(todo_nodes)) { - const int node_curr_idx = POINTER_AS_INT(BLI_heap_pop_min(todo_nodes)); + while (!BLI_fastheap_is_empty(todo_nodes)) { + const int node_curr_idx = POINTER_AS_INT(BLI_fastheap_pop_min(todo_nodes)); BLI_AStarGNode *node_curr = &as_graph->nodes[node_curr_idx]; LinkData *ld; @@ -249,7 +249,7 @@ bool BLI_astar_graph_solve( /* Success! Path found... */ r_solution->steps = g_steps[node_curr_idx] + 1; - BLI_heap_free(todo_nodes, NULL); + BLI_fastheap_free(todo_nodes, NULL); return true; } @@ -269,14 +269,14 @@ bool BLI_astar_graph_solve( g_steps[node_next_idx] = g_steps[node_curr_idx] + 1; /* We might have this node already in heap, but since this 'instance' will be evaluated first, * no problem. */ - BLI_heap_insert(todo_nodes, - f_cost_cb(as_graph, r_solution, link, node_curr_idx, node_next_idx, node_index_dst), - POINTER_FROM_INT(node_next_idx)); + BLI_fastheap_insert(todo_nodes, + f_cost_cb(as_graph, r_solution, link, node_curr_idx, node_next_idx, node_index_dst), + POINTER_FROM_INT(node_next_idx)); } } } } - BLI_heap_free(todo_nodes, NULL); + BLI_fastheap_free(todo_nodes, NULL); return false; } -- cgit v1.2.3