diff options
-rw-r--r-- | source/blender/blenlib/BLI_task.h | 4 | ||||
-rw-r--r-- | source/blender/blenlib/intern/task.c | 204 | ||||
-rw-r--r-- | source/blender/depsgraph/intern/depsgraph_eval.cc | 26 |
3 files changed, 215 insertions, 19 deletions
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index 63a07957336..4cf1d8baaf0 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -84,7 +84,9 @@ void BLI_task_pool_push_ex( TaskPool *pool, TaskRunFunction run, void *taskdata, bool free_taskdata, TaskFreeFunction freedata, TaskPriority priority); void BLI_task_pool_push(TaskPool *pool, TaskRunFunction run, - void *taskdata, bool free_taskdata, TaskPriority priority); + void *taskdata, bool free_taskdata, TaskPriority priority); +void BLI_task_pool_push_from_thread(TaskPool *pool, TaskRunFunction run, + void *taskdata, bool free_taskdata, TaskPriority priority, int thread_id); /* work and wait until all tasks are done */ void BLI_task_pool_work_and_wait(TaskPool *pool); diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index b657120244b..9fefd715d76 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -35,8 +35,17 @@ #include "atomic_ops.h" +/* Define this to enable some detailed statistic print. */ +#undef DEBUG_STATS + /* Types */ +/* Number of per-thread pre-allocated tasks. + * + * For more details see description of TaskMemPool. + */ +#define MEMPOOL_SIZE 256 + typedef struct Task { struct Task *next, *prev; @@ -47,6 +56,50 @@ typedef struct Task { TaskPool *pool; } Task; +/* This is a per-thread storage of pre-allocated tasks. + * + * The idea behind this is simple: reduce amount of malloc() calls when pushing + * new task to the pool. This is done by keeping memory from the tasks which + * were finished already, so instead of freeing that memory we put it to the + * pool for the later re-use. + * + * The tricky part here is to avoid any inter-thread synchronization, hence no + * lock must exist around this pool. The pool will become an owner of the pointer + * from freed task, and only corresponding thread will be able to use this pool + * (no memory stealing and such). + * + * This leads to the following use of the pool: + * + * - task_push() should provide proper thread ID from which the task is being + * pushed from. + * + * - Task allocation function which check corresponding memory pool and if there + * is any memory in there it'll mark memory as re-used, remove it from the pool + * and use that memory for the new task. + * + * At this moment task queue owns the memory. + * + * - When task is done and task_free() is called the memory will be put to the + * pool which corresponds to a thread which handled the task. + */ +typedef struct TaskMemPool { + /* Number of pre-allocated tasks in the pool. */ + int num_tasks; + /* Pre-allocated task memory pointers. */ + Task *tasks[MEMPOOL_SIZE]; +} TaskMemPool; + +#ifdef DEBUG_STATS +typedef struct TaskMemPoolStats { + /* Number of allocations. */ + int num_alloc; + /* Number of avoided allocations (pointer was re-used from the pool). */ + int num_reuse; + /* Number of discarded memory due to pool saturation, */ + int num_discard; +} TaskMemPoolStats; +#endif + struct TaskPool { TaskScheduler *scheduler; @@ -62,14 +115,32 @@ struct TaskPool { volatile bool do_cancel; - /* If set, this pool may never be work_and_wait'ed, which means TaskScheduler has to use its special - * background fallback thread in case we are in single-threaded situation. */ + /* If set, this pool may never be work_and_wait'ed, which means TaskScheduler + * has to use its special background fallback thread in case we are in + * single-threaded situation. + */ bool run_in_background; + + /* This pool is used for caching task pointers for thread id 0. + * This could either point to a global scheduler's task_mempool[0] if the + * pool is handled form the main thread or point to task_mempool_local + * otherwise. + * + * This way we solve possible threading conflicts accessing same global + * memory pool from multiple threads from which wait_work() is called. + */ + TaskMemPool *task_mempool; + TaskMemPool task_mempool_local; + +#ifdef DEBUG_STATS + TaskMemPoolStats *mempool_stats; +#endif }; struct TaskScheduler { pthread_t *threads; struct TaskThread *task_threads; + TaskMemPool *task_mempool; int num_threads; bool background_thread_only; @@ -98,6 +169,63 @@ static void task_data_free(Task *task, const int thread_id) } } +BLI_INLINE TaskMemPool *get_task_mempool(TaskPool *pool, const int thread_id) +{ + if (thread_id == 0) { + return pool->task_mempool; + } + return &pool->scheduler->task_mempool[thread_id]; +} + +static Task *task_alloc(TaskPool *pool, const int thread_id) +{ + assert(thread_id <= pool->scheduler->num_threads); + if (thread_id != -1) { + assert(thread_id >= 0); + TaskMemPool *mem_pool = get_task_mempool(pool, thread_id); + /* Try to re-use task memory from a thread local storage. */ + if (mem_pool->num_tasks > 0) { + --mem_pool->num_tasks; + /* Success! We've just avoided task allocation. */ +#ifdef DEBUG_STATS + pool->mempool_stats[thread_id].num_reuse++; +#endif + return mem_pool->tasks[mem_pool->num_tasks]; + } + /* We are doomed to allocate new task data. */ +#ifdef DEBUG_STATS + pool->mempool_stats[thread_id].num_alloc++; +#endif + } + return MEM_mallocN(sizeof(Task), "New task"); +} + +static void task_free(TaskPool *pool, Task *task, const int thread_id) +{ + task_data_free(task, thread_id); + assert(thread_id >= 0); + assert(thread_id <= pool->scheduler->num_threads); + TaskMemPool *mem_pool = get_task_mempool(pool, thread_id); + if (mem_pool->num_tasks < MEMPOOL_SIZE - 1) { + /* Successfully allowed the task to be re-used later. */ + mem_pool->tasks[mem_pool->num_tasks] = task; + ++mem_pool->num_tasks; + } + else { + /* Local storage saturated, no other way than just discard + * the memory. + * + * TODO(sergey): We can perhaps store such pointer in a global + * scheduler pool, maybe it'll be faster than discarding and + * allocating again. + */ + MEM_freeN(task); +#ifdef DEBUG_STATS + pool->mempool_stats[thread_id].num_discard++; +#endif + } +} + /* Task Scheduler */ static void task_pool_num_decrease(TaskPool *pool, size_t done) @@ -196,8 +324,7 @@ static void *task_scheduler_thread_run(void *thread_p) task->run(pool, task->taskdata, thread_id); /* delete task */ - task_data_free(task, thread_id); - MEM_freeN(task); + task_free(pool, task, thread_id); /* notify pool task was done */ task_pool_num_decrease(pool, 1); @@ -249,6 +376,9 @@ TaskScheduler *BLI_task_scheduler_create(int num_threads) fprintf(stderr, "TaskScheduler failed to launch thread %d/%d\n", i, num_threads); } } + + scheduler->task_mempool = MEM_callocN(sizeof(*scheduler->task_mempool) * (num_threads + 1), + "TaskScheduler task_mempool"); } return scheduler; @@ -281,6 +411,16 @@ void BLI_task_scheduler_free(TaskScheduler *scheduler) MEM_freeN(scheduler->task_threads); } + /* Delete task memory pool */ + if (scheduler->task_mempool) { + for (int i = 0; i <= scheduler->num_threads; ++i) { + for (int j = 0; j < scheduler->task_mempool[i].num_tasks; ++j) { + MEM_freeN(scheduler->task_mempool[i].tasks[j]); + } + } + MEM_freeN(scheduler->task_mempool); + } + /* delete leftover tasks */ for (task = scheduler->queue.first; task; task = task->next) { task_data_free(task, 0); @@ -372,6 +512,20 @@ static TaskPool *task_pool_create_ex(TaskScheduler *scheduler, void *userdata, c pool->userdata = userdata; BLI_mutex_init(&pool->user_mutex); + if (BLI_thread_is_main()) { + pool->task_mempool = scheduler->task_mempool; + } + else { + pool->task_mempool = &pool->task_mempool_local; + pool->task_mempool_local.num_tasks = 0; + } + +#ifdef DEBUG_STATS + pool->mempool_stats = + MEM_callocN(sizeof(*pool->mempool_stats) * (scheduler->num_threads + 1), + "per-taskpool mempool stats"); +#endif + /* Ensure malloc will go fine from threads, * * This is needed because we could be in main thread here @@ -417,16 +571,36 @@ void BLI_task_pool_free(TaskPool *pool) BLI_mutex_end(&pool->user_mutex); + /* Free local memory pool, those pointers are lost forever. */ + if (pool->task_mempool == &pool->task_mempool_local) { + for (int i = 0; i < pool->task_mempool_local.num_tasks; i++) { + MEM_freeN(pool->task_mempool_local.tasks[i]); + } + } + +#ifdef DEBUG_STATS + printf("Thread ID Allocated Reused Discarded\n"); + for (int i = 0; i < pool->scheduler->num_threads + 1; ++i) { + printf("%02d %05d %05d %05d\n", + i, + pool->mempool_stats[i].num_alloc, + pool->mempool_stats[i].num_reuse, + pool->mempool_stats[i].num_discard); + } + MEM_freeN(pool->mempool_stats); +#endif + MEM_freeN(pool); BLI_end_threaded_malloc(); } -void BLI_task_pool_push_ex( +static void task_pool_push( TaskPool *pool, TaskRunFunction run, void *taskdata, - bool free_taskdata, TaskFreeFunction freedata, TaskPriority priority) + bool free_taskdata, TaskFreeFunction freedata, TaskPriority priority, + int thread_id) { - Task *task = MEM_mallocN(sizeof(Task), "Task"); + Task *task = task_alloc(pool, thread_id); task->run = run; task->taskdata = taskdata; @@ -437,12 +611,25 @@ void BLI_task_pool_push_ex( task_scheduler_push(pool->scheduler, task, priority); } +void BLI_task_pool_push_ex( + TaskPool *pool, TaskRunFunction run, void *taskdata, + bool free_taskdata, TaskFreeFunction freedata, TaskPriority priority) +{ + task_pool_push(pool, run, taskdata, free_taskdata, freedata, priority, -1); +} + void BLI_task_pool_push( TaskPool *pool, TaskRunFunction run, void *taskdata, bool free_taskdata, TaskPriority priority) { BLI_task_pool_push_ex(pool, run, taskdata, free_taskdata, NULL, priority); } +void BLI_task_pool_push_from_thread(TaskPool *pool, TaskRunFunction run, + void *taskdata, bool free_taskdata, TaskPriority priority, int thread_id) +{ + task_pool_push(pool, run, taskdata, free_taskdata, NULL, priority, thread_id); +} + void BLI_task_pool_work_and_wait(TaskPool *pool) { TaskScheduler *scheduler = pool->scheduler; @@ -482,8 +669,7 @@ void BLI_task_pool_work_and_wait(TaskPool *pool) work_task->run(pool, work_task->taskdata, 0); /* delete task */ - task_data_free(task, 0); - MEM_freeN(work_task); + task_free(pool, task, 0); /* notify pool task was done */ task_pool_num_decrease(pool, 1); diff --git a/source/blender/depsgraph/intern/depsgraph_eval.cc b/source/blender/depsgraph/intern/depsgraph_eval.cc index adacbb6cb1a..e96ee613f37 100644 --- a/source/blender/depsgraph/intern/depsgraph_eval.cc +++ b/source/blender/depsgraph/intern/depsgraph_eval.cc @@ -122,7 +122,8 @@ void DEG_evaluation_context_free(EvaluationContext *eval_ctx) static void schedule_children(TaskPool *pool, Depsgraph *graph, OperationDepsNode *node, - const int layers); + const int layers, + const int thread_id); struct DepsgraphEvalState { EvaluationContext *eval_ctx; @@ -132,7 +133,7 @@ struct DepsgraphEvalState { static void deg_task_run_func(TaskPool *pool, void *taskdata, - int UNUSED(threadid)) + int thread_id) { DepsgraphEvalState *state = (DepsgraphEvalState *)BLI_task_pool_userdata(pool); OperationDepsNode *node = (OperationDepsNode *)taskdata; @@ -161,7 +162,7 @@ static void deg_task_run_func(TaskPool *pool, node, end_time - start_time); - schedule_children(pool, state->graph, node, state->layers); + schedule_children(pool, state->graph, node, state->layers, thread_id); } static void calculate_pending_parents(Depsgraph *graph, int layers) @@ -235,7 +236,8 @@ static void calculate_eval_priority(OperationDepsNode *node) * after a task has been completed. */ static void schedule_node(TaskPool *pool, Depsgraph *graph, int layers, - OperationDepsNode *node, bool dec_parents) + OperationDepsNode *node, bool dec_parents, + const int thread_id) { int id_layers = node->owner->owner->layers; @@ -252,11 +254,16 @@ static void schedule_node(TaskPool *pool, Depsgraph *graph, int layers, if (!is_scheduled) { if (node->is_noop()) { /* skip NOOP node, schedule children right away */ - schedule_children(pool, graph, node, layers); + schedule_children(pool, graph, node, layers, thread_id); } else { /* children are scheduled once this task is completed */ - BLI_task_pool_push(pool, deg_task_run_func, node, false, TASK_PRIORITY_LOW); + BLI_task_pool_push_from_thread(pool, + deg_task_run_func, + node, + false, + TASK_PRIORITY_LOW, + thread_id); } } } @@ -272,14 +279,15 @@ static void schedule_graph(TaskPool *pool, ++it) { OperationDepsNode *node = *it; - schedule_node(pool, graph, layers, node, false); + schedule_node(pool, graph, layers, node, false, 0); } } static void schedule_children(TaskPool *pool, Depsgraph *graph, OperationDepsNode *node, - const int layers) + const int layers, + const int thread_id) { DEPSNODE_RELATIONS_ITER_BEGIN(node->outlinks, rel) { @@ -289,7 +297,7 @@ static void schedule_children(TaskPool *pool, /* Happens when having cyclic dependencies. */ continue; } - schedule_node(pool, graph, layers, child, (rel->flag & DEPSREL_FLAG_CYCLIC) == 0); + schedule_node(pool, graph, layers, child, (rel->flag & DEPSREL_FLAG_CYCLIC) == 0, thread_id); } DEPSNODE_RELATIONS_ITER_END; } |