diff options
Diffstat (limited to 'source/blender/blenlib')
-rw-r--r-- | source/blender/blenlib/BLI_task.h | 3 | ||||
-rw-r--r-- | source/blender/blenlib/intern/task.c | 55 |
2 files changed, 38 insertions, 20 deletions
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index 8c22a25fe14..28da673ea97 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -106,7 +106,8 @@ void BLI_task_parallel_range_ex( int start, int stop, void *userdata, TaskParallelRangeFunc func, - const int range_threshold); + const int range_threshold, + const bool use_dynamic_scheduling); void BLI_task_parallel_range( int start, int stop, void *userdata, diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index 07c67f001f9..219ccb18d98 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -29,6 +29,7 @@ #include "MEM_guardedalloc.h" #include "BLI_listbase.h" +#include "BLI_math.h" #include "BLI_task.h" #include "BLI_threads.h" @@ -452,18 +453,21 @@ typedef struct ParallelRangeState { TaskParallelRangeFunc func; int iter; + int chunk_size; SpinLock lock; } ParallelRangeState; BLI_INLINE bool parallel_range_next_iter_get( - ParallelRangeState *state, - int *iter) + ParallelRangeState * __restrict state, + int * __restrict iter, int * __restrict count) { bool result = false; if (state->iter < state->stop) { BLI_spin_lock(&state->lock); if (state->iter < state->stop) { - *iter = state->iter++; + *count = min_ii(state->chunk_size, state->stop - state->iter); + *iter = state->iter; + state->iter += *count; result = true; } BLI_spin_unlock(&state->lock); @@ -472,14 +476,17 @@ BLI_INLINE bool parallel_range_next_iter_get( } static void parallel_range_func( - TaskPool *pool, + TaskPool * __restrict pool, void *UNUSED(taskdata), int UNUSED(threadid)) { - ParallelRangeState *state = BLI_task_pool_userdata(pool); - int iter; - while (parallel_range_next_iter_get(state, &iter)) { - state->func(state->userdata, iter); + ParallelRangeState * __restrict state = BLI_task_pool_userdata(pool); + int iter, count; + while (parallel_range_next_iter_get(state, &iter, &count)) { + int i; + for (i = 0; i < count; ++i) { + state->func(state->userdata, iter + i); + } } } @@ -487,12 +494,13 @@ void BLI_task_parallel_range_ex( int start, int stop, void *userdata, TaskParallelRangeFunc func, - const int range_threshold) + const int range_threshold, + const bool use_dynamic_scheduling) { TaskScheduler *task_scheduler; TaskPool *task_pool; ParallelRangeState state; - int i; + int i, num_threads, num_tasks; BLI_assert(start < stop); @@ -506,21 +514,30 @@ void BLI_task_parallel_range_ex( return; } - BLI_spin_init(&state.lock); - state.start = start; - state.stop = stop; - state.userdata = userdata; - state.func = func; - state.iter = start; - task_scheduler = BLI_task_scheduler_get(); task_pool = BLI_task_pool_create(task_scheduler, &state); + num_threads = BLI_task_scheduler_num_threads(task_scheduler); /* The idea here is to prevent creating task for each of the loop iterations * and instead have tasks which are evenly distributed across CPU cores and * pull next iter to be crunched using the queue. */ - for (i = 0; i < 2 * BLI_task_scheduler_num_threads(task_scheduler); i++) { + num_tasks = num_threads * 2; + + BLI_spin_init(&state.lock); + state.start = start; + state.stop = stop; + state.userdata = userdata; + state.func = func; + state.iter = start; + if (use_dynamic_scheduling) { + state.chunk_size = 32; + } + else { + state.chunk_size = (stop - start) / (num_tasks); + } + + for (i = 0; i < num_tasks; i++) { BLI_task_pool_push(task_pool, parallel_range_func, NULL, false, @@ -538,5 +555,5 @@ void BLI_task_parallel_range( void *userdata, TaskParallelRangeFunc func) { - BLI_task_parallel_range_ex(start, stop, userdata, func, 64); + BLI_task_parallel_range_ex(start, stop, userdata, func, 64, false); } |