diff options
Diffstat (limited to 'source/blender/blenlib/intern/task.c')
-rw-r--r-- | source/blender/blenlib/intern/task.c | 61 |
1 files changed, 48 insertions, 13 deletions
diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index 85d39f2f98e..bea38a232cc 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -1054,6 +1054,49 @@ typedef struct ParallelRangeState { int chunk_size; } ParallelRangeState; +BLI_INLINE void task_parallel_range_calc_chunk_size(const TaskParallelSettings *settings, + const int num_tasks, + ParallelRangeState *state) +{ + const int tot_items = state->stop - state->start; + int chunk_size = 0; + + if (settings->min_iter_per_thread > 0) { + /* Already set by user, no need to do anything here. */ + chunk_size = settings->min_iter_per_thread; + } + else { + /* Basic heuristic to avoid threading on low amount of items. We could make that limit + * configurable in settings too... */ + if (tot_items > 0 && tot_items < 256) { + chunk_size = tot_items; + } + /* NOTE: The idea here is to compensate for rather measurable threading + * overhead caused by fetching tasks. With too many CPU threads we are starting + * to spend too much time in those overheads. */ + else if (num_tasks > 32) { + chunk_size = 128; + } + else if (num_tasks > 16) { + chunk_size = 64; + } + else { + chunk_size = 32; + } + } + + BLI_assert(chunk_size > 0); + + switch (settings->scheduling_mode) { + case TASK_SCHEDULING_STATIC: + state->chunk_size = max_ii(chunk_size, tot_items / (num_tasks)); + break; + case TASK_SCHEDULING_DYNAMIC: + state->chunk_size = chunk_size; + break; + } +} + BLI_INLINE bool parallel_range_next_iter_get(ParallelRangeState *__restrict state, int *__restrict iter, int *__restrict count) @@ -1069,7 +1112,7 @@ BLI_INLINE bool parallel_range_next_iter_get(ParallelRangeState *__restrict stat static void parallel_range_func(TaskPool *__restrict pool, void *userdata_chunk, int thread_id) { ParallelRangeState *__restrict state = BLI_task_pool_userdata(pool); - ParallelRangeTLS tls = { + TaskParallelTLS tls = { .thread_id = thread_id, .userdata_chunk = userdata_chunk, }; @@ -1085,7 +1128,7 @@ static void parallel_range_single_thread(const int start, int const stop, void *userdata, TaskParallelRangeFunc func, - const ParallelRangeSettings *settings) + const TaskParallelSettings *settings) { void *userdata_chunk = settings->userdata_chunk; const size_t userdata_chunk_size = settings->userdata_chunk_size; @@ -1095,7 +1138,7 @@ static void parallel_range_single_thread(const int start, userdata_chunk_local = MALLOCA(userdata_chunk_size); memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size); } - ParallelRangeTLS tls = { + TaskParallelTLS tls = { .thread_id = 0, .userdata_chunk = userdata_chunk_local, }; @@ -1118,7 +1161,7 @@ void BLI_task_parallel_range(const int start, const int stop, void *userdata, TaskParallelRangeFunc func, - const ParallelRangeSettings *settings) + const TaskParallelSettings *settings) { TaskScheduler *task_scheduler; TaskPool *task_pool; @@ -1162,16 +1205,8 @@ void BLI_task_parallel_range(const int start, state.userdata = userdata; state.func = func; state.iter = start; - switch (settings->scheduling_mode) { - case TASK_SCHEDULING_STATIC: - state.chunk_size = max_ii(settings->min_iter_per_thread, (stop - start) / (num_tasks)); - break; - case TASK_SCHEDULING_DYNAMIC: - /* TODO(sergey): Make it configurable from min_iter_per_thread. */ - state.chunk_size = 32; - break; - } + task_parallel_range_calc_chunk_size(settings, num_tasks, &state); num_tasks = min_ii(num_tasks, max_ii(1, (stop - start) / state.chunk_size)); if (num_tasks == 1) { |