diff options
-rw-r--r-- | source/blender/blenlib/BLI_task.h | 12 | ||||
-rw-r--r-- | source/blender/blenlib/intern/task_iterator.c | 204 | ||||
-rw-r--r-- | tests/gtests/blenlib/BLI_task_performance_test.cc | 86 | ||||
-rw-r--r-- | tests/gtests/blenlib/BLI_task_test.cc | 74 |
4 files changed, 0 insertions, 376 deletions
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index 2b5964d091c..42dd47266dc 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -201,18 +201,6 @@ void BLI_task_parallel_range(const int start, TaskParallelRangeFunc func, TaskParallelSettings *settings); -typedef struct TaskParallelRangePool TaskParallelRangePool; -struct TaskParallelRangePool *BLI_task_parallel_range_pool_init( - const struct TaskParallelSettings *settings); -void BLI_task_parallel_range_pool_push(struct TaskParallelRangePool *range_pool, - const int start, - const int stop, - void *userdata, - TaskParallelRangeFunc func, - const struct TaskParallelSettings *settings); -void BLI_task_parallel_range_pool_work_and_wait(struct TaskParallelRangePool *range_pool); -void BLI_task_parallel_range_pool_free(struct TaskParallelRangePool *range_pool); - /* This data is shared between all tasks, its access needs thread lock or similar protection. */ typedef struct TaskParallelIteratorStateShared { diff --git a/source/blender/blenlib/intern/task_iterator.c b/source/blender/blenlib/intern/task_iterator.c index 8a596879b2c..1189ec0d0c0 100644 --- a/source/blender/blenlib/intern/task_iterator.c +++ b/source/blender/blenlib/intern/task_iterator.c @@ -382,210 +382,6 @@ void BLI_task_parallel_range(const int start, } } -/** - * Initialize a task pool to parallelize several for loops at the same time. - * - * See public API doc of ParallelRangeSettings for description of all settings. - * Note that loop-specific settings (like 'tls' data or reduce/free functions) must be left NULL - * here. Only settings controlling how iteration is parallelized must be defined, as those will - * affect all loops added to that pool. - */ -TaskParallelRangePool *BLI_task_parallel_range_pool_init(const TaskParallelSettings *settings) -{ - TaskParallelRangePool *range_pool = MEM_callocN(sizeof(*range_pool), __func__); - - BLI_assert(settings->userdata_chunk == NULL); - BLI_assert(settings->func_reduce == NULL); - BLI_assert(settings->func_free == NULL); - range_pool->settings = MEM_mallocN(sizeof(*range_pool->settings), __func__); - *range_pool->settings = *settings; - - return range_pool; -} - -/** - * Add a loop task to the pool. It does not execute it at all. - * - * See public API doc of ParallelRangeSettings for description of all settings. - * Note that only 'tls'-related data are used here. - */ -void BLI_task_parallel_range_pool_push(TaskParallelRangePool *range_pool, - const int start, - const int stop, - void *userdata, - TaskParallelRangeFunc func, - const TaskParallelSettings *settings) -{ - BLI_assert(range_pool->pool == NULL); - - if (start == stop) { - return; - } - - BLI_assert(start < stop); - if (settings->userdata_chunk_size != 0) { - BLI_assert(settings->userdata_chunk != NULL); - } - - TaskParallelRangeState *state = MEM_callocN(sizeof(*state), __func__); - state->start = start; - state->stop = stop; - state->userdata_shared = userdata; - state->func = func; - state->iter_value = start; - state->initial_tls_memory = settings->userdata_chunk; - state->tls_data_size = settings->userdata_chunk_size; - state->func_reduce = settings->func_reduce; - state->func_free = settings->func_free; - - state->next = range_pool->parallel_range_states; - range_pool->parallel_range_states = state; -} - -static void parallel_range_func_finalize(TaskPool *__restrict pool, - void *v_state, - int UNUSED(thread_id)) -{ - TaskParallelRangePool *__restrict range_pool = BLI_task_pool_user_data(pool); - TaskParallelRangeState *state = v_state; - - for (int i = 0; i < range_pool->num_tasks; i++) { - void *tls_data = (char *)state->flatten_tls_storage + (state->tls_data_size * (size_t)i); - if (state->func_reduce != NULL) { - state->func_reduce(state->userdata_shared, state->initial_tls_memory, tls_data); - } - if (state->func_free != NULL) { - /* `func_free` should only free data that was created during execution of `func`. */ - state->func_free(state->userdata_shared, tls_data); - } - } -} - -/** - * Run all tasks pushed to the range_pool. - * - * Note that the range pool is re-usable (you may push new tasks into it and call this function - * again). - */ -void BLI_task_parallel_range_pool_work_and_wait(TaskParallelRangePool *range_pool) -{ - BLI_assert(range_pool->pool == NULL); - - /* If it's not enough data to be crunched, don't bother with tasks at all, - * do everything from the current thread. - */ - if (!range_pool->settings->use_threading) { - parallel_range_single_thread(range_pool); - return; - } - - TaskScheduler *task_scheduler = BLI_task_scheduler_get(); - const int num_threads = BLI_task_scheduler_num_threads(task_scheduler); - - /* The idea here is to prevent creating task for each of the loop iterations - * and instead have tasks which are evenly distributed across CPU cores and - * pull next iter to be crunched using the queue. - */ - int num_tasks = num_threads + 2; - range_pool->num_tasks = num_tasks; - - task_parallel_range_calc_chunk_size(range_pool); - range_pool->num_tasks = num_tasks = min_ii( - num_tasks, max_ii(1, range_pool->num_total_iters / range_pool->chunk_size)); - - if (num_tasks == 1) { - parallel_range_single_thread(range_pool); - return; - } - - /* We create all 'tls' data here in a single loop. */ - for (TaskParallelRangeState *state = range_pool->parallel_range_states; state != NULL; - state = state->next) { - void *userdata_chunk = state->initial_tls_memory; - const size_t userdata_chunk_size = state->tls_data_size; - if (userdata_chunk_size == 0) { - BLI_assert(userdata_chunk == NULL); - continue; - } - - void *userdata_chunk_array = NULL; - state->flatten_tls_storage = userdata_chunk_array = MALLOCA(userdata_chunk_size * - (size_t)num_tasks); - for (int i = 0; i < num_tasks; i++) { - void *userdata_chunk_local = (char *)userdata_chunk_array + - (userdata_chunk_size * (size_t)i); - memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size); - } - } - - TaskPool *task_pool = range_pool->pool = BLI_task_pool_create_suspended( - task_scheduler, range_pool, TASK_PRIORITY_HIGH); - - range_pool->current_state = range_pool->parallel_range_states; - const int thread_id = BLI_task_pool_creator_thread_id(task_pool); - for (int i = 0; i < num_tasks; i++) { - BLI_task_pool_push_from_thread( - task_pool, parallel_range_func, POINTER_FROM_INT(i), false, NULL, thread_id); - } - - BLI_task_pool_work_and_wait(task_pool); - - BLI_assert(atomic_cas_ptr((void **)&range_pool->current_state, NULL, NULL) == NULL); - - /* Finalize all tasks. */ - for (TaskParallelRangeState *state = range_pool->parallel_range_states; state != NULL; - state = state->next) { - const size_t userdata_chunk_size = state->tls_data_size; - void *userdata_chunk_array = state->flatten_tls_storage; - UNUSED_VARS_NDEBUG(userdata_chunk_array); - if (userdata_chunk_size == 0) { - BLI_assert(userdata_chunk_array == NULL); - continue; - } - - if (state->func_reduce != NULL || state->func_free != NULL) { - BLI_task_pool_push_from_thread( - task_pool, parallel_range_func_finalize, state, false, NULL, thread_id); - } - } - - BLI_task_pool_work_and_wait(task_pool); - BLI_task_pool_free(task_pool); - range_pool->pool = NULL; - - /* Cleanup all tasks. */ - TaskParallelRangeState *state_next; - for (TaskParallelRangeState *state = range_pool->parallel_range_states; state != NULL; - state = state_next) { - state_next = state->next; - - const size_t userdata_chunk_size = state->tls_data_size; - void *userdata_chunk_array = state->flatten_tls_storage; - if (userdata_chunk_size != 0) { - BLI_assert(userdata_chunk_array != NULL); - MALLOCA_FREE(userdata_chunk_array, userdata_chunk_size * (size_t)num_tasks); - } - - MEM_freeN(state); - } - range_pool->parallel_range_states = NULL; -} - -/** - * Clear/free given \a range_pool. - */ -void BLI_task_parallel_range_pool_free(TaskParallelRangePool *range_pool) -{ - TaskParallelRangeState *state_next = NULL; - for (TaskParallelRangeState *state = range_pool->parallel_range_states; state != NULL; - state = state_next) { - state_next = state->next; - MEM_freeN(state); - } - MEM_freeN(range_pool->settings); - MEM_freeN(range_pool); -} - typedef struct TaskParallelIteratorState { void *userdata; TaskParallelIteratorIterFunc iter_func; diff --git a/tests/gtests/blenlib/BLI_task_performance_test.cc b/tests/gtests/blenlib/BLI_task_performance_test.cc index db596340464..06e832bdb5e 100644 --- a/tests/gtests/blenlib/BLI_task_performance_test.cc +++ b/tests/gtests/blenlib/BLI_task_performance_test.cc @@ -36,92 +36,6 @@ static uint gen_pseudo_random_number(uint num) return ((num & 255) << 6) + 1; } -/* *** Parallel iterations over range of indices. *** */ - -static void task_parallel_range_func(void *UNUSED(userdata), - int index, - const TaskParallelTLS *__restrict UNUSED(tls)) -{ - const uint limit = gen_pseudo_random_number((uint)index); - for (uint i = (uint)index; i < limit;) { - i += gen_pseudo_random_number(i); - } -} - -static void task_parallel_range_test_do(const char *id, - const int num_items, - const bool use_threads) -{ - TaskParallelSettings settings; - BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = use_threads; - - double averaged_timing = 0.0; - for (int i = 0; i < NUM_RUN_AVERAGED; i++) { - const double init_time = PIL_check_seconds_timer(); - for (int j = 0; j < 10; j++) { - BLI_task_parallel_range(i + j, i + j + num_items, NULL, task_parallel_range_func, &settings); - } - averaged_timing += PIL_check_seconds_timer() - init_time; - } - - printf("\t%s: non-pooled done in %fs on average over %d runs\n", - id, - averaged_timing / NUM_RUN_AVERAGED, - NUM_RUN_AVERAGED); - - averaged_timing = 0.0; - for (int i = 0; i < NUM_RUN_AVERAGED; i++) { - const double init_time = PIL_check_seconds_timer(); - TaskParallelRangePool *range_pool = BLI_task_parallel_range_pool_init(&settings); - for (int j = 0; j < 10; j++) { - BLI_task_parallel_range_pool_push( - range_pool, i + j, i + j + num_items, NULL, task_parallel_range_func, &settings); - } - BLI_task_parallel_range_pool_work_and_wait(range_pool); - BLI_task_parallel_range_pool_free(range_pool); - averaged_timing += PIL_check_seconds_timer() - init_time; - } - - printf("\t%s: pooled done in %fs on average over %d runs\n", - id, - averaged_timing / NUM_RUN_AVERAGED, - NUM_RUN_AVERAGED); -} - -TEST(task, RangeIter10KNoThread) -{ - task_parallel_range_test_do( - "Range parallel iteration - Single thread - 10K items", 10000, false); -} - -TEST(task, RangeIter10k) -{ - task_parallel_range_test_do("Range parallel iteration - Threaded - 10K items", 10000, true); -} - -TEST(task, RangeIter100KNoThread) -{ - task_parallel_range_test_do( - "Range parallel iteration - Single thread - 100K items", 100000, false); -} - -TEST(task, RangeIter100k) -{ - task_parallel_range_test_do("Range parallel iteration - Threaded - 100K items", 100000, true); -} - -TEST(task, RangeIter1000KNoThread) -{ - task_parallel_range_test_do( - "Range parallel iteration - Single thread - 1000K items", 1000000, false); -} - -TEST(task, RangeIter1000k) -{ - task_parallel_range_test_do("Range parallel iteration - Threaded - 1000K items", 1000000, true); -} - /* *** Parallel iterations over double-linked list items. *** */ static void task_listbase_light_iter_func(void *UNUSED(userdata), diff --git a/tests/gtests/blenlib/BLI_task_test.cc b/tests/gtests/blenlib/BLI_task_test.cc index 348aa7305f9..fe0f481d469 100644 --- a/tests/gtests/blenlib/BLI_task_test.cc +++ b/tests/gtests/blenlib/BLI_task_test.cc @@ -67,80 +67,6 @@ TEST(task, RangeIter) BLI_threadapi_exit(); } -TEST(task, RangeIterPool) -{ - const int num_tasks = 10; - int data[num_tasks][NUM_ITEMS] = {{0}}; - int sum = 0; - - BLI_threadapi_init(); - - TaskParallelSettings settings; - BLI_parallel_range_settings_defaults(&settings); - settings.min_iter_per_thread = 1; - - TaskParallelRangePool *range_pool = BLI_task_parallel_range_pool_init(&settings); - - for (int j = 0; j < num_tasks; j++) { - settings.userdata_chunk = ∑ - settings.userdata_chunk_size = sizeof(sum); - settings.func_reduce = task_range_iter_reduce_func; - - BLI_task_parallel_range_pool_push( - range_pool, 0, NUM_ITEMS, data[j], task_range_iter_func, &settings); - } - - BLI_task_parallel_range_pool_work_and_wait(range_pool); - - /* Those checks should ensure us all items of the listbase were processed once, and only once - - * as expected. */ - { - int expected_sum = 0; - for (int j = 0; j < num_tasks; j++) { - for (int i = 0; i < NUM_ITEMS; i++) { - // EXPECT_EQ(data[j][i], i); - expected_sum += i; - } - } - EXPECT_EQ(sum, expected_sum); - } - - /* A pool can be re-used until it is freed. */ - - for (int j = 0; j < num_tasks; j++) { - memset(data[j], 0, sizeof(data[j])); - } - sum = 0; - - for (int j = 0; j < num_tasks; j++) { - settings.userdata_chunk = ∑ - settings.userdata_chunk_size = sizeof(sum); - settings.func_reduce = task_range_iter_reduce_func; - - BLI_task_parallel_range_pool_push( - range_pool, 0, NUM_ITEMS, data[j], task_range_iter_func, &settings); - } - - BLI_task_parallel_range_pool_work_and_wait(range_pool); - - BLI_task_parallel_range_pool_free(range_pool); - - /* Those checks should ensure us all items of the listbase were processed once, and only once - - * as expected. */ - { - int expected_sum = 0; - for (int j = 0; j < num_tasks; j++) { - for (int i = 0; i < NUM_ITEMS; i++) { - // EXPECT_EQ(data[j][i], i); - expected_sum += i; - } - } - EXPECT_EQ(sum, expected_sum); - } - - BLI_threadapi_exit(); -} - /* *** Parallel iterations over mempool items. *** */ static void task_mempool_iter_func(void *userdata, MempoolIterData *item) |