From f9028a3be1f77c01edca44a68894e2ba9d9cfb14 Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Mon, 25 Nov 2019 11:58:09 +0100 Subject: BLI_task: Add pooled threaded index range iterator. This code allows to push a set of different operations all based on iterations over a range of indices, and then process them all at once over multiple threads. This commit also adds unit tests for both old un-pooled, and new pooled `task_parallel_range` family of functions, as well as some basic performances tests. This is mainly interesting for relatively low amount of individual tasks, as expected. E.g. performance tests on a 32 threads machine, for a set of 10 different tasks, shows following improvements when using pooled version instead of ten sequential calls to `BLI_task_parallel_range()`: | Num Items | Sequential | Pooled | Speed-up | | --------- | ---------- | ------- | -------- | | 10K | 365 us | 138 us | 2.5 x | | 100K | 877 us | 530 us | 1.66 x | | 1000K | 5521 us | 4625 us | 1.25 x | Differential Revision: https://developer.blender.org/D6189 --- tests/gtests/blenlib/BLI_task_performance_test.cc | 90 +++++++++++++++- tests/gtests/blenlib/BLI_task_test.cc | 120 ++++++++++++++++++++++ 2 files changed, 208 insertions(+), 2 deletions(-) (limited to 'tests/gtests') diff --git a/tests/gtests/blenlib/BLI_task_performance_test.cc b/tests/gtests/blenlib/BLI_task_performance_test.cc index dc8981f8064..84b7b8b6439 100644 --- a/tests/gtests/blenlib/BLI_task_performance_test.cc +++ b/tests/gtests/blenlib/BLI_task_performance_test.cc @@ -19,8 +19,6 @@ extern "C" { #include "MEM_guardedalloc.h" } -/* *** Parallel iterations over double-linked list items. *** */ - #define NUM_RUN_AVERAGED 100 static uint gen_pseudo_random_number(uint num) @@ -38,6 +36,94 @@ static uint gen_pseudo_random_number(uint num) return ((num & 255) << 6) + 1; } +/* *** Parallel iterations over range of indices. *** */ + +static void task_parallel_range_func(void *UNUSED(userdata), + int index, + const TaskParallelTLS *__restrict UNUSED(tls)) +{ + const uint limit = gen_pseudo_random_number((uint)index); + for (uint i = (uint)index; i < limit;) { + i += gen_pseudo_random_number(i); + } +} + +static void task_parallel_range_test_do(const char *id, + const int num_items, + const bool use_threads) +{ + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = use_threads; + + double averaged_timing = 0.0; + for (int i = 0; i < NUM_RUN_AVERAGED; i++) { + const double init_time = PIL_check_seconds_timer(); + for (int j = 0; j < 10; j++) { + BLI_task_parallel_range(i + j, i + j + num_items, NULL, task_parallel_range_func, &settings); + } + averaged_timing += PIL_check_seconds_timer() - init_time; + } + + printf("\t%s: non-pooled done in %fs on average over %d runs\n", + id, + averaged_timing / NUM_RUN_AVERAGED, + NUM_RUN_AVERAGED); + + averaged_timing = 0.0; + for (int i = 0; i < NUM_RUN_AVERAGED; i++) { + const double init_time = PIL_check_seconds_timer(); + TaskParallelRangePool *range_pool = BLI_task_parallel_range_pool_init(&settings); + for (int j = 0; j < 10; j++) { + BLI_task_parallel_range_pool_push( + range_pool, i + j, i + j + num_items, NULL, task_parallel_range_func, &settings); + } + BLI_task_parallel_range_pool_work_and_wait(range_pool); + BLI_task_parallel_range_pool_free(range_pool); + averaged_timing += PIL_check_seconds_timer() - init_time; + } + + printf("\t%s: pooled done in %fs on average over %d runs\n", + id, + averaged_timing / NUM_RUN_AVERAGED, + NUM_RUN_AVERAGED); +} + +TEST(task, RangeIter10KNoThread) +{ + task_parallel_range_test_do( + "Range parallel iteration - Single thread - 10K items", 10000, false); +} + +TEST(task, RangeIter10k) +{ + task_parallel_range_test_do("Range parallel iteration - Threaded - 10K items", 10000, true); +} + +TEST(task, RangeIter100KNoThread) +{ + task_parallel_range_test_do( + "Range parallel iteration - Single thread - 100K items", 100000, false); +} + +TEST(task, RangeIter100k) +{ + task_parallel_range_test_do("Range parallel iteration - Threaded - 100K items", 100000, true); +} + +TEST(task, RangeIter1000KNoThread) +{ + task_parallel_range_test_do( + "Range parallel iteration - Single thread - 1000K items", 1000000, false); +} + +TEST(task, RangeIter1000k) +{ + task_parallel_range_test_do("Range parallel iteration - Threaded - 1000K items", 1000000, true); +} + +/* *** Parallel iterations over double-linked list items. *** */ + static void task_listbase_light_iter_func(void *UNUSED(userdata), void *item, int index, diff --git a/tests/gtests/blenlib/BLI_task_test.cc b/tests/gtests/blenlib/BLI_task_test.cc index 62ae0baaec9..a682e8aedf6 100644 --- a/tests/gtests/blenlib/BLI_task_test.cc +++ b/tests/gtests/blenlib/BLI_task_test.cc @@ -17,6 +17,126 @@ extern "C" { #define NUM_ITEMS 10000 +/* *** Parallel iterations over range of integer values. *** */ + +static void task_range_iter_func(void *userdata, int index, const TaskParallelTLS *__restrict tls) +{ + int *data = (int *)userdata; + data[index] = index; + *((int *)tls->userdata_chunk) += index; + // printf("%d, %d, %d\n", index, data[index], *((int *)tls->userdata_chunk)); +} + +static void task_range_iter_finalize_func(void *__restrict userdata, + void *__restrict userdata_chunk) +{ + int *data = (int *)userdata; + data[NUM_ITEMS] += *(int *)userdata_chunk; + // printf("%d, %d\n", data[NUM_ITEMS], *((int *)userdata_chunk)); +} + +TEST(task, RangeIter) +{ + int data[NUM_ITEMS + 1] = {0}; + int sum = 0; + + BLI_threadapi_init(); + + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.min_iter_per_thread = 1; + + settings.userdata_chunk = ∑ + settings.userdata_chunk_size = sizeof(sum); + settings.func_finalize = task_range_iter_finalize_func; + + BLI_task_parallel_range(0, NUM_ITEMS, data, task_range_iter_func, &settings); + + /* Those checks should ensure us all items of the listbase were processed once, and only once - + * as expected. */ + + int expected_sum = 0; + for (int i = 0; i < NUM_ITEMS; i++) { + EXPECT_EQ(data[i], i); + expected_sum += i; + } + EXPECT_EQ(data[NUM_ITEMS], expected_sum); + + BLI_threadapi_exit(); +} + +TEST(task, RangeIterPool) +{ + const int num_tasks = 10; + int data[num_tasks][NUM_ITEMS + 1] = {{0}}; + int sum = 0; + + BLI_threadapi_init(); + + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.min_iter_per_thread = 1; + + TaskParallelRangePool *range_pool = BLI_task_parallel_range_pool_init(&settings); + + for (int j = 0; j < num_tasks; j++) { + settings.userdata_chunk = ∑ + settings.userdata_chunk_size = sizeof(sum); + settings.func_finalize = task_range_iter_finalize_func; + + BLI_task_parallel_range_pool_push( + range_pool, 0, NUM_ITEMS, data[j], task_range_iter_func, &settings); + } + + BLI_task_parallel_range_pool_work_and_wait(range_pool); + + /* Those checks should ensure us all items of the listbase were processed once, and only once - + * as expected. */ + + for (int j = 0; j < num_tasks; j++) { + int expected_sum = 0; + for (int i = 0; i < NUM_ITEMS; i++) { + // EXPECT_EQ(data[j][i], i); + expected_sum += i; + } + EXPECT_EQ(data[j][NUM_ITEMS], expected_sum); + } + + /* A pool can be re-used untill it is freed. */ + + for (int j = 0; j < num_tasks; j++) { + memset(data[j], 0, sizeof(data[j])); + } + sum = 0; + + for (int j = 0; j < num_tasks; j++) { + settings.userdata_chunk = ∑ + settings.userdata_chunk_size = sizeof(sum); + settings.func_finalize = task_range_iter_finalize_func; + + BLI_task_parallel_range_pool_push( + range_pool, 0, NUM_ITEMS, data[j], task_range_iter_func, &settings); + } + + BLI_task_parallel_range_pool_work_and_wait(range_pool); + + BLI_task_parallel_range_pool_free(range_pool); + + /* Those checks should ensure us all items of the listbase were processed once, and only once - + * as expected. */ + + for (int j = 0; j < num_tasks; j++) { + int expected_sum = 0; + for (int i = 0; i < NUM_ITEMS; i++) { + // EXPECT_EQ(data[j][i], i); + expected_sum += i; + } + EXPECT_EQ(data[j][NUM_ITEMS], expected_sum); + } + + BLI_threadapi_exit(); +} + /* *** Parallel iterations over mempool items. *** */ static void task_mempool_iter_func(void *userdata, MempoolIterData *item) -- cgit v1.2.3