diff options
author | Bastien Montagne <b.mont29@gmail.com> | 2019-11-26 16:26:47 +0300 |
---|---|---|
committer | Bastien Montagne <b.mont29@gmail.com> | 2019-11-26 16:30:41 +0300 |
commit | fcbec6e97e649eee33f06e0202455ee11dcfe46e (patch) | |
tree | 05b21434a9a5974be893326ad378f69d4d3521b4 /tests/gtests/blenlib/BLI_task_performance_test.cc | |
parent | 9ecc30250aaea4e102e869f181e4c896c315e5ef (diff) |
BLI_task: Add pooled threaded index range iterator, Take II.
This code allows to push a set of different operations all based on
iterations over a range of indices, and then process them all at once
over multiple threads.
This commit also adds unit tests for both old un-pooled, and new pooled
task_parallel_range family of functions, as well as some basic
performances tests.
This is mainly interesting for relatively low amount of individual
tasks, as expected.
E.g. performance tests on a 32 threads machine, for a set of 10
different tasks, shows following improvements when using pooled version
instead of ten sequential calls to BLI_task_parallel_range():
| Num Items | Sequential | Pooled | Speed-up |
| --------- | ---------- | ------- | -------- |
| 10K | 365 us | 138 us | 2.5 x |
| 100K | 877 us | 530 us | 1.66 x |
| 1000K | 5521 us | 4625 us | 1.25 x |
Differential Revision: https://developer.blender.org/D6189
Note: Compared to previous commit yesterday, this reworks atomic handling in
parallel iter code, and fixes a dummy double-free bug.
Now we should only use the two critical values for synchronization from
atomic calls results, which is the proper way to do things.
Reading a value after an atomic operation does not guarantee you will
get the latest value in all cases (especially on Windows release builds
it seems).
Diffstat (limited to 'tests/gtests/blenlib/BLI_task_performance_test.cc')
-rw-r--r-- | tests/gtests/blenlib/BLI_task_performance_test.cc | 90 |
1 files changed, 88 insertions, 2 deletions
diff --git a/tests/gtests/blenlib/BLI_task_performance_test.cc b/tests/gtests/blenlib/BLI_task_performance_test.cc index dc8981f8064..84b7b8b6439 100644 --- a/tests/gtests/blenlib/BLI_task_performance_test.cc +++ b/tests/gtests/blenlib/BLI_task_performance_test.cc @@ -19,8 +19,6 @@ extern "C" { #include "MEM_guardedalloc.h" } -/* *** Parallel iterations over double-linked list items. *** */ - #define NUM_RUN_AVERAGED 100 static uint gen_pseudo_random_number(uint num) @@ -38,6 +36,94 @@ static uint gen_pseudo_random_number(uint num) return ((num & 255) << 6) + 1; } +/* *** Parallel iterations over range of indices. *** */ + +static void task_parallel_range_func(void *UNUSED(userdata), + int index, + const TaskParallelTLS *__restrict UNUSED(tls)) +{ + const uint limit = gen_pseudo_random_number((uint)index); + for (uint i = (uint)index; i < limit;) { + i += gen_pseudo_random_number(i); + } +} + +static void task_parallel_range_test_do(const char *id, + const int num_items, + const bool use_threads) +{ + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = use_threads; + + double averaged_timing = 0.0; + for (int i = 0; i < NUM_RUN_AVERAGED; i++) { + const double init_time = PIL_check_seconds_timer(); + for (int j = 0; j < 10; j++) { + BLI_task_parallel_range(i + j, i + j + num_items, NULL, task_parallel_range_func, &settings); + } + averaged_timing += PIL_check_seconds_timer() - init_time; + } + + printf("\t%s: non-pooled done in %fs on average over %d runs\n", + id, + averaged_timing / NUM_RUN_AVERAGED, + NUM_RUN_AVERAGED); + + averaged_timing = 0.0; + for (int i = 0; i < NUM_RUN_AVERAGED; i++) { + const double init_time = PIL_check_seconds_timer(); + TaskParallelRangePool *range_pool = BLI_task_parallel_range_pool_init(&settings); + for (int j = 0; j < 10; j++) { + BLI_task_parallel_range_pool_push( + range_pool, i + j, i + j + num_items, NULL, task_parallel_range_func, &settings); + } + BLI_task_parallel_range_pool_work_and_wait(range_pool); + BLI_task_parallel_range_pool_free(range_pool); + averaged_timing += PIL_check_seconds_timer() - init_time; + } + + printf("\t%s: pooled done in %fs on average over %d runs\n", + id, + averaged_timing / NUM_RUN_AVERAGED, + NUM_RUN_AVERAGED); +} + +TEST(task, RangeIter10KNoThread) +{ + task_parallel_range_test_do( + "Range parallel iteration - Single thread - 10K items", 10000, false); +} + +TEST(task, RangeIter10k) +{ + task_parallel_range_test_do("Range parallel iteration - Threaded - 10K items", 10000, true); +} + +TEST(task, RangeIter100KNoThread) +{ + task_parallel_range_test_do( + "Range parallel iteration - Single thread - 100K items", 100000, false); +} + +TEST(task, RangeIter100k) +{ + task_parallel_range_test_do("Range parallel iteration - Threaded - 100K items", 100000, true); +} + +TEST(task, RangeIter1000KNoThread) +{ + task_parallel_range_test_do( + "Range parallel iteration - Single thread - 1000K items", 1000000, false); +} + +TEST(task, RangeIter1000k) +{ + task_parallel_range_test_do("Range parallel iteration - Threaded - 1000K items", 1000000, true); +} + +/* *** Parallel iterations over double-linked list items. *** */ + static void task_listbase_light_iter_func(void *UNUSED(userdata), void *item, int index, |