BLI_task: Add pooled threaded index range iterator, Take II.

This code allows to push a set of different operations all based on iterations over a range of indices, and then process them all at once over multiple threads. This commit also adds unit tests for both old un-pooled, and new pooled task_parallel_range family of functions, as well as some basic performances tests. This is mainly interesting for relatively low amount of individual tasks, as expected. E.g. performance tests on a 32 threads machine, for a set of 10 different tasks, shows following improvements when using pooled version instead of ten sequential calls to BLI_task_parallel_range(): | Num Items | Sequential | Pooled | Speed-up | | --------- | ---------- | ------- | -------- | | 10K | 365 us | 138 us | 2.5 x | | 100K | 877 us | 530 us | 1.66 x | | 1000K | 5521 us | 4625 us | 1.25 x | Differential Revision: https://developer.blender.org/D6189 Note: Compared to previous commit yesterday, this reworks atomic handling in parallel iter code, and fixes a dummy double-free bug. Now we should only use the two critical values for synchronization from atomic calls results, which is the proper way to do things. Reading a value after an atomic operation does not guarantee you will get the latest value in all cases (especially on Windows release builds it seems).
author: Bastien Montagne <b.mont29@gmail.com> 2019-11-26 16:26:47 +0300
committer: Bastien Montagne <b.mont29@gmail.com> 2019-11-26 16:30:41 +0300
commit: fcbec6e97e649eee33f06e0202455ee11dcfe46e (patch)
tree: 05b21434a9a5974be893326ad378f69d4d3521b4 /tests/gtests/blenlib/BLI_task_performance_test.cc
parent: 9ecc30250aaea4e102e869f181e4c896c315e5ef (diff)
1 files changed, 88 insertions, 2 deletions
diff --git a/tests/gtests/blenlib/BLI_task_performance_test.cc b/tests/gtests/blenlib/BLI_task_performance_test.cc
index dc8981f8064..84b7b8b6439 100644
--- a/tests/gtests/blenlib/BLI_task_performance_test.cc
+++ b/tests/gtests/blenlib/BLI_task_performance_test.cc
@@ -19,8 +19,6 @@ extern "C" {
 #include "MEM_guardedalloc.h"
 }
 
-/* *** Parallel iterations over double-linked list items. *** */
-
 #define NUM_RUN_AVERAGED 100
 
 static uint gen_pseudo_random_number(uint num)
@@ -38,6 +36,94 @@ static uint gen_pseudo_random_number(uint num)
   return ((num & 255) << 6) + 1;
 }
 
+/* *** Parallel iterations over range of indices. *** */
+
+static void task_parallel_range_func(void *UNUSED(userdata),
+                                     int index,
+                                     const TaskParallelTLS *__restrict UNUSED(tls))
+{
+  const uint limit = gen_pseudo_random_number((uint)index);
+  for (uint i = (uint)index; i < limit;) {
+    i += gen_pseudo_random_number(i);
+  }
+}
+
+static void task_parallel_range_test_do(const char *id,
+                                        const int num_items,
+                                        const bool use_threads)
+{
+  TaskParallelSettings settings;
+  BLI_parallel_range_settings_defaults(&settings);
+  settings.use_threading = use_threads;
+
+  double averaged_timing = 0.0;
+  for (int i = 0; i < NUM_RUN_AVERAGED; i++) {
+    const double init_time = PIL_check_seconds_timer();
+    for (int j = 0; j < 10; j++) {
+      BLI_task_parallel_range(i + j, i + j + num_items, NULL, task_parallel_range_func, &settings);
+    }
+    averaged_timing += PIL_check_seconds_timer() - init_time;
+  }
+
+  printf("\t%s: non-pooled done in %fs on average over %d runs\n",
+         id,
+         averaged_timing / NUM_RUN_AVERAGED,
+         NUM_RUN_AVERAGED);
+
+  averaged_timing = 0.0;
+  for (int i = 0; i < NUM_RUN_AVERAGED; i++) {
+    const double init_time = PIL_check_seconds_timer();
+    TaskParallelRangePool *range_pool = BLI_task_parallel_range_pool_init(&settings);
+    for (int j = 0; j < 10; j++) {
+      BLI_task_parallel_range_pool_push(
+          range_pool, i + j, i + j + num_items, NULL, task_parallel_range_func, &settings);
+    }
+    BLI_task_parallel_range_pool_work_and_wait(range_pool);
+    BLI_task_parallel_range_pool_free(range_pool);
+    averaged_timing += PIL_check_seconds_timer() - init_time;
+  }
+
+  printf("\t%s: pooled done in %fs on average over %d runs\n",
+         id,
+         averaged_timing / NUM_RUN_AVERAGED,
+         NUM_RUN_AVERAGED);
+}
+
+TEST(task, RangeIter10KNoThread)
+{
+  task_parallel_range_test_do(
+      "Range parallel iteration - Single thread - 10K items", 10000, false);
+}
+
+TEST(task, RangeIter10k)
+{
+  task_parallel_range_test_do("Range parallel iteration - Threaded - 10K items", 10000, true);
+}
+
+TEST(task, RangeIter100KNoThread)
+{
+  task_parallel_range_test_do(
+      "Range parallel iteration - Single thread - 100K items", 100000, false);
+}
+
+TEST(task, RangeIter100k)
+{
+  task_parallel_range_test_do("Range parallel iteration - Threaded - 100K items", 100000, true);
+}
+
+TEST(task, RangeIter1000KNoThread)
+{
+  task_parallel_range_test_do(
+      "Range parallel iteration - Single thread - 1000K items", 1000000, false);
+}
+
+TEST(task, RangeIter1000k)
+{
+  task_parallel_range_test_do("Range parallel iteration - Threaded - 1000K items", 1000000, true);
+}
+
+/* *** Parallel iterations over double-linked list items. *** */
+
 static void task_listbase_light_iter_func(void *UNUSED(userdata),
                                           void *item,
                                           int index,
author	Bastien Montagne <b.mont29@gmail.com>	2019-11-26 16:26:47 +0300
committer	Bastien Montagne <b.mont29@gmail.com>	2019-11-26 16:30:41 +0300
commit	fcbec6e97e649eee33f06e0202455ee11dcfe46e (patch)
tree	05b21434a9a5974be893326ad378f69d4d3521b4 /tests/gtests/blenlib/BLI_task_performance_test.cc
parent	9ecc30250aaea4e102e869f181e4c896c315e5ef (diff)