From e1309030603980c6b2f33486adf6ae5c2e4eb965 Mon Sep 17 00:00:00 2001
From: Jacques Lucke <jacques@blender.org>
Date: Thu, 2 Dec 2021 12:54:35 +0100
Subject: BLI: avoid invoking tbb for small workloads

We often call `parallel_for` in places with very variable
sized workloads. When many elements are processed,
using multi-threading is great, but when processing
few elements (possibly many times) using `parallel_for`
can result in significant overhead.

I measured that this improves performance by >20% in
the refactored realize instances code I'm working on
separately. The change might also help with debugging
sometimes, because the stack trace is smaller and contains
fewer irrevelant symbols.
---
 source/blender/blenlib/BLI_task.hh | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'source/blender')
diff --git a/source/blender/blenlib/BLI_task.hh b/source/blender/blenlib/BLI_task.hh
index da7309837c8..84d5cd39bb4 100644
--- a/source/blender/blenlib/BLI_task.hh
+++ b/source/blender/blenlib/BLI_task.hh
@@ -67,14 +67,19 @@ void parallel_for(IndexRange range, int64_t grain_size, const Function &function
     return;
   }
 #ifdef WITH_TBB
-  tbb::parallel_for(tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
-                    [&](const tbb::blocked_range<int64_t> &subrange) {
-                      function(IndexRange(subrange.begin(), subrange.size()));
-                    });
+  /* Invoking tbb for small workloads has a large overhead. */
+  if (range.size() >= grain_size) {
+    tbb::parallel_for(
+        tbb::blocked_range<int64_t>(range.first(), range.one_after_last(), grain_size),
+        [&](const tbb::blocked_range<int64_t> &subrange) {
+          function(IndexRange(subrange.begin(), subrange.size()));
+        });
+    return;
+  }
 #else
   UNUSED_VARS(grain_size);
-  function(range);
 #endif
+  function(range);
 }
 
 template<typename Value, typename Function, typename Reduction>
-- 
cgit v1.2.3