From e1309030603980c6b2f33486adf6ae5c2e4eb965 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 2 Dec 2021 12:54:35 +0100 Subject: BLI: avoid invoking tbb for small workloads We often call `parallel_for` in places with very variable sized workloads. When many elements are processed, using multi-threading is great, but when processing few elements (possibly many times) using `parallel_for` can result in significant overhead. I measured that this improves performance by >20% in the refactored realize instances code I'm working on separately. The change might also help with debugging sometimes, because the stack trace is smaller and contains fewer irrevelant symbols. --- source/blender/blenlib/BLI_task.hh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'source/blender') diff --git a/source/blender/blenlib/BLI_task.hh b/source/blender/blenlib/BLI_task.hh index da7309837c8..84d5cd39bb4 100644 --- a/source/blender/blenlib/BLI_task.hh +++ b/source/blender/blenlib/BLI_task.hh @@ -67,14 +67,19 @@ void parallel_for(IndexRange range, int64_t grain_size, const Function &function return; } #ifdef WITH_TBB - tbb::parallel_for(tbb::blocked_range(range.first(), range.one_after_last(), grain_size), - [&](const tbb::blocked_range &subrange) { - function(IndexRange(subrange.begin(), subrange.size())); - }); + /* Invoking tbb for small workloads has a large overhead. */ + if (range.size() >= grain_size) { + tbb::parallel_for( + tbb::blocked_range(range.first(), range.one_after_last(), grain_size), + [&](const tbb::blocked_range &subrange) { + function(IndexRange(subrange.begin(), subrange.size())); + }); + return; + } #else UNUSED_VARS(grain_size); - function(range); #endif + function(range); } template -- cgit v1.2.3