Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacques Lucke <jacques@blender.org>2021-11-26 13:05:47 +0300
committerJacques Lucke <jacques@blender.org>2021-11-26 13:06:16 +0300
commit658fd8df0bd2427cd77e7fc4bcca8a102f67b626 (patch)
tree574c5a6f4c11db7047a98ca38c6d6f129a4b10e2 /source/blender/functions/intern
parent004172de38d5483b715a5b13d06c2aa5dd3de3f5 (diff)
Geometry Nodes: refactor multi-threading in field evaluation
Previously, there was a fixed grain size for all multi-functions. That was not sufficient because some functions could benefit a lot from smaller grain sizes. This refactors adds a new `MultiFunction::call_auto` method which has the same effect as just calling `MultiFunction::call` but additionally figures out how to execute the specific multi-function efficiently. It determines a good grain size and decides whether the mask indices should be shifted or not. Most multi-function evaluations benefit from this, but medium sized work loads (1000 - 50000 elements) benefit from it the most. Especially when expensive multi-functions (e.g. noise) is involved. This is because for smaller work loads, threading is rarely used and for larger work loads threading worked fine before already. With this patch, multi-functions can specify execution hints, that allow the caller to execute it most efficiently. These execution hints still have to be added to more functions. Some performance measurements of a field evaluation involving noise and math nodes, ordered by the number of elements being evaluated: ``` 1,000,000: 133 ms -> 120 ms 100,000: 30 ms -> 18 ms 10,000: 20 ms -> 2.7 ms 1,000: 4 ms -> 0.5 ms 100: 0.5 ms -> 0.4 ms ```
Diffstat (limited to 'source/blender/functions/intern')
-rw-r--r--source/blender/functions/intern/field.cc10
-rw-r--r--source/blender/functions/intern/multi_function.cc133
-rw-r--r--source/blender/functions/intern/multi_function_parallel.cc93
-rw-r--r--source/blender/functions/intern/multi_function_params.cc44
-rw-r--r--source/blender/functions/intern/multi_function_procedure_executor.cc10
5 files changed, 188 insertions, 102 deletions
diff --git a/source/blender/functions/intern/field.cc b/source/blender/functions/intern/field.cc
index 7934490a6d9..297df3c15cf 100644
--- a/source/blender/functions/intern/field.cc
+++ b/source/blender/functions/intern/field.cc
@@ -21,7 +21,6 @@
#include "BLI_vector_set.hh"
#include "FN_field.hh"
-#include "FN_multi_function_parallel.hh"
namespace blender::fn {
@@ -358,13 +357,8 @@ Vector<GVArray> evaluate_fields(ResourceScope &scope,
build_multi_function_procedure_for_fields(
procedure, scope, field_tree_info, varying_fields_to_evaluate);
MFProcedureExecutor procedure_executor{procedure};
- /* Add multi threading capabilities to the field evaluation. */
- const int grain_size = 10000;
- fn::ParallelMultiFunction parallel_procedure_executor{procedure_executor, grain_size};
- /* Utility variable to make easy to switch the executor. */
- const MultiFunction &executor_fn = parallel_procedure_executor;
- MFParamsBuilder mf_params{executor_fn, &mask};
+ MFParamsBuilder mf_params{procedure_executor, &mask};
MFContextBuilder mf_context;
/* Provide inputs to the procedure executor. */
@@ -405,7 +399,7 @@ Vector<GVArray> evaluate_fields(ResourceScope &scope,
mf_params.add_uninitialized_single_output(span);
}
- executor_fn.call(mask, mf_params, mf_context);
+ procedure_executor.call_auto(mask, mf_params, mf_context);
}
/* Evaluate constant fields if necessary. */
diff --git a/source/blender/functions/intern/multi_function.cc b/source/blender/functions/intern/multi_function.cc
index ee2c69068db..3e5539d4248 100644
--- a/source/blender/functions/intern/multi_function.cc
+++ b/source/blender/functions/intern/multi_function.cc
@@ -16,8 +16,141 @@
#include "FN_multi_function.hh"
+#include "BLI_task.hh"
+#include "BLI_threads.h"
+
namespace blender::fn {
+using ExecutionHints = MultiFunction::ExecutionHints;
+
+ExecutionHints MultiFunction::execution_hints() const
+{
+ return this->get_execution_hints();
+}
+
+ExecutionHints MultiFunction::get_execution_hints() const
+{
+ return ExecutionHints{};
+}
+
+static bool supports_threading_by_slicing_params(const MultiFunction &fn)
+{
+ for (const int i : fn.param_indices()) {
+ const MFParamType param_type = fn.param_type(i);
+ if (ELEM(param_type.interface_type(),
+ MFParamType::InterfaceType::Mutable,
+ MFParamType::InterfaceType::Output)) {
+ if (param_type.data_type().is_vector()) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static int64_t compute_grain_size(const ExecutionHints &hints, const IndexMask mask)
+{
+ int64_t grain_size = hints.min_grain_size;
+ if (hints.uniform_execution_time) {
+ const int thread_count = BLI_system_thread_count();
+ /* Avoid using a small grain size even if it is not necessary. */
+ const int64_t thread_based_grain_size = mask.size() / thread_count / 4;
+ grain_size = std::max(grain_size, thread_based_grain_size);
+ }
+ if (hints.allocates_array) {
+ const int64_t max_grain_size = 10000;
+ /* Avoid allocating many large intermediate arrays. Better process data in smaller chunks to
+ * keep peak memory usage lower. */
+ grain_size = std::min(grain_size, max_grain_size);
+ }
+ return grain_size;
+}
+
+/**
+ * The result is the same as using #call directly but this method has some additional features.
+ * - Automatic multi-threading when possible and appropriate.
+ * - Automatic index mask offsetting to avoid large temporary intermediate arrays that are mostly
+ * unused.
+ */
+void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context) const
+{
+ if (mask.is_empty()) {
+ return;
+ }
+ const ExecutionHints hints = this->execution_hints();
+ const int64_t grain_size = compute_grain_size(hints, mask);
+
+ if (mask.size() <= grain_size) {
+ this->call(mask, params, context);
+ return;
+ }
+
+ const bool supports_threading = supports_threading_by_slicing_params(*this);
+ if (!supports_threading) {
+ this->call(mask, params, context);
+ return;
+ }
+
+ threading::parallel_for(mask.index_range(), grain_size, [&](const IndexRange sub_range) {
+ const IndexMask sliced_mask = mask.slice(sub_range);
+ if (!hints.allocates_array) {
+ /* There is no benefit to changing indices in this case. */
+ this->call(sliced_mask, params, context);
+ return;
+ }
+ if (sliced_mask[0] < grain_size) {
+ /* The indices are low, no need to offset them. */
+ this->call(sliced_mask, params, context);
+ return;
+ }
+ const int64_t input_slice_start = sliced_mask[0];
+ const int64_t input_slice_size = sliced_mask.last() - input_slice_start + 1;
+ const IndexRange input_slice_range{input_slice_start, input_slice_size};
+
+ Vector<int64_t> offset_mask_indices;
+ const IndexMask offset_mask = mask.slice_and_offset(sub_range, offset_mask_indices);
+
+ MFParamsBuilder offset_params{*this, offset_mask.min_array_size()};
+
+ /* Slice all parameters so that for the actual function call. */
+ for (const int param_index : this->param_indices()) {
+ const MFParamType param_type = this->param_type(param_index);
+ switch (param_type.category()) {
+ case MFParamType::SingleInput: {
+ const GVArray &varray = params.readonly_single_input(param_index);
+ offset_params.add_readonly_single_input(varray.slice(input_slice_range));
+ break;
+ }
+ case MFParamType::SingleMutable: {
+ const GMutableSpan span = params.single_mutable(param_index);
+ const GMutableSpan sliced_span = span.slice(input_slice_range);
+ offset_params.add_single_mutable(sliced_span);
+ break;
+ }
+ case MFParamType::SingleOutput: {
+ const GMutableSpan span = params.uninitialized_single_output_if_required(param_index);
+ if (span.is_empty()) {
+ offset_params.add_ignored_single_output();
+ }
+ else {
+ const GMutableSpan sliced_span = span.slice(input_slice_range);
+ offset_params.add_uninitialized_single_output(sliced_span);
+ }
+ break;
+ }
+ case MFParamType::VectorInput:
+ case MFParamType::VectorMutable:
+ case MFParamType::VectorOutput: {
+ BLI_assert_unreachable();
+ break;
+ }
+ }
+ }
+
+ this->call(offset_mask, offset_params, context);
+ });
+}
+
std::string MultiFunction::debug_name() const
{
return signature_ref_->function_name;
diff --git a/source/blender/functions/intern/multi_function_parallel.cc b/source/blender/functions/intern/multi_function_parallel.cc
deleted file mode 100644
index eefe647644d..00000000000
--- a/source/blender/functions/intern/multi_function_parallel.cc
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#include "FN_multi_function_parallel.hh"
-
-#include "BLI_task.hh"
-
-namespace blender::fn {
-
-ParallelMultiFunction::ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size)
- : fn_(fn), grain_size_(grain_size)
-{
- this->set_signature(&fn.signature());
-
- threading_supported_ = true;
- for (const int param_index : fn.param_indices()) {
- const MFParamType param_type = fn.param_type(param_index);
- if (param_type.data_type().category() == MFDataType::Vector) {
- /* Vector parameters do not support threading yet. */
- threading_supported_ = false;
- break;
- }
- }
-}
-
-void ParallelMultiFunction::call(IndexMask full_mask, MFParams params, MFContext context) const
-{
- if (full_mask.size() <= grain_size_ || !threading_supported_) {
- fn_.call(full_mask, params, context);
- return;
- }
-
- threading::parallel_for(full_mask.index_range(), grain_size_, [&](const IndexRange mask_slice) {
- Vector<int64_t> sub_mask_indices;
- const IndexMask sub_mask = full_mask.slice_and_offset(mask_slice, sub_mask_indices);
- if (sub_mask.is_empty()) {
- return;
- }
- const int64_t input_slice_start = full_mask[mask_slice.first()];
- const int64_t input_slice_size = full_mask[mask_slice.last()] - input_slice_start + 1;
- const IndexRange input_slice_range{input_slice_start, input_slice_size};
-
- MFParamsBuilder sub_params{fn_, sub_mask.min_array_size()};
-
- /* All parameters are sliced so that the wrapped multi-function does not have to take care of
- * the index offset. */
- for (const int param_index : fn_.param_indices()) {
- const MFParamType param_type = fn_.param_type(param_index);
- switch (param_type.category()) {
- case MFParamType::SingleInput: {
- const GVArray &varray = params.readonly_single_input(param_index);
- sub_params.add_readonly_single_input(varray.slice(input_slice_range));
- break;
- }
- case MFParamType::SingleMutable: {
- const GMutableSpan span = params.single_mutable(param_index);
- const GMutableSpan sliced_span = span.slice(input_slice_start, input_slice_size);
- sub_params.add_single_mutable(sliced_span);
- break;
- }
- case MFParamType::SingleOutput: {
- const GMutableSpan span = params.uninitialized_single_output(param_index);
- const GMutableSpan sliced_span = span.slice(input_slice_start, input_slice_size);
- sub_params.add_uninitialized_single_output(sliced_span);
- break;
- }
- case MFParamType::VectorInput:
- case MFParamType::VectorMutable:
- case MFParamType::VectorOutput: {
- BLI_assert_unreachable();
- break;
- }
- }
- }
-
- fn_.call(sub_mask, sub_params, context);
- });
-}
-
-} // namespace blender::fn
diff --git a/source/blender/functions/intern/multi_function_params.cc b/source/blender/functions/intern/multi_function_params.cc
new file mode 100644
index 00000000000..376c5b2deb7
--- /dev/null
+++ b/source/blender/functions/intern/multi_function_params.cc
@@ -0,0 +1,44 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "FN_multi_function_params.hh"
+
+namespace blender::fn {
+
+GMutableSpan MFParams::ensure_dummy_single_output(int data_index)
+{
+ /* Lock because we are actually modifying #builder_ and it may be used by multiple threads. */
+ std::lock_guard lock{builder_->mutex_};
+
+ for (const std::pair<int, GMutableSpan> &items : builder_->dummy_output_spans_) {
+ if (items.first == data_index) {
+ return items.second;
+ }
+ }
+
+ const CPPType &type = builder_->mutable_spans_[data_index].type();
+ void *buffer = builder_->scope_.linear_allocator().allocate(
+ builder_->min_array_size_ * type.size(), type.alignment());
+ if (!type.is_trivially_destructible()) {
+ builder_->scope_.add_destruct_call(
+ [&type, buffer, mask = builder_->mask_]() { type.destruct_indices(buffer, mask); });
+ }
+ const GMutableSpan span{type, buffer, builder_->min_array_size_};
+ builder_->dummy_output_spans_.append({data_index, span});
+ return span;
+}
+
+} // namespace blender::fn
diff --git a/source/blender/functions/intern/multi_function_procedure_executor.cc b/source/blender/functions/intern/multi_function_procedure_executor.cc
index 06c97fd1173..ab2fd7c098c 100644
--- a/source/blender/functions/intern/multi_function_procedure_executor.cc
+++ b/source/blender/functions/intern/multi_function_procedure_executor.cc
@@ -1045,7 +1045,7 @@ static void execute_call_instruction(const MFCallInstruction &instruction,
}
try {
- fn.call(mask, params, context);
+ fn.call_auto(mask, params, context);
}
catch (...) {
/* Multi-functions must not throw exceptions. */
@@ -1236,4 +1236,12 @@ void MFProcedureExecutor::call(IndexMask full_mask, MFParams params, MFContext c
}
}
+MultiFunction::ExecutionHints MFProcedureExecutor::get_execution_hints() const
+{
+ ExecutionHints hints;
+ hints.allocates_array = true;
+ hints.min_grain_size = 10000;
+ return hints;
+}
+
} // namespace blender::fn