Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacques Lucke <jacques@blender.org>2021-09-09 12:19:09 +0300
committerJacques Lucke <jacques@blender.org>2021-09-09 12:19:09 +0300
commit2920a569b527c3543dd393a96bca2362ee04feef (patch)
tree329255ec6ea0335db49ed7c1ae03f92181f79ff0
parent068f0122213b1574ccad7ec1335969ad65bbf0d2 (diff)
progress
-rw-r--r--source/blender/blenlib/BLI_virtual_array.hh79
-rw-r--r--source/blender/functions/CMakeLists.txt18
-rw-r--r--source/blender/functions/FN_multi_function_parallel.hh17
-rw-r--r--source/blender/functions/intern/field.cc11
-rw-r--r--source/blender/functions/intern/multi_function_parallel.cc109
-rw-r--r--source/blender/functions/tests/FN_multi_function_test.cc27
6 files changed, 223 insertions, 38 deletions
diff --git a/source/blender/blenlib/BLI_virtual_array.hh b/source/blender/blenlib/BLI_virtual_array.hh
index 1c02bce8411..e99036d06a9 100644
--- a/source/blender/blenlib/BLI_virtual_array.hh
+++ b/source/blender/blenlib/BLI_virtual_array.hh
@@ -622,41 +622,50 @@ inline void devirtualize_varray2(const VArray<T1> &varray1,
const Func &func,
bool enable = true)
{
- /* Support disabling the devirtualization to simplify benchmarking. */
- if (enable) {
- const bool is_span1 = varray1.is_span();
- const bool is_span2 = varray2.is_span();
- const bool is_single1 = varray1.is_single();
- const bool is_single2 = varray2.is_single();
- if (is_span1 && is_span2) {
- const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
- const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
- func(varray1_span, varray2_span);
- return;
- }
- if (is_span1 && is_single2) {
- const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
- const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
- func(varray1_span, varray2_single);
- return;
- }
- if (is_single1 && is_span2) {
- const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
- const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
- func(varray1_single, varray2_span);
- return;
- }
- if (is_single1 && is_single2) {
- const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
- const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
- func(varray1_single, varray2_single);
- return;
- }
- }
- /* This fallback is used even when one of the inputs could be optimized. It's probably not worth
- * it to optimize just one of the inputs, because then the compiler still has to call into
- * unknown code, which inhibits many compiler optimizations. */
- func(varray1, varray2);
+ devirtualize_varray(
+ varray1,
+ [&](const auto &varray1) {
+ devirtualize_varray(
+ varray2, [&](const auto &varray2) { func(varray1, varray2); }, enable);
+ },
+ enable);
+
+ // /* Support disabling the devirtualization to simplify benchmarking. */
+ // if (enable) {
+ // const bool is_span1 = varray1.is_span();
+ // const bool is_span2 = varray2.is_span();
+ // const bool is_single1 = varray1.is_single();
+ // const bool is_single2 = varray2.is_single();
+ // if (is_span1 && is_span2) {
+ // const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
+ // const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
+ // func(varray1_span, varray2_span);
+ // return;
+ // }
+ // if (is_span1 && is_single2) {
+ // const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
+ // const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
+ // func(varray1_span, varray2_single);
+ // return;
+ // }
+ // if (is_single1 && is_span2) {
+ // const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
+ // const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
+ // func(varray1_single, varray2_span);
+ // return;
+ // }
+ // if (is_single1 && is_single2) {
+ // const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
+ // const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
+ // func(varray1_single, varray2_single);
+ // return;
+ // }
+ // }
+ // /* This fallback is used even when one of the inputs could be optimized. It's probably not
+ // worth
+ // * it to optimize just one of the inputs, because then the compiler still has to call into
+ // * unknown code, which inhibits many compiler optimizations. */
+ // func(varray1, varray2);
}
} // namespace blender
diff --git a/source/blender/functions/CMakeLists.txt b/source/blender/functions/CMakeLists.txt
index 3c27e9d5e19..856668f01d7 100644
--- a/source/blender/functions/CMakeLists.txt
+++ b/source/blender/functions/CMakeLists.txt
@@ -34,6 +34,7 @@ set(SRC
intern/generic_virtual_vector_array.cc
intern/multi_function.cc
intern/multi_function_builder.cc
+ intern/multi_function_parallel.cc
intern/multi_function_procedure.cc
intern/multi_function_procedure_builder.cc
intern/multi_function_procedure_executor.cc
@@ -54,6 +55,7 @@ set(SRC
FN_multi_function_data_type.hh
FN_multi_function_param_type.hh
FN_multi_function_params.hh
+ FN_multi_function_parallel.hh
FN_multi_function_procedure.hh
FN_multi_function_procedure_builder.hh
FN_multi_function_procedure_executor.hh
@@ -64,6 +66,22 @@ set(LIB
bf_blenlib
)
+if(WITH_TBB)
+ add_definitions(-DWITH_TBB)
+ if(WIN32)
+ # TBB includes Windows.h which will define min/max macros
+ # that will collide with the stl versions.
+ add_definitions(-DNOMINMAX)
+ endif()
+ list(APPEND INC_SYS
+ ${TBB_INCLUDE_DIRS}
+ )
+
+ list(APPEND LIB
+ ${TBB_LIBRARIES}
+ )
+endif()
+
blender_add_lib(bf_functions "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
if(WITH_GTESTS)
diff --git a/source/blender/functions/FN_multi_function_parallel.hh b/source/blender/functions/FN_multi_function_parallel.hh
index b5b3e2f2f94..84c57efd434 100644
--- a/source/blender/functions/FN_multi_function_parallel.hh
+++ b/source/blender/functions/FN_multi_function_parallel.hh
@@ -20,5 +20,20 @@
* \ingroup fn
*/
+#include "FN_multi_function.hh"
+
namespace blender::fn {
-}
+
+class ParallelMultiFunction : public MultiFunction {
+ private:
+ const MultiFunction &fn_;
+ const int64_t grain_size_;
+ bool threading_supported_;
+
+ public:
+ ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size);
+
+ void call(IndexMask mask, MFParams params, MFContext context) const override;
+};
+
+} // namespace blender::fn
diff --git a/source/blender/functions/intern/field.cc b/source/blender/functions/intern/field.cc
index a27c5e4e3dc..7b35593ad75 100644
--- a/source/blender/functions/intern/field.cc
+++ b/source/blender/functions/intern/field.cc
@@ -18,9 +18,11 @@
#include "BLI_multi_value_map.hh"
#include "BLI_set.hh"
#include "BLI_stack.hh"
+#include "BLI_timeit.hh"
#include "BLI_vector_set.hh"
#include "FN_field.hh"
+#include "FN_multi_function_parallel.hh"
namespace blender::fn {
@@ -271,6 +273,8 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
const FieldContext &context,
Span<GVMutableArray *> dst_hints)
{
+ SCOPED_TIMER(__func__);
+
Vector<const GVArray *> r_varrays(fields_to_evaluate.size(), nullptr);
/* Destination hints are optional. Create a small utility method to access them. */
@@ -334,7 +338,10 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
build_multi_function_procedure_for_fields(
procedure, scope, field_tree_info, varying_fields_to_evaluate);
MFProcedureExecutor procedure_executor{"Procedure", procedure};
- MFParamsBuilder mf_params{procedure_executor, array_size};
+ fn::ParallelMultiFunction parallel_fn{procedure_executor, 20000};
+ const MultiFunction &fn_to_execute = procedure_executor;
+
+ MFParamsBuilder mf_params{fn_to_execute, array_size};
MFContextBuilder mf_context;
/* Provide inputs to the procedure executor. */
@@ -376,7 +383,7 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
mf_params.add_uninitialized_single_output(span);
}
- procedure_executor.call(mask, mf_params, mf_context);
+ fn_to_execute.call(mask, mf_params, mf_context);
}
/* Evaluate constant fields if necessary. */
diff --git a/source/blender/functions/intern/multi_function_parallel.cc b/source/blender/functions/intern/multi_function_parallel.cc
new file mode 100644
index 00000000000..6843c4a233b
--- /dev/null
+++ b/source/blender/functions/intern/multi_function_parallel.cc
@@ -0,0 +1,109 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "FN_multi_function_parallel.hh"
+
+#include "BLI_task.hh"
+
+#include <mutex>
+
+namespace blender::fn {
+
+ParallelMultiFunction::ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size)
+ : fn_(fn), grain_size_(grain_size)
+{
+ this->set_signature(&fn.signature());
+
+ threading_supported_ = true;
+ for (const int param_index : fn.param_indices()) {
+ const MFParamType param_type = fn.param_type(param_index);
+ if (param_type.data_type().category() == MFDataType::Vector) {
+ threading_supported_ = false;
+ break;
+ }
+ }
+}
+
+void ParallelMultiFunction::call(IndexMask mask, MFParams params, MFContext context) const
+{
+ if (mask.size() <= grain_size_ || !threading_supported_) {
+ fn_.call(mask, params, context);
+ return;
+ }
+
+ threading::parallel_for(mask.index_range(), grain_size_, [&](const IndexRange range) {
+ const int size = range.size();
+ IndexMask original_sub_mask{mask.indices().slice(range)};
+ const int64_t offset = original_sub_mask.indices().first();
+ const int64_t slice_size = original_sub_mask.indices().last() - offset + 1;
+ const IndexRange slice_range{offset, slice_size};
+ IndexMask sub_mask;
+ Vector<int64_t> sub_mask_indices;
+ if (original_sub_mask.is_range()) {
+ sub_mask = IndexMask(size);
+ }
+ else {
+ sub_mask_indices.resize(size);
+ for (const int i : IndexRange(size)) {
+ sub_mask_indices[i] = original_sub_mask[i] - offset;
+ }
+ sub_mask = sub_mask_indices.as_span();
+ }
+
+ MFParamsBuilder sub_params{fn_, sub_mask.min_array_size()};
+ ResourceScope scope;
+ // static std::mutex mutex;
+ // {
+ // std::lock_guard lock{mutex};
+ // std::cout << range << " " << sub_mask.min_array_size() << "\n";
+ // }
+
+ for (const int param_index : fn_.param_indices()) {
+ const MFParamType param_type = fn_.param_type(param_index);
+ switch (param_type.category()) {
+ case MFParamType::SingleInput: {
+ const GVArray &varray = params.readonly_single_input(param_index);
+ const GVArray &sliced_varray = scope.construct<GVArray_Slice>(
+ "sliced varray", varray, slice_range);
+ sub_params.add_readonly_single_input(sliced_varray);
+ break;
+ }
+ case MFParamType::SingleMutable: {
+ const GMutableSpan span = params.single_mutable(param_index);
+ const GMutableSpan sliced_span = span.slice(slice_range.start(), slice_range.size());
+ sub_params.add_single_mutable(sliced_span);
+ break;
+ }
+ case MFParamType::SingleOutput: {
+ const GMutableSpan span = params.uninitialized_single_output(param_index);
+ const GMutableSpan sliced_span = span.slice(slice_range.start(), slice_range.size());
+ sub_params.add_uninitialized_single_output(sliced_span);
+ break;
+ }
+ case MFParamType::VectorInput:
+ case MFParamType::VectorMutable:
+ case MFParamType::VectorOutput: {
+ BLI_assert_unreachable();
+ break;
+ }
+ }
+ }
+
+ fn_.call(sub_mask, sub_params, context);
+ });
+}
+
+} // namespace blender::fn
diff --git a/source/blender/functions/tests/FN_multi_function_test.cc b/source/blender/functions/tests/FN_multi_function_test.cc
index 91c72a51dd6..9deeaf8d3bd 100644
--- a/source/blender/functions/tests/FN_multi_function_test.cc
+++ b/source/blender/functions/tests/FN_multi_function_test.cc
@@ -2,8 +2,11 @@
#include "testing/testing.h"
+#include "BLI_timeit.hh"
+
#include "FN_multi_function.hh"
#include "FN_multi_function_builder.hh"
+#include "FN_multi_function_parallel.hh"
#include "FN_multi_function_test_common.hh"
namespace blender::fn::tests {
@@ -328,5 +331,29 @@ TEST(multi_function, CustomMF_Convert)
EXPECT_EQ(outputs[2], 9);
}
+TEST(multi_function, Parallel)
+{
+ CustomMF_SI_SI_SO<float, float, float> add_fn{
+ "add", [](float a, float b) { return std::tan(std::sin(a)) * std::tanh(std::cos(b)); }};
+ ParallelMultiFunction parallel_fn{add_fn, int64_t(1e5)};
+ const MultiFunction &fn_to_evaluate = parallel_fn;
+
+ const int amount = 1e8;
+ Array<float> inputs_a(amount, 1);
+ Array<float> inputs_b(amount, 1);
+ Array<float> outputs(amount, 1);
+
+ for (int i = 0; i < 10; i++) {
+ SCOPED_TIMER(__func__);
+ MFParamsBuilder params(fn_to_evaluate, amount);
+ params.add_readonly_single_input(inputs_a.as_span());
+ params.add_readonly_single_input(inputs_b.as_span());
+ params.add_uninitialized_single_output(outputs.as_mutable_span());
+
+ MFContextBuilder context;
+ fn_to_evaluate.call(IndexRange(amount), params, context);
+ }
+}
+
} // namespace
} // namespace blender::fn::tests