From 2920a569b527c3543dd393a96bca2362ee04feef Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 9 Sep 2021 11:19:09 +0200 Subject: progress --- source/blender/blenlib/BLI_virtual_array.hh | 79 ++++++++------- source/blender/functions/CMakeLists.txt | 18 ++++ .../functions/FN_multi_function_parallel.hh | 17 +++- source/blender/functions/intern/field.cc | 11 ++- .../functions/intern/multi_function_parallel.cc | 109 +++++++++++++++++++++ .../functions/tests/FN_multi_function_test.cc | 27 +++++ 6 files changed, 223 insertions(+), 38 deletions(-) create mode 100644 source/blender/functions/intern/multi_function_parallel.cc diff --git a/source/blender/blenlib/BLI_virtual_array.hh b/source/blender/blenlib/BLI_virtual_array.hh index 1c02bce8411..e99036d06a9 100644 --- a/source/blender/blenlib/BLI_virtual_array.hh +++ b/source/blender/blenlib/BLI_virtual_array.hh @@ -622,41 +622,50 @@ inline void devirtualize_varray2(const VArray &varray1, const Func &func, bool enable = true) { - /* Support disabling the devirtualization to simplify benchmarking. */ - if (enable) { - const bool is_span1 = varray1.is_span(); - const bool is_span2 = varray2.is_span(); - const bool is_single1 = varray1.is_single(); - const bool is_single2 = varray2.is_single(); - if (is_span1 && is_span2) { - const VArray_For_Span varray1_span{varray1.get_internal_span()}; - const VArray_For_Span varray2_span{varray2.get_internal_span()}; - func(varray1_span, varray2_span); - return; - } - if (is_span1 && is_single2) { - const VArray_For_Span varray1_span{varray1.get_internal_span()}; - const VArray_For_Single varray2_single{varray2.get_internal_single(), varray2.size()}; - func(varray1_span, varray2_single); - return; - } - if (is_single1 && is_span2) { - const VArray_For_Single varray1_single{varray1.get_internal_single(), varray1.size()}; - const VArray_For_Span varray2_span{varray2.get_internal_span()}; - func(varray1_single, varray2_span); - return; - } - if (is_single1 && is_single2) { - const VArray_For_Single varray1_single{varray1.get_internal_single(), varray1.size()}; - const VArray_For_Single varray2_single{varray2.get_internal_single(), varray2.size()}; - func(varray1_single, varray2_single); - return; - } - } - /* This fallback is used even when one of the inputs could be optimized. It's probably not worth - * it to optimize just one of the inputs, because then the compiler still has to call into - * unknown code, which inhibits many compiler optimizations. */ - func(varray1, varray2); + devirtualize_varray( + varray1, + [&](const auto &varray1) { + devirtualize_varray( + varray2, [&](const auto &varray2) { func(varray1, varray2); }, enable); + }, + enable); + + // /* Support disabling the devirtualization to simplify benchmarking. */ + // if (enable) { + // const bool is_span1 = varray1.is_span(); + // const bool is_span2 = varray2.is_span(); + // const bool is_single1 = varray1.is_single(); + // const bool is_single2 = varray2.is_single(); + // if (is_span1 && is_span2) { + // const VArray_For_Span varray1_span{varray1.get_internal_span()}; + // const VArray_For_Span varray2_span{varray2.get_internal_span()}; + // func(varray1_span, varray2_span); + // return; + // } + // if (is_span1 && is_single2) { + // const VArray_For_Span varray1_span{varray1.get_internal_span()}; + // const VArray_For_Single varray2_single{varray2.get_internal_single(), varray2.size()}; + // func(varray1_span, varray2_single); + // return; + // } + // if (is_single1 && is_span2) { + // const VArray_For_Single varray1_single{varray1.get_internal_single(), varray1.size()}; + // const VArray_For_Span varray2_span{varray2.get_internal_span()}; + // func(varray1_single, varray2_span); + // return; + // } + // if (is_single1 && is_single2) { + // const VArray_For_Single varray1_single{varray1.get_internal_single(), varray1.size()}; + // const VArray_For_Single varray2_single{varray2.get_internal_single(), varray2.size()}; + // func(varray1_single, varray2_single); + // return; + // } + // } + // /* This fallback is used even when one of the inputs could be optimized. It's probably not + // worth + // * it to optimize just one of the inputs, because then the compiler still has to call into + // * unknown code, which inhibits many compiler optimizations. */ + // func(varray1, varray2); } } // namespace blender diff --git a/source/blender/functions/CMakeLists.txt b/source/blender/functions/CMakeLists.txt index 3c27e9d5e19..856668f01d7 100644 --- a/source/blender/functions/CMakeLists.txt +++ b/source/blender/functions/CMakeLists.txt @@ -34,6 +34,7 @@ set(SRC intern/generic_virtual_vector_array.cc intern/multi_function.cc intern/multi_function_builder.cc + intern/multi_function_parallel.cc intern/multi_function_procedure.cc intern/multi_function_procedure_builder.cc intern/multi_function_procedure_executor.cc @@ -54,6 +55,7 @@ set(SRC FN_multi_function_data_type.hh FN_multi_function_param_type.hh FN_multi_function_params.hh + FN_multi_function_parallel.hh FN_multi_function_procedure.hh FN_multi_function_procedure_builder.hh FN_multi_function_procedure_executor.hh @@ -64,6 +66,22 @@ set(LIB bf_blenlib ) +if(WITH_TBB) + add_definitions(-DWITH_TBB) + if(WIN32) + # TBB includes Windows.h which will define min/max macros + # that will collide with the stl versions. + add_definitions(-DNOMINMAX) + endif() + list(APPEND INC_SYS + ${TBB_INCLUDE_DIRS} + ) + + list(APPEND LIB + ${TBB_LIBRARIES} + ) +endif() + blender_add_lib(bf_functions "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") if(WITH_GTESTS) diff --git a/source/blender/functions/FN_multi_function_parallel.hh b/source/blender/functions/FN_multi_function_parallel.hh index b5b3e2f2f94..84c57efd434 100644 --- a/source/blender/functions/FN_multi_function_parallel.hh +++ b/source/blender/functions/FN_multi_function_parallel.hh @@ -20,5 +20,20 @@ * \ingroup fn */ +#include "FN_multi_function.hh" + namespace blender::fn { -} + +class ParallelMultiFunction : public MultiFunction { + private: + const MultiFunction &fn_; + const int64_t grain_size_; + bool threading_supported_; + + public: + ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size); + + void call(IndexMask mask, MFParams params, MFContext context) const override; +}; + +} // namespace blender::fn diff --git a/source/blender/functions/intern/field.cc b/source/blender/functions/intern/field.cc index a27c5e4e3dc..7b35593ad75 100644 --- a/source/blender/functions/intern/field.cc +++ b/source/blender/functions/intern/field.cc @@ -18,9 +18,11 @@ #include "BLI_multi_value_map.hh" #include "BLI_set.hh" #include "BLI_stack.hh" +#include "BLI_timeit.hh" #include "BLI_vector_set.hh" #include "FN_field.hh" +#include "FN_multi_function_parallel.hh" namespace blender::fn { @@ -271,6 +273,8 @@ Vector evaluate_fields(ResourceScope &scope, const FieldContext &context, Span dst_hints) { + SCOPED_TIMER(__func__); + Vector r_varrays(fields_to_evaluate.size(), nullptr); /* Destination hints are optional. Create a small utility method to access them. */ @@ -334,7 +338,10 @@ Vector evaluate_fields(ResourceScope &scope, build_multi_function_procedure_for_fields( procedure, scope, field_tree_info, varying_fields_to_evaluate); MFProcedureExecutor procedure_executor{"Procedure", procedure}; - MFParamsBuilder mf_params{procedure_executor, array_size}; + fn::ParallelMultiFunction parallel_fn{procedure_executor, 20000}; + const MultiFunction &fn_to_execute = procedure_executor; + + MFParamsBuilder mf_params{fn_to_execute, array_size}; MFContextBuilder mf_context; /* Provide inputs to the procedure executor. */ @@ -376,7 +383,7 @@ Vector evaluate_fields(ResourceScope &scope, mf_params.add_uninitialized_single_output(span); } - procedure_executor.call(mask, mf_params, mf_context); + fn_to_execute.call(mask, mf_params, mf_context); } /* Evaluate constant fields if necessary. */ diff --git a/source/blender/functions/intern/multi_function_parallel.cc b/source/blender/functions/intern/multi_function_parallel.cc new file mode 100644 index 00000000000..6843c4a233b --- /dev/null +++ b/source/blender/functions/intern/multi_function_parallel.cc @@ -0,0 +1,109 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "FN_multi_function_parallel.hh" + +#include "BLI_task.hh" + +#include + +namespace blender::fn { + +ParallelMultiFunction::ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size) + : fn_(fn), grain_size_(grain_size) +{ + this->set_signature(&fn.signature()); + + threading_supported_ = true; + for (const int param_index : fn.param_indices()) { + const MFParamType param_type = fn.param_type(param_index); + if (param_type.data_type().category() == MFDataType::Vector) { + threading_supported_ = false; + break; + } + } +} + +void ParallelMultiFunction::call(IndexMask mask, MFParams params, MFContext context) const +{ + if (mask.size() <= grain_size_ || !threading_supported_) { + fn_.call(mask, params, context); + return; + } + + threading::parallel_for(mask.index_range(), grain_size_, [&](const IndexRange range) { + const int size = range.size(); + IndexMask original_sub_mask{mask.indices().slice(range)}; + const int64_t offset = original_sub_mask.indices().first(); + const int64_t slice_size = original_sub_mask.indices().last() - offset + 1; + const IndexRange slice_range{offset, slice_size}; + IndexMask sub_mask; + Vector sub_mask_indices; + if (original_sub_mask.is_range()) { + sub_mask = IndexMask(size); + } + else { + sub_mask_indices.resize(size); + for (const int i : IndexRange(size)) { + sub_mask_indices[i] = original_sub_mask[i] - offset; + } + sub_mask = sub_mask_indices.as_span(); + } + + MFParamsBuilder sub_params{fn_, sub_mask.min_array_size()}; + ResourceScope scope; + // static std::mutex mutex; + // { + // std::lock_guard lock{mutex}; + // std::cout << range << " " << sub_mask.min_array_size() << "\n"; + // } + + for (const int param_index : fn_.param_indices()) { + const MFParamType param_type = fn_.param_type(param_index); + switch (param_type.category()) { + case MFParamType::SingleInput: { + const GVArray &varray = params.readonly_single_input(param_index); + const GVArray &sliced_varray = scope.construct( + "sliced varray", varray, slice_range); + sub_params.add_readonly_single_input(sliced_varray); + break; + } + case MFParamType::SingleMutable: { + const GMutableSpan span = params.single_mutable(param_index); + const GMutableSpan sliced_span = span.slice(slice_range.start(), slice_range.size()); + sub_params.add_single_mutable(sliced_span); + break; + } + case MFParamType::SingleOutput: { + const GMutableSpan span = params.uninitialized_single_output(param_index); + const GMutableSpan sliced_span = span.slice(slice_range.start(), slice_range.size()); + sub_params.add_uninitialized_single_output(sliced_span); + break; + } + case MFParamType::VectorInput: + case MFParamType::VectorMutable: + case MFParamType::VectorOutput: { + BLI_assert_unreachable(); + break; + } + } + } + + fn_.call(sub_mask, sub_params, context); + }); +} + +} // namespace blender::fn diff --git a/source/blender/functions/tests/FN_multi_function_test.cc b/source/blender/functions/tests/FN_multi_function_test.cc index 91c72a51dd6..9deeaf8d3bd 100644 --- a/source/blender/functions/tests/FN_multi_function_test.cc +++ b/source/blender/functions/tests/FN_multi_function_test.cc @@ -2,8 +2,11 @@ #include "testing/testing.h" +#include "BLI_timeit.hh" + #include "FN_multi_function.hh" #include "FN_multi_function_builder.hh" +#include "FN_multi_function_parallel.hh" #include "FN_multi_function_test_common.hh" namespace blender::fn::tests { @@ -328,5 +331,29 @@ TEST(multi_function, CustomMF_Convert) EXPECT_EQ(outputs[2], 9); } +TEST(multi_function, Parallel) +{ + CustomMF_SI_SI_SO add_fn{ + "add", [](float a, float b) { return std::tan(std::sin(a)) * std::tanh(std::cos(b)); }}; + ParallelMultiFunction parallel_fn{add_fn, int64_t(1e5)}; + const MultiFunction &fn_to_evaluate = parallel_fn; + + const int amount = 1e8; + Array inputs_a(amount, 1); + Array inputs_b(amount, 1); + Array outputs(amount, 1); + + for (int i = 0; i < 10; i++) { + SCOPED_TIMER(__func__); + MFParamsBuilder params(fn_to_evaluate, amount); + params.add_readonly_single_input(inputs_a.as_span()); + params.add_readonly_single_input(inputs_b.as_span()); + params.add_uninitialized_single_output(outputs.as_mutable_span()); + + MFContextBuilder context; + fn_to_evaluate.call(IndexRange(amount), params, context); + } +} + } // namespace } // namespace blender::fn::tests -- cgit v1.2.3