progress

author: Jacques Lucke <jacques@blender.org> 2021-09-09 12:19:09 +0300
committer: Jacques Lucke <jacques@blender.org> 2021-09-09 12:19:09 +0300
commit: 2920a569b527c3543dd393a96bca2362ee04feef (patch)
tree: 329255ec6ea0335db49ed7c1ae03f92181f79ff0
parent: 068f0122213b1574ccad7ec1335969ad65bbf0d2 (diff)
6 files changed, 223 insertions, 38 deletions
diff --git a/source/blender/blenlib/BLI_virtual_array.hh b/source/blender/blenlib/BLI_virtual_array.hh
index 1c02bce8411..e99036d06a9 100644
--- a/source/blender/blenlib/BLI_virtual_array.hh
+++ b/source/blender/blenlib/BLI_virtual_array.hh
@@ -622,41 +622,50 @@ inline void devirtualize_varray2(const VArray<T1> &varray1,
                                  const Func &func,
                                  bool enable = true)
 {
-  /* Support disabling the devirtualization to simplify benchmarking. */
-  if (enable) {
-    const bool is_span1 = varray1.is_span();
-    const bool is_span2 = varray2.is_span();
-    const bool is_single1 = varray1.is_single();
-    const bool is_single2 = varray2.is_single();
-    if (is_span1 && is_span2) {
-      const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
-      const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
-      func(varray1_span, varray2_span);
-      return;
-    }
-    if (is_span1 && is_single2) {
-      const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
-      const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
-      func(varray1_span, varray2_single);
-      return;
-    }
-    if (is_single1 && is_span2) {
-      const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
-      const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
-      func(varray1_single, varray2_span);
-      return;
-    }
-    if (is_single1 && is_single2) {
-      const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
-      const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
-      func(varray1_single, varray2_single);
-      return;
-    }
-  }
-  /* This fallback is used even when one of the inputs could be optimized. It's probably not worth
-   * it to optimize just one of the inputs, because then the compiler still has to call into
-   * unknown code, which inhibits many compiler optimizations. */
-  func(varray1, varray2);
+  devirtualize_varray(
+      varray1,
+      [&](const auto &varray1) {
+        devirtualize_varray(
+            varray2, [&](const auto &varray2) { func(varray1, varray2); }, enable);
+      },
+      enable);
+
+  // /* Support disabling the devirtualization to simplify benchmarking. */
+  // if (enable) {
+  //   const bool is_span1 = varray1.is_span();
+  //   const bool is_span2 = varray2.is_span();
+  //   const bool is_single1 = varray1.is_single();
+  //   const bool is_single2 = varray2.is_single();
+  //   if (is_span1 && is_span2) {
+  //     const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
+  //     const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
+  //     func(varray1_span, varray2_span);
+  //     return;
+  //   }
+  //   if (is_span1 && is_single2) {
+  //     const VArray_For_Span<T1> varray1_span{varray1.get_internal_span()};
+  //     const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
+  //     func(varray1_span, varray2_single);
+  //     return;
+  //   }
+  //   if (is_single1 && is_span2) {
+  //     const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
+  //     const VArray_For_Span<T2> varray2_span{varray2.get_internal_span()};
+  //     func(varray1_single, varray2_span);
+  //     return;
+  //   }
+  //   if (is_single1 && is_single2) {
+  //     const VArray_For_Single<T1> varray1_single{varray1.get_internal_single(), varray1.size()};
+  //     const VArray_For_Single<T2> varray2_single{varray2.get_internal_single(), varray2.size()};
+  //     func(varray1_single, varray2_single);
+  //     return;
+  //   }
+  // }
+  // /* This fallback is used even when one of the inputs could be optimized. It's probably not
+  // worth
+  //  * it to optimize just one of the inputs, because then the compiler still has to call into
+  //  * unknown code, which inhibits many compiler optimizations. */
+  // func(varray1, varray2);
 }
 
 }  // namespace blender
diff --git a/source/blender/functions/CMakeLists.txt b/source/blender/functions/CMakeLists.txt
index 3c27e9d5e19..856668f01d7 100644
--- a/source/blender/functions/CMakeLists.txt
+++ b/source/blender/functions/CMakeLists.txt
@@ -34,6 +34,7 @@ set(SRC
   intern/generic_virtual_vector_array.cc
   intern/multi_function.cc
   intern/multi_function_builder.cc
+  intern/multi_function_parallel.cc
   intern/multi_function_procedure.cc
   intern/multi_function_procedure_builder.cc
   intern/multi_function_procedure_executor.cc
@@ -54,6 +55,7 @@ set(SRC
   FN_multi_function_data_type.hh
   FN_multi_function_param_type.hh
   FN_multi_function_params.hh
+  FN_multi_function_parallel.hh
   FN_multi_function_procedure.hh
   FN_multi_function_procedure_builder.hh
   FN_multi_function_procedure_executor.hh
@@ -64,6 +66,22 @@ set(LIB
   bf_blenlib
 )
 
+if(WITH_TBB)
+  add_definitions(-DWITH_TBB)
+  if(WIN32)
+    # TBB includes Windows.h which will define min/max macros
+    # that will collide with the stl versions.
+    add_definitions(-DNOMINMAX)
+  endif()
+  list(APPEND INC_SYS
+    ${TBB_INCLUDE_DIRS}
+  )
+
+  list(APPEND LIB
+    ${TBB_LIBRARIES}
+  )
+endif()
+
 blender_add_lib(bf_functions "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
 
 if(WITH_GTESTS)
diff --git a/source/blender/functions/FN_multi_function_parallel.hh b/source/blender/functions/FN_multi_function_parallel.hh
index b5b3e2f2f94..84c57efd434 100644
--- a/source/blender/functions/FN_multi_function_parallel.hh
+++ b/source/blender/functions/FN_multi_function_parallel.hh
@@ -20,5 +20,20 @@
  * \ingroup fn
  */
 
+#include "FN_multi_function.hh"
+
 namespace blender::fn {
-}
+
+class ParallelMultiFunction : public MultiFunction {
+ private:
+  const MultiFunction &fn_;
+  const int64_t grain_size_;
+  bool threading_supported_;
+
+ public:
+  ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size);
+
+  void call(IndexMask mask, MFParams params, MFContext context) const override;
+};
+
+}  // namespace blender::fn
diff --git a/source/blender/functions/intern/field.cc b/source/blender/functions/intern/field.cc
index a27c5e4e3dc..7b35593ad75 100644
--- a/source/blender/functions/intern/field.cc
+++ b/source/blender/functions/intern/field.cc
@@ -18,9 +18,11 @@
 #include "BLI_multi_value_map.hh"
 #include "BLI_set.hh"
 #include "BLI_stack.hh"
+#include "BLI_timeit.hh"
 #include "BLI_vector_set.hh"
 
 #include "FN_field.hh"
+#include "FN_multi_function_parallel.hh"
 
 namespace blender::fn {
 
@@ -271,6 +273,8 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
                                         const FieldContext &context,
                                         Span<GVMutableArray *> dst_hints)
 {
+  SCOPED_TIMER(__func__);
+
   Vector<const GVArray *> r_varrays(fields_to_evaluate.size(), nullptr);
 
   /* Destination hints are optional. Create a small utility method to access them. */
@@ -334,7 +338,10 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
     build_multi_function_procedure_for_fields(
         procedure, scope, field_tree_info, varying_fields_to_evaluate);
     MFProcedureExecutor procedure_executor{"Procedure", procedure};
-    MFParamsBuilder mf_params{procedure_executor, array_size};
+    fn::ParallelMultiFunction parallel_fn{procedure_executor, 20000};
+    const MultiFunction &fn_to_execute = procedure_executor;
+
+    MFParamsBuilder mf_params{fn_to_execute, array_size};
     MFContextBuilder mf_context;
 
     /* Provide inputs to the procedure executor. */
@@ -376,7 +383,7 @@ Vector<const GVArray *> evaluate_fields(ResourceScope &scope,
       mf_params.add_uninitialized_single_output(span);
     }
 
-    procedure_executor.call(mask, mf_params, mf_context);
+    fn_to_execute.call(mask, mf_params, mf_context);
   }
 
   /* Evaluate constant fields if necessary. */
diff --git a/source/blender/functions/intern/multi_function_parallel.cc b/source/blender/functions/intern/multi_function_parallel.cc
new file mode 100644
index 00000000000..6843c4a233b
--- /dev/null
+++ b/source/blender/functions/intern/multi_function_parallel.cc
@@ -0,0 +1,109 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "FN_multi_function_parallel.hh"
+
+#include "BLI_task.hh"
+
+#include <mutex>
+
+namespace blender::fn {
+
+ParallelMultiFunction::ParallelMultiFunction(const MultiFunction &fn, const int64_t grain_size)
+    : fn_(fn), grain_size_(grain_size)
+{
+  this->set_signature(&fn.signature());
+
+  threading_supported_ = true;
+  for (const int param_index : fn.param_indices()) {
+    const MFParamType param_type = fn.param_type(param_index);
+    if (param_type.data_type().category() == MFDataType::Vector) {
+      threading_supported_ = false;
+      break;
+    }
+  }
+}
+
+void ParallelMultiFunction::call(IndexMask mask, MFParams params, MFContext context) const
+{
+  if (mask.size() <= grain_size_ || !threading_supported_) {
+    fn_.call(mask, params, context);
+    return;
+  }
+
+  threading::parallel_for(mask.index_range(), grain_size_, [&](const IndexRange range) {
+    const int size = range.size();
+    IndexMask original_sub_mask{mask.indices().slice(range)};
+    const int64_t offset = original_sub_mask.indices().first();
+    const int64_t slice_size = original_sub_mask.indices().last() - offset + 1;
+    const IndexRange slice_range{offset, slice_size};
+    IndexMask sub_mask;
+    Vector<int64_t> sub_mask_indices;
+    if (original_sub_mask.is_range()) {
+      sub_mask = IndexMask(size);
+    }
+    else {
+      sub_mask_indices.resize(size);
+      for (const int i : IndexRange(size)) {
+        sub_mask_indices[i] = original_sub_mask[i] - offset;
+      }
+      sub_mask = sub_mask_indices.as_span();
+    }
+
+    MFParamsBuilder sub_params{fn_, sub_mask.min_array_size()};
+    ResourceScope scope;
+    // static std::mutex mutex;
+    // {
+    //   std::lock_guard lock{mutex};
+    //   std::cout << range << " " << sub_mask.min_array_size() << "\n";
+    // }
+
+    for (const int param_index : fn_.param_indices()) {
+      const MFParamType param_type = fn_.param_type(param_index);
+      switch (param_type.category()) {
+        case MFParamType::SingleInput: {
+          const GVArray &varray = params.readonly_single_input(param_index);
+          const GVArray &sliced_varray = scope.construct<GVArray_Slice>(
+              "sliced varray", varray, slice_range);
+          sub_params.add_readonly_single_input(sliced_varray);
+          break;
+        }
+        case MFParamType::SingleMutable: {
+          const GMutableSpan span = params.single_mutable(param_index);
+          const GMutableSpan sliced_span = span.slice(slice_range.start(), slice_range.size());
+          sub_params.add_single_mutable(sliced_span);
+          break;
+        }
+        case MFParamType::SingleOutput: {
+          const GMutableSpan span = params.uninitialized_single_output(param_index);
+          const GMutableSpan sliced_span = span.slice(slice_range.start(), slice_range.size());
+          sub_params.add_uninitialized_single_output(sliced_span);
+          break;
+        }
+        case MFParamType::VectorInput:
+        case MFParamType::VectorMutable:
+        case MFParamType::VectorOutput: {
+          BLI_assert_unreachable();
+          break;
+        }
+      }
+    }
+
+    fn_.call(sub_mask, sub_params, context);
+  });
+}
+
+}  // namespace blender::fn
diff --git a/source/blender/functions/tests/FN_multi_function_test.cc b/source/blender/functions/tests/FN_multi_function_test.cc
index 91c72a51dd6..9deeaf8d3bd 100644
--- a/source/blender/functions/tests/FN_multi_function_test.cc
+++ b/source/blender/functions/tests/FN_multi_function_test.cc
@@ -2,8 +2,11 @@
 
 #include "testing/testing.h"
 
+#include "BLI_timeit.hh"
+
 #include "FN_multi_function.hh"
 #include "FN_multi_function_builder.hh"
+#include "FN_multi_function_parallel.hh"
 #include "FN_multi_function_test_common.hh"
 
 namespace blender::fn::tests {
@@ -328,5 +331,29 @@ TEST(multi_function, CustomMF_Convert)
   EXPECT_EQ(outputs[2], 9);
 }
 
+TEST(multi_function, Parallel)
+{
+  CustomMF_SI_SI_SO<float, float, float> add_fn{
+      "add", [](float a, float b) { return std::tan(std::sin(a)) * std::tanh(std::cos(b)); }};
+  ParallelMultiFunction parallel_fn{add_fn, int64_t(1e5)};
+  const MultiFunction &fn_to_evaluate = parallel_fn;
+
+  const int amount = 1e8;
+  Array<float> inputs_a(amount, 1);
+  Array<float> inputs_b(amount, 1);
+  Array<float> outputs(amount, 1);
+
+  for (int i = 0; i < 10; i++) {
+    SCOPED_TIMER(__func__);
+    MFParamsBuilder params(fn_to_evaluate, amount);
+    params.add_readonly_single_input(inputs_a.as_span());
+    params.add_readonly_single_input(inputs_b.as_span());
+    params.add_uninitialized_single_output(outputs.as_mutable_span());
+
+    MFContextBuilder context;
+    fn_to_evaluate.call(IndexRange(amount), params, context);
+  }
+}
+
 }  // namespace
 }  // namespace blender::fn::tests
author	Jacques Lucke <jacques@blender.org>	2021-09-09 12:19:09 +0300
committer	Jacques Lucke <jacques@blender.org>	2021-09-09 12:19:09 +0300
commit	2920a569b527c3543dd393a96bca2362ee04feef (patch)
tree	329255ec6ea0335db49ed7c1ae03f92181f79ff0
parent	068f0122213b1574ccad7ec1335969ad65bbf0d2 (diff)