1 files changed, 68 insertions, 0 deletions
diff --git a/source/blender/blenlib/intern/index_mask.cc b/source/blender/blenlib/intern/index_mask.cc
index 8c03df2d4c3..1e301bc5fb9 100644
--- a/source/blender/blenlib/intern/index_mask.cc
+++ b/source/blender/blenlib/intern/index_mask.cc
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
 #include "BLI_index_mask.hh"
+#include "BLI_index_mask_ops.hh"
 
 namespace blender {
 
@@ -126,3 +127,70 @@ Vector<IndexRange> IndexMask::extract_ranges_invert(const IndexRange full_range,
 }
 
 }  // namespace blender
+
+namespace blender::index_mask_ops::detail {
+
+IndexMask find_indices_based_on_predicate__merge(
+    IndexMask indices_to_check,
+    threading::EnumerableThreadSpecific<Vector<Vector<int64_t>>> &sub_masks,
+    Vector<int64_t> &r_indices)
+{
+  /* Gather vectors that have been generated by possibly multiple threads. */
+  Vector<Vector<int64_t> *> all_vectors;
+  int64_t result_mask_size = 0;
+  for (Vector<Vector<int64_t>> &local_sub_masks : sub_masks) {
+    for (Vector<int64_t> &sub_mask : local_sub_masks) {
+      all_vectors.append(&sub_mask);
+      result_mask_size += sub_mask.size();
+    }
+  }
+
+  if (all_vectors.is_empty()) {
+    /* Special case when the predicate was false for all elements. */
+    return {};
+  }
+  if (result_mask_size == indices_to_check.size()) {
+    /* Special case when the predicate was true for all elements. */
+    return indices_to_check;
+  }
+  if (all_vectors.size() == 1) {
+    /* Special case when all indices for which the predicate is true happen to be in a single
+     * vector. */
+    r_indices = std::move(*all_vectors[0]);
+    return r_indices.as_span();
+  }
+
+  /* Indices in separate vectors don't overlap. So it is ok to sort the vectors just by looking at
+   * the first element. */
+  std::sort(all_vectors.begin(),
+            all_vectors.end(),
+            [](const Vector<int64_t> *a, const Vector<int64_t> *b) { return (*a)[0] < (*b)[0]; });
+
+  /* Precompute the offsets for the individual vectors, so that the indices can be copied into the
+   * final vector in parallel. */
+  Vector<int64_t> offsets;
+  offsets.reserve(all_vectors.size() + 1);
+  offsets.append(0);
+  for (Vector<int64_t> *vector : all_vectors) {
+    offsets.append(offsets.last() + vector->size());
+  }
+
+  r_indices.resize(result_mask_size);
+
+  /* Fill the final index mask in parallel again. */
+  threading::parallel_for(all_vectors.index_range(), 100, [&](const IndexRange all_vectors_range) {
+    for (const int64_t vector_index : all_vectors_range) {
+      Vector<int64_t> &vector = *all_vectors[vector_index];
+      const int64_t offset = offsets[vector_index];
+      threading::parallel_for(vector.index_range(), 1024, [&](const IndexRange range) {
+        initialized_copy_n(vector.data() + range.start(),
+                           range.size(),
+                           r_indices.data() + offset + range.start());
+      });
+    }
+  });
+
+  return r_indices.as_span();
+}
+
+}  // namespace blender::index_mask_ops::detail