5 files changed, 144 insertions, 0 deletions
diff --git a/source/blender/blenlib/BLI_index_mask_ops.hh b/source/blender/blenlib/BLI_index_mask_ops.hh
new file mode 100644
index 00000000000..48a1f27a2fa
--- /dev/null
+++ b/source/blender/blenlib/BLI_index_mask_ops.hh
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ *
+ * This is separate from `BLI_index_mask.hh` because it includes headers just `IndexMask` shouldn't
+ * depend on.
+ */
+
+#include "BLI_enumerable_thread_specific.hh"
+#include "BLI_index_mask.hh"
+#include "BLI_task.hh"
+#include "BLI_vector.hh"
+
+namespace blender::index_mask_ops {
+
+namespace detail {
+IndexMask find_indices_based_on_predicate__merge(
+    IndexMask indices_to_check,
+    threading::EnumerableThreadSpecific<Vector<Vector<int64_t>>> &sub_masks,
+    Vector<int64_t> &r_indices);
+}  // namespace detail
+
+/**
+ * Evaluate the #predicate for all indices in #indices_to_check and return a mask that contains all
+ * indices where the predicate was true.
+ *
+ * #r_indices indices is only used if necessary.
+ */
+template<typename Predicate>
+inline IndexMask find_indices_based_on_predicate(const IndexMask indices_to_check,
+                                                 const int64_t parallel_grain_size,
+                                                 Vector<int64_t> &r_indices,
+                                                 const Predicate &predicate)
+{
+  /* Evaluate predicate in parallel. Since the size of the final mask is not known yet, many
+   * smaller vectors have to be filled with all indices where the predicate is true. Those smaller
+   * vectors are joined afterwards. */
+  threading::EnumerableThreadSpecific<Vector<Vector<int64_t>>> sub_masks;
+  threading::parallel_for(
+      indices_to_check.index_range(), parallel_grain_size, [&](const IndexRange range) {
+        const IndexMask sub_mask = indices_to_check.slice(range);
+        Vector<int64_t> masked_indices;
+        for (const int64_t i : sub_mask) {
+          if (predicate(i)) {
+            masked_indices.append(i);
+          }
+        }
+        if (!masked_indices.is_empty()) {
+          sub_masks.local().append(std::move(masked_indices));
+        }
+      });
+
+  /* This part doesn't have to be in the header. */
+  return detail::find_indices_based_on_predicate__merge(indices_to_check, sub_masks, r_indices);
+}
+
+}  // namespace blender::index_mask_ops
diff --git a/source/blender/blenlib/BLI_math_geom.h b/source/blender/blenlib/BLI_math_geom.h
index 3d2ac5688ff..4bba84f2e29 100644
--- a/source/blender/blenlib/BLI_math_geom.h
+++ b/source/blender/blenlib/BLI_math_geom.h
@@ -303,6 +303,9 @@ float dist_squared_to_projected_aabb_simple(const float projmat[4][4],
                                             const float bbmin[3],
                                             const float bbmax[3]);
 
+/** Returns the distance between two 2D line segments. */
+float dist_seg_seg_v2(const float a1[3], const float a2[3], const float b1[3], const float b2[3]);
+
 float closest_to_ray_v3(float r_close[3],
                         const float p[3],
                         const float ray_orig[3],
diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt
index ca22315b2ed..6e3e84f6495 100644
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -203,6 +203,7 @@ set(SRC
   BLI_heap.h
   BLI_heap_simple.h
   BLI_index_mask.hh
+  BLI_index_mask_ops.hh
   BLI_index_range.hh
   BLI_inplace_priority_queue.hh
   BLI_iterator.h
diff --git a/source/blender/blenlib/intern/index_mask.cc b/source/blender/blenlib/intern/index_mask.cc
index 8c03df2d4c3..1e301bc5fb9 100644
--- a/source/blender/blenlib/intern/index_mask.cc
+++ b/source/blender/blenlib/intern/index_mask.cc
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
 #include "BLI_index_mask.hh"
+#include "BLI_index_mask_ops.hh"
 
 namespace blender {
 
@@ -126,3 +127,70 @@ Vector<IndexRange> IndexMask::extract_ranges_invert(const IndexRange full_range,
 }
 
 }  // namespace blender
+
+namespace blender::index_mask_ops::detail {
+
+IndexMask find_indices_based_on_predicate__merge(
+    IndexMask indices_to_check,
+    threading::EnumerableThreadSpecific<Vector<Vector<int64_t>>> &sub_masks,
+    Vector<int64_t> &r_indices)
+{
+  /* Gather vectors that have been generated by possibly multiple threads. */
+  Vector<Vector<int64_t> *> all_vectors;
+  int64_t result_mask_size = 0;
+  for (Vector<Vector<int64_t>> &local_sub_masks : sub_masks) {
+    for (Vector<int64_t> &sub_mask : local_sub_masks) {
+      all_vectors.append(&sub_mask);
+      result_mask_size += sub_mask.size();
+    }
+  }
+
+  if (all_vectors.is_empty()) {
+    /* Special case when the predicate was false for all elements. */
+    return {};
+  }
+  if (result_mask_size == indices_to_check.size()) {
+    /* Special case when the predicate was true for all elements. */
+    return indices_to_check;
+  }
+  if (all_vectors.size() == 1) {
+    /* Special case when all indices for which the predicate is true happen to be in a single
+     * vector. */
+    r_indices = std::move(*all_vectors[0]);
+    return r_indices.as_span();
+  }
+
+  /* Indices in separate vectors don't overlap. So it is ok to sort the vectors just by looking at
+   * the first element. */
+  std::sort(all_vectors.begin(),
+            all_vectors.end(),
+            [](const Vector<int64_t> *a, const Vector<int64_t> *b) { return (*a)[0] < (*b)[0]; });
+
+  /* Precompute the offsets for the individual vectors, so that the indices can be copied into the
+   * final vector in parallel. */
+  Vector<int64_t> offsets;
+  offsets.reserve(all_vectors.size() + 1);
+  offsets.append(0);
+  for (Vector<int64_t> *vector : all_vectors) {
+    offsets.append(offsets.last() + vector->size());
+  }
+
+  r_indices.resize(result_mask_size);
+
+  /* Fill the final index mask in parallel again. */
+  threading::parallel_for(all_vectors.index_range(), 100, [&](const IndexRange all_vectors_range) {
+    for (const int64_t vector_index : all_vectors_range) {
+      Vector<int64_t> &vector = *all_vectors[vector_index];
+      const int64_t offset = offsets[vector_index];
+      threading::parallel_for(vector.index_range(), 1024, [&](const IndexRange range) {
+        initialized_copy_n(vector.data() + range.start(),
+                           range.size(),
+                           r_indices.data() + offset + range.start());
+      });
+    }
+  });
+
+  return r_indices.as_span();
+}
+
+}  // namespace blender::index_mask_ops::detail
diff --git a/source/blender/blenlib/intern/math_geom.c b/source/blender/blenlib/intern/math_geom.c
index f96c80185b1..bc3ed099fd5 100644
--- a/source/blender/blenlib/intern/math_geom.c
+++ b/source/blender/blenlib/intern/math_geom.c
@@ -903,6 +903,18 @@ float dist_squared_to_projected_aabb_simple(const float projmat[4][4],
 
 /** \} */
 
+float dist_seg_seg_v2(const float a1[3], const float a2[3], const float b1[3], const float b2[3])
+{
+  if (isect_seg_seg_v2_simple(a1, a2, b1, b2)) {
+    return 0.0f;
+  }
+  const float d1 = dist_squared_to_line_segment_v2(a1, b1, b2);
+  const float d2 = dist_squared_to_line_segment_v2(a2, b1, b2);
+  const float d3 = dist_squared_to_line_segment_v2(b1, a1, a2);
+  const float d4 = dist_squared_to_line_segment_v2(b2, a1, a2);
+  return sqrtf(min_ffff(d1, d2, d3, d4));
+}
+
 void closest_on_tri_to_point_v3(
     float r[3], const float p[3], const float v1[3], const float v2[3], const float v3[3])
 {