Various Exact Boolean parallelizations and optimizations.

From patch D11780 from Erik Abrahamsson. It parallelizes making the vertices, destruction of map entries, finding if the result is PWN, finding triangle adjacencies, and finding the ambient cell. The latter needs a parallel_reduce from tbb, so added one into BLI_task.hh so that if WITH_TBB is false, the code will still work. On Erik's 6-core machine, the elapsed time went from 17.5s to 11.8s (33% faster) on an intersection of two spheres with 3.1M faces. On Howard's 24-core machine, the elapsed time went from 18.7s to 10.8s for the same test.
author: Erik Abrahamsson <ecke101@gmail.com> 2021-07-06 01:09:36 +0300
committer: Howard Trickey <howard.trickey@gmail.com> 2021-07-06 01:09:36 +0300
commit: ceff86aafe46a6fb66e023500f5a47260964b0a2 (patch)
tree: a34e6772226719f0f22806f475cdb07bfbf458c7 /source/blender/blenlib/intern/mesh_intersect.cc
parent: cf17f7e0cc6efb6f14a271e37d2ea1b3f10bb66d (diff)
1 files changed, 96 insertions, 14 deletions
diff --git a/source/blender/blenlib/intern/mesh_intersect.cc b/source/blender/blenlib/intern/mesh_intersect.cc
index c8028231e81..f91dd762e70 100644
--- a/source/blender/blenlib/intern/mesh_intersect.cc
+++ b/source/blender/blenlib/intern/mesh_intersect.cc
@@ -43,6 +43,7 @@
 #  include "BLI_set.hh"
 #  include "BLI_span.hh"
 #  include "BLI_task.h"
+#  include "BLI_task.hh"
 #  include "BLI_threads.h"
 #  include "BLI_vector.hh"
 #  include "BLI_vector_set.hh"
@@ -51,6 +52,10 @@
 
 #  include "BLI_mesh_intersect.hh"
 
+#  ifdef WITH_TBB
+#    include "tbb/parallel_sort.h"
+#  endif
+
 // #  define PERFDEBUG
 
 namespace blender::meshintersect {
@@ -406,6 +411,11 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
     return add_or_find_vert(mco, co, orig);
   }
 
+  const Vert *add_or_find_vert(Vert *vert)
+  {
+    return add_or_find_vert_(vert);
+  }
+
   Face *add_face(Span<const Vert *> verts, int orig, Span<int> edge_origs, Span<bool> is_intersect)
   {
     Face *f = new Face(verts, next_face_id_++, orig, edge_origs, is_intersect);
@@ -486,10 +496,48 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
  private:
   const Vert *add_or_find_vert(const mpq3 &mco, const double3 &dco, int orig)
   {
-    /* Don't allocate Vert yet, in case it is already there. */
-    Vert vtry(mco, dco, NO_INDEX, NO_INDEX);
+    Vert *vtry = new Vert(mco, dco, NO_INDEX, NO_INDEX);
     const Vert *ans;
-    VSetKey vskey(&vtry);
+    VSetKey vskey(vtry);
+    if (intersect_use_threading) {
+#  ifdef USE_SPINLOCK
+      BLI_spin_lock(&lock_);
+#  else
+      BLI_mutex_lock(mutex_);
+#  endif
+    }
+    const VSetKey *lookup = vset_.lookup_key_ptr(vskey);
+    if (!lookup) {
+      vtry->id = next_vert_id_++;
+      vtry->orig = orig;
+      vskey.vert = vtry;  // new Vert(mco, dco, next_vert_id_++, orig);
+      vset_.add_new(vskey);
+      allocated_verts_.append(std::unique_ptr<Vert>(vskey.vert));
+      ans = vskey.vert;
+    }
+    else {
+      /* It was a duplicate, so return the existing one.
+       * Note that the returned Vert may have a different orig.
+       * This is the intended semantics: if the Vert already
+       * exists then we are merging verts and using the first-seen
+       * one as the canonical one. */
+      delete vtry;
+      ans = lookup->vert;
+    }
+    if (intersect_use_threading) {
+#  ifdef USE_SPINLOCK
+      BLI_spin_unlock(&lock_);
+#  else
+      BLI_mutex_unlock(mutex_);
+#  endif
+    }
+    return ans;
+  };
+
+  const Vert *add_or_find_vert_(Vert *vtry)
+  {
+    const Vert *ans;
+    VSetKey vskey(vtry);
     if (intersect_use_threading) {
 #  ifdef USE_SPINLOCK
       BLI_spin_lock(&lock_);
@@ -499,7 +547,8 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
     }
     const VSetKey *lookup = vset_.lookup_key_ptr(vskey);
     if (!lookup) {
-      vskey.vert = new Vert(mco, dco, next_vert_id_++, orig);
+      vtry->id = next_vert_id_++;
+      vskey.vert = vtry;  // new Vert(mco, dco, next_vert_id_++, orig);
       vset_.add_new(vskey);
       allocated_verts_.append(std::unique_ptr<Vert>(vskey.vert));
       ans = vskey.vert;
@@ -510,6 +559,7 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
        * This is the intended semantics: if the Vert already
        * exists then we are merging verts and using the first-seen
        * one as the canonical one. */
+      delete vtry;
       ans = lookup->vert;
     }
     if (intersect_use_threading) {
@@ -550,6 +600,11 @@ const Vert *IMeshArena::add_or_find_vert(const mpq3 &co, int orig)
   return pimpl_->add_or_find_vert(co, orig);
 }
 
+const Vert *IMeshArena::add_or_find_vert(Vert *vert)
+{
+  return pimpl_->add_or_find_vert(vert);
+}
+
 Face *IMeshArena::add_face(Span<const Vert *> verts,
                            int orig,
                            Span<int> edge_origs,
@@ -633,7 +688,11 @@ void IMesh::populate_vert(int max_verts)
    * TODO: when all debugged, set fix_order = false. */
   const bool fix_order = true;
   if (fix_order) {
+#  ifdef WITH_TBB
+    tbb::parallel_sort(vert_.begin(), vert_.end(), [](const Vert *a, const Vert *b) {
+#  else
     std::sort(vert_.begin(), vert_.end(), [](const Vert *a, const Vert *b) {
+#  endif
       if (a->orig != NO_INDEX && b->orig != NO_INDEX) {
         return a->orig < b->orig;
       }
@@ -2187,6 +2246,14 @@ IMesh triangulate_polymesh(IMesh &imesh, IMeshArena *arena)
   Vector<Face *> face_tris;
   constexpr int estimated_tris_per_face = 3;
   face_tris.reserve(estimated_tris_per_face * imesh.face_size());
+  threading::parallel_for(imesh.face_index_range(), 2048, [&](IndexRange range) {
+    for (int i : range) {
+      Face *f = imesh.face(i);
+      if (!f->plane_populated() && f->size() >= 4) {
+        f->populate_plane(false);
+      }
+    }
+  });
   for (Face *f : imesh.faces()) {
     /* Tessellate face f, following plan similar to #BM_face_calc_tessellation. */
     int flen = f->size();
@@ -2278,12 +2345,22 @@ class TriOverlaps {
     if (two_trees_no_self) {
       tree_b_ = BLI_bvhtree_new(tm.face_size(), FLT_EPSILON, 8, 6);
     }
+
+    /* Create a Vector containing face shape. */
+    Vector<int> shapes;
+    shapes.resize(tm.face_size());
+    threading::parallel_for(tm.face_index_range(), 2048, [&](IndexRange range) {
+      for (int t : range) {
+        shapes[t] = shape_fn(tm.face(t)->orig);
+      }
+    });
+
     float bbpts[6];
     for (int t : tm.face_index_range()) {
       const BoundingBox &bb = tri_bb[t];
       copy_v3_v3(bbpts, bb.min);
       copy_v3_v3(bbpts + 3, bb.max);
-      int shape = shape_fn(tm.face(t)->orig);
+      int shape = shapes[t];
       if (two_trees_no_self) {
         if (shape == 0) {
           BLI_bvhtree_insert(tree_, t, bbpts, 2);
@@ -2575,11 +2652,13 @@ static void calc_subdivided_non_cluster_tris(Array<IMesh> &r_tri_subdivided,
       0, overlap_tri_range_tot, &data, calc_subdivided_tri_range_func, &settings);
   /* Now have to put in the triangles that are the same as the input ones, and not in clusters.
    */
-  for (int t : tm.face_index_range()) {
-    if (r_tri_subdivided[t].face_size() == 0 && clinfo.tri_cluster(t) == NO_INDEX) {
-      r_tri_subdivided[t] = IMesh({tm.face(t)});
+  threading::parallel_for(tm.face_index_range(), 2048, [&](IndexRange range) {
+    for (int t : range) {
+      if (r_tri_subdivided[t].face_size() == 0 && clinfo.tri_cluster(t) == NO_INDEX) {
+        r_tri_subdivided[t] = IMesh({tm.face(t)});
+      }
     }
-  }
+  });
 }
 
 /**
@@ -2936,11 +3015,15 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
   double overlap_time = PIL_check_seconds_timer();
   std::cout << "intersect overlaps calculated, time = " << overlap_time - bb_calc_time << "\n";
 #  endif
-  for (int t : tm_clean->face_index_range()) {
-    if (tri_ov.first_overlap_index(t) != -1) {
-      tm_clean->face(t)->populate_plane(true);
+  Array<IMesh> tri_subdivided(tm_clean->face_size(), NoInitialization());
+  threading::parallel_for(tm_clean->face_index_range(), 1024, [&](IndexRange range) {
+    for (int t : range) {
+      if (tri_ov.first_overlap_index(t) != -1) {
+        tm_clean->face(t)->populate_plane(true);
+      }
+      new (static_cast<void *>(&tri_subdivided[t])) IMesh;
     }
-  }
+  });
 #  ifdef PERFDEBUG
   double plane_populate = PIL_check_seconds_timer();
   std::cout << "planes populated, time = " << plane_populate - overlap_time << "\n";
@@ -2965,7 +3048,6 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
   doperfmax(1, clinfo.tot_cluster());
   doperfmax(2, tri_ov.overlap().size());
 #  endif
-  Array<IMesh> tri_subdivided(tm_clean->face_size());
   calc_subdivided_non_cluster_tris(tri_subdivided, *tm_clean, itt_map, clinfo, tri_ov, arena);
 #  ifdef PERFDEBUG
   double subdivided_tris_time = PIL_check_seconds_timer();
author	Erik Abrahamsson <ecke101@gmail.com>	2021-07-06 01:09:36 +0300
committer	Howard Trickey <howard.trickey@gmail.com>	2021-07-06 01:09:36 +0300
commit	ceff86aafe46a6fb66e023500f5a47260964b0a2 (patch)
tree	a34e6772226719f0f22806f475cdb07bfbf458c7 /source/blender/blenlib/intern/mesh_intersect.cc
parent	cf17f7e0cc6efb6f14a271e37d2ea1b3f10bb66d (diff)