Better performance when there are clusters of coplanar triangles.

author: Howard Trickey <howard.trickey@gmail.com> 2020-08-19 04:51:35 +0300
committer: Howard Trickey <howard.trickey@gmail.com> 2020-08-19 04:51:35 +0300
commit: 108e6d4ef2bc62752f8071179092c0cc80b3f867 (patch)
tree: 39c946e99369efd0bcf1c2320a2884be4bffbbe7 /source/blender/blenlib
parent: 5cd49e46f4117a96e68ae736eec8c4871eafff6e (diff)
2 files changed, 198 insertions, 54 deletions
diff --git a/source/blender/blenlib/intern/mesh_intersect.cc b/source/blender/blenlib/intern/mesh_intersect.cc
index db70f74ecb1..ddd305b31f2 100644
--- a/source/blender/blenlib/intern/mesh_intersect.cc
+++ b/source/blender/blenlib/intern/mesh_intersect.cc
@@ -53,7 +53,7 @@ static void dump_perfdata(void);
 #  endif
 
 /* For debugging, can disable threading in intersect code with this static constant. */
-static constexpr bool intersect_use_threading = false; /*DEBUG!!*/
+static constexpr bool intersect_use_threading = true;
 
 Vert::Vert(const mpq3 &mco, const double3 &dco, int id, int orig)
     : co_exact(mco), co(dco), id(id), orig(orig)
@@ -2459,6 +2459,79 @@ class TriOverlaps {
   }
 };
 
+/* Data needed for parallelization of calc_overlap_itts. */
+struct OverlapIttsData {
+  Vector<std::pair<int, int>> intersect_pairs;
+  Map<std::pair<int, int>, ITT_value> &itt_map;
+  const Mesh &tm;
+  MArena *arena;
+
+  OverlapIttsData(Map<std::pair<int, int>, ITT_value> &itt_map, const Mesh &tm, MArena *arena)
+      : itt_map(itt_map), tm(tm), arena(arena)
+  {
+  }
+};
+
+static void calc_overlap_itts_range_func(void *__restrict userdata,
+                                         const int iter,
+                                         const TaskParallelTLS *__restrict UNUSED(tls))
+{
+  constexpr int dbg_level = 0;
+  OverlapIttsData *data = static_cast<OverlapIttsData *>(userdata);
+  std::pair<int, int> tri_pair = data->intersect_pairs[iter];
+  int a = tri_pair.first;
+  int b = tri_pair.second;
+  if (dbg_level > 0) {
+    std::cout << "calc_overlap_itts_range_func a=" << a << ", b=" << b << "\n";
+  }
+  ITT_value itt = intersect_tri_tri(data->tm, a, b);
+  if (dbg_level > 0) {
+    std::cout << "result of intersecting " << a << " and " << b << " = " << itt << "\n";
+  }
+  BLI_assert(data->itt_map.contains(tri_pair));
+  data->itt_map.add_overwrite(tri_pair, itt);
+}
+
+/* Fill in itt_map with the vector of ITT_values that result from intersecting the triangles in ov.
+ * Use a canonical order for triangles: (a,b) where  a < b.
+ * Don't bother doing this if both a and b are part of the same coplanar cluster, as the
+ * intersection for those will be handled by CDT, later.
+ */
+static void calc_overlap_itts(Map<std::pair<int, int>, ITT_value> &itt_map,
+                              const Mesh &tm,
+                              const CoplanarClusterInfo &clinfo,
+                              const TriOverlaps &ov,
+                              MArena *arena)
+{
+  OverlapIttsData data(itt_map, tm, arena);
+  /* Put dummy values in itt_map intially, so map entries will exist when doing the range function.
+   * This means we won't have to protect the itt_map.add_overwrite function with a lock.
+   */
+  for (const BVHTreeOverlap &olap : ov.overlap()) {
+    int a = olap.indexA;
+    int b = olap.indexB;
+    if (a >= b) {
+      std::swap(a, b);
+    }
+    std::pair<int, int> key(a, b);
+    if (!itt_map.contains(key)) {
+      int ca = clinfo.tri_cluster(a);
+      int cb = clinfo.tri_cluster(b);
+      if (ca == NO_INDEX || ca != cb) {
+        itt_map.add_new(key, ITT_value());
+        data.intersect_pairs.append(key);
+      }
+    }
+  }
+  int tot_intersect_pairs = data.intersect_pairs.size();
+  TaskParallelSettings settings;
+  BLI_parallel_range_settings_defaults(&settings);
+  constexpr int intersect_threading_threshold = 100;
+  settings.use_threading = (intersect_use_threading &&
+                            tot_intersect_pairs > intersect_threading_threshold);
+  BLI_task_parallel_range(0, tot_intersect_pairs, &data, calc_overlap_itts_range_func, &settings);
+}
+
 /* Data needed for parallelization of calc_subdivided_tris. */
 
 struct OverlapTriRange {
@@ -2469,6 +2542,7 @@ struct OverlapTriRange {
 struct SubdivideTrisData {
   Array<Mesh> &r_tri_subdivided;
   const Mesh &tm;
+  const Map<std::pair<int, int>, ITT_value> &itt_map;
   Span<BVHTreeOverlap> overlap;
   MArena *arena;
 
@@ -2481,10 +2555,12 @@ struct SubdivideTrisData {
 
   SubdivideTrisData(Array<Mesh> &r_tri_subdivided,
                     const Mesh &tm,
+                    const Map<std::pair<int, int>, ITT_value> &itt_map,
                     Span<BVHTreeOverlap> overlap,
                     MArena *arena)
       : r_tri_subdivided(r_tri_subdivided),
         tm(tm),
+        itt_map(itt_map),
         overlap(overlap),
         arena(arena),
         overlap_tri_range{}
@@ -2508,7 +2584,16 @@ static void calc_subdivided_tri_range_func(void *__restrict userdata,
   Vector<ITT_value, inline_capacity> itts(otr.len);
   for (int j = otr.overlap_start; j < otr.overlap_start + otr.len; ++j) {
     int t_other = data->overlap[j].indexB;
-    ITT_value itt = intersect_tri_tri(data->tm, t, t_other);
+    int a = t;
+    int b = t_other;
+    if (a >= b) {
+      std::swap(a, b);
+    }
+    std::pair<int, int> key(a, b);
+    ITT_value itt;
+    if (data->itt_map.contains(key)) {
+      itt = data->itt_map.lookup(key);
+    }
     if (itt.kind != INONE) {
       itts.append(itt);
     }
@@ -2531,10 +2616,10 @@ static void calc_subdivided_tri_range_func(void *__restrict userdata,
  * all the other triangles in the mesh, if it intersects any others.
  * But don't do this for triangles that are part of a cluster.
  * Also, do nothing here if the answer is just the triangle itself.
- * TODO: parallelize this loop.
  */
 static void calc_subdivided_tris(Array<Mesh> &r_tri_subdivided,
                                  const Mesh &tm,
+                                 const Map<std::pair<int, int>, ITT_value> &itt_map,
                                  const CoplanarClusterInfo &clinfo,
                                  const TriOverlaps &ov,
                                  MArena *arena)
@@ -2544,10 +2629,7 @@ static void calc_subdivided_tris(Array<Mesh> &r_tri_subdivided,
     std::cout << "\nCALC_SUBDIVIDED_TRIS\n\n";
   }
   Span<BVHTreeOverlap> overlap = ov.overlap();
-  SubdivideTrisData data(r_tri_subdivided, tm, overlap, arena);
-  data.r_tri_subdivided = r_tri_subdivided;
-  data.overlap = overlap;
-  data.arena = arena;
+  SubdivideTrisData data(r_tri_subdivided, tm, itt_map, overlap, arena);
   int overlap_tot = overlap.size();
   data.overlap_tri_range = Vector<OverlapTriRange>();
   data.overlap_tri_range.reserve(overlap_tot);
@@ -2577,14 +2659,39 @@ static void calc_subdivided_tris(Array<Mesh> &r_tri_subdivided,
   int overlap_tri_range_tot = data.overlap_tri_range.size();
   TaskParallelSettings settings;
   BLI_parallel_range_settings_defaults(&settings);
-  settings.use_threading = (intersect_use_threading && overlap_tri_range_tot > 1000);
+  constexpr int trisubdiv_threading_threshold = 100;
+  settings.use_threading = (intersect_use_threading &&
+                            overlap_tri_range_tot > trisubdiv_threading_threshold);
   BLI_task_parallel_range(
       0, overlap_tri_range_tot, &data, calc_subdivided_tri_range_func, &settings);
 }
 
+/* Get first index in ov where indexA == t. Assuming sorted on indexA. */
+static int find_first_overlap_index(const TriOverlaps &ov, int t)
+{
+  Span<BVHTreeOverlap> span = ov.overlap();
+  int min = 0;
+  int max = static_cast<int>(span.size()) - 1;
+  while (min < max) {
+    int mid = (min + max) / 2;
+    if (t <= span[mid].indexA) {
+      max = mid;
+    }
+    else {
+      min = mid + 1;
+    }
+  }
+  if (span[min].indexA == t) {
+    return min;
+  }
+  return -1;
+}
+
 static CDT_data calc_cluster_subdivided(const CoplanarClusterInfo &clinfo,
                                         int c,
                                         const Mesh &tm,
+                                        const TriOverlaps &ov,
+                                        const Map<std::pair<int, int>, ITT_value> &itt_map,
                                         MArena *UNUSED(arena))
 {
   constexpr int dbg_level = 0;
@@ -2592,28 +2699,42 @@ static CDT_data calc_cluster_subdivided(const CoplanarClusterInfo &clinfo,
   const CoplanarCluster &cl = clinfo.cluster(c);
   /* Make a CDT input with triangles from C and intersects from other triangles in tm. */
   if (dbg_level > 0) {
-    std::cout << "calc_cluster_subdivided for cluster " << c << " = " << cl << "\n";
+    std::cout << "CALC_CLUSTER_SUBDIVIDED for cluster " << c << " = " << cl << "\n";
   }
   /* Get vector itts of all intersections of a triangle of cl with any triangle of tm not
    * in cl and not coplanar with it (for that latter, if there were an intersection,
    * it should already be in cluster cl).
    */
   Vector<ITT_value> itts;
-  for (int t_other : tm.face_index_range()) {
-    if (clinfo.tri_cluster(t_other) != c) {
-      if (dbg_level > 0) {
-        std::cout << "intersect cluster " << c << " with tri " << t_other << "\n";
-      }
-      for (const int t : cl) {
-        ITT_value itt = intersect_tri_tri(tm, t, t_other);
-#  ifdef PERFDEBUG
-        incperfcount(5); /* intersect_tri_tri calls from calc_cluster_subdivided. */
-#  endif
+  Span<BVHTreeOverlap> ovspan = ov.overlap();
+  for (int t : cl) {
+    if (dbg_level > 0) {
+      std::cout << "find intersects with triangle " << t << " of cluster\n";
+    }
+    int first_i = find_first_overlap_index(ov, t);
+    if (first_i == -1) {
+      continue;
+    }
+    for (int i = first_i; i < ovspan.size() && ovspan[i].indexA == t; ++i) {
+      int t_other = ovspan[i].indexB;
+      if (clinfo.tri_cluster(t_other) != c) {
         if (dbg_level > 0) {
-          std::cout << "intersect tri " << t << " with tri " << t_other << " = " << itt << "\n";
+          std::cout << "use intersect(" << t << "," << t_other << "\n";
         }
-        if (itt.kind != INONE && itt.kind != ICOPLANAR) {
-          itts.append(itt);
+        int a = t;
+        int b = t_other;
+        if (a >= b) {
+          std::swap(a, b);
+        }
+        std::pair<int, int> key(a, b);
+        if (itt_map.contains(key)) {
+          ITT_value itt = itt_map.lookup(key);
+          if (itt.kind != INONE && itt.kind != ICOPLANAR) {
+            itts.append(itt);
+            if (dbg_level > 0) {
+              std::cout << "  itt = " << itt << "\n";
+            }
+          }
         }
       }
     }
@@ -2834,12 +2955,18 @@ Mesh trimesh_nary_intersect(
   doperfmax(1, static_cast<int>(clinfo.tot_cluster()));
   doperfmax(2, static_cast<int>(tri_ov.overlap().size()));
 #  endif
+  /* itt_map((a,b)) will hold the intersection value resulting from intersecting
+   * triangles with indices a and b, where a < b.
+   */
+  Map<std::pair<int, int>, ITT_value> itt_map;
+  itt_map.reserve(tri_ov.overlap().size());
+  calc_overlap_itts(itt_map, *tm_clean, clinfo, tri_ov, arena);
+  Array<Mesh> tri_subdivided(tm_clean->face_size());
+  calc_subdivided_tris(tri_subdivided, *tm_clean, itt_map, clinfo, tri_ov, arena);
   Array<CDT_data> cluster_subdivided(clinfo.tot_cluster());
   for (int c : clinfo.index_range()) {
-    cluster_subdivided[c] = calc_cluster_subdivided(clinfo, c, *tm_clean, arena);
+    cluster_subdivided[c] = calc_cluster_subdivided(clinfo, c, *tm_clean, tri_ov, itt_map, arena);
   }
-  blender::Array<Mesh> tri_subdivided(tm_clean->face_size());
-  calc_subdivided_tris(tri_subdivided, *tm_clean, clinfo, tri_ov, arena);
   for (int t : tm_clean->face_index_range()) {
     int c = clinfo.tri_cluster(t);
     if (c != NO_INDEX) {
@@ -2982,10 +3109,6 @@ static void perfdata_init(void)
   perfdata.count.append(0);
   perfdata.count_name.append("final non-NONE intersects");
 
-  /* count 5. */
-  perfdata.count.append(0);
-  perfdata.count_name.append("intersect_tri_tri calls from calc_cluster_subdivided");
-
   /* max 0. */
   perfdata.max.append(0);
   perfdata.max_name.append("total faces");
diff --git a/source/blender/blenlib/tests/BLI_mesh_intersect_test.cc b/source/blender/blenlib/tests/BLI_mesh_intersect_test.cc
index 305afc383a1..014e0e29ade 100644
--- a/source/blender/blenlib/tests/BLI_mesh_intersect_test.cc
+++ b/source/blender/blenlib/tests/BLI_mesh_intersect_test.cc
@@ -16,7 +16,7 @@
 #include "BLI_vector.hh"
 
 #define DO_REGULAR_TESTS 1
-#define DO_PERF_TESTS 0
+#define DO_PERF_TESTS 1
 
 namespace blender::meshintersect {
 
@@ -711,11 +711,8 @@ TEST(mesh_intersect, CubeCubeStep)
 
 #if DO_PERF_TESTS
 
-static void get_sphere_params(int nrings,
-                              int nsegs,
-                              bool triangulate,
-                              int *r_num_verts,
-                              int *r_num_faces)
+static void get_sphere_params(
+    int nrings, int nsegs, bool triangulate, int *r_num_verts, int *r_num_faces)
 {
   *r_num_verts = nsegs * (nrings - 1) + 2;
   if (triangulate) {
@@ -760,9 +757,7 @@ static void fill_sphere_data(int nrings,
     }
     return seg * (nrings - 1) + (ring - 1);
   };
-  auto face_index_fn = [nrings](int seg, int ring) {
-    return seg * nrings + ring;
-  };
+  auto face_index_fn = [nrings](int seg, int ring) { return seg * nrings + ring; };
   auto tri_index_fn = [nrings, nsegs](int seg, int ring, int tri) {
     if (ring == 0) {
       return seg;
@@ -890,9 +885,25 @@ static void spheresphere_test(int nrings, double y_offset, bool use_self)
   Array<Facep> tris(2 * num_sphere_tris);
   arena.reserve(2 * num_sphere_verts, 2 * num_sphere_tris);
   double3 center1(0.0, 0.0, 0.0);
-  fill_sphere_data(nrings, nsegs, center1, 1.0, true, MutableSpan<Facep>(tris.begin(), num_sphere_tris), 0, 0, &arena);
+  fill_sphere_data(nrings,
+                   nsegs,
+                   center1,
+                   1.0,
+                   true,
+                   MutableSpan<Facep>(tris.begin(), num_sphere_tris),
+                   0,
+                   0,
+                   &arena);
   double3 center2(0.0, y_offset, 0.0);
-  fill_sphere_data(nrings, nsegs, center2, 1.0, true, MutableSpan<Facep>(tris.begin() + num_sphere_tris, num_sphere_tris), num_sphere_verts, num_sphere_verts, &arena);
+  fill_sphere_data(nrings,
+                   nsegs,
+                   center2,
+                   1.0,
+                   true,
+                   MutableSpan<Facep>(tris.begin() + num_sphere_tris, num_sphere_tris),
+                   num_sphere_verts,
+                   num_sphere_verts,
+                   &arena);
   Mesh mesh(tris);
   double time_create = PIL_check_seconds_timer();
   write_obj_mesh(mesh, "spheresphere_in");
@@ -913,8 +924,8 @@ static void spheresphere_test(int nrings, double y_offset, bool use_self)
   BLI_task_scheduler_exit();
 }
 
-
-static void get_grid_params(int x_subdiv, int y_subdiv, bool triangulate, int *r_num_verts, int *r_num_faces)
+static void get_grid_params(
+    int x_subdiv, int y_subdiv, bool triangulate, int *r_num_verts, int *r_num_faces)
 {
   *r_num_verts = x_subdiv * y_subdiv;
   if (triangulate) {
@@ -943,12 +954,8 @@ static void fill_grid_data(int x_subdiv,
   get_grid_params(x_subdiv, y_subdiv, triangulate, &num_verts, &num_faces);
   BLI_assert(face.size() == num_faces);
   Array<Vertp> vert(num_verts);
-  auto vert_index_fn = [x_subdiv](int ix, int iy) {
-    return iy * x_subdiv + ix;
-  };
-  auto face_index_fn = [x_subdiv](int ix, int iy) {
-    return iy * (x_subdiv - 1) + ix;
-  };
+  auto vert_index_fn = [x_subdiv](int ix, int iy) { return iy * x_subdiv + ix; };
+  auto face_index_fn = [x_subdiv](int ix, int iy) { return iy * (x_subdiv - 1) + ix; };
   auto tri_index_fn = [x_subdiv](int ix, int iy, int tri) {
     return 2 * iy * (x_subdiv - 1) + 2 * ix + tri;
   };
@@ -972,7 +979,7 @@ static void fill_grid_data(int x_subdiv,
       int i0 = vert_index_fn(ix, iy);
       int i1 = vert_index_fn(ix, iy + 1);
       int i2 = vert_index_fn(ix + 1, iy + 1);
-      int i3 = vert_index_fn(ix +1, iy);
+      int i3 = vert_index_fn(ix + 1, iy);
       if (triangulate) {
         Facep f = arena->add_face({vert[i0], vert[i1], vert[i2]}, fid++, eid);
         Facep f2 = arena->add_face({vert[i2], vert[i3], vert[i0]}, fid++, eid);
@@ -1012,8 +1019,24 @@ static void spheregrid_test(int nrings, int grid_level, double z_offset, bool us
   Array<Facep> tris(num_sphere_tris + num_grid_tris);
   arena.reserve(num_sphere_verts + num_grid_verts, num_sphere_tris + num_grid_tris);
   double3 center(0.0, 0.0, z_offset);
-  fill_sphere_data(nrings, nsegs, center, 1.0, true, MutableSpan<Facep>(tris.begin(), num_sphere_tris), 0, 0, &arena);
-  fill_grid_data(subdivs, subdivs, true, 4.0, double3(0,0,0), MutableSpan<Facep>(tris.begin() + num_sphere_tris, num_grid_tris), num_sphere_verts, num_sphere_tris, &arena);
+  fill_sphere_data(nrings,
+                   nsegs,
+                   center,
+                   1.0,
+                   true,
+                   MutableSpan<Facep>(tris.begin(), num_sphere_tris),
+                   0,
+                   0,
+                   &arena);
+  fill_grid_data(subdivs,
+                 subdivs,
+                 true,
+                 4.0,
+                 double3(0, 0, 0),
+                 MutableSpan<Facep>(tris.begin() + num_sphere_tris, num_grid_tris),
+                 num_sphere_verts,
+                 num_sphere_tris,
+                 &arena);
   Mesh mesh(tris);
   double time_create = PIL_check_seconds_timer();
   // write_obj_mesh(mesh, "spheregrid_in");
@@ -1030,16 +1053,14 @@ static void spheregrid_test(int nrings, int grid_level, double z_offset, bool us
   std::cout << "Create time: " << time_create - time_start << "\n";
   std::cout << "Intersect time: " << time_intersect - time_create << "\n";
   std::cout << "Total time: " << time_intersect - time_start << "\n";
-  write_obj_mesh(out, "spheregrid");
+  // write_obj_mesh(out, "spheregrid");
   BLI_task_scheduler_exit();
 }
 
-#if 0
 TEST(mesh_intersect_perf, SphereSphere)
 {
   spheresphere_test(64, 0.5, false);
 }
-#endif
 
 TEST(mesh_intersect_perf, SphereGrid)
 {
author	Howard Trickey <howard.trickey@gmail.com>	2020-08-19 04:51:35 +0300
committer	Howard Trickey <howard.trickey@gmail.com>	2020-08-19 04:51:35 +0300
commit	108e6d4ef2bc62752f8071179092c0cc80b3f867 (patch)
tree	39c946e99369efd0bcf1c2320a2884be4bffbbe7 /source/blender/blenlib
parent	5cd49e46f4117a96e68ae736eec8c4871eafff6e (diff)