Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErik Abrahamsson <ecke101@gmail.com>2021-07-06 01:09:36 +0300
committerHoward Trickey <howard.trickey@gmail.com>2021-07-06 01:09:36 +0300
commitceff86aafe46a6fb66e023500f5a47260964b0a2 (patch)
treea34e6772226719f0f22806f475cdb07bfbf458c7 /source/blender/blenlib/intern/mesh_intersect.cc
parentcf17f7e0cc6efb6f14a271e37d2ea1b3f10bb66d (diff)
Various Exact Boolean parallelizations and optimizations.
From patch D11780 from Erik Abrahamsson. It parallelizes making the vertices, destruction of map entries, finding if the result is PWN, finding triangle adjacencies, and finding the ambient cell. The latter needs a parallel_reduce from tbb, so added one into BLI_task.hh so that if WITH_TBB is false, the code will still work. On Erik's 6-core machine, the elapsed time went from 17.5s to 11.8s (33% faster) on an intersection of two spheres with 3.1M faces. On Howard's 24-core machine, the elapsed time went from 18.7s to 10.8s for the same test.
Diffstat (limited to 'source/blender/blenlib/intern/mesh_intersect.cc')
-rw-r--r--source/blender/blenlib/intern/mesh_intersect.cc110
1 files changed, 96 insertions, 14 deletions
diff --git a/source/blender/blenlib/intern/mesh_intersect.cc b/source/blender/blenlib/intern/mesh_intersect.cc
index c8028231e81..f91dd762e70 100644
--- a/source/blender/blenlib/intern/mesh_intersect.cc
+++ b/source/blender/blenlib/intern/mesh_intersect.cc
@@ -43,6 +43,7 @@
# include "BLI_set.hh"
# include "BLI_span.hh"
# include "BLI_task.h"
+# include "BLI_task.hh"
# include "BLI_threads.h"
# include "BLI_vector.hh"
# include "BLI_vector_set.hh"
@@ -51,6 +52,10 @@
# include "BLI_mesh_intersect.hh"
+# ifdef WITH_TBB
+# include "tbb/parallel_sort.h"
+# endif
+
// # define PERFDEBUG
namespace blender::meshintersect {
@@ -406,6 +411,11 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
return add_or_find_vert(mco, co, orig);
}
+ const Vert *add_or_find_vert(Vert *vert)
+ {
+ return add_or_find_vert_(vert);
+ }
+
Face *add_face(Span<const Vert *> verts, int orig, Span<int> edge_origs, Span<bool> is_intersect)
{
Face *f = new Face(verts, next_face_id_++, orig, edge_origs, is_intersect);
@@ -486,10 +496,48 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
private:
const Vert *add_or_find_vert(const mpq3 &mco, const double3 &dco, int orig)
{
- /* Don't allocate Vert yet, in case it is already there. */
- Vert vtry(mco, dco, NO_INDEX, NO_INDEX);
+ Vert *vtry = new Vert(mco, dco, NO_INDEX, NO_INDEX);
const Vert *ans;
- VSetKey vskey(&vtry);
+ VSetKey vskey(vtry);
+ if (intersect_use_threading) {
+# ifdef USE_SPINLOCK
+ BLI_spin_lock(&lock_);
+# else
+ BLI_mutex_lock(mutex_);
+# endif
+ }
+ const VSetKey *lookup = vset_.lookup_key_ptr(vskey);
+ if (!lookup) {
+ vtry->id = next_vert_id_++;
+ vtry->orig = orig;
+ vskey.vert = vtry; // new Vert(mco, dco, next_vert_id_++, orig);
+ vset_.add_new(vskey);
+ allocated_verts_.append(std::unique_ptr<Vert>(vskey.vert));
+ ans = vskey.vert;
+ }
+ else {
+ /* It was a duplicate, so return the existing one.
+ * Note that the returned Vert may have a different orig.
+ * This is the intended semantics: if the Vert already
+ * exists then we are merging verts and using the first-seen
+ * one as the canonical one. */
+ delete vtry;
+ ans = lookup->vert;
+ }
+ if (intersect_use_threading) {
+# ifdef USE_SPINLOCK
+ BLI_spin_unlock(&lock_);
+# else
+ BLI_mutex_unlock(mutex_);
+# endif
+ }
+ return ans;
+ };
+
+ const Vert *add_or_find_vert_(Vert *vtry)
+ {
+ const Vert *ans;
+ VSetKey vskey(vtry);
if (intersect_use_threading) {
# ifdef USE_SPINLOCK
BLI_spin_lock(&lock_);
@@ -499,7 +547,8 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
}
const VSetKey *lookup = vset_.lookup_key_ptr(vskey);
if (!lookup) {
- vskey.vert = new Vert(mco, dco, next_vert_id_++, orig);
+ vtry->id = next_vert_id_++;
+ vskey.vert = vtry; // new Vert(mco, dco, next_vert_id_++, orig);
vset_.add_new(vskey);
allocated_verts_.append(std::unique_ptr<Vert>(vskey.vert));
ans = vskey.vert;
@@ -510,6 +559,7 @@ class IMeshArena::IMeshArenaImpl : NonCopyable, NonMovable {
* This is the intended semantics: if the Vert already
* exists then we are merging verts and using the first-seen
* one as the canonical one. */
+ delete vtry;
ans = lookup->vert;
}
if (intersect_use_threading) {
@@ -550,6 +600,11 @@ const Vert *IMeshArena::add_or_find_vert(const mpq3 &co, int orig)
return pimpl_->add_or_find_vert(co, orig);
}
+const Vert *IMeshArena::add_or_find_vert(Vert *vert)
+{
+ return pimpl_->add_or_find_vert(vert);
+}
+
Face *IMeshArena::add_face(Span<const Vert *> verts,
int orig,
Span<int> edge_origs,
@@ -633,7 +688,11 @@ void IMesh::populate_vert(int max_verts)
* TODO: when all debugged, set fix_order = false. */
const bool fix_order = true;
if (fix_order) {
+# ifdef WITH_TBB
+ tbb::parallel_sort(vert_.begin(), vert_.end(), [](const Vert *a, const Vert *b) {
+# else
std::sort(vert_.begin(), vert_.end(), [](const Vert *a, const Vert *b) {
+# endif
if (a->orig != NO_INDEX && b->orig != NO_INDEX) {
return a->orig < b->orig;
}
@@ -2187,6 +2246,14 @@ IMesh triangulate_polymesh(IMesh &imesh, IMeshArena *arena)
Vector<Face *> face_tris;
constexpr int estimated_tris_per_face = 3;
face_tris.reserve(estimated_tris_per_face * imesh.face_size());
+ threading::parallel_for(imesh.face_index_range(), 2048, [&](IndexRange range) {
+ for (int i : range) {
+ Face *f = imesh.face(i);
+ if (!f->plane_populated() && f->size() >= 4) {
+ f->populate_plane(false);
+ }
+ }
+ });
for (Face *f : imesh.faces()) {
/* Tessellate face f, following plan similar to #BM_face_calc_tessellation. */
int flen = f->size();
@@ -2278,12 +2345,22 @@ class TriOverlaps {
if (two_trees_no_self) {
tree_b_ = BLI_bvhtree_new(tm.face_size(), FLT_EPSILON, 8, 6);
}
+
+ /* Create a Vector containing face shape. */
+ Vector<int> shapes;
+ shapes.resize(tm.face_size());
+ threading::parallel_for(tm.face_index_range(), 2048, [&](IndexRange range) {
+ for (int t : range) {
+ shapes[t] = shape_fn(tm.face(t)->orig);
+ }
+ });
+
float bbpts[6];
for (int t : tm.face_index_range()) {
const BoundingBox &bb = tri_bb[t];
copy_v3_v3(bbpts, bb.min);
copy_v3_v3(bbpts + 3, bb.max);
- int shape = shape_fn(tm.face(t)->orig);
+ int shape = shapes[t];
if (two_trees_no_self) {
if (shape == 0) {
BLI_bvhtree_insert(tree_, t, bbpts, 2);
@@ -2575,11 +2652,13 @@ static void calc_subdivided_non_cluster_tris(Array<IMesh> &r_tri_subdivided,
0, overlap_tri_range_tot, &data, calc_subdivided_tri_range_func, &settings);
/* Now have to put in the triangles that are the same as the input ones, and not in clusters.
*/
- for (int t : tm.face_index_range()) {
- if (r_tri_subdivided[t].face_size() == 0 && clinfo.tri_cluster(t) == NO_INDEX) {
- r_tri_subdivided[t] = IMesh({tm.face(t)});
+ threading::parallel_for(tm.face_index_range(), 2048, [&](IndexRange range) {
+ for (int t : range) {
+ if (r_tri_subdivided[t].face_size() == 0 && clinfo.tri_cluster(t) == NO_INDEX) {
+ r_tri_subdivided[t] = IMesh({tm.face(t)});
+ }
}
- }
+ });
}
/**
@@ -2936,11 +3015,15 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
double overlap_time = PIL_check_seconds_timer();
std::cout << "intersect overlaps calculated, time = " << overlap_time - bb_calc_time << "\n";
# endif
- for (int t : tm_clean->face_index_range()) {
- if (tri_ov.first_overlap_index(t) != -1) {
- tm_clean->face(t)->populate_plane(true);
+ Array<IMesh> tri_subdivided(tm_clean->face_size(), NoInitialization());
+ threading::parallel_for(tm_clean->face_index_range(), 1024, [&](IndexRange range) {
+ for (int t : range) {
+ if (tri_ov.first_overlap_index(t) != -1) {
+ tm_clean->face(t)->populate_plane(true);
+ }
+ new (static_cast<void *>(&tri_subdivided[t])) IMesh;
}
- }
+ });
# ifdef PERFDEBUG
double plane_populate = PIL_check_seconds_timer();
std::cout << "planes populated, time = " << plane_populate - overlap_time << "\n";
@@ -2965,7 +3048,6 @@ IMesh trimesh_nary_intersect(const IMesh &tm_in,
doperfmax(1, clinfo.tot_cluster());
doperfmax(2, tri_ov.overlap().size());
# endif
- Array<IMesh> tri_subdivided(tm_clean->face_size());
calc_subdivided_non_cluster_tris(tri_subdivided, *tm_clean, itt_map, clinfo, tri_ov, arena);
# ifdef PERFDEBUG
double subdivided_tris_time = PIL_check_seconds_timer();