Mesh: optimize object mode face tessellation

- Multi-thread BKE_mesh_recalc_looptri. - Add BKE_mesh_recalc_looptri_with_normals, this skips having to calculate normals for ngons. Exact performance depends on number of faces, size of ngons and available CPU cores. For high poly meshes the isolated improvement to BKE_mesh_recalc_looptri in my tests was between 6.7x .. 25.0x, with the largest gains seen in meshes containing ngons with many sides. The overall speedup for high poly meshes containing quads and triangles is only ~20% although ngon heavy meshes can be much faster.
author: Campbell Barton <ideasman42@gmail.com> 2021-06-20 06:21:11 +0300
committer: Campbell Barton <ideasman42@gmail.com> 2021-06-20 07:39:13 +0300
commit: 513f566b40a5bde4d89797aecf7c0ad3e4d1a20b (patch)
tree: 40949ae61aef858a7e3f8710abdc56f4e7da4f70
parent: b5e5fbcfc81b6ca4968d0788b5e6e330205d298b (diff)
4 files changed, 295 insertions, 105 deletions
diff --git a/source/blender/blenkernel/BKE_mesh.h b/source/blender/blenkernel/BKE_mesh.h
index 81d3fc683b7..87964307105 100644
--- a/source/blender/blenkernel/BKE_mesh.h
+++ b/source/blender/blenkernel/BKE_mesh.h
@@ -297,6 +297,13 @@ void BKE_mesh_recalc_looptri(const struct MLoop *mloop,
                              int totloop,
                              int totpoly,
                              struct MLoopTri *mlooptri);
+void BKE_mesh_recalc_looptri_with_normals(const struct MLoop *mloop,
+                                          const struct MPoly *mpoly,
+                                          const struct MVert *mvert,
+                                          int totloop,
+                                          int totpoly,
+                                          struct MLoopTri *mlooptri,
+                                          const float (*poly_normals)[3]);
 
 /* *** mesh_evaluate.c *** */
 
diff --git a/source/blender/blenkernel/intern/mesh_tessellate.c b/source/blender/blenkernel/intern/mesh_tessellate.c
index 358f3413104..98a93dbc4b5 100644
--- a/source/blender/blenkernel/intern/mesh_tessellate.c
+++ b/source/blender/blenkernel/intern/mesh_tessellate.c
@@ -36,6 +36,7 @@
 #include "BLI_math.h"
 #include "BLI_memarena.h"
 #include "BLI_polyfill_2d.h"
+#include "BLI_task.h"
 #include "BLI_utildefines.h"
 
 #include "BKE_customdata.h"
@@ -43,6 +44,9 @@
 
 #include "BLI_strict_flags.h"
 
+/** Compared against total loops. */
+#define MESH_FACE_TESSELLATE_THREADED_LIMIT 4096
+
 /* -------------------------------------------------------------------- */
 /** \name MFace Tessellation
  * \{ */
@@ -440,145 +444,307 @@ void BKE_mesh_tessface_calc(Mesh *mesh)
  * \{ */
 
 /**
- * Calculate tessellation into #MLoopTri which exist only for this purpose.
+ * \param face_normal: This will be optimized out as a constant.
  */
-void BKE_mesh_recalc_looptri(const MLoop *mloop,
-                             const MPoly *mpoly,
-                             const MVert *mvert,
-                             int totloop,
-                             int totpoly,
-                             MLoopTri *mlooptri)
+BLI_INLINE void mesh_calc_tessellation_for_face_impl(const MLoop *mloop,
+                                                     const MPoly *mpoly,
+                                                     const MVert *mvert,
+                                                     uint poly_index,
+                                                     MLoopTri *mlt,
+                                                     MemArena **pf_arena_p,
+                                                     const bool face_normal,
+                                                     const float normal_precalc[3])
 {
-  /* use this to avoid locking pthread for _every_ polygon
-   * and calling the fill function */
-
-#define USE_TESSFACE_SPEEDUP
-
-  const MPoly *mp;
-  const MLoop *ml;
-  MLoopTri *mlt;
-  MemArena *arena = NULL;
-  int poly_index, mlooptri_index;
-  uint j;
-
-  mlooptri_index = 0;
-  mp = mpoly;
-  for (poly_index = 0; poly_index < totpoly; poly_index++, mp++) {
-    const uint mp_loopstart = (uint)mp->loopstart;
-    const uint mp_totloop = (uint)mp->totloop;
-    uint l1, l2, l3;
-    if (mp_totloop < 3) {
-      /* do nothing */
-    }
-
-#ifdef USE_TESSFACE_SPEEDUP
-
-#  define ML_TO_MLT(i1, i2, i3) \
-    { \
-      mlt = &mlooptri[mlooptri_index]; \
-      l1 = mp_loopstart + i1; \
-      l2 = mp_loopstart + i2; \
-      l3 = mp_loopstart + i3; \
-      ARRAY_SET_ITEMS(mlt->tri, l1, l2, l3); \
-      mlt->poly = (uint)poly_index; \
-    } \
-    ((void)0)
-
-    else if (mp_totloop == 3) {
+  const uint mp_loopstart = (uint)mpoly[poly_index].loopstart;
+  const uint mp_totloop = (uint)mpoly[poly_index].totloop;
+
+#define ML_TO_MLT(i1, i2, i3) \
+  { \
+    ARRAY_SET_ITEMS(mlt->tri, mp_loopstart + i1, mp_loopstart + i2, mp_loopstart + i3); \
+    mlt->poly = poly_index; \
+  } \
+  ((void)0)
+
+  switch (mp_totloop) {
+    case 3: {
       ML_TO_MLT(0, 1, 2);
-      mlooptri_index++;
+      break;
     }
-    else if (mp_totloop == 4) {
+    case 4: {
       ML_TO_MLT(0, 1, 2);
-      MLoopTri *mlt_a = mlt;
-      mlooptri_index++;
+      MLoopTri *mlt_a = mlt++;
       ML_TO_MLT(0, 2, 3);
       MLoopTri *mlt_b = mlt;
-      mlooptri_index++;
 
       if (UNLIKELY(is_quad_flip_v3_first_third_fast(mvert[mloop[mlt_a->tri[0]].v].co,
                                                     mvert[mloop[mlt_a->tri[1]].v].co,
                                                     mvert[mloop[mlt_a->tri[2]].v].co,
                                                     mvert[mloop[mlt_b->tri[2]].v].co))) {
-        /* flip out of degenerate 0-2 state. */
+        /* Flip out of degenerate 0-2 state. */
         mlt_a->tri[2] = mlt_b->tri[2];
         mlt_b->tri[0] = mlt_a->tri[1];
       }
+      break;
     }
-#endif /* USE_TESSFACE_SPEEDUP */
-    else {
-      const float *co_curr, *co_prev;
-
-      float normal[3];
-
+    default: {
+      const MLoop *ml;
       float axis_mat[3][3];
-      float(*projverts)[2];
-      uint(*tris)[3];
-
-      const uint totfilltri = mp_totloop - 2;
 
-      if (UNLIKELY(arena == NULL)) {
-        arena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
+      /* Calculate `axis_mat` to project verts to 2D. */
+      if (face_normal == false) {
+        float normal[3];
+        const float *co_curr, *co_prev;
+
+        zero_v3(normal);
+
+        /* Calc normal, flipped: to get a positive 2D cross product. */
+        ml = mloop + mp_loopstart;
+        co_prev = mvert[ml[mp_totloop - 1].v].co;
+        for (uint j = 0; j < mp_totloop; j++, ml++) {
+          co_curr = mvert[ml->v].co;
+          add_newell_cross_v3_v3v3(normal, co_prev, co_curr);
+          co_prev = co_curr;
+        }
+        if (UNLIKELY(normalize_v3(normal) == 0.0f)) {
+          normal[2] = 1.0f;
+        }
+        axis_dominant_v3_to_m3_negate(axis_mat, normal);
+      }
+      else {
+        axis_dominant_v3_to_m3_negate(axis_mat, normal_precalc);
       }
 
-      tris = BLI_memarena_alloc(arena, sizeof(*tris) * (size_t)totfilltri);
-      projverts = BLI_memarena_alloc(arena, sizeof(*projverts) * (size_t)mp_totloop);
-
-      zero_v3(normal);
+      const uint totfilltri = mp_totloop - 2;
 
-      /* calc normal, flipped: to get a positive 2d cross product */
-      ml = mloop + mp_loopstart;
-      co_prev = mvert[ml[mp_totloop - 1].v].co;
-      for (j = 0; j < mp_totloop; j++, ml++) {
-        co_curr = mvert[ml->v].co;
-        add_newell_cross_v3_v3v3(normal, co_prev, co_curr);
-        co_prev = co_curr;
-      }
-      if (UNLIKELY(normalize_v3(normal) == 0.0f)) {
-        normal[2] = 1.0f;
+      MemArena *pf_arena = *pf_arena_p;
+      if (UNLIKELY(pf_arena == NULL)) {
+        pf_arena = *pf_arena_p = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
       }
 
-      /* project verts to 2d */
-      axis_dominant_v3_to_m3_negate(axis_mat, normal);
+      uint(*tris)[3] = tris = BLI_memarena_alloc(pf_arena, sizeof(*tris) * (size_t)totfilltri);
+      float(*projverts)[2] = projverts = BLI_memarena_alloc(
+          pf_arena, sizeof(*projverts) * (size_t)mp_totloop);
 
       ml = mloop + mp_loopstart;
-      for (j = 0; j < mp_totloop; j++, ml++) {
+      for (uint j = 0; j < mp_totloop; j++, ml++) {
         mul_v2_m3v3(projverts[j], axis_mat, mvert[ml->v].co);
       }
 
-      BLI_polyfill_calc_arena(projverts, mp_totloop, 1, tris, arena);
+      BLI_polyfill_calc_arena(projverts, mp_totloop, 1, tris, pf_arena);
 
-      /* apply fill */
-      for (j = 0; j < totfilltri; j++) {
-        uint *tri = tris[j];
+      /* Apply fill. */
+      for (uint j = 0; j < totfilltri; j++, mlt++) {
+        const uint *tri = tris[j];
+        ML_TO_MLT(tri[0], tri[1], tri[2]);
+      }
 
-        mlt = &mlooptri[mlooptri_index];
+      BLI_memarena_clear(pf_arena);
 
-        /* set loop indices, transformed to vert indices later */
-        l1 = mp_loopstart + tri[0];
-        l2 = mp_loopstart + tri[1];
-        l3 = mp_loopstart + tri[2];
+      break;
+    }
+  }
+#undef ML_TO_MLT
+}
 
-        ARRAY_SET_ITEMS(mlt->tri, l1, l2, l3);
-        mlt->poly = (uint)poly_index;
+static void mesh_calc_tessellation_for_face(const MLoop *mloop,
+                                            const MPoly *mpoly,
+                                            const MVert *mvert,
+                                            uint poly_index,
+                                            MLoopTri *mlt,
+                                            MemArena **pf_arena_p)
+{
+  mesh_calc_tessellation_for_face_impl(
+      mloop, mpoly, mvert, poly_index, mlt, pf_arena_p, false, NULL);
+}
 
-        mlooptri_index++;
-      }
+static void mesh_calc_tessellation_for_face_with_normal(const MLoop *mloop,
+                                                        const MPoly *mpoly,
+                                                        const MVert *mvert,
+                                                        uint poly_index,
+                                                        MLoopTri *mlt,
+                                                        MemArena **pf_arena_p,
+                                                        const float normal_precalc[3])
+{
+  mesh_calc_tessellation_for_face_impl(
+      mloop, mpoly, mvert, poly_index, mlt, pf_arena_p, true, normal_precalc);
+}
 
-      BLI_memarena_clear(arena);
+static void mesh_recalc_looptri__single_threaded(const MLoop *mloop,
+                                                 const MPoly *mpoly,
+                                                 const MVert *mvert,
+                                                 int totloop,
+                                                 int totpoly,
+                                                 MLoopTri *mlooptri,
+                                                 const float (*poly_normals)[3])
+{
+  MemArena *pf_arena = NULL;
+  const MPoly *mp = mpoly;
+  uint tri_index = 0;
+
+  if (poly_normals != NULL) {
+    for (uint poly_index = 0; poly_index < (uint)totpoly; poly_index++, mp++) {
+      mesh_calc_tessellation_for_face_with_normal(mloop,
+                                                  mpoly,
+                                                  mvert,
+                                                  poly_index,
+                                                  &mlooptri[tri_index],
+                                                  &pf_arena,
+                                                  poly_normals[poly_index]);
+      tri_index += (uint)(mp->totloop - 2);
     }
   }
-
-  if (arena) {
-    BLI_memarena_free(arena);
-    arena = NULL;
+  else {
+    for (uint poly_index = 0; poly_index < (uint)totpoly; poly_index++, mp++) {
+      mesh_calc_tessellation_for_face(
+          mloop, mpoly, mvert, poly_index, &mlooptri[tri_index], &pf_arena);
+      tri_index += (uint)(mp->totloop - 2);
+    }
   }
 
-  BLI_assert(mlooptri_index == poly_to_tri_count(totpoly, totloop));
+  if (pf_arena) {
+    BLI_memarena_free(pf_arena);
+    pf_arena = NULL;
+  }
+  BLI_assert(tri_index == (uint)poly_to_tri_count(totpoly, totloop));
   UNUSED_VARS_NDEBUG(totloop);
+}
 
-#undef USE_TESSFACE_SPEEDUP
-#undef ML_TO_MLT
+struct TessellationUserData {
+  const MLoop *mloop;
+  const MPoly *mpoly;
+  const MVert *mvert;
+
+  /** Output array. */
+  MLoopTri *mlooptri;
+
+  /** Optional pre-calculated polygon normals array. */
+  const float (*poly_normals)[3];
+};
+
+struct TessellationUserTLS {
+  MemArena *pf_arena;
+};
+
+static void mesh_calc_tessellation_for_face_fn(void *__restrict userdata,
+                                               const int index,
+                                               const TaskParallelTLS *__restrict tls)
+{
+  const struct TessellationUserData *data = userdata;
+  struct TessellationUserTLS *tls_data = tls->userdata_chunk;
+  const int tri_index = poly_to_tri_count(index, data->mpoly[index].loopstart);
+  mesh_calc_tessellation_for_face_impl(data->mloop,
+                                       data->mpoly,
+                                       data->mvert,
+                                       (uint)index,
+                                       &data->mlooptri[tri_index],
+                                       &tls_data->pf_arena,
+                                       false,
+                                       NULL);
+}
+
+static void mesh_calc_tessellation_for_face_with_normal_fn(void *__restrict userdata,
+                                                           const int index,
+                                                           const TaskParallelTLS *__restrict tls)
+{
+  const struct TessellationUserData *data = userdata;
+  struct TessellationUserTLS *tls_data = tls->userdata_chunk;
+  const int tri_index = poly_to_tri_count(index, data->mpoly[index].loopstart);
+  mesh_calc_tessellation_for_face_impl(data->mloop,
+                                       data->mpoly,
+                                       data->mvert,
+                                       (uint)index,
+                                       &data->mlooptri[tri_index],
+                                       &tls_data->pf_arena,
+                                       true,
+                                       data->poly_normals[index]);
+}
+
+static void mesh_calc_tessellation_for_face_free_fn(const void *__restrict UNUSED(userdata),
+                                                    void *__restrict tls_v)
+{
+  struct TessellationUserTLS *tls_data = tls_v;
+  if (tls_data->pf_arena) {
+    BLI_memarena_free(tls_data->pf_arena);
+  }
+}
+
+static void mesh_recalc_looptri__multi_threaded(const MLoop *mloop,
+                                                const MPoly *mpoly,
+                                                const MVert *mvert,
+                                                int UNUSED(totloop),
+                                                int totpoly,
+                                                MLoopTri *mlooptri,
+                                                const float (*poly_normals)[3])
+{
+  struct TessellationUserTLS tls_data_dummy = {NULL};
+
+  struct TessellationUserData data = {
+      .mloop = mloop,
+      .mpoly = mpoly,
+      .mvert = mvert,
+      .mlooptri = mlooptri,
+      .poly_normals = poly_normals,
+  };
+
+  TaskParallelSettings settings;
+  BLI_parallel_range_settings_defaults(&settings);
+
+  settings.userdata_chunk = &tls_data_dummy;
+  settings.userdata_chunk_size = sizeof(tls_data_dummy);
+
+  settings.func_free = mesh_calc_tessellation_for_face_free_fn;
+
+  BLI_task_parallel_range(0,
+                          totpoly,
+                          &data,
+                          poly_normals ? mesh_calc_tessellation_for_face_with_normal_fn :
+                                         mesh_calc_tessellation_for_face_fn,
+                          &settings);
+}
+
+/**
+ * Calculate tessellation into #MLoopTri which exist only for this purpose.
+ */
+void BKE_mesh_recalc_looptri(const MLoop *mloop,
+                             const MPoly *mpoly,
+                             const MVert *mvert,
+                             int totloop,
+                             int totpoly,
+                             MLoopTri *mlooptri)
+{
+  if (totloop < MESH_FACE_TESSELLATE_THREADED_LIMIT) {
+    mesh_recalc_looptri__single_threaded(mloop, mpoly, mvert, totloop, totpoly, mlooptri, NULL);
+  }
+  else {
+    mesh_recalc_looptri__multi_threaded(mloop, mpoly, mvert, totloop, totpoly, mlooptri, NULL);
+  }
+}
+
+/**
+ * A version of #BKE_mesh_recalc_looptri which takes pre-calculated polygon normals
+ * (used to avoid having to calculate the face normal for NGON tessellation).
+ *
+ * \note Only use this function if normals have already been calculated, there is no need
+ * to calculate normals just to use this function as it will cause the normals for triangles
+ * to be calculated which aren't needed for tessellation.
+ */
+void BKE_mesh_recalc_looptri_with_normals(const MLoop *mloop,
+                                          const MPoly *mpoly,
+                                          const MVert *mvert,
+                                          int totloop,
+                                          int totpoly,
+                                          MLoopTri *mlooptri,
+                                          const float (*poly_normals)[3])
+{
+  BLI_assert(poly_normals != NULL);
+  if (totloop < MESH_FACE_TESSELLATE_THREADED_LIMIT) {
+    mesh_recalc_looptri__single_threaded(
+        mloop, mpoly, mvert, totloop, totpoly, mlooptri, poly_normals);
+  }
+  else {
+    mesh_recalc_looptri__multi_threaded(
+        mloop, mpoly, mvert, totloop, totpoly, mlooptri, poly_normals);
+  }
 }
 
 /** \} */
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c
index 44026c0167b..b0eb0c8bcb3 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c
+++ b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c
@@ -349,8 +349,19 @@ void mesh_render_data_update_looptris(MeshRenderData *mr,
     /* Mesh */
     if ((iter_type & MR_ITER_LOOPTRI) || (data_flag & MR_DATA_LOOPTRI)) {
       mr->mlooptri = MEM_mallocN(sizeof(*mr->mlooptri) * mr->tri_len, "MR_DATATYPE_LOOPTRI");
-      BKE_mesh_recalc_looptri(
-          me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, mr->mlooptri);
+      if (mr->poly_normals != NULL) {
+        BKE_mesh_recalc_looptri_with_normals(me->mloop,
+                                             me->mpoly,
+                                             me->mvert,
+                                             me->totloop,
+                                             me->totpoly,
+                                             mr->mlooptri,
+                                             mr->poly_normals);
+      }
+      else {
+        BKE_mesh_recalc_looptri(
+            me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, mr->mlooptri);
+      }
     }
   }
   else {
diff --git a/source/blender/render/intern/bake.c b/source/blender/render/intern/bake.c
index 416d85ce1a3..011bdb056d8 100644
--- a/source/blender/render/intern/bake.c
+++ b/source/blender/render/intern/bake.c
@@ -466,7 +466,16 @@ static TriTessFace *mesh_calc_tri_tessface(Mesh *me, bool tangent, Mesh *me_eval
   looptri = MEM_mallocN(sizeof(*looptri) * tottri, __func__);
   triangles = MEM_callocN(sizeof(TriTessFace) * tottri, __func__);
 
-  BKE_mesh_recalc_looptri(me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, looptri);
+  const float(*precomputed_normals)[3] = CustomData_get_layer(&me->pdata, CD_NORMAL);
+  const bool calculate_normal = precomputed_normals ? false : true;
+
+  if (precomputed_normals != NULL) {
+    BKE_mesh_recalc_looptri_with_normals(
+        me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, looptri, precomputed_normals);
+  }
+  else {
+    BKE_mesh_recalc_looptri(me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, looptri);
+  }
 
   if (tangent) {
     BKE_mesh_ensure_normals_for_display(me_eval);
@@ -479,9 +488,6 @@ static TriTessFace *mesh_calc_tri_tessface(Mesh *me, bool tangent, Mesh *me_eval
     loop_normals = CustomData_get_layer(&me_eval->ldata, CD_NORMAL);
   }
 
-  const float(*precomputed_normals)[3] = CustomData_get_layer(&me->pdata, CD_NORMAL);
-  const bool calculate_normal = precomputed_normals ? false : true;
-
   for (i = 0; i < tottri; i++) {
     const MLoopTri *lt = &looptri[i];
     const MPoly *mp = &me->mpoly[lt->poly];
author	Campbell Barton <ideasman42@gmail.com>	2021-06-20 06:21:11 +0300
committer	Campbell Barton <ideasman42@gmail.com>	2021-06-20 07:39:13 +0300
commit	513f566b40a5bde4d89797aecf7c0ad3e4d1a20b (patch)
tree	40949ae61aef858a7e3f8710abdc56f4e7da4f70
parent	b5e5fbcfc81b6ca4968d0788b5e6e330205d298b (diff)