diff options
author | Campbell Barton <ideasman42@gmail.com> | 2021-08-13 02:27:29 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2021-08-13 03:21:30 +0300 |
commit | 399b6ec76c80a8f343747a0459bb3d3df514555f (patch) | |
tree | fd36a240ac275e63d231a11ee1c95f1d76d85c86 | |
parent | 1275ce61b1b02261f666018866f7061a3f30ce60 (diff) |
Mesh: optimize normal calculation
Optimize mesh normal calculation.
- Remove the intermediate `lnors_weighted` array, accumulate directly
into the normal array using a spin-lock for thread safety.
- Remove single threaded iteration over loops
(normal calculation is now fully multi-threaded).
- Remove stack array (alloca) for pre-calculating edge-directions.
Summary of Performance Characteristics:
- The largest gains are for single high poly meshes, with isolated
normal-calculation benchmarks of meshes over ~1.5 million showing
2x+ speedup, ~25 million polygons are ~2.85x faster.
- Single lower poly meshes (250k polys) can be ~2x slower.
Since these meshes aren't normally a bottleneck,
and this problem isn't noticeable on large scenes,
we considered the performance trade-off reasonable.
- The performance difference reduces with larger scenes,
tests with production files from "Sprite Fight" showing
the same or slightly better overall performance.
NOTE: tested on a AMD Ryzen TR 3970X 32-Core.
For more details & benchmarking scripts, see the patch description.
Reviewed By: mont29
Ref D11993
-rw-r--r-- | source/blender/blenkernel/intern/mesh_normals.cc | 135 |
1 files changed, 82 insertions, 53 deletions
diff --git a/source/blender/blenkernel/intern/mesh_normals.cc b/source/blender/blenkernel/intern/mesh_normals.cc index 6bd08b3dbe0..b86332097fa 100644 --- a/source/blender/blenkernel/intern/mesh_normals.cc +++ b/source/blender/blenkernel/intern/mesh_normals.cc @@ -50,6 +50,8 @@ #include "BKE_global.h" #include "BKE_mesh.h" +#include "atomic_ops.h" + // #define DEBUG_TIME #ifdef DEBUG_TIME @@ -60,6 +62,46 @@ static CLG_LogRef LOG = {"bke.mesh_normals"}; /* -------------------------------------------------------------------- */ +/** \name Private Utility Functions + * \{ */ + +/** + * A thread-safe version of #add_v3_v3 that uses a spin-lock. + * + * \note Avoid using this when the chance of contention is high. + */ +static void add_v3_v3_atomic(float r[3], const float a[3]) +{ +#define FLT_EQ_NONAN(_fa, _fb) (*((const uint32_t *)&_fa) == *((const uint32_t *)&_fb)) + + float virtual_lock = r[0]; + while (true) { + /* This loops until following conditions are met: + * - `r[0]` has same value as virtual_lock (i.e. it did not change since last try). + * - `r[0]` was not `FLT_MAX`, i.e. it was not locked by another thread. */ + const float test_lock = atomic_cas_float(&r[0], virtual_lock, FLT_MAX); + if (_ATOMIC_LIKELY(FLT_EQ_NONAN(test_lock, virtual_lock) && (test_lock != FLT_MAX))) { + break; + } + virtual_lock = test_lock; + } + virtual_lock += a[0]; + r[1] += a[1]; + r[2] += a[2]; + + /* Second atomic operation to 'release' + * our lock on that vector and set its first scalar value. */ + /* Note that we do not need to loop here, since we 'locked' `r[0]`, + * nobody should have changed it in the mean time. */ + virtual_lock = atomic_cas_float(&r[0], FLT_MAX, virtual_lock); + BLI_assert(virtual_lock == FLT_MAX); + +#undef FLT_EQ_NONAN +} + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Mesh Normal Calculation * \{ */ @@ -210,7 +252,6 @@ struct MeshCalcNormalsData { const MLoop *mloop; MVert *mverts; float (*pnors)[3]; - float (*lnors_weighted)[3]; float (*vnors)[3]; }; @@ -224,65 +265,65 @@ static void mesh_calc_normals_poly_cb(void *__restrict userdata, BKE_mesh_calc_poly_normal(mp, data->mloop + mp->loopstart, data->mverts, data->pnors[pidx]); } -static void mesh_calc_normals_poly_prepare_cb(void *__restrict userdata, - const int pidx, - const TaskParallelTLS *__restrict UNUSED(tls)) +static void mesh_calc_normals_poly_and_accum_cb(void *__restrict userdata, + const int pidx, + const TaskParallelTLS *__restrict UNUSED(tls)) { - MeshCalcNormalsData *data = (MeshCalcNormalsData *)userdata; + const MeshCalcNormalsData *data = (MeshCalcNormalsData *)userdata; const MPoly *mp = &data->mpolys[pidx]; const MLoop *ml = &data->mloop[mp->loopstart]; const MVert *mverts = data->mverts; + float(*vnors)[3] = data->vnors; float pnor_temp[3]; float *pnor = data->pnors ? data->pnors[pidx] : pnor_temp; - float(*lnors_weighted)[3] = data->lnors_weighted; - const int nverts = mp->totloop; - float(*edgevecbuf)[3] = (float(*)[3])BLI_array_alloca(edgevecbuf, (size_t)nverts); + const int i_end = mp->totloop - 1; /* Polygon Normal and edge-vector */ /* inline version of #BKE_mesh_calc_poly_normal, also does edge-vectors */ { - int i_prev = nverts - 1; - const float *v_prev = mverts[ml[i_prev].v].co; - const float *v_curr; - zero_v3(pnor); /* Newell's Method */ - for (int i = 0; i < nverts; i++) { - v_curr = mverts[ml[i].v].co; - add_newell_cross_v3_v3v3(pnor, v_prev, v_curr); - - /* Unrelated to normalize, calculate edge-vector */ - sub_v3_v3v3(edgevecbuf[i_prev], v_prev, v_curr); - normalize_v3(edgevecbuf[i_prev]); - i_prev = i; - - v_prev = v_curr; + const float *v_curr = mverts[ml[i_end].v].co; + for (int i_next = 0; i_next <= i_end; i_next++) { + const float *v_next = mverts[ml[i_next].v].co; + add_newell_cross_v3_v3v3(pnor, v_curr, v_next); + v_curr = v_next; } if (UNLIKELY(normalize_v3(pnor) == 0.0f)) { pnor[2] = 1.0f; /* other axes set to 0.0 */ } } - /* accumulate angle weighted face normal */ - /* inline version of #accumulate_vertex_normals_poly_v3, - * split between this threaded callback and #mesh_calc_normals_poly_accum_cb. */ + /* Accumulate angle weighted face normal into the vertex normal. */ + /* inline version of #accumulate_vertex_normals_poly_v3. */ { - const float *prev_edge = edgevecbuf[nverts - 1]; - - for (int i = 0; i < nverts; i++) { - const int lidx = mp->loopstart + i; - const float *cur_edge = edgevecbuf[i]; - - /* calculate angle between the two poly edges incident on - * this vertex */ - const float fac = saacos(-dot_v3v3(cur_edge, prev_edge)); + float edvec_prev[3], edvec_next[3], edvec_end[3]; + const float *v_curr = mverts[ml[i_end].v].co; + sub_v3_v3v3(edvec_prev, mverts[ml[i_end - 1].v].co, v_curr); + normalize_v3(edvec_prev); + copy_v3_v3(edvec_end, edvec_prev); + + for (int i_next = 0, i_curr = i_end; i_next <= i_end; i_curr = i_next++) { + const float *v_next = mverts[ml[i_next].v].co; + + /* Skip an extra normalization by reusing the first calculated edge. */ + if (i_next != i_end) { + sub_v3_v3v3(edvec_next, v_curr, v_next); + normalize_v3(edvec_next); + } + else { + copy_v3_v3(edvec_next, edvec_end); + } - /* Store for later accumulation */ - mul_v3_v3fl(lnors_weighted[lidx], pnor, fac); + /* Calculate angle between the two poly edges incident on this vertex. */ + const float fac = saacos(-dot_v3v3(edvec_prev, edvec_next)); + const float vnor_add[3] = {pnor[0] * fac, pnor[1] * fac, pnor[2] * fac}; - prev_edge = cur_edge; + add_v3_v3_atomic(vnors[ml[i_curr].v], vnor_add); + v_curr = v_next; + copy_v3_v3(edvec_prev, edvec_next); } } } @@ -309,7 +350,7 @@ void BKE_mesh_calc_normals_poly(MVert *mverts, int numVerts, const MLoop *mloop, const MPoly *mpolys, - int numLoops, + int UNUSED(numLoops), int numPolys, float (*r_polynors)[3], const bool only_face_normals) @@ -335,8 +376,6 @@ void BKE_mesh_calc_normals_poly(MVert *mverts, } float(*vnors)[3] = r_vertnors; - float(*lnors_weighted)[3] = (float(*)[3])MEM_malloc_arrayN( - (size_t)numLoops, sizeof(*lnors_weighted), __func__); bool free_vnors = false; /* first go through and calculate normals for all the polys */ @@ -353,27 +392,17 @@ void BKE_mesh_calc_normals_poly(MVert *mverts, data.mloop = mloop; data.mverts = mverts; data.pnors = pnors; - data.lnors_weighted = lnors_weighted; data.vnors = vnors; - /* Compute poly normals, and prepare weighted loop normals. */ - BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_prepare_cb, &settings); - - /* Actually accumulate weighted loop normals into vertex ones. */ - /* Unfortunately, not possible to thread that - * (not in a reasonable, totally lock- and barrier-free fashion), - * since several loops will point to the same vertex... */ - for (int lidx = 0; lidx < numLoops; lidx++) { - add_v3_v3(vnors[mloop[lidx].v], data.lnors_weighted[lidx]); - } + /* Compute poly normals (`pnors`), accumulating them into vertex normals (`vnors`). */ + BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_and_accum_cb, &settings); - /* Normalize and validate computed vertex normals. */ + /* Normalize and validate computed vertex normals (`vnors`). */ BLI_task_parallel_range(0, numVerts, &data, mesh_calc_normals_poly_finalize_cb, &settings); if (free_vnors) { MEM_freeN(vnors); } - MEM_freeN(lnors_weighted); } void BKE_mesh_ensure_normals(Mesh *mesh) |