diff options
author | Bastien Montagne <montagne29@wanadoo.fr> | 2017-11-23 23:36:27 +0300 |
---|---|---|
committer | Bastien Montagne <montagne29@wanadoo.fr> | 2017-11-23 23:36:27 +0300 |
commit | a786baa193d28e2193a9f2953b61e659c7df92e0 (patch) | |
tree | 0808fba080bdcdd9d362cd0797ca7fe7d521f933 | |
parent | c9477888305b647abb89702ab2316ba5f93033b1 (diff) | |
parent | 43ddf0e9a7f0f9986ed24e05df0ce7eac5f944b6 (diff) |
Merge branch 'master' into blender2.8
-rw-r--r-- | intern/atomic/atomic_ops.h | 3 | ||||
-rw-r--r-- | intern/atomic/intern/atomic_ops_ext.h | 6 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_mempool.h | 5 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_task.h | 11 | ||||
-rw-r--r-- | source/blender/blenlib/intern/BLI_mempool.c | 74 | ||||
-rw-r--r-- | source/blender/blenlib/intern/task.c | 87 | ||||
-rw-r--r-- | source/blender/bmesh/CMakeLists.txt | 1 | ||||
-rw-r--r-- | source/blender/bmesh/intern/bmesh_iterators_inline.h | 36 | ||||
-rw-r--r-- | source/blender/bmesh/intern/bmesh_mesh.c | 265 | ||||
-rw-r--r-- | tests/gtests/blenlib/BLI_task_test.cc | 76 | ||||
-rw-r--r-- | tests/gtests/blenlib/CMakeLists.txt | 2 |
11 files changed, 459 insertions, 107 deletions
diff --git a/intern/atomic/atomic_ops.h b/intern/atomic/atomic_ops.h index 578cfb76eb6..e849bcf6cef 100644 --- a/intern/atomic/atomic_ops.h +++ b/intern/atomic/atomic_ops.h @@ -130,6 +130,9 @@ ATOMIC_INLINE unsigned int atomic_cas_u(unsigned int *v, unsigned int old, unsig ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new); + +ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new); + /* WARNING! Float 'atomics' are really faked ones, those are actually closer to some kind of spinlock-sync'ed operation, * which means they are only efficient if collisions are highly unlikely (i.e. if probability of two threads * working on the same pointer at the same time is very low). */ diff --git a/intern/atomic/intern/atomic_ops_ext.h b/intern/atomic/intern/atomic_ops_ext.h index 7eef20f46d3..1b1fea9642d 100644 --- a/intern/atomic/intern/atomic_ops_ext.h +++ b/intern/atomic/intern/atomic_ops_ext.h @@ -191,6 +191,12 @@ ATOMIC_INLINE void *atomic_cas_ptr(void **v, void *old, void *_new) /* float operations. */ ATOMIC_STATIC_ASSERT(sizeof(float) == sizeof(uint32_t), "sizeof(float) != sizeof(uint32_t)"); +ATOMIC_INLINE float atomic_cas_float(float *v, float old, float _new) +{ + uint32_t ret = atomic_cas_uint32((uint32_t *)v, *(uint32_t *)&old, *(uint32_t *)&_new); + return *(float *)&ret; +} + ATOMIC_INLINE float atomic_add_and_fetch_fl(float *p, const float x) { float oldval, newval; diff --git a/source/blender/blenlib/BLI_mempool.h b/source/blender/blenlib/BLI_mempool.h index 0c754f551e0..b68ca6b1f2b 100644 --- a/source/blender/blenlib/BLI_mempool.h +++ b/source/blender/blenlib/BLI_mempool.h @@ -71,6 +71,8 @@ typedef struct BLI_mempool_iter { BLI_mempool *pool; struct BLI_mempool_chunk *curchunk; unsigned int curindex; + + struct BLI_mempool_chunk **curchunk_threaded_shared; } BLI_mempool_iter; /* flag */ @@ -87,6 +89,9 @@ enum { void BLI_mempool_iternew(BLI_mempool *pool, BLI_mempool_iter *iter) ATTR_NONNULL(); void *BLI_mempool_iterstep(BLI_mempool_iter *iter) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(); +BLI_mempool_iter *BLI_mempool_iter_threadsafe_create(BLI_mempool *pool, const size_t num_iter) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(); +void BLI_mempool_iter_threadsafe_free(BLI_mempool_iter *iter_arr) ATTR_NONNULL(); + #ifdef __cplusplus } #endif diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index 721327d26a8..ccfa2b6e2e7 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -35,6 +35,8 @@ extern "C" { #include "BLI_threads.h" #include "BLI_utildefines.h" +struct BLI_mempool; + /* Task Scheduler * * Central scheduler that holds running threads ready to execute tasks. A single @@ -150,6 +152,15 @@ void BLI_task_parallel_listbase( TaskParallelListbaseFunc func, const bool use_threading); +typedef struct MempoolIterData MempoolIterData; +typedef void (*TaskParallelMempoolFunc)(void *userdata, + MempoolIterData *iter); +void BLI_task_parallel_mempool( + struct BLI_mempool *mempool, + void *userdata, + TaskParallelMempoolFunc func, + const bool use_threading); + #ifdef __cplusplus } #endif diff --git a/source/blender/blenlib/intern/BLI_mempool.c b/source/blender/blenlib/intern/BLI_mempool.c index b02811616dd..c90f9e300b7 100644 --- a/source/blender/blenlib/intern/BLI_mempool.c +++ b/source/blender/blenlib/intern/BLI_mempool.c @@ -41,6 +41,8 @@ #include <string.h> #include <stdlib.h> +#include "atomic_ops.h" + #include "BLI_utildefines.h" #include "BLI_mempool.h" /* own include */ @@ -553,7 +555,7 @@ void *BLI_mempool_as_arrayN(BLI_mempool *pool, const char *allocstr) } /** - * Create a new mempool iterator, \a BLI_MEMPOOL_ALLOW_ITER flag must be set. + * Initialize a new mempool iterator, \a BLI_MEMPOOL_ALLOW_ITER flag must be set. */ void BLI_mempool_iternew(BLI_mempool *pool, BLI_mempool_iter *iter) { @@ -562,6 +564,47 @@ void BLI_mempool_iternew(BLI_mempool *pool, BLI_mempool_iter *iter) iter->pool = pool; iter->curchunk = pool->chunks; iter->curindex = 0; + + iter->curchunk_threaded_shared = NULL; +} + +/** + * Initialize an array of mempool iterators, \a BLI_MEMPOOL_ALLOW_ITER flag must be set. + * + * This is used in threaded code, to generate as much iterators as needed (each task should have its own), + * such that each iterator goes over its own single chunk, and only getting the next chunk to iterate over has to be + * protected against concurrency (which can be done in a lockless way). + * + * To be used when creating a task for each single item in the pool is totally overkill. + * + * See BLI_task_parallel_mempool implementation for detailed usage example. + */ +BLI_mempool_iter *BLI_mempool_iter_threadsafe_create(BLI_mempool *pool, const size_t num_iter) +{ + BLI_assert(pool->flag & BLI_MEMPOOL_ALLOW_ITER); + + BLI_mempool_iter *iter_arr = MEM_mallocN(sizeof(*iter_arr) * num_iter, __func__); + BLI_mempool_chunk **curchunk_threaded_shared = MEM_mallocN(sizeof(void *), __func__); + + BLI_mempool_iternew(pool, iter_arr); + + *curchunk_threaded_shared = iter_arr->curchunk; + iter_arr->curchunk_threaded_shared = curchunk_threaded_shared; + + for (size_t i = 1; i < num_iter; i++) { + iter_arr[i] = iter_arr[0]; + *curchunk_threaded_shared = iter_arr[i].curchunk = (*curchunk_threaded_shared) ? (*curchunk_threaded_shared)->next : NULL; + } + + return iter_arr; +} + +void BLI_mempool_iter_threadsafe_free(BLI_mempool_iter *iter_arr) +{ + BLI_assert(iter_arr->curchunk_threaded_shared != NULL); + + MEM_freeN(iter_arr->curchunk_threaded_shared); + MEM_freeN(iter_arr); } #if 0 @@ -571,15 +614,28 @@ static void *bli_mempool_iternext(BLI_mempool_iter *iter) { void *ret = NULL; - if (!iter->curchunk || !iter->pool->totused) return NULL; + if (iter->curchunk == NULL || !iter->pool->totused) { + return ret; + } ret = ((char *)CHUNK_DATA(iter->curchunk)) + (iter->pool->esize * iter->curindex); iter->curindex++; if (iter->curindex == iter->pool->pchunk) { - iter->curchunk = iter->curchunk->next; iter->curindex = 0; + if (iter->curchunk_threaded_shared) { + while (1) { + iter->curchunk = *iter->curchunk_threaded_shared; + if (iter->curchunk == NULL) { + break; + } + if (atomic_cas_ptr((void **)iter->curchunk_threaded_shared, iter->curchunk, iter->curchunk->next) == iter->curchunk) { + break; + } + } + } + iter->curchunk = iter->curchunk->next; } return ret; @@ -620,8 +676,18 @@ void *BLI_mempool_iterstep(BLI_mempool_iter *iter) } else { iter->curindex = 0; + if (iter->curchunk_threaded_shared) { + for (iter->curchunk = *iter->curchunk_threaded_shared; + (iter->curchunk != NULL) && + (atomic_cas_ptr((void **)iter->curchunk_threaded_shared, iter->curchunk, iter->curchunk->next) != iter->curchunk); + iter->curchunk = *iter->curchunk_threaded_shared); + + if (UNLIKELY(iter->curchunk == NULL)) { + return (ret->freeword == FREEWORD) ? NULL : ret; + } + } iter->curchunk = iter->curchunk->next; - if (iter->curchunk == NULL) { + if (UNLIKELY(iter->curchunk == NULL)) { return (ret->freeword == FREEWORD) ? NULL : ret; } curnode = CHUNK_DATA(iter->curchunk); diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index d69241c3737..eb7f186702b 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -32,6 +32,7 @@ #include "BLI_listbase.h" #include "BLI_math.h" +#include "BLI_mempool.h" #include "BLI_task.h" #include "BLI_threads.h" @@ -1354,3 +1355,89 @@ void BLI_task_parallel_listbase( BLI_spin_end(&state.lock); } + + +typedef struct ParallelMempoolState { + void *userdata; + TaskParallelMempoolFunc func; +} ParallelMempoolState; + +static void parallel_mempool_func( + TaskPool * __restrict pool, + void *taskdata, + int UNUSED(threadid)) +{ + ParallelMempoolState * __restrict state = BLI_task_pool_userdata(pool); + BLI_mempool_iter *iter = taskdata; + MempoolIterData *item; + + while ((item = BLI_mempool_iterstep(iter)) != NULL) { + state->func(state->userdata, item); + } +} + +/** + * This function allows to parallelize for loops over Mempool items. + * + * \param pool The iterable BLI_mempool to loop over. + * \param userdata Common userdata passed to all instances of \a func. + * \param func Callback function. + * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop + * (allows caller to use any kind of test to switch on parallelization or not). + * + * \note There is no static scheduling here. + */ +void BLI_task_parallel_mempool( + BLI_mempool *mempool, + void *userdata, + TaskParallelMempoolFunc func, + const bool use_threading) +{ + TaskScheduler *task_scheduler; + TaskPool *task_pool; + ParallelMempoolState state; + int i, num_threads, num_tasks; + + if (BLI_mempool_count(mempool) == 0) { + return; + } + + if (!use_threading) { + BLI_mempool_iter iter; + BLI_mempool_iternew(mempool, &iter); + + for (void *item = BLI_mempool_iterstep(&iter); item != NULL; item = BLI_mempool_iterstep(&iter)) { + func(userdata, item); + } + return; + } + + task_scheduler = BLI_task_scheduler_get(); + task_pool = BLI_task_pool_create(task_scheduler, &state); + num_threads = BLI_task_scheduler_num_threads(task_scheduler); + + /* The idea here is to prevent creating task for each of the loop iterations + * and instead have tasks which are evenly distributed across CPU cores and + * pull next item to be crunched using the threaded-aware BLI_mempool_iter. + */ + num_tasks = num_threads * 2; + + state.userdata = userdata; + state.func = func; + + BLI_mempool_iter *mempool_iterators = BLI_mempool_iter_threadsafe_create(mempool, (size_t)num_tasks); + + for (i = 0; i < num_tasks; i++) { + /* Use this pool's pre-allocated tasks. */ + BLI_task_pool_push_from_thread(task_pool, + parallel_mempool_func, + &mempool_iterators[i], false, + TASK_PRIORITY_HIGH, + task_pool->thread_id); + } + + BLI_task_pool_work_and_wait(task_pool); + BLI_task_pool_free(task_pool); + + BLI_mempool_iter_threadsafe_free(mempool_iterators); +} diff --git a/source/blender/bmesh/CMakeLists.txt b/source/blender/bmesh/CMakeLists.txt index ea24da86626..43e45eab98f 100644 --- a/source/blender/bmesh/CMakeLists.txt +++ b/source/blender/bmesh/CMakeLists.txt @@ -30,6 +30,7 @@ set(INC ../blentranslation ../makesdna ../../../intern/guardedalloc + ../../../intern/atomic ../../../intern/eigen ../../../extern/rangetree ) diff --git a/source/blender/bmesh/intern/bmesh_iterators_inline.h b/source/blender/bmesh/intern/bmesh_iterators_inline.h index e68440021e6..32f0f4b67c4 100644 --- a/source/blender/bmesh/intern/bmesh_iterators_inline.h +++ b/source/blender/bmesh/intern/bmesh_iterators_inline.h @@ -182,4 +182,40 @@ BLI_INLINE void *BM_iter_new(BMIter *iter, BMesh *bm, const char itype, void *da } } +/** + * \brief Parallel (threaded) iterator, only available for most basic itertypes (verts/edges/faces of mesh). + * + * Uses BLI_task_parallel_mempool to iterate over all items of underlying matching mempool. + * + * \note You have to include BLI_task.h before BMesh includes to be able to use this function! + */ + +#ifdef __BLI_TASK_H__ + +ATTR_NONNULL(1) +BLI_INLINE void BM_iter_parallel( + BMesh *bm, const char itype, TaskParallelMempoolFunc func, void *userdata, const bool use_threading) +{ + BLI_assert(bm != NULL); + + /* inlining optimizes out this switch when called with the defined type */ + switch ((BMIterType)itype) { + case BM_VERTS_OF_MESH: + BLI_task_parallel_mempool(bm->vpool, userdata, func, use_threading); + break; + case BM_EDGES_OF_MESH: + BLI_task_parallel_mempool(bm->epool, userdata, func, use_threading); + break; + case BM_FACES_OF_MESH: + BLI_task_parallel_mempool(bm->fpool, userdata, func, use_threading); + break; + default: + /* should never happen */ + BLI_assert(0); + break; + } +} + +#endif /* __BLI_TASK_H__ */ + #endif /* __BMESH_ITERATORS_INLINE_H__ */ diff --git a/source/blender/bmesh/intern/bmesh_mesh.c b/source/blender/bmesh/intern/bmesh_mesh.c index 2ff670c770e..8407dc36040 100644 --- a/source/blender/bmesh/intern/bmesh_mesh.c +++ b/source/blender/bmesh/intern/bmesh_mesh.c @@ -35,6 +35,7 @@ #include "BLI_listbase.h" #include "BLI_math.h" #include "BLI_stack.h" +#include "BLI_task.h" #include "BLI_utildefines.h" #include "BKE_cdderivedmesh.h" @@ -42,6 +43,8 @@ #include "BKE_mesh.h" #include "BKE_multires.h" +#include "atomic_ops.h" + #include "intern/bmesh_private.h" /* used as an extern, defined in bmesh.h */ @@ -318,146 +321,202 @@ void BM_mesh_free(BMesh *bm) MEM_freeN(bm); } + /** * Helpers for #BM_mesh_normals_update and #BM_verts_calc_normal_vcos */ -static void bm_mesh_edges_calc_vectors(BMesh *bm, float (*edgevec)[3], const float (*vcos)[3]) + +typedef struct BMEdgesCalcVectorsData { + /* Read-only data. */ + const float (*vcos)[3]; + + /* Read-write data, but no need to protect it, no concurrency to fear here. */ + float (*edgevec)[3]; +} BMEdgesCalcVectorsData; + + +static void mesh_edges_calc_vectors_cb(void *userdata, MempoolIterData *mp_e) { - BMIter eiter; - BMEdge *e; - int index; + BMEdgesCalcVectorsData *data = userdata; + BMEdge *e = (BMEdge *)mp_e; - if (vcos) { - BM_mesh_elem_index_ensure(bm, BM_VERT); + if (e->l) { + const float *v1_co = data->vcos ? data->vcos[BM_elem_index_get(e->v1)] : e->v1->co; + const float *v2_co = data->vcos ? data->vcos[BM_elem_index_get(e->v2)] : e->v2->co; + sub_v3_v3v3(data->edgevec[BM_elem_index_get(e)], v2_co, v1_co); + normalize_v3(data->edgevec[BM_elem_index_get(e)]); } + else { + /* the edge vector will not be needed when the edge has no radial */ + } +} - BM_ITER_MESH_INDEX (e, &eiter, bm, BM_EDGES_OF_MESH, index) { - BM_elem_index_set(e, index); /* set_inline */ +static void bm_mesh_edges_calc_vectors(BMesh *bm, float (*edgevec)[3], const float (*vcos)[3]) +{ + BM_mesh_elem_index_ensure(bm, BM_EDGE | (vcos ? BM_VERT : 0)); - if (e->l) { - const float *v1_co = vcos ? vcos[BM_elem_index_get(e->v1)] : e->v1->co; - const float *v2_co = vcos ? vcos[BM_elem_index_get(e->v2)] : e->v2->co; - sub_v3_v3v3(edgevec[index], v2_co, v1_co); - normalize_v3(edgevec[index]); - } - else { - /* the edge vector will not be needed when the edge has no radial */ + BMEdgesCalcVectorsData data = { + .vcos = vcos, + .edgevec = edgevec + }; + + BM_iter_parallel(bm, BM_EDGES_OF_MESH, mesh_edges_calc_vectors_cb, &data, bm->totedge >= BM_OMP_LIMIT); +} + + +typedef struct BMVertsCalcNormalsData { + /* Read-only data. */ + const float (*fnos)[3]; + const float (*edgevec)[3]; + const float (*vcos)[3]; + + /* Read-write data, protected by an atomic-based fake spinlock-like system... */ + float (*vnos)[3]; +} BMVertsCalcNormalsData; + +static void mesh_verts_calc_normals_accum_cb(void *userdata, MempoolIterData *mp_f) +{ + BMVertsCalcNormalsData *data = userdata; + BMFace *f = (BMFace *)mp_f; + + const float *f_no = data->fnos ? data->fnos[BM_elem_index_get(f)] : f->no; + + BMLoop *l_first, *l_iter; + l_iter = l_first = BM_FACE_FIRST_LOOP(f); + do { + const float *e1diff, *e2diff; + float dotprod; + float fac; + + /* calculate the dot product of the two edges that + * meet at the loop's vertex */ + e1diff = data->edgevec[BM_elem_index_get(l_iter->prev->e)]; + e2diff = data->edgevec[BM_elem_index_get(l_iter->e)]; + dotprod = dot_v3v3(e1diff, e2diff); + + /* edge vectors are calculated from e->v1 to e->v2, so + * adjust the dot product if one but not both loops + * actually runs from from e->v2 to e->v1 */ + if ((l_iter->prev->e->v1 == l_iter->prev->v) ^ (l_iter->e->v1 == l_iter->v)) { + dotprod = -dotprod; + } + + fac = saacos(-dotprod); + + /* accumulate weighted face normal into the vertex's normal */ + float *v_no = data->vnos ? data->vnos[BM_elem_index_get(l_iter->v)] : l_iter->v->no; + + /* This block is a lockless threadsafe madd_v3_v3fl. + * It uses the first float of the vector as a sort of cheap spinlock, + * assuming FLT_MAX is a safe 'illegal' value that cannot be set here otherwise. + * It also assumes that collisions between threads are highly unlikely, + * else performances would be quite bad here. */ + float virtual_lock = v_no[0]; + while ((virtual_lock = atomic_cas_float(&v_no[0], virtual_lock, FLT_MAX)) == FLT_MAX) { + /* This loops until following conditions are met: + * - v_no[0] has same value as virtual_lock (i.e. it did not change since last try). + * - v_no_[0] was not FLT_MAX, i.e. it was not locked by another thread. + */ } + /* Now we own that normal value, and can change it. + * But first scalar of the vector must not be changed yet, it's our lock! */ + virtual_lock += f_no[0] * fac; + v_no[1] += f_no[1] * fac; + v_no[2] += f_no[2] * fac; + /* Second atomic operation to 'release' our lock on that vector and set its first scalar value. */ + virtual_lock = atomic_cas_float(&v_no[0], FLT_MAX, virtual_lock); + BLI_assert(virtual_lock == FLT_MAX); + + } while ((l_iter = l_iter->next) != l_first); +} + +static void mesh_verts_calc_normals_normalize_cb(void *userdata, MempoolIterData *mp_v) +{ + BMVertsCalcNormalsData *data = userdata; + BMVert *v = (BMVert *)mp_v; + + float *v_no = data->vnos ? data->vnos[BM_elem_index_get(v)] : v->no; + if (UNLIKELY(normalize_v3(v_no) == 0.0f)) { + const float *v_co = data->vcos ? data->vcos[BM_elem_index_get(v)] : v->co; + normalize_v3_v3(v_no, v_co); } - bm->elem_index_dirty &= ~BM_EDGE; } static void bm_mesh_verts_calc_normals( BMesh *bm, const float (*edgevec)[3], const float (*fnos)[3], const float (*vcos)[3], float (*vnos)[3]) { - BM_mesh_elem_index_ensure(bm, (vnos) ? (BM_EDGE | BM_VERT) : BM_EDGE); + BM_mesh_elem_index_ensure(bm, (BM_EDGE | BM_FACE) | ((vnos || vcos) ? BM_VERT : 0)); - /* add weighted face normals to vertices */ - { - BMIter fiter; - BMFace *f; - int i; - - BM_ITER_MESH_INDEX (f, &fiter, bm, BM_FACES_OF_MESH, i) { - BMLoop *l_first, *l_iter; - const float *f_no = fnos ? fnos[i] : f->no; - - l_iter = l_first = BM_FACE_FIRST_LOOP(f); - do { - const float *e1diff, *e2diff; - float dotprod; - float fac; - float *v_no = vnos ? vnos[BM_elem_index_get(l_iter->v)] : l_iter->v->no; - - /* calculate the dot product of the two edges that - * meet at the loop's vertex */ - e1diff = edgevec[BM_elem_index_get(l_iter->prev->e)]; - e2diff = edgevec[BM_elem_index_get(l_iter->e)]; - dotprod = dot_v3v3(e1diff, e2diff); - - /* edge vectors are calculated from e->v1 to e->v2, so - * adjust the dot product if one but not both loops - * actually runs from from e->v2 to e->v1 */ - if ((l_iter->prev->e->v1 == l_iter->prev->v) ^ (l_iter->e->v1 == l_iter->v)) { - dotprod = -dotprod; - } + BMVertsCalcNormalsData data = { + .fnos = fnos, + .edgevec = edgevec, + .vcos = vcos, + .vnos = vnos + }; - fac = saacos(-dotprod); + BM_iter_parallel(bm, BM_FACES_OF_MESH, mesh_verts_calc_normals_accum_cb, &data, bm->totface >= BM_OMP_LIMIT); - /* accumulate weighted face normal into the vertex's normal */ - madd_v3_v3fl(v_no, f_no, fac); - } while ((l_iter = l_iter->next) != l_first); - } - } + /* normalize the accumulated vertex normals */ + BM_iter_parallel(bm, BM_VERTS_OF_MESH, mesh_verts_calc_normals_normalize_cb, &data, bm->totvert >= BM_OMP_LIMIT); +} - /* normalize the accumulated vertex normals */ - { - BMIter viter; - BMVert *v; - int i; - - BM_ITER_MESH_INDEX (v, &viter, bm, BM_VERTS_OF_MESH, i) { - float *v_no = vnos ? vnos[i] : v->no; - if (UNLIKELY(normalize_v3(v_no) == 0.0f)) { - const float *v_co = vcos ? vcos[i] : v->co; - normalize_v3_v3(v_no, v_co); - } - } - } +static void mesh_faces_calc_normals_cb(void *UNUSED(userdata), MempoolIterData *mp_f) +{ + BMFace *f = (BMFace *)mp_f; + + BM_face_normal_update(f); } + /** * \brief BMesh Compute Normals * * Updates the normals of a mesh. */ +#include "PIL_time_utildefines.h" void BM_mesh_normals_update(BMesh *bm) { float (*edgevec)[3] = MEM_mallocN(sizeof(*edgevec) * bm->totedge, __func__); -#pragma omp parallel sections if (bm->totvert + bm->totedge + bm->totface >= BM_OMP_LIMIT) - { -#pragma omp section - { - /* calculate all face normals */ - BMIter fiter; - BMFace *f; - int i; - - BM_ITER_MESH_INDEX (f, &fiter, bm, BM_FACES_OF_MESH, i) { - BM_elem_index_set(f, i); /* set_inline */ - BM_face_normal_update(f); - } - bm->elem_index_dirty &= ~BM_FACE; - } -#pragma omp section - { - /* Zero out vertex normals */ - BMIter viter; - BMVert *v; - int i; - - BM_ITER_MESH_INDEX (v, &viter, bm, BM_VERTS_OF_MESH, i) { - BM_elem_index_set(v, i); /* set_inline */ - zero_v3(v->no); - } - bm->elem_index_dirty &= ~BM_VERT; - } -#pragma omp section - { - /* Compute normalized direction vectors for each edge. - * Directions will be used for calculating the weights of the face normals on the vertex normals. - */ - bm_mesh_edges_calc_vectors(bm, edgevec, NULL); - } + TIMEIT_START_AVERAGED(bmesh_nors); + + /* Parallel mempool iteration does not allow to generate indices inline anymore... */ + BM_mesh_elem_index_ensure(bm, (BM_EDGE | BM_FACE)); + + /* calculate all face normals */ + TIMEIT_START_AVERAGED(faces_nors); + BM_iter_parallel(bm, BM_FACES_OF_MESH, mesh_faces_calc_normals_cb, NULL, bm->totface >= BM_OMP_LIMIT); + TIMEIT_END_AVERAGED(faces_nors); + + /* Zero out vertex normals */ + BMIter viter; + BMVert *v; + int i; + + TIMEIT_START_AVERAGED(verts_zero_nors); + BM_ITER_MESH_INDEX (v, &viter, bm, BM_VERTS_OF_MESH, i) { + BM_elem_index_set(v, i); /* set_inline */ + zero_v3(v->no); } - /* end omp */ + bm->elem_index_dirty &= ~BM_VERT; + TIMEIT_END_AVERAGED(verts_zero_nors); + + /* Compute normalized direction vectors for each edge. + * Directions will be used for calculating the weights of the face normals on the vertex normals. + */ + TIMEIT_START_AVERAGED(edges_vecs); + bm_mesh_edges_calc_vectors(bm, edgevec, NULL); + TIMEIT_END_AVERAGED(edges_vecs); /* Add weighted face normals to vertices, and normalize vert normals. */ + TIMEIT_START_AVERAGED(verts_nors); bm_mesh_verts_calc_normals(bm, (const float(*)[3])edgevec, NULL, NULL, NULL); + TIMEIT_END_AVERAGED(verts_nors); MEM_freeN(edgevec); + + TIMEIT_END_AVERAGED(bmesh_nors); } /** diff --git a/tests/gtests/blenlib/BLI_task_test.cc b/tests/gtests/blenlib/BLI_task_test.cc new file mode 100644 index 00000000000..e6464164ecb --- /dev/null +++ b/tests/gtests/blenlib/BLI_task_test.cc @@ -0,0 +1,76 @@ +/* Apache License, Version 2.0 */ + +#include "testing/testing.h" +#include <string.h> + +#include "atomic_ops.h" + +extern "C" { +#include "BLI_mempool.h" +#include "BLI_task.h" +#include "BLI_utildefines.h" +}; + +#define NUM_ITEMS 10000 + +static void task_mempool_iter_func(void *userdata, MempoolIterData *item) { + int *data = (int *)item; + int *count = (int *)userdata; + + EXPECT_TRUE(data != NULL); + + *data += 1; + atomic_sub_and_fetch_uint32((uint32_t *)count, 1); +} + +TEST(task, MempoolIter) +{ + int *data[NUM_ITEMS]; + BLI_mempool *mempool = BLI_mempool_create(sizeof(*data[0]), NUM_ITEMS, 32, BLI_MEMPOOL_ALLOW_ITER); + + int i; + + /* 'Randomly' add and remove some items from mempool, to create a non-homogenous one. */ + int num_items = 0; + for (i = 0; i < NUM_ITEMS; i++) { + data[i] = (int *)BLI_mempool_alloc(mempool); + *data[i] = i - 1; + num_items++; + } + + for (i = 0; i < NUM_ITEMS; i += 3) { + BLI_mempool_free(mempool, data[i]); + data[i] = NULL; + num_items--; + } + + for (i = 0; i < NUM_ITEMS; i += 7) { + if (data[i] == NULL) { + data[i] = (int *)BLI_mempool_alloc(mempool); + *data[i] = i - 1; + num_items++; + } + } + + for (i = 0; i < NUM_ITEMS - 5; i += 23) { + for (int j = 0; j < 5; j++) { + if (data[i + j] != NULL) { + BLI_mempool_free(mempool, data[i + j]); + data[i + j] = NULL; + num_items--; + } + } + } + + BLI_task_parallel_mempool(mempool, &num_items, task_mempool_iter_func, true); + + /* Those checks should ensure us all items of the mempool were processed once, and only once - as expected. */ + EXPECT_EQ(num_items, 0); + for (i = 0; i < NUM_ITEMS; i++) { + if (data[i] != NULL) { + EXPECT_EQ(*data[i], i); + } + } + + BLI_mempool_destroy(mempool); +} diff --git a/tests/gtests/blenlib/CMakeLists.txt b/tests/gtests/blenlib/CMakeLists.txt index f3b2e81c61a..001f1d5f7b3 100644 --- a/tests/gtests/blenlib/CMakeLists.txt +++ b/tests/gtests/blenlib/CMakeLists.txt @@ -27,6 +27,7 @@ set(INC ../../../source/blender/blenlib ../../../source/blender/makesdna ../../../intern/guardedalloc + ../../../intern/atomic ) include_directories(${INC}) @@ -56,6 +57,7 @@ BLENDER_TEST(BLI_polyfill2d "bf_blenlib") BLENDER_TEST(BLI_stack "bf_blenlib") BLENDER_TEST(BLI_string "bf_blenlib") BLENDER_TEST(BLI_string_utf8 "bf_blenlib") +BLENDER_TEST(BLI_task "bf_blenlib") BLENDER_TEST_PERFORMANCE(BLI_ghash_performance "bf_blenlib") |