diff options
-rw-r--r-- | source/blender/blenlib/BLI_task.h | 12 | ||||
-rw-r--r-- | source/blender/blenlib/intern/task.c | 112 | ||||
-rw-r--r-- | source/blender/modifiers/intern/MOD_meshdeform.c | 139 |
3 files changed, 212 insertions, 51 deletions
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index c9cbaf997fb..8c22a25fe14 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -100,6 +100,18 @@ ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool); /* number of tasks done, for stats, don't use this to make decisions */ size_t BLI_task_pool_tasks_done(TaskPool *pool); +/* Parallel for routines */ +typedef void (*TaskParallelRangeFunc)(void *userdata, int iter); +void BLI_task_parallel_range_ex( + int start, int stop, + void *userdata, + TaskParallelRangeFunc func, + const int range_threshold); +void BLI_task_parallel_range( + int start, int stop, + void *userdata, + TaskParallelRangeFunc func); + #ifdef __cplusplus } #endif diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index 8d867b9f295..3a49abc7060 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -428,3 +428,115 @@ size_t BLI_task_pool_tasks_done(TaskPool *pool) return pool->done; } +/* Parallel range routines */ + +/** + * + * Main functions: + * - #BLI_task_parallel_range + * + * TODO: + * - #BLI_task_parallel_foreach_listbase (#ListBase - double linked list) + * - #BLI_task_parallel_foreach_link (#Link - single linked list) + * - #BLI_task_parallel_foreach_ghash/gset (#GHash/#GSet - hash & set) + * - #BLI_task_parallel_foreach_mempool (#BLI_mempool - iterate over mempools) + * + * Possible improvements: + * + * - Chunk iterations to reduce number of spin locks. + */ + +typedef struct ParallelRangeState { + int start, stop; + void *userdata; + TaskParallelRangeFunc func; + + int iter; + SpinLock lock; +} ParallelRangeState; + +BLI_INLINE bool parallel_range_next_iter_get( + ParallelRangeState *state, + int *iter) +{ + bool result = false; + if (state->iter < state->stop) { + BLI_spin_lock(&state->lock); + if (state->iter < state->stop) { + *iter = state->iter++; + result = true; + } + BLI_spin_unlock(&state->lock); + } + return result; +} + +static void parallel_range_func( + TaskPool *pool, + void *UNUSED(taskdata), + int UNUSED(threadid)) +{ + ParallelRangeState *state = BLI_task_pool_userdata(pool); + int iter; + while (parallel_range_next_iter_get(state, &iter)) { + state->func(state->userdata, iter); + } +} + +void BLI_task_parallel_range_ex( + int start, int stop, + void *userdata, + TaskParallelRangeFunc func, + const int range_threshold) +{ + TaskScheduler *task_scheduler; + TaskPool *task_pool; + ParallelRangeState state; + int i; + + BLI_assert(start < stop); + + /* If it's not enough data to be cranched, don't bother with tasks at all, + * do everything from the main thread. + */ + if (stop - start < range_threshold) { + for (i = start; i < stop; ++i) { + func(userdata, i); + } + return; + } + + BLI_spin_init(&state.lock); + state.start = start; + state.stop = stop; + state.userdata = userdata; + state.func = func; + state.iter = start; + + task_scheduler = BLI_task_scheduler_get(); + task_pool = BLI_task_pool_create(task_scheduler, &state); + + /* The idea here is to prevent creating task for each of the loop iterations + * and instead have tasks which are evenly distributed across CPU cores and + * pull next iter to be cranched using the queue. + */ + for (i = 0; i < 2 * BLI_task_scheduler_num_threads(task_scheduler); i++) { + BLI_task_pool_push(task_pool, + parallel_range_func, + NULL, false, + TASK_PRIORITY_HIGH); + } + + BLI_task_pool_work_and_wait(task_pool); + BLI_task_pool_free(task_pool); + + BLI_spin_end(&state.lock); +} + +void BLI_task_parallel_range( + int start, int stop, + void *userdata, + TaskParallelRangeFunc func) +{ + BLI_task_parallel_range_ex(start, stop, userdata, func, 64); +} diff --git a/source/blender/modifiers/intern/MOD_meshdeform.c b/source/blender/modifiers/intern/MOD_meshdeform.c index 959bbdcbca9..bd6ce26103c 100644 --- a/source/blender/modifiers/intern/MOD_meshdeform.c +++ b/source/blender/modifiers/intern/MOD_meshdeform.c @@ -37,6 +37,7 @@ #include "DNA_scene_types.h" #include "BLI_math.h" +#include "BLI_task.h" #include "BLI_utildefines.h" #include "BLF_translation.h" @@ -181,6 +182,75 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3 return totweight; } +typedef struct MeshdeformUserdata { + /*const*/ MeshDeformModifierData *mmd; + const MDeformVert *dvert; + /*const*/ float (*dco)[3]; + int defgrp_index; + float (*vertexCos)[3]; + float (*cagemat)[4]; + float (*icagemat)[3]; + SpinLock lock; +} MeshdeformUserdata; + +static void meshdeform_vert_task(void *userdata, int iter) +{ + MeshdeformUserdata *data = userdata; + /*const*/ MeshDeformModifierData *mmd = data->mmd; + const MDeformVert *dvert = data->dvert; + const int defgrp_index = data->defgrp_index; + const int *offsets = mmd->bindoffsets; + const MDefInfluence *influences = influences = mmd->bindinfluences; + /*const*/ float (*dco)[3] = data->dco; + float (*vertexCos)[3] = data->vertexCos; + float co[3]; + float weight, totweight, fac = 1.0f; + + if (mmd->flag & MOD_MDEF_DYNAMIC_BIND) + if (!mmd->dynverts[iter]) + return; + + if (dvert) { + fac = defvert_find_weight(&dvert[iter], defgrp_index); + + if (mmd->flag & MOD_MDEF_INVERT_VGROUP) { + fac = 1.0f - fac; + } + + if (fac <= 0.0f) { + return; + } + } + + if (mmd->flag & MOD_MDEF_DYNAMIC_BIND) { + /* transform coordinate into cage's local space */ + mul_v3_m4v3(co, data->cagemat, vertexCos[iter]); + totweight = meshdeform_dynamic_bind(mmd, dco, co); + } + else { + int a; + totweight = 0.0f; + zero_v3(co); + + for (a = offsets[iter]; a < offsets[iter + 1]; a++) { + weight = influences[a].weight; + madd_v3_v3fl(co, dco[influences[a].vertex], weight); + totweight += weight; + } + } + + if (totweight > 0.0f) { + mul_v3_fl(co, fac / totweight); + mul_m3_v3(data->icagemat, co); + BLI_spin_lock(&data->lock); + if (G.debug_value != 527) + add_v3_v3(vertexCos[iter], co); + else + copy_v3_v3(vertexCos[iter], co); + BLI_spin_unlock(&data->lock); + } +} + static void meshdeformModifier_do( ModifierData *md, Object *ob, DerivedMesh *dm, float (*vertexCos)[3], int numVerts) @@ -188,12 +258,11 @@ static void meshdeformModifier_do( MeshDeformModifierData *mmd = (MeshDeformModifierData *) md; DerivedMesh *tmpdm, *cagedm; MDeformVert *dvert = NULL; - MDefInfluence *influences; - const int *offsets; float imat[4][4], cagemat[4][4], iobmat[4][4], icagemat[3][3], cmat[4][4]; - float weight, totweight, fac, co[3], (*dco)[3], (*bindcagecos)[3]; - int a, b, totvert, totcagevert, defgrp_index; + float co[3], (*dco)[3], (*bindcagecos)[3]; + int a, totvert, totcagevert, defgrp_index; float (*cagecos)[3]; + MeshdeformUserdata data; if (!mmd->object || (!mmd->bindcagecos && !mmd->bindfunc)) return; @@ -273,8 +342,6 @@ static void meshdeformModifier_do( /* setup deformation data */ cagedm->getVertCos(cagedm, cagecos); - influences = mmd->bindinfluences; - offsets = mmd->bindoffsets; bindcagecos = (float(*)[3])mmd->bindcagecos; dco = MEM_callocN(sizeof(*dco) * totcagevert, "MDefDco"); @@ -293,51 +360,21 @@ static void meshdeformModifier_do( modifier_get_vgroup(ob, dm, mmd->defgrp_name, &dvert, &defgrp_index); - /* do deformation */ - fac = 1.0f; - - for (b = 0; b < totvert; b++) { - if (mmd->flag & MOD_MDEF_DYNAMIC_BIND) - if (!mmd->dynverts[b]) - continue; - - if (dvert) { - fac = defvert_find_weight(&dvert[b], defgrp_index); - - if (mmd->flag & MOD_MDEF_INVERT_VGROUP) { - fac = 1.0f - fac; - } - - if (fac <= 0.0f) { - continue; - } - } - - if (mmd->flag & MOD_MDEF_DYNAMIC_BIND) { - /* transform coordinate into cage's local space */ - mul_v3_m4v3(co, cagemat, vertexCos[b]); - totweight = meshdeform_dynamic_bind(mmd, dco, co); - } - else { - totweight = 0.0f; - zero_v3(co); - - for (a = offsets[b]; a < offsets[b + 1]; a++) { - weight = influences[a].weight; - madd_v3_v3fl(co, dco[influences[a].vertex], weight); - totweight += weight; - } - } - - if (totweight > 0.0f) { - mul_v3_fl(co, fac / totweight); - mul_m3_v3(icagemat, co); - if (G.debug_value != 527) - add_v3_v3(vertexCos[b], co); - else - copy_v3_v3(vertexCos[b], co); - } - } + /* Initialize data to be pass to the for body function. */ + data.mmd = mmd; + data.dvert = dvert; + data.dco = dco; + data.defgrp_index = defgrp_index; + data.vertexCos = vertexCos; + data.cagemat = cagemat; + data.icagemat = icagemat; + BLI_spin_init(&data.lock); + + /* Do deformation. */ + BLI_task_parallel_range(0, totvert, &data, meshdeform_vert_task); + + /* Uninitialize user dtaa used by the task system. */ + BLI_spin_end(&data.lock); /* release cage derivedmesh */ MEM_freeN(dco); |