diff options
author | Bastien Montagne <montagne29@wanadoo.fr> | 2015-11-25 13:01:59 +0300 |
---|---|---|
committer | Bastien Montagne <montagne29@wanadoo.fr> | 2015-11-25 13:01:59 +0300 |
commit | 0f609d5d04cff4f5553fd56924f17d9b08bef1e8 (patch) | |
tree | e44c15fcc1dbc9bdf3a5742e6c0f8b8e7b849056 /source/blender | |
parent | 8294452b14fac54443f31fe11950d19370e27b43 (diff) |
BLI_task: BLI_task_parallel_range_ex: add some per-chunk userdata.
This mimics OpenMP's 'firstprivate' feature. It is sometimes handy to have some persistent local data during a whole chunk.
Reviewers: sergey
Reviewed By: sergey
Subscribers: campbellbarton
Differential Revision: https://developer.blender.org/D1635
Diffstat (limited to 'source/blender')
-rw-r--r-- | source/blender/blenlib/BLI_task.h | 4 | ||||
-rw-r--r-- | source/blender/blenlib/intern/task.c | 60 | ||||
-rw-r--r-- | source/blender/modifiers/intern/MOD_meshdeform.c | 2 |
3 files changed, 61 insertions, 5 deletions
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index 81c277cd956..45a6e0b02c1 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -112,10 +112,12 @@ ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool); size_t BLI_task_pool_tasks_done(TaskPool *pool); /* Parallel for routines */ -typedef void (*TaskParallelRangeFunc)(void *userdata, int iter); +typedef void (*TaskParallelRangeFunc)(void *userdata, void *userdata_chunk, int iter); void BLI_task_parallel_range_ex( int start, int stop, void *userdata, + void *userdata_chunk, + const size_t userdata_chunk_size, TaskParallelRangeFunc func, const int range_threshold, const bool use_dynamic_scheduling); diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index 5803448a4c0..e3d13b4ada4 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -575,9 +575,15 @@ size_t BLI_task_pool_tasks_done(TaskPool *pool) * - Chunk iterations to reduce number of spin locks. */ +/* Allows to avoid using malloc for userdata_chunk in tasks, when small enough. */ +#define MALLOCA(_size) ((_size) <= 8192) ? alloca((_size)) : MEM_mallocN((_size), __func__) +#define MALLOCA_FREE(_mem, _size) if (((_mem) != NULL) && ((_size) > 8192)) MEM_freeN((_mem)) + typedef struct ParallelRangeState { int start, stop; void *userdata; + void *userdata_chunk; + size_t userdata_chunk_size; TaskParallelRangeFunc func; int iter; @@ -608,17 +614,45 @@ static void parallel_range_func( { ParallelRangeState * __restrict state = BLI_task_pool_userdata(pool); int iter, count; + + const bool use_userdata_chunk = (state->userdata_chunk_size != 0) && (state->userdata_chunk != NULL); + void *userdata_chunk = use_userdata_chunk ? MALLOCA(state->userdata_chunk_size) : NULL; + while (parallel_range_next_iter_get(state, &iter, &count)) { int i; + + if (use_userdata_chunk) { + memcpy(userdata_chunk, state->userdata_chunk, state->userdata_chunk_size); + } + for (i = 0; i < count; ++i) { - state->func(state->userdata, iter + i); + state->func(state->userdata, userdata_chunk, iter + i); } } + + MALLOCA_FREE(userdata_chunk, state->userdata_chunk_size); } +/** + * This function allows to parallelized for loops in a similar way to OpenMP's 'parallel for' statement. + * + * \param start First index to process. + * \param stop Index to stop looping (excluded). + * \param userdata Common userdata passed to all instances of \a func. + * \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data + * (similar to OpenMP's firstprivate). + * \param userdata_chunk_size Memory size of \a userdata_chunk. + * \param func Callback function. + * \param range_threshold Minimum size of processed range to start using tasks + * (below this, loop is done in main thread only). + * \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently), + * otehrwise whole range is split in a few big chunks (num_threads * 2 chunks currently). + */ void BLI_task_parallel_range_ex( int start, int stop, void *userdata, + void *userdata_chunk, + const size_t userdata_chunk_size, TaskParallelRangeFunc func, const int range_threshold, const bool use_dynamic_scheduling) @@ -634,9 +668,19 @@ void BLI_task_parallel_range_ex( * do everything from the main thread. */ if (stop - start < range_threshold) { + const bool use_userdata_chunk = (userdata_chunk_size != 0) && (userdata_chunk != NULL); + void *userdata_chunk_local = NULL; + + if (use_userdata_chunk) { + userdata_chunk_local = MALLOCA(userdata_chunk_size); + memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size); + } + for (i = start; i < stop; ++i) { - func(userdata, i); + func(userdata, userdata_chunk_local, i); } + + MALLOCA_FREE(userdata_chunk_local, userdata_chunk_size); return; } @@ -654,6 +698,8 @@ void BLI_task_parallel_range_ex( state.start = start; state.stop = stop; state.userdata = userdata; + state.userdata_chunk = userdata_chunk; + state.userdata_chunk_size = userdata_chunk_size; state.func = func; state.iter = start; if (use_dynamic_scheduling) { @@ -676,10 +722,18 @@ void BLI_task_parallel_range_ex( BLI_spin_end(&state.lock); } +/** + * A simpler version of \a BLI_task_parallel_range_ex, which does not use \a use_dynamic_scheduling, + * has a \a range_threshold of 64, and does not handle 'firstprivate'-like \a userdata_chunk. + */ void BLI_task_parallel_range( int start, int stop, void *userdata, TaskParallelRangeFunc func) { - BLI_task_parallel_range_ex(start, stop, userdata, func, 64, false); + BLI_task_parallel_range_ex(start, stop, userdata, NULL, 0, func, 64, false); } + +#undef MALLOCA +#undef MALLOCA_FREE + diff --git a/source/blender/modifiers/intern/MOD_meshdeform.c b/source/blender/modifiers/intern/MOD_meshdeform.c index 8aa5f281f56..cdf1f85ade5 100644 --- a/source/blender/modifiers/intern/MOD_meshdeform.c +++ b/source/blender/modifiers/intern/MOD_meshdeform.c @@ -234,7 +234,7 @@ typedef struct MeshdeformUserdata { float (*icagemat)[3]; } MeshdeformUserdata; -static void meshdeform_vert_task(void * userdata, int iter) +static void meshdeform_vert_task(void *userdata, void *UNUSED(userdata_chunck), int iter) { MeshdeformUserdata *data = userdata; /*const*/ MeshDeformModifierData *mmd = data->mmd; |