diff options
Diffstat (limited to 'source/blender/blenlib')
-rw-r--r-- | source/blender/blenlib/BLI_task.h | 72 | ||||
-rw-r--r-- | source/blender/blenlib/intern/BLI_kdopbvh.c | 18 | ||||
-rw-r--r-- | source/blender/blenlib/intern/math_statistics.c | 8 | ||||
-rw-r--r-- | source/blender/blenlib/intern/task.c | 83 |
4 files changed, 82 insertions, 99 deletions
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index acfdd3729c1..b4c374d3fe7 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -19,7 +19,9 @@ */ #ifndef __BLI_TASK_H__ -#define __BLI_TASK_H__ +#define __BLI_TASK_H__ + +#include <string.h> /* for memset() */ struct Link; struct ListBase; @@ -117,6 +119,20 @@ void BLI_task_pool_delayed_push_end(TaskPool *pool, int thread_id); /* Parallel for routines */ +typedef enum eTaskSchedulingMode { + /* Task scheduler will divide overall work into equal chunks, scheduling + * even chunks to all worker threads. + * Least run time benefit, ideal for cases when each task requires equal + * amount of compute power. + */ + TASK_SCHEDULING_STATIC, + /* Task scheduler will schedule small amount of work to each worker thread. + * Has more run time overhead, but deals much better with cases when each + * part of the work requires totally different amount of compute power. + */ + TASK_SCHEDULING_DYNAMIC, +} eTaskSchedulingMode; + /* Per-thread specific data passed to the callback. */ typedef struct ParallelRangeTLS { /* Identifier of the thread who this data belongs to. */ @@ -131,29 +147,36 @@ typedef void (*TaskParallelRangeFunc)(void *userdata, const int iter, const ParallelRangeTLS *tls); typedef void (*TaskParallelRangeFuncFinalize)(void *userdata, void *userdata_chunk); -void BLI_task_parallel_range_ex( - int start, int stop, - void *userdata, - void *userdata_chunk, - const size_t userdata_chunk_size, - TaskParallelRangeFunc func, - const bool use_threading, - const bool use_dynamic_scheduling); -void BLI_task_parallel_range( - int start, int stop, - void *userdata, - TaskParallelRangeFunc func, - const bool use_threading); -void BLI_task_parallel_range_finalize( +typedef struct ParallelRangeSettings { + /* Whether caller allows to do threading of the particular range. + * Usually set by some equation, which forces threading off when threading + * overhead becomes higher than speed benefit. + * BLI_task_parallel_range() by itself will always use threading when range + * is higher than a chunk size. As in, threading will always be performed. + */ + bool use_threading; + /* Scheduling mode to use for this parallel range invocation. */ + eTaskSchedulingMode scheduling_mode; + /* Each instance of looping chunks will get a copy of this data + * (similar to OpenMP's firstprivate). + */ + void *userdata_chunk; /* Pointer to actual data. */ + size_t userdata_chunk_size; /* Size of that data. */ + /* Function called from calling thread once whole range have been + * processed. + */ + TaskParallelRangeFuncFinalize func_finalize; +} ParallelRangeSettings; + +BLI_INLINE void BLI_parallel_range_settings_defaults( + ParallelRangeSettings* settings); + +void BLI_task_parallel_range( int start, int stop, void *userdata, - void *userdata_chunk, - const size_t userdata_chunk_size, TaskParallelRangeFunc func, - TaskParallelRangeFuncFinalize func_finalize, - const bool use_threading, - const bool use_dynamic_scheduling); + const ParallelRangeSettings *settings); typedef void (*TaskParallelListbaseFunc)(void *userdata, struct Link *iter, @@ -173,6 +196,15 @@ void BLI_task_parallel_mempool( TaskParallelMempoolFunc func, const bool use_threading); +/* TODO(sergey): Think of a better place for this. */ +BLI_INLINE void BLI_parallel_range_settings_defaults( + ParallelRangeSettings* settings) +{ + memset(settings, 0, sizeof(*settings)); + settings->use_threading = true; + settings->scheduling_mode = TASK_SCHEDULING_STATIC; +} + #ifdef __cplusplus } #endif diff --git a/source/blender/blenlib/intern/BLI_kdopbvh.c b/source/blender/blenlib/intern/BLI_kdopbvh.c index 6e33f75fe69..03784e31eee 100644 --- a/source/blender/blenlib/intern/BLI_kdopbvh.c +++ b/source/blender/blenlib/intern/BLI_kdopbvh.c @@ -923,9 +923,14 @@ static void non_recursive_bvh_div_nodes( cb_data.depth = depth; if (true) { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD); BLI_task_parallel_range( - i, i_stop, &cb_data, non_recursive_bvh_div_nodes_task_cb, - num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD); + i, i_stop, + &cb_data, + non_recursive_bvh_div_nodes_task_cb, + &settings); } else { /* Less hassle for debugging. */ @@ -1342,9 +1347,14 @@ BVHTreeOverlap *BLI_bvhtree_overlap( data[j].thread = j; } + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD); BLI_task_parallel_range( - 0, thread_num, data, bvhtree_overlap_task_cb, - tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD); + 0, thread_num, + data, + bvhtree_overlap_task_cb, + &settings); for (j = 0; j < thread_num; j++) total += BLI_stack_count(data[j].overlap); diff --git a/source/blender/blenlib/intern/math_statistics.c b/source/blender/blenlib/intern/math_statistics.c index cfadbba028f..14e3aaea053 100644 --- a/source/blender/blenlib/intern/math_statistics.c +++ b/source/blender/blenlib/intern/math_statistics.c @@ -118,8 +118,14 @@ void BLI_covariance_m_vn_ex( .covfac = covfac, .n = n, .nbr_cos_vn = nbr_cos_vn, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((nbr_cos_vn * n * n) >= 10000); BLI_task_parallel_range( - 0, n * n, &data, covariance_m_vn_ex_task_cb, (nbr_cos_vn * n * n) >= 10000); + 0, n * n, + &data, + covariance_m_vn_ex_task_cb, + &settings); } /** diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index 030d4124e62..6620c9822ad 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -1147,86 +1147,21 @@ static void task_parallel_range_ex( } } -/** - * This function allows to parallelize for loops in a similar way to OpenMP's 'parallel for' statement. - * - * \param start First index to process. - * \param stop Index to stop looping (excluded). - * \param userdata Common userdata passed to all instances of \a func. - * \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data - * (similar to OpenMP's firstprivate). - * \param userdata_chunk_size Memory size of \a userdata_chunk. - * \param func_ex Callback function (advanced version). - * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop - * (allows caller to use any kind of test to switch on parallelization or not). - * \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently), - * otherwise whole range is split in a few big chunks (num_threads * 2 chunks currently). - */ -void BLI_task_parallel_range_ex( - int start, int stop, - void *userdata, - void *userdata_chunk, - const size_t userdata_chunk_size, - TaskParallelRangeFunc func, - const bool use_threading, - const bool use_dynamic_scheduling) -{ - task_parallel_range_ex( - start, stop, userdata, userdata_chunk, userdata_chunk_size, func, NULL, - use_threading, use_dynamic_scheduling); -} - -/** - * A simpler version of \a BLI_task_parallel_range_ex, which does not use \a use_dynamic_scheduling, - * and does not handle 'firstprivate'-like \a userdata_chunk. - * - * \param start First index to process. - * \param stop Index to stop looping (excluded). - * \param userdata Common userdata passed to all instances of \a func. - * \param func Callback function (simple version). - * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop - * (allows caller to use any kind of test to switch on parallelization or not). - */ void BLI_task_parallel_range( int start, int stop, void *userdata, TaskParallelRangeFunc func, - const bool use_threading) -{ - task_parallel_range_ex(start, stop, userdata, NULL, 0, func, NULL, use_threading, false); -} - -/** - * This function allows to parallelize for loops in a similar way to OpenMP's 'parallel for' statement, - * with an additional 'finalize' func called from calling thread once whole range have been processed. - * - * \param start First index to process. - * \param stop Index to stop looping (excluded). - * \param userdata Common userdata passed to all instances of \a func. - * \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data - * (similar to OpenMP's firstprivate). - * \param userdata_chunk_size Memory size of \a userdata_chunk. - * \param func_ex Callback function (advanced version). - * \param func_finalize Callback function, called after all workers have finished, - * useful to finalize accumulative tasks. - * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop - * (allows caller to use any kind of test to switch on parallelization or not). - * \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently), - * otherwise whole range is split in a few big chunks (num_threads * 2 chunks currently). - */ -void BLI_task_parallel_range_finalize( - int start, int stop, - void *userdata, - void *userdata_chunk, - const size_t userdata_chunk_size, - TaskParallelRangeFunc func, - TaskParallelRangeFuncFinalize func_finalize, - const bool use_threading, - const bool use_dynamic_scheduling) + const ParallelRangeSettings *settings) { task_parallel_range_ex( - start, stop, userdata, userdata_chunk, userdata_chunk_size, func, func_finalize, - use_threading, use_dynamic_scheduling); + start, stop, + userdata, + settings->userdata_chunk, + settings->userdata_chunk_size, + func, + settings->func_finalize, + settings->use_threading, + (settings->scheduling_mode == TASK_SCHEDULING_DYNAMIC)); } #undef MALLOCA |