4 files changed, 82 insertions, 99 deletions
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h
index acfdd3729c1..b4c374d3fe7 100644
--- a/source/blender/blenlib/BLI_task.h
+++ b/source/blender/blenlib/BLI_task.h
@@ -19,7 +19,9 @@
  */
 
 #ifndef __BLI_TASK_H__
-#define __BLI_TASK_H__ 
+#define __BLI_TASK_H__
+
+#include <string.h>  /* for memset() */
 
 struct Link;
 struct ListBase;
@@ -117,6 +119,20 @@ void BLI_task_pool_delayed_push_end(TaskPool *pool, int thread_id);
 
 /* Parallel for routines */
 
+typedef enum eTaskSchedulingMode {
+	/* Task scheduler will divide overall work into equal chunks, scheduling
+	 * even chunks to all worker threads.
+	 * Least run time benefit, ideal for cases when each task requires equal
+	 * amount of compute power.
+	 */
+	TASK_SCHEDULING_STATIC,
+	/* Task scheduler will schedule small amount of work to each worker thread.
+	 * Has more run time overhead, but deals much better with cases when each
+	 * part of the work requires totally different amount of compute power.
+	 */
+	TASK_SCHEDULING_DYNAMIC,
+} eTaskSchedulingMode;
+
 /* Per-thread specific data passed to the callback. */
 typedef struct ParallelRangeTLS {
 	/* Identifier of the thread who this data belongs to. */
@@ -131,29 +147,36 @@ typedef void (*TaskParallelRangeFunc)(void *userdata,
                                       const int iter,
                                       const ParallelRangeTLS *tls);
 typedef void (*TaskParallelRangeFuncFinalize)(void *userdata, void *userdata_chunk);
-void BLI_task_parallel_range_ex(
-        int start, int stop,
-        void *userdata,
-        void *userdata_chunk,
-        const size_t userdata_chunk_size,
-        TaskParallelRangeFunc func,
-        const bool use_threading,
-        const bool use_dynamic_scheduling);
-void BLI_task_parallel_range(
-        int start, int stop,
-        void *userdata,
-        TaskParallelRangeFunc func,
-        const bool use_threading);
 
-void BLI_task_parallel_range_finalize(
+typedef struct ParallelRangeSettings {
+	/* Whether caller allows to do threading of the particular range.
+	 * Usually set by some equation, which forces threading off when threading
+	 * overhead becomes higher than speed benefit.
+	 * BLI_task_parallel_range() by itself will always use threading when range
+	 * is higher than a chunk size. As in, threading will always be performed.
+	 */
+	bool use_threading;
+	/* Scheduling mode to use for this parallel range invocation. */
+	eTaskSchedulingMode scheduling_mode;
+	/* Each instance of looping chunks will get a copy of this data
+	 * (similar to OpenMP's firstprivate).
+	 */
+	void *userdata_chunk;        /* Pointer to actual data. */
+	size_t userdata_chunk_size;  /* Size of that data.  */
+	/* Function called from calling thread once whole range have been
+	 * processed.
+	 */
+	TaskParallelRangeFuncFinalize func_finalize;
+} ParallelRangeSettings;
+
+BLI_INLINE void BLI_parallel_range_settings_defaults(
+        ParallelRangeSettings* settings);
+
+void BLI_task_parallel_range(
         int start, int stop,
         void *userdata,
-        void *userdata_chunk,
-        const size_t userdata_chunk_size,
         TaskParallelRangeFunc func,
-        TaskParallelRangeFuncFinalize func_finalize,
-        const bool use_threading,
-        const bool use_dynamic_scheduling);
+        const ParallelRangeSettings *settings);
 
 typedef void (*TaskParallelListbaseFunc)(void *userdata,
                                          struct Link *iter,
@@ -173,6 +196,15 @@ void BLI_task_parallel_mempool(
         TaskParallelMempoolFunc func,
         const bool use_threading);
 
+/* TODO(sergey): Think of a better place for this. */
+BLI_INLINE void BLI_parallel_range_settings_defaults(
+        ParallelRangeSettings* settings)
+{
+	memset(settings, 0, sizeof(*settings));
+	settings->use_threading = true;
+	settings->scheduling_mode = TASK_SCHEDULING_STATIC;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/blenlib/intern/BLI_kdopbvh.c b/source/blender/blenlib/intern/BLI_kdopbvh.c
index 6e33f75fe69..03784e31eee 100644
--- a/source/blender/blenlib/intern/BLI_kdopbvh.c
+++ b/source/blender/blenlib/intern/BLI_kdopbvh.c
@@ -923,9 +923,14 @@ static void non_recursive_bvh_div_nodes(
 		cb_data.depth = depth;
 
 		if (true) {
+			ParallelRangeSettings settings;
+			BLI_parallel_range_settings_defaults(&settings);
+			settings.use_threading = (num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD);
 			BLI_task_parallel_range(
-			        i, i_stop, &cb_data, non_recursive_bvh_div_nodes_task_cb,
-			        num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD);
+			        i, i_stop,
+			        &cb_data,
+			        non_recursive_bvh_div_nodes_task_cb,
+			        &settings);
 		}
 		else {
 			/* Less hassle for debugging. */
@@ -1342,9 +1347,14 @@ BVHTreeOverlap *BLI_bvhtree_overlap(
 		data[j].thread = j;
 	}
 
+	ParallelRangeSettings settings;
+	BLI_parallel_range_settings_defaults(&settings);
+	settings.use_threading = (tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD);
 	BLI_task_parallel_range(
-	            0, thread_num, data, bvhtree_overlap_task_cb,
-	            tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD);
+	            0, thread_num,
+	            data,
+	            bvhtree_overlap_task_cb,
+	            &settings);
 	
 	for (j = 0; j < thread_num; j++)
 		total += BLI_stack_count(data[j].overlap);
diff --git a/source/blender/blenlib/intern/math_statistics.c b/source/blender/blenlib/intern/math_statistics.c
index cfadbba028f..14e3aaea053 100644
--- a/source/blender/blenlib/intern/math_statistics.c
+++ b/source/blender/blenlib/intern/math_statistics.c
@@ -118,8 +118,14 @@ void BLI_covariance_m_vn_ex(
 		.covfac = covfac, .n = n, .nbr_cos_vn = nbr_cos_vn,
 	};
 
+	ParallelRangeSettings settings;
+	BLI_parallel_range_settings_defaults(&settings);
+	settings.use_threading = ((nbr_cos_vn * n * n) >= 10000);
 	BLI_task_parallel_range(
-	            0, n * n, &data, covariance_m_vn_ex_task_cb, (nbr_cos_vn * n * n) >= 10000);
+	            0, n * n,
+	            &data,
+	            covariance_m_vn_ex_task_cb,
+	            &settings);
 }
 
 /**
diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c
index 030d4124e62..6620c9822ad 100644
--- a/source/blender/blenlib/intern/task.c
+++ b/source/blender/blenlib/intern/task.c
@@ -1147,86 +1147,21 @@ static void task_parallel_range_ex(
 	}
 }
 
-/**
- * This function allows to parallelize for loops in a similar way to OpenMP's 'parallel for' statement.
- *
- * \param start First index to process.
- * \param stop Index to stop looping (excluded).
- * \param userdata Common userdata passed to all instances of \a func.
- * \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data
- *                       (similar to OpenMP's firstprivate).
- * \param userdata_chunk_size Memory size of \a userdata_chunk.
- * \param func_ex Callback function (advanced version).
- * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop
- *                      (allows caller to use any kind of test to switch on parallelization or not).
- * \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently),
- *                               otherwise whole range is split in a few big chunks (num_threads * 2 chunks currently).
- */
-void BLI_task_parallel_range_ex(
-        int start, int stop,
-        void *userdata,
-        void *userdata_chunk,
-        const size_t userdata_chunk_size,
-        TaskParallelRangeFunc func,
-        const bool use_threading,
-        const bool use_dynamic_scheduling)
-{
-	task_parallel_range_ex(
-	            start, stop, userdata, userdata_chunk, userdata_chunk_size, func, NULL,
-	            use_threading, use_dynamic_scheduling);
-}
-
-/**
- * A simpler version of \a BLI_task_parallel_range_ex, which does not use \a use_dynamic_scheduling,
- * and does not handle 'firstprivate'-like \a userdata_chunk.
- *
- * \param start First index to process.
- * \param stop Index to stop looping (excluded).
- * \param userdata Common userdata passed to all instances of \a func.
- * \param func Callback function (simple version).
- * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop
- *                      (allows caller to use any kind of test to switch on parallelization or not).
- */
 void BLI_task_parallel_range(
         int start, int stop,
         void *userdata,
         TaskParallelRangeFunc func,
-        const bool use_threading)
-{
-	task_parallel_range_ex(start, stop, userdata, NULL, 0, func, NULL, use_threading, false);
-}
-
-/**
- * This function allows to parallelize for loops in a similar way to OpenMP's 'parallel for' statement,
- * with an additional 'finalize' func called from calling thread once whole range have been processed.
- *
- * \param start First index to process.
- * \param stop Index to stop looping (excluded).
- * \param userdata Common userdata passed to all instances of \a func.
- * \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data
- *                       (similar to OpenMP's firstprivate).
- * \param userdata_chunk_size Memory size of \a userdata_chunk.
- * \param func_ex Callback function (advanced version).
- * \param func_finalize Callback function, called after all workers have finished,
- * useful to finalize accumulative tasks.
- * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop
- *                      (allows caller to use any kind of test to switch on parallelization or not).
- * \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently),
- *                               otherwise whole range is split in a few big chunks (num_threads * 2 chunks currently).
- */
-void BLI_task_parallel_range_finalize(
-        int start, int stop,
-        void *userdata,
-        void *userdata_chunk,
-        const size_t userdata_chunk_size,
-        TaskParallelRangeFunc func,
-        TaskParallelRangeFuncFinalize func_finalize,
-        const bool use_threading,
-        const bool use_dynamic_scheduling)
+        const ParallelRangeSettings *settings)
 {
 	task_parallel_range_ex(
-	            start, stop, userdata, userdata_chunk, userdata_chunk_size, func, func_finalize,
-	            use_threading, use_dynamic_scheduling);
+	        start, stop,
+	        userdata,
+	        settings->userdata_chunk,
+	        settings->userdata_chunk_size,
+	        func,
+	        settings->func_finalize,
+	        settings->use_threading,
+	        (settings->scheduling_mode == TASK_SCHEDULING_DYNAMIC));
 }
 
 #undef MALLOCA