2 files changed, 38 insertions, 20 deletions
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h
index 8c22a25fe14..28da673ea97 100644
--- a/source/blender/blenlib/BLI_task.h
+++ b/source/blender/blenlib/BLI_task.h
@@ -106,7 +106,8 @@ void BLI_task_parallel_range_ex(
         int start, int stop,
         void *userdata,
         TaskParallelRangeFunc func,
-        const int range_threshold);
+        const int range_threshold,
+        const bool use_dynamic_scheduling);
 void BLI_task_parallel_range(
         int start, int stop,
         void *userdata,
diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c
index 07c67f001f9..219ccb18d98 100644
--- a/source/blender/blenlib/intern/task.c
+++ b/source/blender/blenlib/intern/task.c
@@ -29,6 +29,7 @@
 #include "MEM_guardedalloc.h"
 
 #include "BLI_listbase.h"
+#include "BLI_math.h"
 #include "BLI_task.h"
 #include "BLI_threads.h"
 
@@ -452,18 +453,21 @@ typedef struct ParallelRangeState {
 	TaskParallelRangeFunc func;
 
 	int iter;
+	int chunk_size;
 	SpinLock lock;
 } ParallelRangeState;
 
 BLI_INLINE bool parallel_range_next_iter_get(
-        ParallelRangeState *state,
-        int *iter)
+        ParallelRangeState * __restrict state,
+        int * __restrict iter, int * __restrict count)
 {
 	bool result = false;
 	if (state->iter < state->stop) {
 		BLI_spin_lock(&state->lock);
 		if (state->iter < state->stop) {
-			*iter = state->iter++;
+			*count = min_ii(state->chunk_size, state->stop - state->iter);
+			*iter = state->iter;
+			state->iter += *count;
 			result = true;
 		}
 		BLI_spin_unlock(&state->lock);
@@ -472,14 +476,17 @@ BLI_INLINE bool parallel_range_next_iter_get(
 }
 
 static void parallel_range_func(
-        TaskPool *pool,
+        TaskPool * __restrict pool,
         void *UNUSED(taskdata),
         int UNUSED(threadid))
 {
-	ParallelRangeState *state = BLI_task_pool_userdata(pool);
-	int iter;
-	while (parallel_range_next_iter_get(state, &iter)) {
-		state->func(state->userdata, iter);
+	ParallelRangeState * __restrict state = BLI_task_pool_userdata(pool);
+	int iter, count;
+	while (parallel_range_next_iter_get(state, &iter, &count)) {
+		int i;
+		for (i = 0; i < count; ++i) {
+			state->func(state->userdata, iter + i);
+		}
 	}
 }
 
@@ -487,12 +494,13 @@ void BLI_task_parallel_range_ex(
         int start, int stop,
         void *userdata,
         TaskParallelRangeFunc func,
-        const int range_threshold)
+        const int range_threshold,
+        const bool use_dynamic_scheduling)
 {
 	TaskScheduler *task_scheduler;
 	TaskPool *task_pool;
 	ParallelRangeState state;
-	int i;
+	int i, num_threads, num_tasks;
 
 	BLI_assert(start < stop);
 
@@ -506,21 +514,30 @@ void BLI_task_parallel_range_ex(
 		return;
 	}
 
-	BLI_spin_init(&state.lock);
-	state.start = start;
-	state.stop = stop;
-	state.userdata = userdata;
-	state.func = func;
-	state.iter = start;
-
 	task_scheduler = BLI_task_scheduler_get();
 	task_pool = BLI_task_pool_create(task_scheduler, &state);
+	num_threads = BLI_task_scheduler_num_threads(task_scheduler);
 
 	/* The idea here is to prevent creating task for each of the loop iterations
 	 * and instead have tasks which are evenly distributed across CPU cores and
 	 * pull next iter to be crunched using the queue.
 	 */
-	for (i = 0; i < 2 * BLI_task_scheduler_num_threads(task_scheduler); i++) {
+	num_tasks = num_threads * 2;
+
+	BLI_spin_init(&state.lock);
+	state.start = start;
+	state.stop = stop;
+	state.userdata = userdata;
+	state.func = func;
+	state.iter = start;
+	if (use_dynamic_scheduling) {
+		state.chunk_size = 32;
+	}
+	else {
+		state.chunk_size = (stop - start) / (num_tasks);
+	}
+
+	for (i = 0; i < num_tasks; i++) {
 		BLI_task_pool_push(task_pool,
 		                   parallel_range_func,
 		                   NULL, false,
@@ -538,5 +555,5 @@ void BLI_task_parallel_range(
         void *userdata,
         TaskParallelRangeFunc func)
 {
-	BLI_task_parallel_range_ex(start, stop, userdata, func, 64);
+	BLI_task_parallel_range_ex(start, stop, userdata, func, 64, false);
 }