Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBastien Montagne <montagne29@wanadoo.fr>2016-01-16 17:59:37 +0300
committerBastien Montagne <montagne29@wanadoo.fr>2016-01-16 17:59:37 +0300
commit31d907fa0ad1afa8c8a1829e16d3af394ab9e301 (patch)
treef889d37f917b5f4daf121d7595daafccced8e514 /source/blender/blenlib
parent724809655c891b13bba49b60cbb2ddab3d54f18a (diff)
Cleanup: BLI_task - API changes.
Based on usages so far: - Split callback worker func in two, 'basic' and 'extended' versions. The former goes back to the simplest verion, while the later keeps the 'userdata_chunk', and gets the thread_id too. - Add use_threading to simple BLI_task_parallel_range(), turns out we need this pretty much systematically, and allows to get rid of most usages of BLI_task_parallel_range_ex(). - Now BLI_task_parallel_range() expects 'basic' version of callback, while BLI_task_parallel_range_ex() expectes 'extended' version of the callback. All in all, this should make common usage of BLI_task_parallel_range simpler (less verbose), and add access to advanced callback to thread id, which is mandatory in some (future) cases.
Diffstat (limited to 'source/blender/blenlib')
-rw-r--r--source/blender/blenlib/BLI_task.h9
-rw-r--r--source/blender/blenlib/intern/BLI_kdopbvh.c16
-rw-r--r--source/blender/blenlib/intern/math_statistics.c6
-rw-r--r--source/blender/blenlib/intern/task.c100
4 files changed, 94 insertions, 37 deletions
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h
index 7b9a3c59430..63a07957336 100644
--- a/source/blender/blenlib/BLI_task.h
+++ b/source/blender/blenlib/BLI_task.h
@@ -112,19 +112,20 @@ ThreadMutex *BLI_task_pool_user_mutex(TaskPool *pool);
size_t BLI_task_pool_tasks_done(TaskPool *pool);
/* Parallel for routines */
-typedef void (*TaskParallelRangeFunc)(void *userdata, void *userdata_chunk, int iter);
+typedef void (*TaskParallelRangeFunc)(void *userdata, const int iter);
+typedef void (*TaskParallelRangeFuncEx)(void *userdata, void *userdata_chunk, const int iter, const int thread_id);
void BLI_task_parallel_range_ex(
int start, int stop,
void *userdata,
void *userdata_chunk,
- const size_t userdata_chunk_size,
- TaskParallelRangeFunc func,
+ const size_t userdata_chunk_size, TaskParallelRangeFuncEx func_ex,
const bool use_threading,
const bool use_dynamic_scheduling);
void BLI_task_parallel_range(
int start, int stop,
void *userdata,
- TaskParallelRangeFunc func);
+ TaskParallelRangeFunc func,
+ const bool use_threading);
#ifdef __cplusplus
}
diff --git a/source/blender/blenlib/intern/BLI_kdopbvh.c b/source/blender/blenlib/intern/BLI_kdopbvh.c
index f1ed49f74e0..c4bf2ae6910 100644
--- a/source/blender/blenlib/intern/BLI_kdopbvh.c
+++ b/source/blender/blenlib/intern/BLI_kdopbvh.c
@@ -750,7 +750,7 @@ typedef struct BVHDivNodesData {
int first_of_next_level;
} BVHDivNodesData;
-static void non_recursive_bvh_div_nodes_task_cb(void *userdata, void *UNUSED(userdata_chunk), int j)
+static void non_recursive_bvh_div_nodes_task_cb(void *userdata, const int j)
{
BVHDivNodesData *data = userdata;
@@ -873,9 +873,9 @@ static void non_recursive_bvh_div_nodes(BVHTree *tree, BVHNode *branches_array,
cb_data.i = i;
cb_data.depth = depth;
- BLI_task_parallel_range_ex(
- i, end_j, &cb_data, NULL, 0, non_recursive_bvh_div_nodes_task_cb,
- num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD, false);
+ BLI_task_parallel_range(
+ i, end_j, &cb_data, non_recursive_bvh_div_nodes_task_cb,
+ num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD);
}
}
@@ -1195,7 +1195,7 @@ int BLI_bvhtree_overlap_thread_num(const BVHTree *tree)
return (int)MIN2(tree->tree_type, tree->nodes[tree->totleaf]->totnode);
}
-static void bvhtree_overlap_task_cb(void *userdata, void *UNUSED(userdata_chunk), int j)
+static void bvhtree_overlap_task_cb(void *userdata, const int j)
{
BVHOverlapData_Thread *data = &((BVHOverlapData_Thread *)userdata)[j];
BVHOverlapData_Shared *data_shared = data->shared;
@@ -1260,9 +1260,9 @@ BVHTreeOverlap *BLI_bvhtree_overlap(
data[j].thread = j;
}
- BLI_task_parallel_range_ex(
- 0, thread_num, data, NULL, 0, bvhtree_overlap_task_cb,
- tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD, false);
+ BLI_task_parallel_range(
+ 0, thread_num, data, bvhtree_overlap_task_cb,
+ tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD);
for (j = 0; j < thread_num; j++)
total += BLI_stack_count(data[j].overlap);
diff --git a/source/blender/blenlib/intern/math_statistics.c b/source/blender/blenlib/intern/math_statistics.c
index 97c224e0878..fbd6563e039 100644
--- a/source/blender/blenlib/intern/math_statistics.c
+++ b/source/blender/blenlib/intern/math_statistics.c
@@ -46,7 +46,7 @@ typedef struct CovarianceData {
int nbr_cos_vn;
} CovarianceData;
-static void covariance_m_vn_ex_task_cb(void *userdata, void *UNUSED(userdata_chunk), int a)
+static void covariance_m_vn_ex_task_cb(void *userdata, const int a)
{
CovarianceData *data = userdata;
const float *cos_vn = data->cos_vn;
@@ -117,8 +117,8 @@ void BLI_covariance_m_vn_ex(
.covfac = covfac, .n = n, .nbr_cos_vn = nbr_cos_vn,
};
- BLI_task_parallel_range_ex(
- 0, n * n, &data, NULL, 0, covariance_m_vn_ex_task_cb, (nbr_cos_vn * n * n) >= 10000, false);
+ BLI_task_parallel_range(
+ 0, n * n, &data, covariance_m_vn_ex_task_cb, (nbr_cos_vn * n * n) >= 10000);
}
/**
diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c
index 2be688a3d8c..f0edcc73345 100644
--- a/source/blender/blenlib/intern/task.c
+++ b/source/blender/blenlib/intern/task.c
@@ -584,7 +584,9 @@ typedef struct ParallelRangeState {
void *userdata;
void *userdata_chunk;
size_t userdata_chunk_size;
+
TaskParallelRangeFunc func;
+ TaskParallelRangeFuncEx func_ex;
int iter;
int chunk_size;
@@ -610,23 +612,31 @@ BLI_INLINE bool parallel_range_next_iter_get(
static void parallel_range_func(
TaskPool * __restrict pool,
void *UNUSED(taskdata),
- int UNUSED(threadid))
+ int threadid)
{
ParallelRangeState * __restrict state = BLI_task_pool_userdata(pool);
int iter, count;
- const bool use_userdata_chunk = (state->userdata_chunk_size != 0) && (state->userdata_chunk != NULL);
+ const bool use_userdata_chunk = (state->func_ex != NULL) &&
+ (state->userdata_chunk_size != 0) && (state->userdata_chunk != NULL);
void *userdata_chunk = use_userdata_chunk ? MALLOCA(state->userdata_chunk_size) : NULL;
while (parallel_range_next_iter_get(state, &iter, &count)) {
int i;
- if (use_userdata_chunk) {
- memcpy(userdata_chunk, state->userdata_chunk, state->userdata_chunk_size);
- }
+ if (state->func_ex) {
+ if (use_userdata_chunk) {
+ memcpy(userdata_chunk, state->userdata_chunk, state->userdata_chunk_size);
+ }
- for (i = 0; i < count; ++i) {
- state->func(state->userdata, userdata_chunk, iter + i);
+ for (i = 0; i < count; ++i) {
+ state->func_ex(state->userdata, userdata_chunk, iter + i, threadid);
+ }
+ }
+ else {
+ for (i = 0; i < count; ++i) {
+ state->func(state->userdata, iter + i);
+ }
}
}
@@ -642,18 +652,20 @@ static void parallel_range_func(
* \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data
* (similar to OpenMP's firstprivate).
* \param userdata_chunk_size Memory size of \a userdata_chunk.
- * \param func Callback function.
+ * \param func Callback function (simple version).
+ * \param func_ex Callback function (advanced version).
* \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop
* (allows caller to use any kind of test to switch on parallelization or not).
* \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently),
* otherwise whole range is split in a few big chunks (num_threads * 2 chunks currently).
*/
-void BLI_task_parallel_range_ex(
+static void task_parallel_range_ex(
int start, int stop,
void *userdata,
void *userdata_chunk,
const size_t userdata_chunk_size,
TaskParallelRangeFunc func,
+ TaskParallelRangeFuncEx func_ex,
const bool use_threading,
const bool use_dynamic_scheduling)
{
@@ -666,25 +678,37 @@ void BLI_task_parallel_range_ex(
return;
}
- BLI_assert(start <= stop);
+ BLI_assert(start < stop);
+ if (userdata_chunk_size != 0) {
+ BLI_assert(func_ex != NULL && func == NULL);
+ BLI_assert(userdata_chunk != NULL);
+ }
/* If it's not enough data to be crunched, don't bother with tasks at all,
* do everything from the main thread.
*/
if (!use_threading) {
- const bool use_userdata_chunk = (userdata_chunk_size != 0) && (userdata_chunk != NULL);
- void *userdata_chunk_local = NULL;
+ if (func_ex) {
+ const bool use_userdata_chunk = (userdata_chunk_size != 0) && (userdata_chunk != NULL);
+ void *userdata_chunk_local = NULL;
- if (use_userdata_chunk) {
- userdata_chunk_local = MALLOCA(userdata_chunk_size);
- memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size);
- }
+ if (use_userdata_chunk) {
+ userdata_chunk_local = MALLOCA(userdata_chunk_size);
+ memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size);
+ }
- for (i = start; i < stop; ++i) {
- func(userdata, userdata_chunk_local, i);
+ for (i = start; i < stop; ++i) {
+ func_ex(userdata, userdata_chunk, i, 0);
+ }
+
+ MALLOCA_FREE(userdata_chunk_local, userdata_chunk_size);
+ }
+ else {
+ for (i = start; i < stop; ++i) {
+ func(userdata, i);
+ }
}
- MALLOCA_FREE(userdata_chunk_local, userdata_chunk_size);
return;
}
@@ -705,6 +729,7 @@ void BLI_task_parallel_range_ex(
state.userdata_chunk = userdata_chunk;
state.userdata_chunk_size = userdata_chunk_size;
state.func = func;
+ state.func_ex = func_ex;
state.iter = start;
if (use_dynamic_scheduling) {
state.chunk_size = 32;
@@ -729,15 +754,46 @@ void BLI_task_parallel_range_ex(
}
/**
+ * This function allows to parallelized for loops in a similar way to OpenMP's 'parallel for' statement.
+ *
+ * \param start First index to process.
+ * \param stop Index to stop looping (excluded).
+ * \param userdata Common userdata passed to all instances of \a func.
+ * \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data
+ * (similar to OpenMP's firstprivate).
+ * \param userdata_chunk_size Memory size of \a userdata_chunk.
+ * \param func Callback function (simple version).
+ * \param func_ex Callback function (advanced version).
+ * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop
+ * (allows caller to use any kind of test to switch on parallelization or not).
+ * \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently),
+ * otherwise whole range is split in a few big chunks (num_threads * 2 chunks currently).
+ */
+void BLI_task_parallel_range_ex(
+ int start, int stop,
+ void *userdata,
+ void *userdata_chunk,
+ const size_t userdata_chunk_size,
+ TaskParallelRangeFuncEx func_ex,
+ const bool use_threading,
+ const bool use_dynamic_scheduling)
+{
+ task_parallel_range_ex(
+ start, stop, userdata, userdata_chunk, userdata_chunk_size, NULL, func_ex,
+ use_threading, use_dynamic_scheduling);
+}
+
+/**
* A simpler version of \a BLI_task_parallel_range_ex, which does not use \a use_dynamic_scheduling,
- * has a \a range_threshold of 64, and does not handle 'firstprivate'-like \a userdata_chunk.
+ * and does not handle 'firstprivate'-like \a userdata_chunk.
*/
void BLI_task_parallel_range(
int start, int stop,
void *userdata,
- TaskParallelRangeFunc func)
+ TaskParallelRangeFunc func,
+ const bool use_threading)
{
- BLI_task_parallel_range_ex(start, stop, userdata, NULL, 0, func, (stop - start) > 64, false);
+ task_parallel_range_ex(start, stop, userdata, NULL, 0, func, NULL, use_threading, false);
}
#undef MALLOCA