Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--source/blender/blenkernel/intern/colortools.c69
-rw-r--r--source/blender/blenkernel/intern/dynamicpaint.c33
-rw-r--r--source/blender/blenkernel/intern/particle_system.c15
-rw-r--r--source/blender/blenkernel/intern/subdiv_ccg.c18
-rw-r--r--source/blender/blenkernel/intern/subdiv_foreach.c6
-rw-r--r--source/blender/blenlib/BLI_task.h16
-rw-r--r--source/blender/blenlib/intern/task_iterator.c81
-rw-r--r--source/blender/editors/physics/particle_edit.c24
-rw-r--r--source/blender/editors/space_sequencer/sequencer_scopes.c20
-rw-r--r--tests/gtests/blenlib/BLI_task_test.cc32
10 files changed, 168 insertions, 146 deletions
diff --git a/source/blender/blenkernel/intern/colortools.c b/source/blender/blenkernel/intern/colortools.c
index f82b8b6675c..3da384a2745 100644
--- a/source/blender/blenkernel/intern/colortools.c
+++ b/source/blender/blenkernel/intern/colortools.c
@@ -1383,8 +1383,6 @@ typedef struct ScopesUpdateData {
struct ColormanageProcessor *cm_processor;
const unsigned char *display_buffer;
const int ycc_mode;
-
- unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a;
} ScopesUpdateData;
typedef struct ScopesUpdateDataChunk {
@@ -1495,23 +1493,24 @@ static void scopes_update_cb(void *__restrict userdata,
}
}
-static void scopes_update_finalize(void *__restrict userdata, void *__restrict userdata_chunk)
+static void scopes_update_reduce(const void *__restrict UNUSED(userdata),
+ void *__restrict chunk_join,
+ void *__restrict chunk)
{
- const ScopesUpdateData *data = userdata;
- const ScopesUpdateDataChunk *data_chunk = userdata_chunk;
-
- unsigned int *bin_lum = data->bin_lum;
- unsigned int *bin_r = data->bin_r;
- unsigned int *bin_g = data->bin_g;
- unsigned int *bin_b = data->bin_b;
- unsigned int *bin_a = data->bin_a;
+ ScopesUpdateDataChunk *join_chunk = chunk_join;
+ const ScopesUpdateDataChunk *data_chunk = chunk;
+
+ unsigned int *bin_lum = join_chunk->bin_lum;
+ unsigned int *bin_r = join_chunk->bin_r;
+ unsigned int *bin_g = join_chunk->bin_g;
+ unsigned int *bin_b = join_chunk->bin_b;
+ unsigned int *bin_a = join_chunk->bin_a;
const unsigned int *bin_lum_c = data_chunk->bin_lum;
const unsigned int *bin_r_c = data_chunk->bin_r;
const unsigned int *bin_g_c = data_chunk->bin_g;
const unsigned int *bin_b_c = data_chunk->bin_b;
const unsigned int *bin_a_c = data_chunk->bin_a;
- float(*minmax)[2] = data->scopes->minmax;
const float *min = data_chunk->min;
const float *max = data_chunk->max;
@@ -1524,11 +1523,11 @@ static void scopes_update_finalize(void *__restrict userdata, void *__restrict u
}
for (int c = 3; c--;) {
- if (min[c] < minmax[c][0]) {
- minmax[c][0] = min[c];
+ if (min[c] < join_chunk->min[c]) {
+ join_chunk->min[c] = min[c];
}
- if (max[c] > minmax[c][1]) {
- minmax[c][1] = max[c];
+ if (max[c] > join_chunk->max[c]) {
+ join_chunk->max[c] = max[c];
}
}
}
@@ -1542,7 +1541,6 @@ void BKE_scopes_update(Scopes *scopes,
unsigned int nl, na, nr, ng, nb;
double divl, diva, divr, divg, divb;
const unsigned char *display_buffer = NULL;
- uint bin_lum[256] = {0}, bin_r[256] = {0}, bin_g[256] = {0}, bin_b[256] = {0}, bin_a[256] = {0};
int ycc_mode = -1;
void *cache_handle = NULL;
struct ColormanageProcessor *cm_processor = NULL;
@@ -1638,11 +1636,6 @@ void BKE_scopes_update(Scopes *scopes,
.cm_processor = cm_processor,
.display_buffer = display_buffer,
.ycc_mode = ycc_mode,
- .bin_lum = bin_lum,
- .bin_r = bin_r,
- .bin_g = bin_g,
- .bin_b = bin_b,
- .bin_a = bin_a,
};
ScopesUpdateDataChunk data_chunk = {{0}};
INIT_MINMAX(data_chunk.min, data_chunk.max);
@@ -1652,26 +1645,26 @@ void BKE_scopes_update(Scopes *scopes,
settings.use_threading = (ibuf->y > 256);
settings.userdata_chunk = &data_chunk;
settings.userdata_chunk_size = sizeof(data_chunk);
- settings.func_finalize = scopes_update_finalize;
+ settings.func_reduce = scopes_update_reduce;
BLI_task_parallel_range(0, ibuf->y, &data, scopes_update_cb, &settings);
/* convert hist data to float (proportional to max count) */
nl = na = nr = nb = ng = 0;
for (a = 0; a < 256; a++) {
- if (bin_lum[a] > nl) {
- nl = bin_lum[a];
+ if (data_chunk.bin_lum[a] > nl) {
+ nl = data_chunk.bin_lum[a];
}
- if (bin_r[a] > nr) {
- nr = bin_r[a];
+ if (data_chunk.bin_r[a] > nr) {
+ nr = data_chunk.bin_r[a];
}
- if (bin_g[a] > ng) {
- ng = bin_g[a];
+ if (data_chunk.bin_g[a] > ng) {
+ ng = data_chunk.bin_g[a];
}
- if (bin_b[a] > nb) {
- nb = bin_b[a];
+ if (data_chunk.bin_b[a] > nb) {
+ nb = data_chunk.bin_b[a];
}
- if (bin_a[a] > na) {
- na = bin_a[a];
+ if (data_chunk.bin_a[a] > na) {
+ na = data_chunk.bin_a[a];
}
}
divl = nl ? 1.0 / (double)nl : 1.0;
@@ -1681,11 +1674,11 @@ void BKE_scopes_update(Scopes *scopes,
divb = nb ? 1.0 / (double)nb : 1.0;
for (a = 0; a < 256; a++) {
- scopes->hist.data_luma[a] = bin_lum[a] * divl;
- scopes->hist.data_r[a] = bin_r[a] * divr;
- scopes->hist.data_g[a] = bin_g[a] * divg;
- scopes->hist.data_b[a] = bin_b[a] * divb;
- scopes->hist.data_a[a] = bin_a[a] * diva;
+ scopes->hist.data_luma[a] = data_chunk.bin_lum[a] * divl;
+ scopes->hist.data_r[a] = data_chunk.bin_r[a] * divr;
+ scopes->hist.data_g[a] = data_chunk.bin_g[a] * divg;
+ scopes->hist.data_b[a] = data_chunk.bin_b[a] * divb;
+ scopes->hist.data_a[a] = data_chunk.bin_a[a] * diva;
}
if (cm_processor) {
diff --git a/source/blender/blenkernel/intern/dynamicpaint.c b/source/blender/blenkernel/intern/dynamicpaint.c
index 4c78c88d168..3a8b846a41d 100644
--- a/source/blender/blenkernel/intern/dynamicpaint.c
+++ b/source/blender/blenkernel/intern/dynamicpaint.c
@@ -653,15 +653,15 @@ static void grid_bound_insert_cb_ex(void *__restrict userdata,
boundInsert(grid_bound, bData->realCoord[bData->s_pos[i]].v);
}
-static void grid_bound_insert_finalize(void *__restrict userdata, void *__restrict userdata_chunk)
+static void grid_bound_insert_reduce(const void *__restrict UNUSED(userdata),
+ void *__restrict chunk_join,
+ void *__restrict chunk)
{
- PaintBakeData *bData = userdata;
- VolumeGrid *grid = bData->grid;
-
- Bounds3D *grid_bound = userdata_chunk;
+ Bounds3D *join = chunk_join;
+ Bounds3D *grid_bound = chunk;
- boundInsert(&grid->grid_bounds, grid_bound->min);
- boundInsert(&grid->grid_bounds, grid_bound->max);
+ boundInsert(join, grid_bound->min);
+ boundInsert(join, grid_bound->max);
}
static void grid_cell_points_cb_ex(void *__restrict userdata,
@@ -685,17 +685,20 @@ static void grid_cell_points_cb_ex(void *__restrict userdata,
s_num[temp_t_index[i]]++;
}
-static void grid_cell_points_finalize(void *__restrict userdata, void *__restrict userdata_chunk)
+static void grid_cell_points_reduce(const void *__restrict userdata,
+ void *__restrict chunk_join,
+ void *__restrict chunk)
{
- PaintBakeData *bData = userdata;
- VolumeGrid *grid = bData->grid;
+ const PaintBakeData *bData = userdata;
+ const VolumeGrid *grid = bData->grid;
const int grid_cells = grid->dim[0] * grid->dim[1] * grid->dim[2];
- int *s_num = userdata_chunk;
+ int *join_s_num = chunk_join;
+ int *s_num = chunk;
/* calculate grid indexes */
for (int i = 0; i < grid_cells; i++) {
- grid->s_num[i] += s_num[i];
+ join_s_num[i] += s_num[i];
}
}
@@ -753,7 +756,7 @@ static void surfaceGenerateGrid(struct DynamicPaintSurface *surface)
settings.use_threading = (sData->total_points > 1000);
settings.userdata_chunk = &grid->grid_bounds;
settings.userdata_chunk_size = sizeof(grid->grid_bounds);
- settings.func_finalize = grid_bound_insert_finalize;
+ settings.func_reduce = grid_bound_insert_reduce;
BLI_task_parallel_range(0, sData->total_points, bData, grid_bound_insert_cb_ex, &settings);
}
/* get dimensions */
@@ -814,7 +817,7 @@ static void surfaceGenerateGrid(struct DynamicPaintSurface *surface)
settings.use_threading = (sData->total_points > 1000);
settings.userdata_chunk = grid->s_num;
settings.userdata_chunk_size = sizeof(*grid->s_num) * grid_cells;
- settings.func_finalize = grid_cell_points_finalize;
+ settings.func_reduce = grid_cell_points_reduce;
BLI_task_parallel_range(0, sData->total_points, bData, grid_cell_points_cb_ex, &settings);
}
@@ -4880,7 +4883,7 @@ static void dynamicPaint_prepareAdjacencyData(DynamicPaintSurface *surface, cons
0, sData->total_points, sData, dynamic_paint_prepare_adjacency_cb, &settings);
/* calculate average values (single thread).
- * Note: tried to put this in threaded callback (using _finalize feature),
+ * Note: tried to put this in threaded callback (using _reduce feature),
* but gave ~30% slower result! */
bData->average_dist = 0.0;
for (index = 0; index < sData->total_points; index++) {
diff --git a/source/blender/blenkernel/intern/particle_system.c b/source/blender/blenkernel/intern/particle_system.c
index 5ef2f7aeeff..14b1ef7b87f 100644
--- a/source/blender/blenkernel/intern/particle_system.c
+++ b/source/blender/blenkernel/intern/particle_system.c
@@ -3692,10 +3692,11 @@ typedef struct DynamicStepSolverTaskData {
SpinLock spin;
} DynamicStepSolverTaskData;
-static void dynamics_step_finalize_sphdata(void *__restrict UNUSED(userdata),
- void *__restrict tls_userdata_chunk)
+static void dynamics_step_sphdata_reduce(const void *__restrict UNUSED(userdata),
+ void *__restrict UNUSED(join_v),
+ void *__restrict chunk_v)
{
- SPHData *sphdata = tls_userdata_chunk;
+ SPHData *sphdata = chunk_v;
psys_sph_flush_springs(sphdata);
}
@@ -3986,7 +3987,7 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra)
settings.use_threading = (psys->totpart > 100);
settings.userdata_chunk = &sphdata;
settings.userdata_chunk_size = sizeof(sphdata);
- settings.func_finalize = dynamics_step_finalize_sphdata;
+ settings.func_reduce = dynamics_step_sphdata_reduce;
BLI_task_parallel_range(
0, psys->totpart, &task_data, dynamics_step_sph_ddr_task_cb_ex, &settings);
@@ -4018,7 +4019,7 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra)
settings.use_threading = (psys->totpart > 100);
settings.userdata_chunk = &sphdata;
settings.userdata_chunk_size = sizeof(sphdata);
- settings.func_finalize = dynamics_step_finalize_sphdata;
+ settings.func_reduce = dynamics_step_sphdata_reduce;
BLI_task_parallel_range(0,
psys->totpart,
&task_data,
@@ -4033,7 +4034,7 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra)
settings.use_threading = (psys->totpart > 100);
settings.userdata_chunk = &sphdata;
settings.userdata_chunk_size = sizeof(sphdata);
- settings.func_finalize = dynamics_step_finalize_sphdata;
+ settings.func_reduce = dynamics_step_sphdata_reduce;
BLI_task_parallel_range(0,
psys->totpart,
&task_data,
@@ -4189,7 +4190,7 @@ static void particles_fluid_step(ParticleSimulationData *sim,
ParticleSettings *part = psys->part;
ParticleData *pa = NULL;
- int p, totpart, tottypepart = 0;
+ int p, totpart = 0, tottypepart = 0;
int flagActivePart, activeParts = 0;
float posX, posY, posZ, velX, velY, velZ;
float resX, resY, resZ;
diff --git a/source/blender/blenkernel/intern/subdiv_ccg.c b/source/blender/blenkernel/intern/subdiv_ccg.c
index 521aeb60e66..d99c41eaa3e 100644
--- a/source/blender/blenkernel/intern/subdiv_ccg.c
+++ b/source/blender/blenkernel/intern/subdiv_ccg.c
@@ -770,8 +770,8 @@ static void subdiv_ccg_recalc_inner_normal_task(void *__restrict userdata_v,
subdiv_ccg_average_inner_face_normals(data->subdiv_ccg, data->key, tls, grid_index);
}
-static void subdiv_ccg_recalc_inner_normal_finalize(void *__restrict UNUSED(userdata),
- void *__restrict tls_v)
+static void subdiv_ccg_recalc_inner_normal_free(const void *__restrict UNUSED(userdata),
+ void *__restrict tls_v)
{
RecalcInnerNormalsTLSData *tls = tls_v;
MEM_SAFE_FREE(tls->face_normals);
@@ -791,7 +791,7 @@ static void subdiv_ccg_recalc_inner_grid_normals(SubdivCCG *subdiv_ccg)
BLI_parallel_range_settings_defaults(&parallel_range_settings);
parallel_range_settings.userdata_chunk = &tls_data;
parallel_range_settings.userdata_chunk_size = sizeof(tls_data);
- parallel_range_settings.func_finalize = subdiv_ccg_recalc_inner_normal_finalize;
+ parallel_range_settings.func_free = subdiv_ccg_recalc_inner_normal_free;
BLI_task_parallel_range(0,
subdiv_ccg->num_grids,
&data,
@@ -834,8 +834,8 @@ static void subdiv_ccg_recalc_modified_inner_normal_task(void *__restrict userda
subdiv_ccg_average_inner_face_grids(subdiv_ccg, key, face);
}
-static void subdiv_ccg_recalc_modified_inner_normal_finalize(void *__restrict UNUSED(userdata),
- void *__restrict tls_v)
+static void subdiv_ccg_recalc_modified_inner_normal_free(const void *__restrict UNUSED(userdata),
+ void *__restrict tls_v)
{
RecalcInnerNormalsTLSData *tls = tls_v;
MEM_SAFE_FREE(tls->face_normals);
@@ -857,7 +857,7 @@ static void subdiv_ccg_recalc_modified_inner_grid_normals(SubdivCCG *subdiv_ccg,
BLI_parallel_range_settings_defaults(&parallel_range_settings);
parallel_range_settings.userdata_chunk = &tls_data;
parallel_range_settings.userdata_chunk_size = sizeof(tls_data);
- parallel_range_settings.func_finalize = subdiv_ccg_recalc_modified_inner_normal_finalize;
+ parallel_range_settings.func_free = subdiv_ccg_recalc_modified_inner_normal_free;
BLI_task_parallel_range(0,
num_effected_faces,
&data,
@@ -1077,8 +1077,8 @@ static void subdiv_ccg_average_grids_boundaries_task(void *__restrict userdata_v
subdiv_ccg_average_grids_boundary(subdiv_ccg, key, adjacent_edge, tls);
}
-static void subdiv_ccg_average_grids_boundaries_finalize(void *__restrict UNUSED(userdata),
- void *__restrict tls_v)
+static void subdiv_ccg_average_grids_boundaries_free(const void *__restrict UNUSED(userdata),
+ void *__restrict tls_v)
{
AverageGridsBoundariesTLSData *tls = tls_v;
MEM_SAFE_FREE(tls->accumulators);
@@ -1136,7 +1136,7 @@ static void subdiv_ccg_average_all_boundaries(SubdivCCG *subdiv_ccg, CCGKey *key
AverageGridsBoundariesTLSData tls_data = {NULL};
parallel_range_settings.userdata_chunk = &tls_data;
parallel_range_settings.userdata_chunk_size = sizeof(tls_data);
- parallel_range_settings.func_finalize = subdiv_ccg_average_grids_boundaries_finalize;
+ parallel_range_settings.func_free = subdiv_ccg_average_grids_boundaries_free;
BLI_task_parallel_range(0,
subdiv_ccg->num_adjacent_edges,
&boundaries_data,
diff --git a/source/blender/blenkernel/intern/subdiv_foreach.c b/source/blender/blenkernel/intern/subdiv_foreach.c
index b31fb2c9312..0884f40952f 100644
--- a/source/blender/blenkernel/intern/subdiv_foreach.c
+++ b/source/blender/blenkernel/intern/subdiv_foreach.c
@@ -1838,9 +1838,9 @@ static void subdiv_foreach_boundary_edges_task(void *__restrict userdata,
subdiv_foreach_boundary_edges(ctx, tls->userdata_chunk, edge_index);
}
-static void subdiv_foreach_finalize(void *__restrict userdata, void *__restrict userdata_chunk)
+static void subdiv_foreach_free(const void *__restrict userdata, void *__restrict userdata_chunk)
{
- SubdivForeachTaskContext *ctx = userdata;
+ const SubdivForeachTaskContext *ctx = userdata;
ctx->foreach_context->user_data_tls_free(userdata_chunk);
}
@@ -1873,7 +1873,7 @@ bool BKE_subdiv_foreach_subdiv_geometry(Subdiv *subdiv,
parallel_range_settings.userdata_chunk_size = context->user_data_tls_size;
parallel_range_settings.min_iter_per_thread = 1;
if (context->user_data_tls_free != NULL) {
- parallel_range_settings.func_finalize = subdiv_foreach_finalize;
+ parallel_range_settings.func_free = subdiv_foreach_free;
}
/* TODO(sergey): Possible optimization is to have a single pool and push all
diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h
index 898e4be5f92..78370e98202 100644
--- a/source/blender/blenlib/BLI_task.h
+++ b/source/blender/blenlib/BLI_task.h
@@ -146,12 +146,14 @@ typedef struct TaskParallelTLS {
void *userdata_chunk;
} TaskParallelTLS;
-typedef void (*TaskParallelFinalizeFunc)(void *__restrict userdata,
- void *__restrict userdata_chunk);
-
typedef void (*TaskParallelRangeFunc)(void *__restrict userdata,
const int iter,
const TaskParallelTLS *__restrict tls);
+typedef void (*TaskParallelReduceFunc)(const void *__restrict userdata,
+ void *__restrict chunk_join,
+ void *__restrict chunk);
+
+typedef void (*TaskParallelFreeFunc)(const void *__restrict userdata, void *__restrict chunk);
typedef struct TaskParallelSettings {
/* Whether caller allows to do threading of the particular range.
@@ -171,7 +173,13 @@ typedef struct TaskParallelSettings {
/* Function called from calling thread once whole range have been
* processed.
*/
- TaskParallelFinalizeFunc func_finalize;
+ /* Function called to join user data chunk into another, to reduce
+ * the result to the original userdata_chunk memory.
+ * The reduce functions should have no side effects, so that they
+ * can be run on any thread. */
+ TaskParallelReduceFunc func_reduce;
+ /* Function called to free data created by TaskParallelRangeFunc. */
+ TaskParallelFreeFunc func_free;
/* Minimum allowed number of range iterators to be handled by a single
* thread. This allows to achieve following:
* - Reduce amount of threading overhead.
diff --git a/source/blender/blenlib/intern/task_iterator.c b/source/blender/blenlib/intern/task_iterator.c
index 4ac012fa578..0d1c96612e3 100644
--- a/source/blender/blenlib/intern/task_iterator.c
+++ b/source/blender/blenlib/intern/task_iterator.c
@@ -78,8 +78,13 @@ typedef struct TaskParallelRangeState {
/* Number of 'tls' copies in the array, i.e. number of worker threads. */
size_t num_elements_in_tls_storage;
- /* Function called from calling thread once whole range have been processed. */
- TaskParallelFinalizeFunc func_finalize;
+ /* Function called to join user data chunk into another, to reduce
+ * the result to the original userdata_chunk memory.
+ * The reduce functions should have no side effects, so that they
+ * can be run on any thread. */
+ TaskParallelReduceFunc func_reduce;
+ /* Function called to free data created by TaskParallelRangeFunc. */
+ TaskParallelFreeFunc func_free;
/* Current value of the iterator, shared between all threads (atomically updated). */
int iter_value;
@@ -256,23 +261,18 @@ static void parallel_range_single_thread(TaskParallelRangePool *range_pool)
void *initial_tls_memory = state->initial_tls_memory;
const size_t tls_data_size = state->tls_data_size;
- void *flatten_tls_storage = NULL;
const bool use_tls_data = (tls_data_size != 0) && (initial_tls_memory != NULL);
- if (use_tls_data) {
- flatten_tls_storage = MALLOCA(tls_data_size);
- memcpy(flatten_tls_storage, initial_tls_memory, tls_data_size);
- }
TaskParallelTLS tls = {
.thread_id = 0,
- .userdata_chunk = flatten_tls_storage,
+ .userdata_chunk = initial_tls_memory,
};
for (int i = start; i < stop; i++) {
func(userdata, i, &tls);
}
- if (state->func_finalize != NULL) {
- state->func_finalize(userdata, flatten_tls_storage);
+ if (use_tls_data && state->func_free != NULL) {
+ /* `func_free` should only free data that was created during execution of `func`. */
+ state->func_free(userdata, initial_tls_memory);
}
- MALLOCA_FREE(flatten_tls_storage, tls_data_size);
}
}
@@ -303,7 +303,7 @@ void BLI_task_parallel_range(const int start,
.iter_value = start,
.initial_tls_memory = settings->userdata_chunk,
.tls_data_size = settings->userdata_chunk_size,
- .func_finalize = settings->func_finalize,
+ .func_free = settings->func_free,
};
TaskParallelRangePool range_pool = {
.pool = NULL, .parallel_range_states = &state, .current_state = NULL, .settings = settings};
@@ -367,11 +367,15 @@ void BLI_task_parallel_range(const int start,
BLI_task_pool_work_and_wait(task_pool);
BLI_task_pool_free(task_pool);
- if (use_tls_data) {
- if (settings->func_finalize != NULL) {
- for (i = 0; i < num_tasks; i++) {
- void *userdata_chunk_local = (char *)flatten_tls_storage + (tls_data_size * (size_t)i);
- settings->func_finalize(userdata, userdata_chunk_local);
+ if (use_tls_data && (settings->func_free != NULL || settings->func_reduce != NULL)) {
+ for (i = 0; i < num_tasks; i++) {
+ void *userdata_chunk_local = (char *)flatten_tls_storage + (tls_data_size * (size_t)i);
+ if (settings->func_reduce) {
+ settings->func_reduce(userdata, tls_data, userdata_chunk_local);
+ }
+ if (settings->func_free) {
+ /* `func_free` should only free data that was created during execution of `func`. */
+ settings->func_free(userdata, userdata_chunk_local);
}
}
MALLOCA_FREE(flatten_tls_storage, tls_data_size * (size_t)num_tasks);
@@ -382,16 +386,17 @@ void BLI_task_parallel_range(const int start,
* Initialize a task pool to parallelize several for loops at the same time.
*
* See public API doc of ParallelRangeSettings for description of all settings.
- * Note that loop-specific settings (like 'tls' data or finalize function) must be left NULL here.
- * Only settings controlling how iteration is parallelized must be defined, as those will affect
- * all loops added to that pool.
+ * Note that loop-specific settings (like 'tls' data or reduce/free functions) must be left NULL
+ * here. Only settings controlling how iteration is parallelized must be defined, as those will
+ * affect all loops added to that pool.
*/
TaskParallelRangePool *BLI_task_parallel_range_pool_init(const TaskParallelSettings *settings)
{
TaskParallelRangePool *range_pool = MEM_callocN(sizeof(*range_pool), __func__);
BLI_assert(settings->userdata_chunk == NULL);
- BLI_assert(settings->func_finalize == NULL);
+ BLI_assert(settings->func_reduce == NULL);
+ BLI_assert(settings->func_free == NULL);
range_pool->settings = MEM_mallocN(sizeof(*range_pool->settings), __func__);
*range_pool->settings = *settings;
@@ -430,7 +435,8 @@ void BLI_task_parallel_range_pool_push(TaskParallelRangePool *range_pool,
state->iter_value = start;
state->initial_tls_memory = settings->userdata_chunk;
state->tls_data_size = settings->userdata_chunk_size;
- state->func_finalize = settings->func_finalize;
+ state->func_reduce = settings->func_reduce;
+ state->func_free = settings->func_free;
state->next = range_pool->parallel_range_states;
range_pool->parallel_range_states = state;
@@ -445,7 +451,13 @@ static void parallel_range_func_finalize(TaskPool *__restrict pool,
for (int i = 0; i < range_pool->num_tasks; i++) {
void *tls_data = (char *)state->flatten_tls_storage + (state->tls_data_size * (size_t)i);
- state->func_finalize(state->userdata_shared, tls_data);
+ if (state->func_reduce != NULL) {
+ state->func_reduce(state->userdata_shared, state->initial_tls_memory, tls_data);
+ }
+ if (state->func_free != NULL) {
+ /* `func_free` should only free data that was created during execution of `func`. */
+ state->func_free(state->userdata_shared, tls_data);
+ }
}
}
@@ -531,7 +543,7 @@ void BLI_task_parallel_range_pool_work_and_wait(TaskParallelRangePool *range_poo
continue;
}
- if (state->func_finalize != NULL) {
+ if (state->func_reduce != NULL || state->func_free != NULL) {
BLI_task_pool_push_from_thread(
task_pool, parallel_range_func_finalize, state, false, NULL, thread_id);
}
@@ -677,11 +689,9 @@ static void task_parallel_iterator_no_threads(const TaskParallelSettings *settin
parallel_iterator_func_do(state, userdata_chunk, 0);
- if (use_userdata_chunk) {
- if (settings->func_finalize != NULL) {
- settings->func_finalize(state->userdata, userdata_chunk_local);
- }
- MALLOCA_FREE(userdata_chunk_local, userdata_chunk_size);
+ if (use_userdata_chunk && settings->func_free != NULL) {
+ /* `func_free` should only free data that was created during execution of `func`. */
+ settings->func_free(state->userdata, userdata_chunk_local);
}
}
@@ -740,11 +750,14 @@ static void task_parallel_iterator_do(const TaskParallelSettings *settings,
BLI_task_pool_work_and_wait(task_pool);
BLI_task_pool_free(task_pool);
- if (use_userdata_chunk) {
- if (settings->func_finalize != NULL) {
- for (size_t i = 0; i < num_tasks; i++) {
- userdata_chunk_local = (char *)userdata_chunk_array + (userdata_chunk_size * i);
- settings->func_finalize(state->userdata, userdata_chunk_local);
+ if (use_userdata_chunk && (settings->func_reduce != NULL || settings->func_free != NULL)) {
+ for (size_t i = 0; i < num_tasks; i++) {
+ userdata_chunk_local = (char *)userdata_chunk_array + (userdata_chunk_size * i);
+ if (settings->func_reduce != NULL) {
+ settings->func_reduce(state->userdata, userdata_chunk, userdata_chunk_local);
+ }
+ if (settings->func_free != NULL) {
+ settings->func_free(state->userdata, userdata_chunk_local);
}
}
MALLOCA_FREE(userdata_chunk_array, userdata_chunk_size * num_tasks);
diff --git a/source/blender/editors/physics/particle_edit.c b/source/blender/editors/physics/particle_edit.c
index 75ae0299318..3ce5aeedae8 100644
--- a/source/blender/editors/physics/particle_edit.c
+++ b/source/blender/editors/physics/particle_edit.c
@@ -4084,7 +4084,6 @@ typedef struct BrushAddCountIterData {
short size;
float imat[4][4];
ParticleData *add_pars;
- int num_added;
} BrushAddCountIterData;
typedef struct BrushAddCountIterTLSData {
@@ -4176,12 +4175,19 @@ static void brush_add_count_iter(void *__restrict iter_data_v,
}
}
-static void brush_add_count_iter_finalize(void *__restrict userdata_v,
- void *__restrict userdata_chunk_v)
+static void brush_add_count_iter_reduce(const void *__restrict UNUSED(userdata),
+ void *__restrict join_v,
+ void *__restrict chunk_v)
+{
+ BrushAddCountIterTLSData *join = (BrushAddCountIterTLSData *)join_v;
+ BrushAddCountIterTLSData *tls = (BrushAddCountIterTLSData *)chunk_v;
+ join->num_added += tls->num_added;
+}
+
+static void brush_add_count_iter_free(const void *__restrict UNUSED(userdata_v),
+ void *__restrict chunk_v)
{
- BrushAddCountIterData *iter_data = (BrushAddCountIterData *)userdata_v;
- BrushAddCountIterTLSData *tls = (BrushAddCountIterTLSData *)userdata_chunk_v;
- iter_data->num_added += tls->num_added;
+ BrushAddCountIterTLSData *tls = (BrushAddCountIterTLSData *)chunk_v;
if (tls->rng != NULL) {
BLI_rng_free(tls->rng);
}
@@ -4245,7 +4251,6 @@ static int brush_add(const bContext *C, PEData *data, short number)
iter_data.number = number;
iter_data.size = size;
iter_data.add_pars = add_pars;
- iter_data.num_added = 0;
copy_m4_m4(iter_data.imat, imat);
BrushAddCountIterTLSData tls = {NULL};
@@ -4255,13 +4260,14 @@ static int brush_add(const bContext *C, PEData *data, short number)
settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC;
settings.userdata_chunk = &tls;
settings.userdata_chunk_size = sizeof(BrushAddCountIterTLSData);
- settings.func_finalize = brush_add_count_iter_finalize;
+ settings.func_reduce = brush_add_count_iter_reduce;
+ settings.func_free = brush_add_count_iter_free;
BLI_task_parallel_range(0, number, &iter_data, brush_add_count_iter, &settings);
/* Convert add_parse to a dense array, where all new particles are in the
* beginning of the array.
*/
- n = iter_data.num_added;
+ n = tls.num_added;
for (int current_iter = 0, new_index = 0; current_iter < number; current_iter++) {
if (add_pars[current_iter].num == DMCACHE_NOTFOUND) {
continue;
diff --git a/source/blender/editors/space_sequencer/sequencer_scopes.c b/source/blender/editors/space_sequencer/sequencer_scopes.c
index b06bc7ad8b7..243a6e193eb 100644
--- a/source/blender/editors/space_sequencer/sequencer_scopes.c
+++ b/source/blender/editors/space_sequencer/sequencer_scopes.c
@@ -447,7 +447,6 @@ static void draw_histogram_bar(ImBuf *ibuf, int x, float val, int col)
typedef struct MakeHistogramViewData {
const ImBuf *ibuf;
- uint32_t (*bins)[HIS_STEPS];
} MakeHistogramViewData;
static void make_histogram_view_from_ibuf_byte_fn(void *__restrict userdata,
@@ -469,17 +468,16 @@ static void make_histogram_view_from_ibuf_byte_fn(void *__restrict userdata,
}
}
-static void make_histogram_view_from_ibuf_finalize(void *__restrict userdata,
- void *__restrict userdata_chunk)
+static void make_histogram_view_from_ibuf_reduce(const void *__restrict UNUSED(userdata),
+ void *__restrict chunk_join,
+ void *__restrict chunk)
{
- MakeHistogramViewData *data = userdata;
- uint32_t(*bins)[HIS_STEPS] = data->bins;
-
- uint32_t(*cur_bins)[HIS_STEPS] = userdata_chunk;
+ uint32_t(*join_bins)[HIS_STEPS] = chunk_join;
+ uint32_t(*bins)[HIS_STEPS] = chunk;
for (int j = 3; j--;) {
for (int i = 0; i < HIS_STEPS; i++) {
- bins[j][i] += cur_bins[j][i];
+ join_bins[j][i] += bins[j][i];
}
}
}
@@ -496,14 +494,13 @@ static ImBuf *make_histogram_view_from_ibuf_byte(ImBuf *ibuf)
MakeHistogramViewData data = {
.ibuf = ibuf,
- .bins = bins,
};
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
settings.use_threading = (ibuf->y >= 256);
settings.userdata_chunk = bins;
settings.userdata_chunk_size = sizeof(bins);
- settings.func_finalize = make_histogram_view_from_ibuf_finalize;
+ settings.func_reduce = make_histogram_view_from_ibuf_reduce;
BLI_task_parallel_range(0, ibuf->y, &data, make_histogram_view_from_ibuf_byte_fn, &settings);
nr = nb = ng = 0;
@@ -582,14 +579,13 @@ static ImBuf *make_histogram_view_from_ibuf_float(ImBuf *ibuf)
MakeHistogramViewData data = {
.ibuf = ibuf,
- .bins = bins,
};
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
settings.use_threading = (ibuf->y >= 256);
settings.userdata_chunk = bins;
settings.userdata_chunk_size = sizeof(bins);
- settings.func_finalize = make_histogram_view_from_ibuf_finalize;
+ settings.func_reduce = make_histogram_view_from_ibuf_reduce;
BLI_task_parallel_range(0, ibuf->y, &data, make_histogram_view_from_ibuf_float_fn, &settings);
nr = nb = ng = 0;
diff --git a/tests/gtests/blenlib/BLI_task_test.cc b/tests/gtests/blenlib/BLI_task_test.cc
index d4ab9de13c4..1f8c092c66a 100644
--- a/tests/gtests/blenlib/BLI_task_test.cc
+++ b/tests/gtests/blenlib/BLI_task_test.cc
@@ -27,17 +27,19 @@ static void task_range_iter_func(void *userdata, int index, const TaskParallelTL
// printf("%d, %d, %d\n", index, data[index], *((int *)tls->userdata_chunk));
}
-static void task_range_iter_finalize_func(void *__restrict userdata,
- void *__restrict userdata_chunk)
+static void task_range_iter_reduce_func(const void *__restrict UNUSED(userdata),
+ void *__restrict join_v,
+ void *__restrict userdata_chunk)
{
- int *data = (int *)userdata;
- data[NUM_ITEMS] += *(int *)userdata_chunk;
+ int *join = (int *)join_v;
+ int *chunk = (int *)userdata_chunk;
+ *join += *chunk;
// printf("%d, %d\n", data[NUM_ITEMS], *((int *)userdata_chunk));
}
TEST(task, RangeIter)
{
- int data[NUM_ITEMS + 1] = {0};
+ int data[NUM_ITEMS] = {0};
int sum = 0;
BLI_threadapi_init();
@@ -48,7 +50,7 @@ TEST(task, RangeIter)
settings.userdata_chunk = &sum;
settings.userdata_chunk_size = sizeof(sum);
- settings.func_finalize = task_range_iter_finalize_func;
+ settings.func_reduce = task_range_iter_reduce_func;
BLI_task_parallel_range(0, NUM_ITEMS, data, task_range_iter_func, &settings);
@@ -60,7 +62,7 @@ TEST(task, RangeIter)
EXPECT_EQ(data[i], i);
expected_sum += i;
}
- EXPECT_EQ(data[NUM_ITEMS], expected_sum);
+ EXPECT_EQ(sum, expected_sum);
BLI_threadapi_exit();
}
@@ -68,7 +70,7 @@ TEST(task, RangeIter)
TEST(task, RangeIterPool)
{
const int num_tasks = 10;
- int data[num_tasks][NUM_ITEMS + 1] = {{0}};
+ int data[num_tasks][NUM_ITEMS] = {{0}};
int sum = 0;
BLI_threadapi_init();
@@ -82,7 +84,7 @@ TEST(task, RangeIterPool)
for (int j = 0; j < num_tasks; j++) {
settings.userdata_chunk = &sum;
settings.userdata_chunk_size = sizeof(sum);
- settings.func_finalize = task_range_iter_finalize_func;
+ settings.func_reduce = task_range_iter_reduce_func;
BLI_task_parallel_range_pool_push(
range_pool, 0, NUM_ITEMS, data[j], task_range_iter_func, &settings);
@@ -93,16 +95,16 @@ TEST(task, RangeIterPool)
/* Those checks should ensure us all items of the listbase were processed once, and only once -
* as expected. */
+ int expected_sum = 0;
for (int j = 0; j < num_tasks; j++) {
- int expected_sum = 0;
for (int i = 0; i < NUM_ITEMS; i++) {
- // EXPECT_EQ(data[j][i], i);
+ // EXPECT_EQ(data[j][i], i);
expected_sum += i;
}
- EXPECT_EQ(data[j][NUM_ITEMS], expected_sum);
}
+ EXPECT_EQ(sum, expected_sum);
- /* A pool can be re-used untill it is freed. */
+ /* A pool can be re-used until it is freed. */
for (int j = 0; j < num_tasks; j++) {
memset(data[j], 0, sizeof(data[j]));
@@ -112,7 +114,7 @@ TEST(task, RangeIterPool)
for (int j = 0; j < num_tasks; j++) {
settings.userdata_chunk = &sum;
settings.userdata_chunk_size = sizeof(sum);
- settings.func_finalize = task_range_iter_finalize_func;
+ settings.func_reduce = task_range_iter_reduce_func;
BLI_task_parallel_range_pool_push(
range_pool, 0, NUM_ITEMS, data[j], task_range_iter_func, &settings);
@@ -131,8 +133,8 @@ TEST(task, RangeIterPool)
// EXPECT_EQ(data[j][i], i);
expected_sum += i;
}
- EXPECT_EQ(data[j][NUM_ITEMS], expected_sum);
}
+ EXPECT_EQ(sum, expected_sum);
BLI_threadapi_exit();
}