diff options
Diffstat (limited to 'source/blender')
-rw-r--r-- | source/blender/blenkernel/intern/colortools.c | 69 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/dynamicpaint.c | 33 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/particle_system.c | 15 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/subdiv_ccg.c | 18 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/subdiv_foreach.c | 6 | ||||
-rw-r--r-- | source/blender/blenlib/BLI_task.h | 16 | ||||
-rw-r--r-- | source/blender/blenlib/intern/task_iterator.c | 81 | ||||
-rw-r--r-- | source/blender/editors/physics/particle_edit.c | 24 | ||||
-rw-r--r-- | source/blender/editors/space_sequencer/sequencer_scopes.c | 20 |
9 files changed, 151 insertions, 131 deletions
diff --git a/source/blender/blenkernel/intern/colortools.c b/source/blender/blenkernel/intern/colortools.c index f82b8b6675c..3da384a2745 100644 --- a/source/blender/blenkernel/intern/colortools.c +++ b/source/blender/blenkernel/intern/colortools.c @@ -1383,8 +1383,6 @@ typedef struct ScopesUpdateData { struct ColormanageProcessor *cm_processor; const unsigned char *display_buffer; const int ycc_mode; - - unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a; } ScopesUpdateData; typedef struct ScopesUpdateDataChunk { @@ -1495,23 +1493,24 @@ static void scopes_update_cb(void *__restrict userdata, } } -static void scopes_update_finalize(void *__restrict userdata, void *__restrict userdata_chunk) +static void scopes_update_reduce(const void *__restrict UNUSED(userdata), + void *__restrict chunk_join, + void *__restrict chunk) { - const ScopesUpdateData *data = userdata; - const ScopesUpdateDataChunk *data_chunk = userdata_chunk; - - unsigned int *bin_lum = data->bin_lum; - unsigned int *bin_r = data->bin_r; - unsigned int *bin_g = data->bin_g; - unsigned int *bin_b = data->bin_b; - unsigned int *bin_a = data->bin_a; + ScopesUpdateDataChunk *join_chunk = chunk_join; + const ScopesUpdateDataChunk *data_chunk = chunk; + + unsigned int *bin_lum = join_chunk->bin_lum; + unsigned int *bin_r = join_chunk->bin_r; + unsigned int *bin_g = join_chunk->bin_g; + unsigned int *bin_b = join_chunk->bin_b; + unsigned int *bin_a = join_chunk->bin_a; const unsigned int *bin_lum_c = data_chunk->bin_lum; const unsigned int *bin_r_c = data_chunk->bin_r; const unsigned int *bin_g_c = data_chunk->bin_g; const unsigned int *bin_b_c = data_chunk->bin_b; const unsigned int *bin_a_c = data_chunk->bin_a; - float(*minmax)[2] = data->scopes->minmax; const float *min = data_chunk->min; const float *max = data_chunk->max; @@ -1524,11 +1523,11 @@ static void scopes_update_finalize(void *__restrict userdata, void *__restrict u } for (int c = 3; c--;) { - if (min[c] < minmax[c][0]) { - minmax[c][0] = min[c]; + if (min[c] < join_chunk->min[c]) { + join_chunk->min[c] = min[c]; } - if (max[c] > minmax[c][1]) { - minmax[c][1] = max[c]; + if (max[c] > join_chunk->max[c]) { + join_chunk->max[c] = max[c]; } } } @@ -1542,7 +1541,6 @@ void BKE_scopes_update(Scopes *scopes, unsigned int nl, na, nr, ng, nb; double divl, diva, divr, divg, divb; const unsigned char *display_buffer = NULL; - uint bin_lum[256] = {0}, bin_r[256] = {0}, bin_g[256] = {0}, bin_b[256] = {0}, bin_a[256] = {0}; int ycc_mode = -1; void *cache_handle = NULL; struct ColormanageProcessor *cm_processor = NULL; @@ -1638,11 +1636,6 @@ void BKE_scopes_update(Scopes *scopes, .cm_processor = cm_processor, .display_buffer = display_buffer, .ycc_mode = ycc_mode, - .bin_lum = bin_lum, - .bin_r = bin_r, - .bin_g = bin_g, - .bin_b = bin_b, - .bin_a = bin_a, }; ScopesUpdateDataChunk data_chunk = {{0}}; INIT_MINMAX(data_chunk.min, data_chunk.max); @@ -1652,26 +1645,26 @@ void BKE_scopes_update(Scopes *scopes, settings.use_threading = (ibuf->y > 256); settings.userdata_chunk = &data_chunk; settings.userdata_chunk_size = sizeof(data_chunk); - settings.func_finalize = scopes_update_finalize; + settings.func_reduce = scopes_update_reduce; BLI_task_parallel_range(0, ibuf->y, &data, scopes_update_cb, &settings); /* convert hist data to float (proportional to max count) */ nl = na = nr = nb = ng = 0; for (a = 0; a < 256; a++) { - if (bin_lum[a] > nl) { - nl = bin_lum[a]; + if (data_chunk.bin_lum[a] > nl) { + nl = data_chunk.bin_lum[a]; } - if (bin_r[a] > nr) { - nr = bin_r[a]; + if (data_chunk.bin_r[a] > nr) { + nr = data_chunk.bin_r[a]; } - if (bin_g[a] > ng) { - ng = bin_g[a]; + if (data_chunk.bin_g[a] > ng) { + ng = data_chunk.bin_g[a]; } - if (bin_b[a] > nb) { - nb = bin_b[a]; + if (data_chunk.bin_b[a] > nb) { + nb = data_chunk.bin_b[a]; } - if (bin_a[a] > na) { - na = bin_a[a]; + if (data_chunk.bin_a[a] > na) { + na = data_chunk.bin_a[a]; } } divl = nl ? 1.0 / (double)nl : 1.0; @@ -1681,11 +1674,11 @@ void BKE_scopes_update(Scopes *scopes, divb = nb ? 1.0 / (double)nb : 1.0; for (a = 0; a < 256; a++) { - scopes->hist.data_luma[a] = bin_lum[a] * divl; - scopes->hist.data_r[a] = bin_r[a] * divr; - scopes->hist.data_g[a] = bin_g[a] * divg; - scopes->hist.data_b[a] = bin_b[a] * divb; - scopes->hist.data_a[a] = bin_a[a] * diva; + scopes->hist.data_luma[a] = data_chunk.bin_lum[a] * divl; + scopes->hist.data_r[a] = data_chunk.bin_r[a] * divr; + scopes->hist.data_g[a] = data_chunk.bin_g[a] * divg; + scopes->hist.data_b[a] = data_chunk.bin_b[a] * divb; + scopes->hist.data_a[a] = data_chunk.bin_a[a] * diva; } if (cm_processor) { diff --git a/source/blender/blenkernel/intern/dynamicpaint.c b/source/blender/blenkernel/intern/dynamicpaint.c index 4c78c88d168..3a8b846a41d 100644 --- a/source/blender/blenkernel/intern/dynamicpaint.c +++ b/source/blender/blenkernel/intern/dynamicpaint.c @@ -653,15 +653,15 @@ static void grid_bound_insert_cb_ex(void *__restrict userdata, boundInsert(grid_bound, bData->realCoord[bData->s_pos[i]].v); } -static void grid_bound_insert_finalize(void *__restrict userdata, void *__restrict userdata_chunk) +static void grid_bound_insert_reduce(const void *__restrict UNUSED(userdata), + void *__restrict chunk_join, + void *__restrict chunk) { - PaintBakeData *bData = userdata; - VolumeGrid *grid = bData->grid; - - Bounds3D *grid_bound = userdata_chunk; + Bounds3D *join = chunk_join; + Bounds3D *grid_bound = chunk; - boundInsert(&grid->grid_bounds, grid_bound->min); - boundInsert(&grid->grid_bounds, grid_bound->max); + boundInsert(join, grid_bound->min); + boundInsert(join, grid_bound->max); } static void grid_cell_points_cb_ex(void *__restrict userdata, @@ -685,17 +685,20 @@ static void grid_cell_points_cb_ex(void *__restrict userdata, s_num[temp_t_index[i]]++; } -static void grid_cell_points_finalize(void *__restrict userdata, void *__restrict userdata_chunk) +static void grid_cell_points_reduce(const void *__restrict userdata, + void *__restrict chunk_join, + void *__restrict chunk) { - PaintBakeData *bData = userdata; - VolumeGrid *grid = bData->grid; + const PaintBakeData *bData = userdata; + const VolumeGrid *grid = bData->grid; const int grid_cells = grid->dim[0] * grid->dim[1] * grid->dim[2]; - int *s_num = userdata_chunk; + int *join_s_num = chunk_join; + int *s_num = chunk; /* calculate grid indexes */ for (int i = 0; i < grid_cells; i++) { - grid->s_num[i] += s_num[i]; + join_s_num[i] += s_num[i]; } } @@ -753,7 +756,7 @@ static void surfaceGenerateGrid(struct DynamicPaintSurface *surface) settings.use_threading = (sData->total_points > 1000); settings.userdata_chunk = &grid->grid_bounds; settings.userdata_chunk_size = sizeof(grid->grid_bounds); - settings.func_finalize = grid_bound_insert_finalize; + settings.func_reduce = grid_bound_insert_reduce; BLI_task_parallel_range(0, sData->total_points, bData, grid_bound_insert_cb_ex, &settings); } /* get dimensions */ @@ -814,7 +817,7 @@ static void surfaceGenerateGrid(struct DynamicPaintSurface *surface) settings.use_threading = (sData->total_points > 1000); settings.userdata_chunk = grid->s_num; settings.userdata_chunk_size = sizeof(*grid->s_num) * grid_cells; - settings.func_finalize = grid_cell_points_finalize; + settings.func_reduce = grid_cell_points_reduce; BLI_task_parallel_range(0, sData->total_points, bData, grid_cell_points_cb_ex, &settings); } @@ -4880,7 +4883,7 @@ static void dynamicPaint_prepareAdjacencyData(DynamicPaintSurface *surface, cons 0, sData->total_points, sData, dynamic_paint_prepare_adjacency_cb, &settings); /* calculate average values (single thread). - * Note: tried to put this in threaded callback (using _finalize feature), + * Note: tried to put this in threaded callback (using _reduce feature), * but gave ~30% slower result! */ bData->average_dist = 0.0; for (index = 0; index < sData->total_points; index++) { diff --git a/source/blender/blenkernel/intern/particle_system.c b/source/blender/blenkernel/intern/particle_system.c index 5ef2f7aeeff..14b1ef7b87f 100644 --- a/source/blender/blenkernel/intern/particle_system.c +++ b/source/blender/blenkernel/intern/particle_system.c @@ -3692,10 +3692,11 @@ typedef struct DynamicStepSolverTaskData { SpinLock spin; } DynamicStepSolverTaskData; -static void dynamics_step_finalize_sphdata(void *__restrict UNUSED(userdata), - void *__restrict tls_userdata_chunk) +static void dynamics_step_sphdata_reduce(const void *__restrict UNUSED(userdata), + void *__restrict UNUSED(join_v), + void *__restrict chunk_v) { - SPHData *sphdata = tls_userdata_chunk; + SPHData *sphdata = chunk_v; psys_sph_flush_springs(sphdata); } @@ -3986,7 +3987,7 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra) settings.use_threading = (psys->totpart > 100); settings.userdata_chunk = &sphdata; settings.userdata_chunk_size = sizeof(sphdata); - settings.func_finalize = dynamics_step_finalize_sphdata; + settings.func_reduce = dynamics_step_sphdata_reduce; BLI_task_parallel_range( 0, psys->totpart, &task_data, dynamics_step_sph_ddr_task_cb_ex, &settings); @@ -4018,7 +4019,7 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra) settings.use_threading = (psys->totpart > 100); settings.userdata_chunk = &sphdata; settings.userdata_chunk_size = sizeof(sphdata); - settings.func_finalize = dynamics_step_finalize_sphdata; + settings.func_reduce = dynamics_step_sphdata_reduce; BLI_task_parallel_range(0, psys->totpart, &task_data, @@ -4033,7 +4034,7 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra) settings.use_threading = (psys->totpart > 100); settings.userdata_chunk = &sphdata; settings.userdata_chunk_size = sizeof(sphdata); - settings.func_finalize = dynamics_step_finalize_sphdata; + settings.func_reduce = dynamics_step_sphdata_reduce; BLI_task_parallel_range(0, psys->totpart, &task_data, @@ -4189,7 +4190,7 @@ static void particles_fluid_step(ParticleSimulationData *sim, ParticleSettings *part = psys->part; ParticleData *pa = NULL; - int p, totpart, tottypepart = 0; + int p, totpart = 0, tottypepart = 0; int flagActivePart, activeParts = 0; float posX, posY, posZ, velX, velY, velZ; float resX, resY, resZ; diff --git a/source/blender/blenkernel/intern/subdiv_ccg.c b/source/blender/blenkernel/intern/subdiv_ccg.c index 521aeb60e66..d99c41eaa3e 100644 --- a/source/blender/blenkernel/intern/subdiv_ccg.c +++ b/source/blender/blenkernel/intern/subdiv_ccg.c @@ -770,8 +770,8 @@ static void subdiv_ccg_recalc_inner_normal_task(void *__restrict userdata_v, subdiv_ccg_average_inner_face_normals(data->subdiv_ccg, data->key, tls, grid_index); } -static void subdiv_ccg_recalc_inner_normal_finalize(void *__restrict UNUSED(userdata), - void *__restrict tls_v) +static void subdiv_ccg_recalc_inner_normal_free(const void *__restrict UNUSED(userdata), + void *__restrict tls_v) { RecalcInnerNormalsTLSData *tls = tls_v; MEM_SAFE_FREE(tls->face_normals); @@ -791,7 +791,7 @@ static void subdiv_ccg_recalc_inner_grid_normals(SubdivCCG *subdiv_ccg) BLI_parallel_range_settings_defaults(¶llel_range_settings); parallel_range_settings.userdata_chunk = &tls_data; parallel_range_settings.userdata_chunk_size = sizeof(tls_data); - parallel_range_settings.func_finalize = subdiv_ccg_recalc_inner_normal_finalize; + parallel_range_settings.func_free = subdiv_ccg_recalc_inner_normal_free; BLI_task_parallel_range(0, subdiv_ccg->num_grids, &data, @@ -834,8 +834,8 @@ static void subdiv_ccg_recalc_modified_inner_normal_task(void *__restrict userda subdiv_ccg_average_inner_face_grids(subdiv_ccg, key, face); } -static void subdiv_ccg_recalc_modified_inner_normal_finalize(void *__restrict UNUSED(userdata), - void *__restrict tls_v) +static void subdiv_ccg_recalc_modified_inner_normal_free(const void *__restrict UNUSED(userdata), + void *__restrict tls_v) { RecalcInnerNormalsTLSData *tls = tls_v; MEM_SAFE_FREE(tls->face_normals); @@ -857,7 +857,7 @@ static void subdiv_ccg_recalc_modified_inner_grid_normals(SubdivCCG *subdiv_ccg, BLI_parallel_range_settings_defaults(¶llel_range_settings); parallel_range_settings.userdata_chunk = &tls_data; parallel_range_settings.userdata_chunk_size = sizeof(tls_data); - parallel_range_settings.func_finalize = subdiv_ccg_recalc_modified_inner_normal_finalize; + parallel_range_settings.func_free = subdiv_ccg_recalc_modified_inner_normal_free; BLI_task_parallel_range(0, num_effected_faces, &data, @@ -1077,8 +1077,8 @@ static void subdiv_ccg_average_grids_boundaries_task(void *__restrict userdata_v subdiv_ccg_average_grids_boundary(subdiv_ccg, key, adjacent_edge, tls); } -static void subdiv_ccg_average_grids_boundaries_finalize(void *__restrict UNUSED(userdata), - void *__restrict tls_v) +static void subdiv_ccg_average_grids_boundaries_free(const void *__restrict UNUSED(userdata), + void *__restrict tls_v) { AverageGridsBoundariesTLSData *tls = tls_v; MEM_SAFE_FREE(tls->accumulators); @@ -1136,7 +1136,7 @@ static void subdiv_ccg_average_all_boundaries(SubdivCCG *subdiv_ccg, CCGKey *key AverageGridsBoundariesTLSData tls_data = {NULL}; parallel_range_settings.userdata_chunk = &tls_data; parallel_range_settings.userdata_chunk_size = sizeof(tls_data); - parallel_range_settings.func_finalize = subdiv_ccg_average_grids_boundaries_finalize; + parallel_range_settings.func_free = subdiv_ccg_average_grids_boundaries_free; BLI_task_parallel_range(0, subdiv_ccg->num_adjacent_edges, &boundaries_data, diff --git a/source/blender/blenkernel/intern/subdiv_foreach.c b/source/blender/blenkernel/intern/subdiv_foreach.c index b31fb2c9312..0884f40952f 100644 --- a/source/blender/blenkernel/intern/subdiv_foreach.c +++ b/source/blender/blenkernel/intern/subdiv_foreach.c @@ -1838,9 +1838,9 @@ static void subdiv_foreach_boundary_edges_task(void *__restrict userdata, subdiv_foreach_boundary_edges(ctx, tls->userdata_chunk, edge_index); } -static void subdiv_foreach_finalize(void *__restrict userdata, void *__restrict userdata_chunk) +static void subdiv_foreach_free(const void *__restrict userdata, void *__restrict userdata_chunk) { - SubdivForeachTaskContext *ctx = userdata; + const SubdivForeachTaskContext *ctx = userdata; ctx->foreach_context->user_data_tls_free(userdata_chunk); } @@ -1873,7 +1873,7 @@ bool BKE_subdiv_foreach_subdiv_geometry(Subdiv *subdiv, parallel_range_settings.userdata_chunk_size = context->user_data_tls_size; parallel_range_settings.min_iter_per_thread = 1; if (context->user_data_tls_free != NULL) { - parallel_range_settings.func_finalize = subdiv_foreach_finalize; + parallel_range_settings.func_free = subdiv_foreach_free; } /* TODO(sergey): Possible optimization is to have a single pool and push all diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index 898e4be5f92..78370e98202 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -146,12 +146,14 @@ typedef struct TaskParallelTLS { void *userdata_chunk; } TaskParallelTLS; -typedef void (*TaskParallelFinalizeFunc)(void *__restrict userdata, - void *__restrict userdata_chunk); - typedef void (*TaskParallelRangeFunc)(void *__restrict userdata, const int iter, const TaskParallelTLS *__restrict tls); +typedef void (*TaskParallelReduceFunc)(const void *__restrict userdata, + void *__restrict chunk_join, + void *__restrict chunk); + +typedef void (*TaskParallelFreeFunc)(const void *__restrict userdata, void *__restrict chunk); typedef struct TaskParallelSettings { /* Whether caller allows to do threading of the particular range. @@ -171,7 +173,13 @@ typedef struct TaskParallelSettings { /* Function called from calling thread once whole range have been * processed. */ - TaskParallelFinalizeFunc func_finalize; + /* Function called to join user data chunk into another, to reduce + * the result to the original userdata_chunk memory. + * The reduce functions should have no side effects, so that they + * can be run on any thread. */ + TaskParallelReduceFunc func_reduce; + /* Function called to free data created by TaskParallelRangeFunc. */ + TaskParallelFreeFunc func_free; /* Minimum allowed number of range iterators to be handled by a single * thread. This allows to achieve following: * - Reduce amount of threading overhead. diff --git a/source/blender/blenlib/intern/task_iterator.c b/source/blender/blenlib/intern/task_iterator.c index 4ac012fa578..0d1c96612e3 100644 --- a/source/blender/blenlib/intern/task_iterator.c +++ b/source/blender/blenlib/intern/task_iterator.c @@ -78,8 +78,13 @@ typedef struct TaskParallelRangeState { /* Number of 'tls' copies in the array, i.e. number of worker threads. */ size_t num_elements_in_tls_storage; - /* Function called from calling thread once whole range have been processed. */ - TaskParallelFinalizeFunc func_finalize; + /* Function called to join user data chunk into another, to reduce + * the result to the original userdata_chunk memory. + * The reduce functions should have no side effects, so that they + * can be run on any thread. */ + TaskParallelReduceFunc func_reduce; + /* Function called to free data created by TaskParallelRangeFunc. */ + TaskParallelFreeFunc func_free; /* Current value of the iterator, shared between all threads (atomically updated). */ int iter_value; @@ -256,23 +261,18 @@ static void parallel_range_single_thread(TaskParallelRangePool *range_pool) void *initial_tls_memory = state->initial_tls_memory; const size_t tls_data_size = state->tls_data_size; - void *flatten_tls_storage = NULL; const bool use_tls_data = (tls_data_size != 0) && (initial_tls_memory != NULL); - if (use_tls_data) { - flatten_tls_storage = MALLOCA(tls_data_size); - memcpy(flatten_tls_storage, initial_tls_memory, tls_data_size); - } TaskParallelTLS tls = { .thread_id = 0, - .userdata_chunk = flatten_tls_storage, + .userdata_chunk = initial_tls_memory, }; for (int i = start; i < stop; i++) { func(userdata, i, &tls); } - if (state->func_finalize != NULL) { - state->func_finalize(userdata, flatten_tls_storage); + if (use_tls_data && state->func_free != NULL) { + /* `func_free` should only free data that was created during execution of `func`. */ + state->func_free(userdata, initial_tls_memory); } - MALLOCA_FREE(flatten_tls_storage, tls_data_size); } } @@ -303,7 +303,7 @@ void BLI_task_parallel_range(const int start, .iter_value = start, .initial_tls_memory = settings->userdata_chunk, .tls_data_size = settings->userdata_chunk_size, - .func_finalize = settings->func_finalize, + .func_free = settings->func_free, }; TaskParallelRangePool range_pool = { .pool = NULL, .parallel_range_states = &state, .current_state = NULL, .settings = settings}; @@ -367,11 +367,15 @@ void BLI_task_parallel_range(const int start, BLI_task_pool_work_and_wait(task_pool); BLI_task_pool_free(task_pool); - if (use_tls_data) { - if (settings->func_finalize != NULL) { - for (i = 0; i < num_tasks; i++) { - void *userdata_chunk_local = (char *)flatten_tls_storage + (tls_data_size * (size_t)i); - settings->func_finalize(userdata, userdata_chunk_local); + if (use_tls_data && (settings->func_free != NULL || settings->func_reduce != NULL)) { + for (i = 0; i < num_tasks; i++) { + void *userdata_chunk_local = (char *)flatten_tls_storage + (tls_data_size * (size_t)i); + if (settings->func_reduce) { + settings->func_reduce(userdata, tls_data, userdata_chunk_local); + } + if (settings->func_free) { + /* `func_free` should only free data that was created during execution of `func`. */ + settings->func_free(userdata, userdata_chunk_local); } } MALLOCA_FREE(flatten_tls_storage, tls_data_size * (size_t)num_tasks); @@ -382,16 +386,17 @@ void BLI_task_parallel_range(const int start, * Initialize a task pool to parallelize several for loops at the same time. * * See public API doc of ParallelRangeSettings for description of all settings. - * Note that loop-specific settings (like 'tls' data or finalize function) must be left NULL here. - * Only settings controlling how iteration is parallelized must be defined, as those will affect - * all loops added to that pool. + * Note that loop-specific settings (like 'tls' data or reduce/free functions) must be left NULL + * here. Only settings controlling how iteration is parallelized must be defined, as those will + * affect all loops added to that pool. */ TaskParallelRangePool *BLI_task_parallel_range_pool_init(const TaskParallelSettings *settings) { TaskParallelRangePool *range_pool = MEM_callocN(sizeof(*range_pool), __func__); BLI_assert(settings->userdata_chunk == NULL); - BLI_assert(settings->func_finalize == NULL); + BLI_assert(settings->func_reduce == NULL); + BLI_assert(settings->func_free == NULL); range_pool->settings = MEM_mallocN(sizeof(*range_pool->settings), __func__); *range_pool->settings = *settings; @@ -430,7 +435,8 @@ void BLI_task_parallel_range_pool_push(TaskParallelRangePool *range_pool, state->iter_value = start; state->initial_tls_memory = settings->userdata_chunk; state->tls_data_size = settings->userdata_chunk_size; - state->func_finalize = settings->func_finalize; + state->func_reduce = settings->func_reduce; + state->func_free = settings->func_free; state->next = range_pool->parallel_range_states; range_pool->parallel_range_states = state; @@ -445,7 +451,13 @@ static void parallel_range_func_finalize(TaskPool *__restrict pool, for (int i = 0; i < range_pool->num_tasks; i++) { void *tls_data = (char *)state->flatten_tls_storage + (state->tls_data_size * (size_t)i); - state->func_finalize(state->userdata_shared, tls_data); + if (state->func_reduce != NULL) { + state->func_reduce(state->userdata_shared, state->initial_tls_memory, tls_data); + } + if (state->func_free != NULL) { + /* `func_free` should only free data that was created during execution of `func`. */ + state->func_free(state->userdata_shared, tls_data); + } } } @@ -531,7 +543,7 @@ void BLI_task_parallel_range_pool_work_and_wait(TaskParallelRangePool *range_poo continue; } - if (state->func_finalize != NULL) { + if (state->func_reduce != NULL || state->func_free != NULL) { BLI_task_pool_push_from_thread( task_pool, parallel_range_func_finalize, state, false, NULL, thread_id); } @@ -677,11 +689,9 @@ static void task_parallel_iterator_no_threads(const TaskParallelSettings *settin parallel_iterator_func_do(state, userdata_chunk, 0); - if (use_userdata_chunk) { - if (settings->func_finalize != NULL) { - settings->func_finalize(state->userdata, userdata_chunk_local); - } - MALLOCA_FREE(userdata_chunk_local, userdata_chunk_size); + if (use_userdata_chunk && settings->func_free != NULL) { + /* `func_free` should only free data that was created during execution of `func`. */ + settings->func_free(state->userdata, userdata_chunk_local); } } @@ -740,11 +750,14 @@ static void task_parallel_iterator_do(const TaskParallelSettings *settings, BLI_task_pool_work_and_wait(task_pool); BLI_task_pool_free(task_pool); - if (use_userdata_chunk) { - if (settings->func_finalize != NULL) { - for (size_t i = 0; i < num_tasks; i++) { - userdata_chunk_local = (char *)userdata_chunk_array + (userdata_chunk_size * i); - settings->func_finalize(state->userdata, userdata_chunk_local); + if (use_userdata_chunk && (settings->func_reduce != NULL || settings->func_free != NULL)) { + for (size_t i = 0; i < num_tasks; i++) { + userdata_chunk_local = (char *)userdata_chunk_array + (userdata_chunk_size * i); + if (settings->func_reduce != NULL) { + settings->func_reduce(state->userdata, userdata_chunk, userdata_chunk_local); + } + if (settings->func_free != NULL) { + settings->func_free(state->userdata, userdata_chunk_local); } } MALLOCA_FREE(userdata_chunk_array, userdata_chunk_size * num_tasks); diff --git a/source/blender/editors/physics/particle_edit.c b/source/blender/editors/physics/particle_edit.c index 75ae0299318..3ce5aeedae8 100644 --- a/source/blender/editors/physics/particle_edit.c +++ b/source/blender/editors/physics/particle_edit.c @@ -4084,7 +4084,6 @@ typedef struct BrushAddCountIterData { short size; float imat[4][4]; ParticleData *add_pars; - int num_added; } BrushAddCountIterData; typedef struct BrushAddCountIterTLSData { @@ -4176,12 +4175,19 @@ static void brush_add_count_iter(void *__restrict iter_data_v, } } -static void brush_add_count_iter_finalize(void *__restrict userdata_v, - void *__restrict userdata_chunk_v) +static void brush_add_count_iter_reduce(const void *__restrict UNUSED(userdata), + void *__restrict join_v, + void *__restrict chunk_v) +{ + BrushAddCountIterTLSData *join = (BrushAddCountIterTLSData *)join_v; + BrushAddCountIterTLSData *tls = (BrushAddCountIterTLSData *)chunk_v; + join->num_added += tls->num_added; +} + +static void brush_add_count_iter_free(const void *__restrict UNUSED(userdata_v), + void *__restrict chunk_v) { - BrushAddCountIterData *iter_data = (BrushAddCountIterData *)userdata_v; - BrushAddCountIterTLSData *tls = (BrushAddCountIterTLSData *)userdata_chunk_v; - iter_data->num_added += tls->num_added; + BrushAddCountIterTLSData *tls = (BrushAddCountIterTLSData *)chunk_v; if (tls->rng != NULL) { BLI_rng_free(tls->rng); } @@ -4245,7 +4251,6 @@ static int brush_add(const bContext *C, PEData *data, short number) iter_data.number = number; iter_data.size = size; iter_data.add_pars = add_pars; - iter_data.num_added = 0; copy_m4_m4(iter_data.imat, imat); BrushAddCountIterTLSData tls = {NULL}; @@ -4255,13 +4260,14 @@ static int brush_add(const bContext *C, PEData *data, short number) settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC; settings.userdata_chunk = &tls; settings.userdata_chunk_size = sizeof(BrushAddCountIterTLSData); - settings.func_finalize = brush_add_count_iter_finalize; + settings.func_reduce = brush_add_count_iter_reduce; + settings.func_free = brush_add_count_iter_free; BLI_task_parallel_range(0, number, &iter_data, brush_add_count_iter, &settings); /* Convert add_parse to a dense array, where all new particles are in the * beginning of the array. */ - n = iter_data.num_added; + n = tls.num_added; for (int current_iter = 0, new_index = 0; current_iter < number; current_iter++) { if (add_pars[current_iter].num == DMCACHE_NOTFOUND) { continue; diff --git a/source/blender/editors/space_sequencer/sequencer_scopes.c b/source/blender/editors/space_sequencer/sequencer_scopes.c index b06bc7ad8b7..243a6e193eb 100644 --- a/source/blender/editors/space_sequencer/sequencer_scopes.c +++ b/source/blender/editors/space_sequencer/sequencer_scopes.c @@ -447,7 +447,6 @@ static void draw_histogram_bar(ImBuf *ibuf, int x, float val, int col) typedef struct MakeHistogramViewData { const ImBuf *ibuf; - uint32_t (*bins)[HIS_STEPS]; } MakeHistogramViewData; static void make_histogram_view_from_ibuf_byte_fn(void *__restrict userdata, @@ -469,17 +468,16 @@ static void make_histogram_view_from_ibuf_byte_fn(void *__restrict userdata, } } -static void make_histogram_view_from_ibuf_finalize(void *__restrict userdata, - void *__restrict userdata_chunk) +static void make_histogram_view_from_ibuf_reduce(const void *__restrict UNUSED(userdata), + void *__restrict chunk_join, + void *__restrict chunk) { - MakeHistogramViewData *data = userdata; - uint32_t(*bins)[HIS_STEPS] = data->bins; - - uint32_t(*cur_bins)[HIS_STEPS] = userdata_chunk; + uint32_t(*join_bins)[HIS_STEPS] = chunk_join; + uint32_t(*bins)[HIS_STEPS] = chunk; for (int j = 3; j--;) { for (int i = 0; i < HIS_STEPS; i++) { - bins[j][i] += cur_bins[j][i]; + join_bins[j][i] += bins[j][i]; } } } @@ -496,14 +494,13 @@ static ImBuf *make_histogram_view_from_ibuf_byte(ImBuf *ibuf) MakeHistogramViewData data = { .ibuf = ibuf, - .bins = bins, }; TaskParallelSettings settings; BLI_parallel_range_settings_defaults(&settings); settings.use_threading = (ibuf->y >= 256); settings.userdata_chunk = bins; settings.userdata_chunk_size = sizeof(bins); - settings.func_finalize = make_histogram_view_from_ibuf_finalize; + settings.func_reduce = make_histogram_view_from_ibuf_reduce; BLI_task_parallel_range(0, ibuf->y, &data, make_histogram_view_from_ibuf_byte_fn, &settings); nr = nb = ng = 0; @@ -582,14 +579,13 @@ static ImBuf *make_histogram_view_from_ibuf_float(ImBuf *ibuf) MakeHistogramViewData data = { .ibuf = ibuf, - .bins = bins, }; TaskParallelSettings settings; BLI_parallel_range_settings_defaults(&settings); settings.use_threading = (ibuf->y >= 256); settings.userdata_chunk = bins; settings.userdata_chunk_size = sizeof(bins); - settings.func_finalize = make_histogram_view_from_ibuf_finalize; + settings.func_reduce = make_histogram_view_from_ibuf_reduce; BLI_task_parallel_range(0, ibuf->y, &data, make_histogram_view_from_ibuf_float_fn, &settings); nr = nb = ng = 0; |