diff options
33 files changed, 1249 insertions, 600 deletions
diff --git a/source/blender/blenkernel/intern/CCGSubSurf_legacy.c b/source/blender/blenkernel/intern/CCGSubSurf_legacy.c index f68d1a2697c..363b7cff453 100644 --- a/source/blender/blenkernel/intern/CCGSubSurf_legacy.c +++ b/source/blender/blenkernel/intern/CCGSubSurf_legacy.c @@ -136,7 +136,8 @@ typedef struct CCGSubSurfCalcSubdivData { int curLvl; } CCGSubSurfCalcSubdivData; -static void ccgSubSurf__calcVertNormals_faces_accumulate_cb(void *userdata, int ptrIdx) +static void ccgSubSurf__calcVertNormals_faces_accumulate_cb(void *userdata, int ptrIdx, + const ParallelRangeTLS *UNUSED(tls)) { CCGSubSurfCalcSubdivData *data = userdata; @@ -227,7 +228,8 @@ static void ccgSubSurf__calcVertNormals_faces_accumulate_cb(void *userdata, int } } -static void ccgSubSurf__calcVertNormals_faces_finalize_cb(void *userdata, int ptrIdx) +static void ccgSubSurf__calcVertNormals_faces_finalize_cb(void *userdata, int ptrIdx, + const ParallelRangeTLS *UNUSED(tls)) { CCGSubSurfCalcSubdivData *data = userdata; @@ -265,7 +267,8 @@ static void ccgSubSurf__calcVertNormals_faces_finalize_cb(void *userdata, int pt } } -static void ccgSubSurf__calcVertNormals_edges_accumulate_cb(void *userdata, int ptrIdx) +static void ccgSubSurf__calcVertNormals_edges_accumulate_cb(void *userdata, int ptrIdx, + const ParallelRangeTLS *UNUSED(tls)) { CCGSubSurfCalcSubdivData *data = userdata; @@ -328,10 +331,15 @@ static void ccgSubSurf__calcVertNormals(CCGSubSurf *ss, .numEffectedF = numEffectedF }; - BLI_task_parallel_range(0, numEffectedF, - &data, - ccgSubSurf__calcVertNormals_faces_accumulate_cb, - numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + BLI_task_parallel_range(0, numEffectedF, + &data, + ccgSubSurf__calcVertNormals_faces_accumulate_cb, + &settings); + } /* XXX can I reduce the number of normalisations here? */ for (ptrIdx = 0; ptrIdx < numEffectedV; ptrIdx++) { @@ -357,15 +365,25 @@ static void ccgSubSurf__calcVertNormals(CCGSubSurf *ss, } } - BLI_task_parallel_range(0, numEffectedE, - &data, - ccgSubSurf__calcVertNormals_edges_accumulate_cb, - numEffectedE * edgeSize * 4 >= CCG_OMP_LIMIT); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numEffectedE * edgeSize * 4 >= CCG_OMP_LIMIT); + BLI_task_parallel_range(0, numEffectedE, + &data, + ccgSubSurf__calcVertNormals_edges_accumulate_cb, + &settings); + } - BLI_task_parallel_range(0, numEffectedF, - &data, - ccgSubSurf__calcVertNormals_faces_finalize_cb, - numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + BLI_task_parallel_range(0, numEffectedF, + &data, + ccgSubSurf__calcVertNormals_faces_finalize_cb, + &settings); + } for (ptrIdx = 0; ptrIdx < numEffectedE; ptrIdx++) { CCGEdge *e = (CCGEdge *) effectedE[ptrIdx]; @@ -396,7 +414,8 @@ static void ccgSubSurf__calcVertNormals(CCGSubSurf *ss, } -static void ccgSubSurf__calcSubdivLevel_interior_faces_edges_midpoints_cb(void *userdata, int ptrIdx) +static void ccgSubSurf__calcSubdivLevel_interior_faces_edges_midpoints_cb(void *userdata, int ptrIdx, + const ParallelRangeTLS *UNUSED(tls)) { CCGSubSurfCalcSubdivData *data = userdata; @@ -483,7 +502,8 @@ static void ccgSubSurf__calcSubdivLevel_interior_faces_edges_midpoints_cb(void * } } -static void ccgSubSurf__calcSubdivLevel_interior_faces_edges_centerpoints_shift_cb(void *userdata, int ptrIdx) +static void ccgSubSurf__calcSubdivLevel_interior_faces_edges_centerpoints_shift_cb(void *userdata, int ptrIdx, + const ParallelRangeTLS *UNUSED(tls)) { CCGSubSurfCalcSubdivData *data = userdata; @@ -588,7 +608,8 @@ static void ccgSubSurf__calcSubdivLevel_interior_faces_edges_centerpoints_shift_ } } -static void ccgSubSurf__calcSubdivLevel_verts_copydata_cb(void *userdata, int ptrIdx) +static void ccgSubSurf__calcSubdivLevel_verts_copydata_cb(void *userdata, int ptrIdx, + const ParallelRangeTLS *UNUSED(tls)) { CCGSubSurfCalcSubdivData *data = userdata; @@ -647,10 +668,15 @@ static void ccgSubSurf__calcSubdivLevel( .curLvl = curLvl }; - BLI_task_parallel_range(0, numEffectedF, - &data, - ccgSubSurf__calcSubdivLevel_interior_faces_edges_midpoints_cb, - numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + BLI_task_parallel_range(0, numEffectedF, + &data, + ccgSubSurf__calcSubdivLevel_interior_faces_edges_midpoints_cb, + &settings); + } /* exterior edge midpoints * - old exterior edge points @@ -925,10 +951,15 @@ static void ccgSubSurf__calcSubdivLevel( } } - BLI_task_parallel_range(0, numEffectedF, - &data, - ccgSubSurf__calcSubdivLevel_interior_faces_edges_centerpoints_shift_cb, - numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + BLI_task_parallel_range(0, numEffectedF, + &data, + ccgSubSurf__calcSubdivLevel_interior_faces_edges_centerpoints_shift_cb, + &settings); + } /* copy down */ edgeSize = ccg_edgesize(nextLvl); @@ -940,10 +971,15 @@ static void ccgSubSurf__calcSubdivLevel( VertDataCopy(EDGE_getCo(e, nextLvl, edgeSize - 1), VERT_getCo(e->v1, nextLvl), ss); } - BLI_task_parallel_range(0, numEffectedF, - &data, - ccgSubSurf__calcSubdivLevel_verts_copydata_cb, - numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + BLI_task_parallel_range(0, numEffectedF, + &data, + ccgSubSurf__calcSubdivLevel_verts_copydata_cb, + &settings); + } } void ccgSubSurf__sync_legacy(CCGSubSurf *ss) diff --git a/source/blender/blenkernel/intern/colortools.c b/source/blender/blenkernel/intern/colortools.c index 310255a15c1..46d06a3ac78 100644 --- a/source/blender/blenkernel/intern/colortools.c +++ b/source/blender/blenkernel/intern/colortools.c @@ -1165,7 +1165,7 @@ typedef struct ScopesUpdateDataChunk { float min[3], max[3]; } ScopesUpdateDataChunk; -static void scopes_update_cb(void *userdata, void *userdata_chunk, const int y, const int UNUSED(threadid)) +static void scopes_update_cb(void *userdata, const int y, const ParallelRangeTLS *tls) { const ScopesUpdateData *data = userdata; @@ -1175,7 +1175,7 @@ static void scopes_update_cb(void *userdata, void *userdata_chunk, const int y, const unsigned char *display_buffer = data->display_buffer; const int ycc_mode = data->ycc_mode; - ScopesUpdateDataChunk *data_chunk = userdata_chunk; + ScopesUpdateDataChunk *data_chunk = tls->userdata_chunk; unsigned int *bin_lum = data_chunk->bin_lum; unsigned int *bin_r = data_chunk->bin_r; unsigned int *bin_g = data_chunk->bin_g; @@ -1387,8 +1387,16 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings * ScopesUpdateDataChunk data_chunk = {{0}}; INIT_MINMAX(data_chunk.min, data_chunk.max); - BLI_task_parallel_range_finalize(0, ibuf->y, &data, &data_chunk, sizeof(data_chunk), - scopes_update_cb, scopes_update_finalize, ibuf->y > 256, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (ibuf->y > 256); + settings.userdata_chunk = &data_chunk; + settings.userdata_chunk_size = sizeof(data_chunk); + settings.func_finalize = scopes_update_finalize; + BLI_task_parallel_range(0, ibuf->y, + &data, + scopes_update_cb, + &settings); /* test for nicer distribution even - non standard, leave it out for a while */ #if 0 diff --git a/source/blender/blenkernel/intern/dynamicpaint.c b/source/blender/blenkernel/intern/dynamicpaint.c index 8d27fcd2835..0773d069767 100644 --- a/source/blender/blenkernel/intern/dynamicpaint.c +++ b/source/blender/blenkernel/intern/dynamicpaint.c @@ -604,11 +604,11 @@ static void freeGrid(PaintSurfaceData *data) bData->grid = NULL; } -static void grid_bound_insert_cb_ex(void *userdata, void *userdata_chunk, const int i, const int UNUSED(thread_id)) +static void grid_bound_insert_cb_ex(void *userdata, const int i, const ParallelRangeTLS *tls) { PaintBakeData *bData = userdata; - Bounds3D *grid_bound = userdata_chunk; + Bounds3D *grid_bound = tls->userdata_chunk; boundInsert(grid_bound, bData->realCoord[bData->s_pos[i]].v); } @@ -624,12 +624,12 @@ static void grid_bound_insert_finalize(void *userdata, void *userdata_chunk) boundInsert(&grid->grid_bounds, grid_bound->max); } -static void grid_cell_points_cb_ex(void *userdata, void *userdata_chunk, const int i, const int UNUSED(thread_id)) +static void grid_cell_points_cb_ex(void *userdata, const int i, const ParallelRangeTLS *tls) { PaintBakeData *bData = userdata; VolumeGrid *grid = bData->grid; int *temp_t_index = grid->temp_t_index; - int *s_num = userdata_chunk; + int *s_num = tls->userdata_chunk; int co[3]; @@ -657,7 +657,7 @@ static void grid_cell_points_finalize(void *userdata, void *userdata_chunk) } } -static void grid_cell_bounds_cb(void *userdata, const int x) +static void grid_cell_bounds_cb(void *userdata, const int x, const ParallelRangeTLS *UNUSED(tls)) { PaintBakeData *bData = userdata; VolumeGrid *grid = bData->grid; @@ -702,10 +702,19 @@ static void surfaceGenerateGrid(struct DynamicPaintSurface *surface) /* calculate canvas dimensions */ /* Important to init correctly our ref grid_bound... */ boundInsert(&grid->grid_bounds, bData->realCoord[bData->s_pos[0]].v); - BLI_task_parallel_range_finalize( - 0, sData->total_points, bData, &grid->grid_bounds, sizeof(grid->grid_bounds), - grid_bound_insert_cb_ex, grid_bound_insert_finalize, sData->total_points > 1000, false); - + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + settings.userdata_chunk = &grid->grid_bounds; + settings.userdata_chunk_size = sizeof(grid->grid_bounds); + settings.func_finalize = grid_bound_insert_finalize; + BLI_task_parallel_range( + 0, sData->total_points, + bData, + grid_bound_insert_cb_ex, + &settings); + } /* get dimensions */ sub_v3_v3v3(dim, grid->grid_bounds.max, grid->grid_bounds.min); copy_v3_v3(td, dim); @@ -754,9 +763,19 @@ static void surfaceGenerateGrid(struct DynamicPaintSurface *surface) if (!error) { /* calculate number of points withing each cell */ - BLI_task_parallel_range_finalize( - 0, sData->total_points, bData, grid->s_num, sizeof(*grid->s_num) * grid_cells, - grid_cell_points_cb_ex, grid_cell_points_finalize, sData->total_points > 1000, false); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + settings.userdata_chunk = grid->s_num; + settings.userdata_chunk_size = sizeof(*grid->s_num) * grid_cells; + settings.func_finalize = grid_cell_points_finalize; + BLI_task_parallel_range( + 0, sData->total_points, + bData, + grid_cell_points_cb_ex, + &settings); + } /* calculate grid indexes (not needed for first cell, which is zero). */ for (i = 1; i < grid_cells; i++) { @@ -772,7 +791,15 @@ static void surfaceGenerateGrid(struct DynamicPaintSurface *surface) } /* calculate cell bounds */ - BLI_task_parallel_range(0, grid->dim[0], bData, grid_cell_bounds_cb, grid_cells > 1000); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (grid_cells > 1000); + BLI_task_parallel_range(0, grid->dim[0], + bData, + grid_cell_bounds_cb, + &settings); + } } if (temp_s_num) @@ -1390,7 +1417,8 @@ typedef struct DynamicPaintSetInitColorData { const bool scene_color_manage; } DynamicPaintSetInitColorData; -static void dynamic_paint_set_init_color_tex_to_vcol_cb(void *userdata, const int i) +static void dynamic_paint_set_init_color_tex_to_vcol_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintSetInitColorData *data = userdata; @@ -1424,7 +1452,8 @@ static void dynamic_paint_set_init_color_tex_to_vcol_cb(void *userdata, const in } } -static void dynamic_paint_set_init_color_tex_to_imseq_cb(void *userdata, const int i) +static void dynamic_paint_set_init_color_tex_to_imseq_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintSetInitColorData *data = userdata; @@ -1462,7 +1491,8 @@ static void dynamic_paint_set_init_color_tex_to_imseq_cb(void *userdata, const i pPoint[i].color[3] = texres.tin; } -static void dynamic_paint_set_init_color_vcol_to_imseq_cb(void *userdata, const int i) +static void dynamic_paint_set_init_color_vcol_to_imseq_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintSetInitColorData *data = userdata; @@ -1540,7 +1570,13 @@ static void dynamicPaint_setInitialColor(const Scene *scene, DynamicPaintSurface .mloop = mloop, .mlooptri = mlooptri, .mloopuv = mloopuv, .pool = pool, .scene_color_manage = scene_color_manage }; - BLI_task_parallel_range(0, tottri, &data, dynamic_paint_set_init_color_tex_to_vcol_cb, tottri > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (tottri > 1000); + BLI_task_parallel_range(0, tottri, + &data, + dynamic_paint_set_init_color_tex_to_vcol_cb, + &settings); BKE_image_pool_free(pool); } else if (surface->format == MOD_DPAINT_SURFACE_F_IMAGESEQ) { @@ -1549,8 +1585,13 @@ static void dynamicPaint_setInitialColor(const Scene *scene, DynamicPaintSurface .mlooptri = mlooptri, .mloopuv = mloopuv, .scene_color_manage = scene_color_manage }; - BLI_task_parallel_range(0, sData->total_points, &data, dynamic_paint_set_init_color_tex_to_imseq_cb, - sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_set_init_color_tex_to_imseq_cb, + &settings); } } /* vertex color layer */ @@ -1578,8 +1619,13 @@ static void dynamicPaint_setInitialColor(const Scene *scene, DynamicPaintSurface .surface = surface, .mlooptri = mlooptri, .mloopcol = col, }; - BLI_task_parallel_range(0, sData->total_points, &data, dynamic_paint_set_init_color_vcol_to_imseq_cb, - sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_set_init_color_vcol_to_imseq_cb, + &settings); } } } @@ -1666,7 +1712,8 @@ typedef struct DynamicPaintModifierApplyData { MLoopCol *mloopcol_preview; } DynamicPaintModifierApplyData; -static void dynamic_paint_apply_surface_displace_cb(void *userdata, const int i) +static void dynamic_paint_apply_surface_displace_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintModifierApplyData *data = userdata; @@ -1696,12 +1743,18 @@ static void dynamicPaint_applySurfaceDisplace(DynamicPaintSurface *surface, Deri MVert *mvert = result->getVertArray(result); DynamicPaintModifierApplyData data = {.surface = surface, .mvert = mvert}; - BLI_task_parallel_range(0, sData->total_points, &data, dynamic_paint_apply_surface_displace_cb, - sData->total_points > 10000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 10000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_apply_surface_displace_cb, + &settings); } } -static void dynamic_paint_apply_surface_vpaint_blend_cb(void *userdata, const int i) +static void dynamic_paint_apply_surface_vpaint_blend_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintModifierApplyData *data = userdata; @@ -1712,7 +1765,8 @@ static void dynamic_paint_apply_surface_vpaint_blend_cb(void *userdata, const in blendColors(pPoint[i].color, pPoint[i].color[3], pPoint[i].e_color, pPoint[i].e_color[3], fcolor[i]); } -static void dynamic_paint_apply_surface_vpaint_cb(void *userdata, const int p_index) +static void dynamic_paint_apply_surface_vpaint_cb(void *userdata, const int p_index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintModifierApplyData *data = userdata; Object *ob = data->ob; @@ -1782,7 +1836,8 @@ static void dynamic_paint_apply_surface_vpaint_cb(void *userdata, const int p_in } } -static void dynamic_paint_apply_surface_wave_cb(void *userdata, const int i) +static void dynamic_paint_apply_surface_wave_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintModifierApplyData *data = userdata; @@ -1829,9 +1884,16 @@ static DerivedMesh *dynamicPaint_Modifier_apply( float (*fcolor)[4] = MEM_callocN(sizeof(*fcolor) * sData->total_points, "Temp paint color"); DynamicPaintModifierApplyData data = {.surface = surface, .fcolor = fcolor}; - BLI_task_parallel_range(0, sData->total_points, &data, - dynamic_paint_apply_surface_vpaint_blend_cb, - sData->total_points > 1000); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range( + 0, sData->total_points, + &data, + dynamic_paint_apply_surface_vpaint_blend_cb, + &settings); + } /* paint layer */ MLoopCol *mloopcol = CustomData_get_layer_named(&result->loopData, CD_MLOOPCOL, surface->output_name); @@ -1866,8 +1928,16 @@ static DerivedMesh *dynamicPaint_Modifier_apply( data.mloopcol_wet = mloopcol_wet; data.mloopcol_preview = mloopcol_preview; - BLI_task_parallel_range(0, totpoly, &data, dynamic_paint_apply_surface_vpaint_cb, - totpoly > 1000); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (totpoly > 1000); + BLI_task_parallel_range( + 0, totpoly, + &data, + dynamic_paint_apply_surface_vpaint_cb, + &settings); + } MEM_freeN(fcolor); @@ -1917,8 +1987,14 @@ static DerivedMesh *dynamicPaint_Modifier_apply( MVert *mvert = result->getVertArray(result); DynamicPaintModifierApplyData data = {.surface = surface, .mvert = mvert}; - BLI_task_parallel_range(0, sData->total_points, &data, dynamic_paint_apply_surface_wave_cb, - sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range( + 0, sData->total_points, + &data, + dynamic_paint_apply_surface_wave_cb, + &settings); update_normals = true; } @@ -2103,7 +2179,8 @@ typedef struct DynamicPaintCreateUVSurfaceData { uint32_t *active_points; } DynamicPaintCreateUVSurfaceData; -static void dynamic_paint_create_uv_surface_direct_cb(void *userdata, const int ty) +static void dynamic_paint_create_uv_surface_direct_cb(void *userdata, const int ty, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintCreateUVSurfaceData *data = userdata; @@ -2200,7 +2277,8 @@ static void dynamic_paint_create_uv_surface_direct_cb(void *userdata, const int } } -static void dynamic_paint_create_uv_surface_neighbor_cb(void *userdata, const int ty) +static void dynamic_paint_create_uv_surface_neighbor_cb(void *userdata, const int ty, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintCreateUVSurfaceData *data = userdata; @@ -2760,7 +2838,15 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface, flo .mlooptri = mlooptri, .mloopuv = mloopuv, .mloop = mloop, .tottri = tottri, .faceBB = faceBB, }; - BLI_task_parallel_range(0, h, &data, dynamic_paint_create_uv_surface_direct_cb, h > 64 || tottri > 1000); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (h > 64 || tottri > 1000); + BLI_task_parallel_range(0, h, + &data, + dynamic_paint_create_uv_surface_direct_cb, + &settings); + } *progress = 0.04f; *do_update = true; @@ -2772,7 +2858,15 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface, flo * (To avoid seams on uv island edges) */ data.active_points = &active_points; - BLI_task_parallel_range(0, h, &data, dynamic_paint_create_uv_surface_neighbor_cb, h > 64); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (h > 64); + BLI_task_parallel_range(0, h, + &data, + dynamic_paint_create_uv_surface_neighbor_cb, + &settings); + } *progress = 0.06f; *do_update = true; @@ -2992,7 +3086,8 @@ typedef struct DynamicPaintOutputSurfaceImageData { ImBuf *ibuf; } DynamicPaintOutputSurfaceImageData; -static void dynamic_paint_output_surface_image_paint_cb(void *userdata, const int index) +static void dynamic_paint_output_surface_image_paint_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintOutputSurfaceImageData *data = userdata; @@ -3012,7 +3107,8 @@ static void dynamic_paint_output_surface_image_paint_cb(void *userdata, const in } } -static void dynamic_paint_output_surface_image_displace_cb(void *userdata, const int index) +static void dynamic_paint_output_surface_image_displace_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintOutputSurfaceImageData *data = userdata; @@ -3036,7 +3132,8 @@ static void dynamic_paint_output_surface_image_displace_cb(void *userdata, const ibuf->rect_float[pos + 3] = 1.0f; } -static void dynamic_paint_output_surface_image_wave_cb(void *userdata, const int index) +static void dynamic_paint_output_surface_image_wave_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintOutputSurfaceImageData *data = userdata; @@ -3058,7 +3155,8 @@ static void dynamic_paint_output_surface_image_wave_cb(void *userdata, const int ibuf->rect_float[pos + 3] = 1.0f; } -static void dynamic_paint_output_surface_image_wetmap_cb(void *userdata, const int index) +static void dynamic_paint_output_surface_image_wetmap_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintOutputSurfaceImageData *data = userdata; @@ -3109,13 +3207,29 @@ void dynamicPaint_outputSurfaceImage(DynamicPaintSurface *surface, char *filenam case MOD_DPAINT_SURFACE_T_PAINT: switch (output_layer) { case 0: - BLI_task_parallel_range(0, sData->total_points, &data, - dynamic_paint_output_surface_image_paint_cb, sData->total_points > 10000); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 10000); + BLI_task_parallel_range( + 0, sData->total_points, + &data, + dynamic_paint_output_surface_image_paint_cb, + &settings); break; + } case 1: - BLI_task_parallel_range(0, sData->total_points, &data, - dynamic_paint_output_surface_image_wetmap_cb, sData->total_points > 10000); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 10000); + BLI_task_parallel_range( + 0, sData->total_points, + &data, + dynamic_paint_output_surface_image_wetmap_cb, + &settings); break; + } default: BLI_assert(0); break; @@ -3124,9 +3238,17 @@ void dynamicPaint_outputSurfaceImage(DynamicPaintSurface *surface, char *filenam case MOD_DPAINT_SURFACE_T_DISPLACE: switch (output_layer) { case 0: - BLI_task_parallel_range(0, sData->total_points, &data, - dynamic_paint_output_surface_image_displace_cb, sData->total_points > 10000); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 10000); + BLI_task_parallel_range( + 0, sData->total_points, + &data, + dynamic_paint_output_surface_image_displace_cb, + &settings); break; + } case 1: break; default: @@ -3137,9 +3259,17 @@ void dynamicPaint_outputSurfaceImage(DynamicPaintSurface *surface, char *filenam case MOD_DPAINT_SURFACE_T_WAVE: switch (output_layer) { case 0: - BLI_task_parallel_range(0, sData->total_points, &data, - dynamic_paint_output_surface_image_wave_cb, sData->total_points > 10000); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 10000); + BLI_task_parallel_range( + 0, sData->total_points, + &data, + dynamic_paint_output_surface_image_wave_cb, + &settings); break; + } case 1: break; default: @@ -3546,7 +3676,8 @@ typedef struct DynamicPaintBrushVelocityData { const float timescale; } DynamicPaintBrushVelocityData; -static void dynamic_paint_brush_velocity_compute_cb(void *userdata, const int i) +static void dynamic_paint_brush_velocity_compute_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintBrushVelocityData *data = userdata; @@ -3626,7 +3757,13 @@ static void dynamicPaint_brushMeshCalculateVelocity( .mvert_p = mvert_p, .mvert_c = mvert_c, .obmat = ob->obmat, .prev_obmat = prev_obmat, .timescale = timescale, }; - BLI_task_parallel_range(0, numOfVerts_c, &data, dynamic_paint_brush_velocity_compute_cb, numOfVerts_c > 10000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numOfVerts_c > 10000); + BLI_task_parallel_range(0, numOfVerts_c, + &data, + dynamic_paint_brush_velocity_compute_cb, + &settings); dm_p->release(dm_p); } @@ -3697,7 +3834,7 @@ typedef struct DynamicPaintPaintData { * Paint a brush object mesh to the surface */ static void dynamic_paint_paint_mesh_cell_point_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int id, const int UNUSED(threadid)) + void *userdata, const int id, const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintPaintData *data = userdata; @@ -4115,9 +4252,13 @@ static int dynamicPaint_paintMesh(const struct EvaluationContext *eval_ctx, Dyna .brush_radius = brush_radius, .avg_brushNor = avg_brushNor, .brushVelocity = brushVelocity, .treeData = &treeData }; - BLI_task_parallel_range_ex(0, grid->s_num[c_index], &data, NULL, 0, - dynamic_paint_paint_mesh_cell_point_cb_ex, - grid->s_num[c_index] > 250, true); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (grid->s_num[c_index] > 250); + BLI_task_parallel_range(0, grid->s_num[c_index], + &data, + dynamic_paint_paint_mesh_cell_point_cb_ex, + &settings); } } } @@ -4138,7 +4279,7 @@ static int dynamicPaint_paintMesh(const struct EvaluationContext *eval_ctx, Dyna * Paint a particle system to the surface */ static void dynamic_paint_paint_particle_cell_point_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int id, const int UNUSED(threadid)) + void *userdata, const int id, const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintPaintData *data = userdata; @@ -4400,9 +4541,13 @@ static int dynamicPaint_paintParticles(DynamicPaintSurface *surface, .solidradius = solidradius, .timescale = timescale, .c_index = c_index, .treeData = tree, }; - BLI_task_parallel_range_ex(0, grid->s_num[c_index], &data, NULL, 0, - dynamic_paint_paint_particle_cell_point_cb_ex, - grid->s_num[c_index] > 250, true); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (grid->s_num[c_index] > 250); + BLI_task_parallel_range(0, grid->s_num[c_index], + &data, + dynamic_paint_paint_particle_cell_point_cb_ex, + &settings); } } BLI_end_threaded_malloc(); @@ -4413,7 +4558,7 @@ static int dynamicPaint_paintParticles(DynamicPaintSurface *surface, /* paint a single point of defined proximity radius to the surface */ static void dynamic_paint_paint_single_point_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int index, const int UNUSED(threadid)) + void *userdata, const int index, const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintPaintData *data = userdata; @@ -4545,9 +4690,13 @@ static int dynamicPaint_paintSinglePoint( .brush_radius = brush_radius, .brushVelocity = &brushVel, .pointCoord = pointCoord, }; - BLI_task_parallel_range_ex(0, sData->total_points, &data, NULL, 0, - dynamic_paint_paint_single_point_cb_ex, - sData->total_points > 1000, true); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_paint_single_point_cb_ex, + &settings); return 1; } @@ -4559,7 +4708,8 @@ static int dynamicPaint_paintSinglePoint( * Calculate current frame distances and directions for adjacency data */ -static void dynamic_paint_prepare_adjacency_cb(void *userdata, const int index) +static void dynamic_paint_prepare_adjacency_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { PaintSurfaceData *sData = userdata; PaintBakeData *bData = sData->bData; @@ -4598,8 +4748,13 @@ static void dynamicPaint_prepareAdjacencyData(DynamicPaintSurface *surface, cons if (!bNeighs) return; - BLI_task_parallel_range( - 0, sData->total_points, sData, dynamic_paint_prepare_adjacency_cb, sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + sData, + dynamic_paint_prepare_adjacency_cb, + &settings); /* calculate average values (single thread). * Note: tried to put this in threaded callback (using _finalize feature), but gave ~30% slower result! */ @@ -4786,7 +4941,8 @@ typedef struct DynamicPaintEffectData { * Prepare data required by effects for current frame. * Returns number of steps required */ -static void dynamic_paint_prepare_effect_cb(void *userdata, const int index) +static void dynamic_paint_prepare_effect_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintEffectData *data = userdata; @@ -4858,8 +5014,13 @@ static int dynamicPaint_prepareEffectStep( .surface = surface, .scene = scene, .force = *force, .effectors = effectors, }; - BLI_task_parallel_range( - 0, sData->total_points, &data, dynamic_paint_prepare_effect_cb, sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_prepare_effect_cb, + &settings); /* calculate average values (single thread) */ for (int index = 0; index < sData->total_points; index++) { @@ -4891,7 +5052,8 @@ static int dynamicPaint_prepareEffectStep( /** * Processes active effect step. */ -static void dynamic_paint_effect_spread_cb(void *userdata, const int index) +static void dynamic_paint_effect_spread_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintEffectData *data = userdata; @@ -4936,7 +5098,8 @@ static void dynamic_paint_effect_spread_cb(void *userdata, const int index) } } -static void dynamic_paint_effect_shrink_cb(void *userdata, const int index) +static void dynamic_paint_effect_shrink_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintEffectData *data = userdata; @@ -4986,7 +5149,8 @@ static void dynamic_paint_effect_shrink_cb(void *userdata, const int index) } } -static void dynamic_paint_effect_drip_cb(void *userdata, const int index) +static void dynamic_paint_effect_drip_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintEffectData *data = userdata; @@ -5119,8 +5283,13 @@ static void dynamicPaint_doEffectStep( DynamicPaintEffectData data = { .surface = surface, .prevPoint = prevPoint, .eff_scale = eff_scale, }; - BLI_task_parallel_range( - 0, sData->total_points, &data, dynamic_paint_effect_spread_cb, sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_effect_spread_cb, + &settings); } /* @@ -5135,8 +5304,13 @@ static void dynamicPaint_doEffectStep( DynamicPaintEffectData data = { .surface = surface, .prevPoint = prevPoint, .eff_scale = eff_scale, }; - BLI_task_parallel_range( - 0, sData->total_points, &data, dynamic_paint_effect_shrink_cb, sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_effect_shrink_cb, + &settings); } /* @@ -5157,14 +5331,20 @@ static void dynamicPaint_doEffectStep( .eff_scale = eff_scale, .force = force, .point_locks = point_locks, }; - BLI_task_parallel_range( - 0, sData->total_points, &data, dynamic_paint_effect_drip_cb, sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_effect_drip_cb, + &settings); MEM_freeN(point_locks); } } -static void dynamic_paint_border_cb(void *userdata, const int b_index) +static void dynamic_paint_border_cb(void *userdata, const int b_index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintEffectData *data = userdata; @@ -5234,11 +5414,17 @@ static void dynamicPaint_doBorderStep(DynamicPaintSurface *surface) .surface = surface }; - BLI_task_parallel_range( - 0, sData->adj_data->total_border, &data, dynamic_paint_border_cb, sData->adj_data->total_border > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->adj_data->total_border > 1000); + BLI_task_parallel_range(0, sData->adj_data->total_border, + &data, + dynamic_paint_border_cb, + &settings); } -static void dynamic_paint_wave_step_cb(void *userdata, const int index) +static void dynamic_paint_wave_step_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintEffectData *data = userdata; @@ -5380,8 +5566,12 @@ static void dynamicPaint_doWaveStep(DynamicPaintSurface *surface, float timescal .wave_speed = wave_speed, .wave_scale = wave_scale, .wave_max_slope = wave_max_slope, .dt = dt, .min_dist = min_dist, .damp_factor = damp_factor, .reset_wave = (ss == steps - 1), }; - BLI_task_parallel_range( - 0, sData->total_points, &data, dynamic_paint_wave_step_cb, sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, dynamic_paint_wave_step_cb, + &settings); } MEM_freeN(prevPoint); @@ -5401,7 +5591,8 @@ typedef struct DynamicPaintDissolveDryData { const float timescale; } DynamicPaintDissolveDryData; -static void dynamic_paint_surface_pre_step_cb(void *userdata, const int index) +static void dynamic_paint_surface_pre_step_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintDissolveDryData *data = userdata; @@ -5524,7 +5715,8 @@ typedef struct DynamicPaintGenerateBakeData { const bool new_bdata; } DynamicPaintGenerateBakeData; -static void dynamic_paint_generate_bake_data_cb(void *userdata, const int index) +static void dynamic_paint_generate_bake_data_cb(void *userdata, const int index, + const ParallelRangeTLS *UNUSED(tls)) { const DynamicPaintGenerateBakeData *data = userdata; @@ -5728,8 +5920,13 @@ static int dynamicPaint_generateBakeData(DynamicPaintSurface *surface, const Vie .mvert = mvert, .canvas_verts = canvas_verts, .do_velocity_data = do_velocity_data, .new_bdata = new_bdata, }; - BLI_task_parallel_range( - 0, sData->total_points, &data, dynamic_paint_generate_bake_data_cb, sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_generate_bake_data_cb, + &settings); MEM_freeN(canvas_verts); @@ -5762,8 +5959,13 @@ static int dynamicPaint_doStep(const struct EvaluationContext *eval_ctx, Scene * if (dynamic_paint_surface_needs_dry_dissolve(surface)) { DynamicPaintDissolveDryData data = {.surface = surface, .timescale = timescale}; - BLI_task_parallel_range(0, sData->total_points, &data, - dynamic_paint_surface_pre_step_cb, sData->total_points > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (sData->total_points > 1000); + BLI_task_parallel_range(0, sData->total_points, + &data, + dynamic_paint_surface_pre_step_cb, + &settings); } /* diff --git a/source/blender/blenkernel/intern/mask_rasterize.c b/source/blender/blenkernel/intern/mask_rasterize.c index 104bb0c07a6..6efcb27c0f5 100644 --- a/source/blender/blenkernel/intern/mask_rasterize.c +++ b/source/blender/blenkernel/intern/mask_rasterize.c @@ -1434,7 +1434,8 @@ typedef struct MaskRasterizeBufferData { float *buffer; } MaskRasterizeBufferData; -static void maskrasterize_buffer_cb(void *userdata, int y) +static void maskrasterize_buffer_cb(void *userdata, int y, + const ParallelRangeTLS *UNUSED(tls)) { MaskRasterizeBufferData *data = userdata; @@ -1474,5 +1475,11 @@ void BKE_maskrasterize_buffer(MaskRasterHandle *mr_handle, .width = width, .buffer = buffer }; - BLI_task_parallel_range(0, (int)height, &data, maskrasterize_buffer_cb, height * width > 10000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((size_t)height * width > 10000); + BLI_task_parallel_range(0, (int)height, + &data, + maskrasterize_buffer_cb, + &settings); } diff --git a/source/blender/blenkernel/intern/mesh_evaluate.c b/source/blender/blenkernel/intern/mesh_evaluate.c index 68283b4a3aa..f822ea836aa 100644 --- a/source/blender/blenkernel/intern/mesh_evaluate.c +++ b/source/blender/blenkernel/intern/mesh_evaluate.c @@ -177,7 +177,8 @@ typedef struct MeshCalcNormalsData { float (*vnors)[3]; } MeshCalcNormalsData; -static void mesh_calc_normals_poly_cb(void *userdata, const int pidx) +static void mesh_calc_normals_poly_cb(void *userdata, const int pidx, + const ParallelRangeTLS *UNUSED(tls)) { MeshCalcNormalsData *data = userdata; const MPoly *mp = &data->mpolys[pidx]; @@ -185,7 +186,8 @@ static void mesh_calc_normals_poly_cb(void *userdata, const int pidx) BKE_mesh_calc_poly_normal(mp, data->mloop + mp->loopstart, data->mverts, data->pnors[pidx]); } -static void mesh_calc_normals_poly_prepare_cb(void *userdata, const int pidx) +static void mesh_calc_normals_poly_prepare_cb(void *userdata, const int pidx, + const ParallelRangeTLS *UNUSED(tls)) { MeshCalcNormalsData *data = userdata; const MPoly *mp = &data->mpolys[pidx]; @@ -247,14 +249,16 @@ static void mesh_calc_normals_poly_prepare_cb(void *userdata, const int pidx) } } -static void mesh_calc_normals_poly_accum_cb(void *userdata, const int lidx) +static void mesh_calc_normals_poly_accum_cb(void *userdata, const int lidx, + const ParallelRangeTLS *UNUSED(tls)) { MeshCalcNormalsData *data = userdata; add_v3_v3(data->vnors[data->mloop[lidx].v], data->lnors_weighted[lidx]); } -static void mesh_calc_normals_poly_finalize_cb(void *userdata, const int vidx) +static void mesh_calc_normals_poly_finalize_cb(void *userdata, const int vidx, + const ParallelRangeTLS *UNUSED(tls)) { MeshCalcNormalsData *data = userdata; @@ -278,6 +282,10 @@ void BKE_mesh_calc_normals_poly( const bool do_threaded = (numPolys > BKE_MESH_OMP_LIMIT); float (*pnors)[3] = r_polynors; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = do_threaded; + if (only_face_normals) { BLI_assert((pnors != NULL) || (numPolys == 0)); BLI_assert(r_vertnors == NULL); @@ -286,7 +294,7 @@ void BKE_mesh_calc_normals_poly( .mpolys = mpolys, .mloop = mloop, .mverts = mverts, .pnors = pnors, }; - BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_cb, do_threaded); + BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_cb, &settings); return; } @@ -309,13 +317,13 @@ void BKE_mesh_calc_normals_poly( }; /* Compute poly normals, and prepare weighted loop normals. */ - BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_prepare_cb, do_threaded); + BLI_task_parallel_range(0, numPolys, &data, mesh_calc_normals_poly_prepare_cb, &settings); /* Actually accumulate weighted loop normals into vertex ones. */ - BLI_task_parallel_range(0, numLoops, &data, mesh_calc_normals_poly_accum_cb, do_threaded); + BLI_task_parallel_range(0, numLoops, &data, mesh_calc_normals_poly_accum_cb, &settings); /* Normalize and validate computed vertex normals. */ - BLI_task_parallel_range(0, numVerts, &data, mesh_calc_normals_poly_finalize_cb, do_threaded); + BLI_task_parallel_range(0, numVerts, &data, mesh_calc_normals_poly_finalize_cb, &settings); if (free_vnors) { MEM_freeN(vnors); diff --git a/source/blender/blenkernel/intern/ocean.c b/source/blender/blenkernel/intern/ocean.c index 537c8926a5b..478b7ef21ef 100644 --- a/source/blender/blenkernel/intern/ocean.c +++ b/source/blender/blenkernel/intern/ocean.c @@ -502,7 +502,8 @@ typedef struct OceanSimulateData { float chop_amount; } OceanSimulateData; -static void ocean_compute_htilda(void *userdata, const int i) +static void ocean_compute_htilda(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { OceanSimulateData *osd = userdata; const Ocean *o = osd->o; @@ -748,7 +749,10 @@ void BKE_ocean_simulate(struct Ocean *o, float t, float scale, float chop_amount * This is not optimal in all cases, but remains reasonably simple and should be OK most of the time. */ /* compute a new htilda */ - BLI_task_parallel_range(0, o->_M, &osd, ocean_compute_htilda, o->_M > 16); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (o->_M > 16); + BLI_task_parallel_range(0, o->_M, &osd, ocean_compute_htilda, &settings); if (o->_do_disp_y) { BLI_task_pool_push(pool, ocean_compute_displacement_y, NULL, false, TASK_PRIORITY_HIGH); diff --git a/source/blender/blenkernel/intern/particle_system.c b/source/blender/blenkernel/intern/particle_system.c index da227514512..f5f175da828 100644 --- a/source/blender/blenkernel/intern/particle_system.c +++ b/source/blender/blenkernel/intern/particle_system.c @@ -3375,14 +3375,14 @@ typedef struct DynamicStepSolverTaskData { } DynamicStepSolverTaskData; static void dynamics_step_sph_ddr_task_cb_ex( - void *userdata, void *userdata_chunk, const int p, const int UNUSED(thread_id)) + void *userdata, const int p, const ParallelRangeTLS *tls) { DynamicStepSolverTaskData *data = userdata; ParticleSimulationData *sim = data->sim; ParticleSystem *psys = sim->psys; ParticleSettings *part = psys->part; - SPHData *sphdata = userdata_chunk; + SPHData *sphdata = tls->userdata_chunk; ParticleData *pa; @@ -3409,7 +3409,7 @@ static void dynamics_step_sph_ddr_task_cb_ex( } static void dynamics_step_sph_classical_basic_integrate_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int p, const int UNUSED(thread_id)) + void *userdata, const int p, const ParallelRangeTLS *UNUSED(tls)) { DynamicStepSolverTaskData *data = userdata; ParticleSimulationData *sim = data->sim; @@ -3425,13 +3425,13 @@ static void dynamics_step_sph_classical_basic_integrate_task_cb_ex( } static void dynamics_step_sph_classical_calc_density_task_cb_ex( - void *userdata, void *userdata_chunk, const int p, const int UNUSED(thread_id)) + void *userdata, const int p, const ParallelRangeTLS *tls) { DynamicStepSolverTaskData *data = userdata; ParticleSimulationData *sim = data->sim; ParticleSystem *psys = sim->psys; - SPHData *sphdata = userdata_chunk; + SPHData *sphdata = tls->userdata_chunk; ParticleData *pa; @@ -3443,14 +3443,14 @@ static void dynamics_step_sph_classical_calc_density_task_cb_ex( } static void dynamics_step_sph_classical_integrate_task_cb_ex( - void *userdata, void *userdata_chunk, const int p, const int UNUSED(thread_id)) + void *userdata, const int p, const ParallelRangeTLS *tls) { DynamicStepSolverTaskData *data = userdata; ParticleSimulationData *sim = data->sim; ParticleSystem *psys = sim->psys; ParticleSettings *part = psys->part; - SPHData *sphdata = userdata_chunk; + SPHData *sphdata = tls->userdata_chunk; ParticleData *pa; @@ -3641,9 +3641,16 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra) /* Apply SPH forces using double-density relaxation algorithm * (Clavat et. al.) */ - BLI_task_parallel_range_ex( - 0, psys->totpart, &task_data, &sphdata, sizeof(sphdata), - dynamics_step_sph_ddr_task_cb_ex, psys->totpart > 100, true); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (psys->totpart > 100); + settings.userdata_chunk = &sphdata; + settings.userdata_chunk_size = sizeof(sphdata); + BLI_task_parallel_range( + 0, psys->totpart, + &task_data, + dynamics_step_sph_ddr_task_cb_ex, + &settings); sph_springs_modify(psys, timestep); } @@ -3653,21 +3660,46 @@ static void dynamics_step(ParticleSimulationData *sim, float cfra) * and Monaghan). Note that, unlike double-density relaxation, * this algorithm is separated into distinct loops. */ - BLI_task_parallel_range_ex( - 0, psys->totpart, &task_data, NULL, 0, - dynamics_step_sph_classical_basic_integrate_task_cb_ex, psys->totpart > 100, true); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (psys->totpart > 100); + BLI_task_parallel_range( + 0, psys->totpart, + &task_data, + dynamics_step_sph_classical_basic_integrate_task_cb_ex, + &settings); + } /* calculate summation density */ /* Note that we could avoid copying sphdata for each thread here (it's only read here), * but doubt this would gain us anything except confusion... */ - BLI_task_parallel_range_ex( - 0, psys->totpart, &task_data, &sphdata, sizeof(sphdata), - dynamics_step_sph_classical_calc_density_task_cb_ex, psys->totpart > 100, true); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (psys->totpart > 100); + settings.userdata_chunk = &sphdata; + settings.userdata_chunk_size = sizeof(sphdata); + BLI_task_parallel_range( + 0, psys->totpart, + &task_data, + dynamics_step_sph_classical_calc_density_task_cb_ex, + &settings); + } /* do global forces & effectors */ - BLI_task_parallel_range_ex( - 0, psys->totpart, &task_data, &sphdata, sizeof(sphdata), - dynamics_step_sph_classical_integrate_task_cb_ex, psys->totpart > 100, true); + { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (psys->totpart > 100); + settings.userdata_chunk = &sphdata; + settings.userdata_chunk_size = sizeof(sphdata); + BLI_task_parallel_range( + 0, psys->totpart, + &task_data, + dynamics_step_sph_classical_integrate_task_cb_ex, + &settings); + } } BLI_spin_end(&task_data.spin); diff --git a/source/blender/blenkernel/intern/pbvh.c b/source/blender/blenkernel/intern/pbvh.c index 585a18cdad5..0395fc9c990 100644 --- a/source/blender/blenkernel/intern/pbvh.c +++ b/source/blender/blenkernel/intern/pbvh.c @@ -939,7 +939,8 @@ typedef struct PBVHUpdateData { int flag; } PBVHUpdateData; -static void pbvh_update_normals_accum_task_cb(void *userdata, const int n) +static void pbvh_update_normals_accum_task_cb(void *userdata, const int n, + const ParallelRangeTLS *UNUSED(tls)) { PBVHUpdateData *data = userdata; @@ -992,7 +993,8 @@ static void pbvh_update_normals_accum_task_cb(void *userdata, const int n) } } -static void pbvh_update_normals_store_task_cb(void *userdata, const int n) +static void pbvh_update_normals_store_task_cb(void *userdata, const int n, + const ParallelRangeTLS *UNUSED(tls)) { PBVHUpdateData *data = userdata; PBVH *bvh = data->bvh; @@ -1051,14 +1053,19 @@ static void pbvh_update_normals(PBVH *bvh, PBVHNode **nodes, .fnors = fnors, .vnors = vnors, }; - BLI_task_parallel_range(0, totnode, &data, pbvh_update_normals_accum_task_cb, totnode > PBVH_THREADED_LIMIT); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (totnode > PBVH_THREADED_LIMIT); - BLI_task_parallel_range(0, totnode, &data, pbvh_update_normals_store_task_cb, totnode > PBVH_THREADED_LIMIT); + BLI_task_parallel_range(0, totnode, &data, pbvh_update_normals_accum_task_cb, &settings); + + BLI_task_parallel_range(0, totnode, &data, pbvh_update_normals_store_task_cb, &settings); MEM_freeN(vnors); } -static void pbvh_update_BB_redraw_task_cb(void *userdata, const int n) +static void pbvh_update_BB_redraw_task_cb(void *userdata, const int n, + const ParallelRangeTLS *UNUSED(tls)) { PBVHUpdateData *data = userdata; PBVH *bvh = data->bvh; @@ -1085,7 +1092,10 @@ void pbvh_update_BB_redraw(PBVH *bvh, PBVHNode **nodes, int totnode, int flag) .flag = flag, }; - BLI_task_parallel_range(0, totnode, &data, pbvh_update_BB_redraw_task_cb, totnode > PBVH_THREADED_LIMIT); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (totnode > PBVH_THREADED_LIMIT); + BLI_task_parallel_range(0, totnode, &data, pbvh_update_BB_redraw_task_cb, &settings); } static void pbvh_update_draw_buffers(PBVH *bvh, PBVHNode **nodes, int totnode) diff --git a/source/blender/blenkernel/intern/shrinkwrap.c b/source/blender/blenkernel/intern/shrinkwrap.c index f9d1793d7cb..0f12bb0a8da 100644 --- a/source/blender/blenkernel/intern/shrinkwrap.c +++ b/source/blender/blenkernel/intern/shrinkwrap.c @@ -88,13 +88,13 @@ typedef struct ShrinkwrapCalcCBData { * for each vertex performs a nearest vertex search on the tree */ static void shrinkwrap_calc_nearest_vertex_cb_ex( - void *userdata, void *userdata_chunk, const int i, const int UNUSED(threadid)) + void *userdata, const int i, const ParallelRangeTLS *tls) { ShrinkwrapCalcCBData *data = userdata; ShrinkwrapCalcData *calc = data->calc; BVHTreeFromMesh *treeData = data->treeData; - BVHTreeNearest *nearest = userdata_chunk; + BVHTreeNearest *nearest = tls->userdata_chunk; float *co = calc->vertexCos[i]; float tmp_co[3]; @@ -167,9 +167,14 @@ static void shrinkwrap_calc_nearest_vertex(ShrinkwrapCalcData *calc) nearest.dist_sq = FLT_MAX; ShrinkwrapCalcCBData data = {.calc = calc, .treeData = &treeData}; - BLI_task_parallel_range_ex( - 0, calc->numVerts, &data, &nearest, sizeof(nearest), shrinkwrap_calc_nearest_vertex_cb_ex, - calc->numVerts > BKE_MESH_OMP_LIMIT, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (calc->numVerts > BKE_MESH_OMP_LIMIT); + settings.userdata_chunk = &nearest; + settings.userdata_chunk_size = sizeof(nearest); + BLI_task_parallel_range(0, calc->numVerts, + &data,shrinkwrap_calc_nearest_vertex_cb_ex, + &settings); free_bvhtree_from_mesh(&treeData); } @@ -257,7 +262,7 @@ bool BKE_shrinkwrap_project_normal( } static void shrinkwrap_calc_normal_projection_cb_ex( - void *userdata, void *userdata_chunk, const int i, const int UNUSED(threadid)) + void *userdata, const int i, const ParallelRangeTLS *tls) { ShrinkwrapCalcCBData *data = userdata; @@ -272,7 +277,7 @@ static void shrinkwrap_calc_normal_projection_cb_ex( float *proj_axis = data->proj_axis; SpaceTransform *local2aux = data->local2aux; - BVHTreeRayHit *hit = userdata_chunk; + BVHTreeRayHit *hit = tls->userdata_chunk; const float proj_limit_squared = calc->smd->projLimit * calc->smd->projLimit; float *co = calc->vertexCos[i]; @@ -463,9 +468,15 @@ static void shrinkwrap_calc_normal_projection(ShrinkwrapCalcData *calc, bool for .auxData = auxData, .aux_tree = aux_tree, .aux_callback = aux_callback, .proj_axis = proj_axis, .local2aux = &local2aux, }; - BLI_task_parallel_range_ex( - 0, calc->numVerts, &data, &hit, sizeof(hit), shrinkwrap_calc_normal_projection_cb_ex, - calc->numVerts > BKE_MESH_OMP_LIMIT, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (calc->numVerts > BKE_MESH_OMP_LIMIT); + settings.userdata_chunk = &hit; + settings.userdata_chunk_size = sizeof(hit); + BLI_task_parallel_range(0, calc->numVerts, + &data, + shrinkwrap_calc_normal_projection_cb_ex, + &settings); } /* free data structures */ @@ -495,13 +506,13 @@ static void shrinkwrap_calc_normal_projection(ShrinkwrapCalcData *calc, bool for * NN matches for each vertex */ static void shrinkwrap_calc_nearest_surface_point_cb_ex( - void *userdata, void *userdata_chunk, const int i, const int UNUSED(threadid)) + void *userdata, const int i, const ParallelRangeTLS *tls) { ShrinkwrapCalcCBData *data = userdata; ShrinkwrapCalcData *calc = data->calc; BVHTreeFromMesh *treeData = data->treeData; - BVHTreeNearest *nearest = userdata_chunk; + BVHTreeNearest *nearest = tls->userdata_chunk; float *co = calc->vertexCos[i]; float tmp_co[3]; @@ -583,9 +594,15 @@ static void shrinkwrap_calc_nearest_surface_point(ShrinkwrapCalcData *calc) /* Find the nearest vertex */ ShrinkwrapCalcCBData data = {.calc = calc, .treeData = &treeData}; - BLI_task_parallel_range_ex( - 0, calc->numVerts, &data, &nearest, sizeof(nearest), shrinkwrap_calc_nearest_surface_point_cb_ex, - calc->numVerts > BKE_MESH_OMP_LIMIT, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (calc->numVerts > BKE_MESH_OMP_LIMIT); + settings.userdata_chunk = &nearest; + settings.userdata_chunk_size = sizeof(nearest); + BLI_task_parallel_range(0, calc->numVerts, + &data, + shrinkwrap_calc_nearest_surface_point_cb_ex, + &settings); free_bvhtree_from_mesh(&treeData); } diff --git a/source/blender/blenkernel/intern/smoke.c b/source/blender/blenkernel/intern/smoke.c index b5eed6c78de..2146b85572c 100644 --- a/source/blender/blenkernel/intern/smoke.c +++ b/source/blender/blenkernel/intern/smoke.c @@ -740,7 +740,7 @@ typedef struct ObstaclesFromDMData { int *num_obstacles; } ObstaclesFromDMData; -static void obstacles_from_derivedmesh_task_cb(void *userdata, const int z) +static void obstacles_from_derivedmesh_task_cb(void *userdata, const int z, const ParallelRangeTLS *UNUSED(tls)) { ObstaclesFromDMData *data = userdata; SmokeDomainSettings *sds = data->sds; @@ -870,8 +870,13 @@ static void obstacles_from_derivedmesh( .velocityX = velocityX, .velocityY = velocityY, .velocityZ = velocityZ, .num_obstacles = num_obstacles }; - BLI_task_parallel_range( - sds->res_min[2], sds->res_max[2], &data, obstacles_from_derivedmesh_task_cb, true); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC; + BLI_task_parallel_range(sds->res_min[2], sds->res_max[2], + &data, + obstacles_from_derivedmesh_task_cb, + &settings); } /* free bvh tree */ free_bvhtree_from_mesh(&treeData); @@ -1186,7 +1191,7 @@ typedef struct EmitFromParticlesData { float hr_smooth; } EmitFromParticlesData; -static void emit_from_particles_task_cb(void *userdata, const int z) +static void emit_from_particles_task_cb(void *userdata, const int z, const ParallelRangeTLS *UNUSED(tls)) { EmitFromParticlesData *data = userdata; SmokeFlowSettings *sfs = data->sfs; @@ -1397,7 +1402,13 @@ static void emit_from_particles( .solid = solid, .smooth = smooth, .hr_smooth = hr_smooth, }; - BLI_task_parallel_range(min[2], max[2], &data, emit_from_particles_task_cb, true); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC; + BLI_task_parallel_range(min[2], max[2], + &data, + emit_from_particles_task_cb, + &settings); } if (sfs->flags & MOD_SMOKE_FLOW_USE_PART_SIZE) { @@ -1569,7 +1580,7 @@ typedef struct EmitFromDMData { int *min, *max, *res; } EmitFromDMData; -static void emit_from_derivedmesh_task_cb(void *userdata, const int z) +static void emit_from_derivedmesh_task_cb(void *userdata, const int z, const ParallelRangeTLS *UNUSED(tls)) { EmitFromDMData *data = userdata; EmissionMap *em = data->em; @@ -1722,7 +1733,13 @@ static void emit_from_derivedmesh(Object *flow_ob, SmokeDomainSettings *sds, Smo .flow_center = flow_center, .min = min, .max = max, .res = res, }; - BLI_task_parallel_range(min[2], max[2], &data, emit_from_derivedmesh_task_cb, true); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC; + BLI_task_parallel_range(min[2], max[2], + &data, + emit_from_derivedmesh_task_cb, + &settings); } /* free bvh tree */ free_bvhtree_from_mesh(&treeData); @@ -2438,7 +2455,7 @@ typedef struct UpdateEffectorsData { unsigned char *obstacle; } UpdateEffectorsData; -static void update_effectors_task_cb(void *userdata, const int x) +static void update_effectors_task_cb(void *userdata, const int x, const ParallelRangeTLS *UNUSED(tls)) { UpdateEffectorsData *data = userdata; SmokeDomainSettings *sds = data->sds; @@ -2512,7 +2529,13 @@ static void update_effectors(const struct EvaluationContext *eval_ctx, Scene *sc data.velocity_z = smoke_get_velocity_z(sds->fluid); data.obstacle = smoke_get_obstacle(sds->fluid); - BLI_task_parallel_range(0, sds->res[0], &data, update_effectors_task_cb, true); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.scheduling_mode = TASK_SCHEDULING_DYNAMIC; + BLI_task_parallel_range(0, sds->res[0], + &data, + update_effectors_task_cb, + &settings); } pdEndEffectors(&effectors); diff --git a/source/blender/blenkernel/intern/tracking_auto.c b/source/blender/blenkernel/intern/tracking_auto.c index 40234bacdfe..0874d645a34 100644 --- a/source/blender/blenkernel/intern/tracking_auto.c +++ b/source/blender/blenkernel/intern/tracking_auto.c @@ -432,7 +432,9 @@ AutoTrackContext *BKE_autotrack_context_new(MovieClip *clip, return context; } -static void autotrack_context_step_cb(void *userdata, int track) +static void autotrack_context_step_cb(void *userdata, + int track, + const ParallelRangeTLS *UNUSED(tls)) { AutoTrackContext *context = userdata; const int frame_delta = context->backwards ? -1 : 1; @@ -510,10 +512,13 @@ bool BKE_autotrack_context_step(AutoTrackContext *context) const int frame_delta = context->backwards ? -1 : 1; context->step_ok = false; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (context->num_tracks > 1); BLI_task_parallel_range(0, context->num_tracks, context, autotrack_context_step_cb, - context->num_tracks > 1); + &settings); /* Advance the frame. */ BLI_spin_lock(&context->spin_lock); diff --git a/source/blender/blenkernel/intern/tracking_stabilize.c b/source/blender/blenkernel/intern/tracking_stabilize.c index edddeb41cc8..fe850708547 100644 --- a/source/blender/blenkernel/intern/tracking_stabilize.c +++ b/source/blender/blenkernel/intern/tracking_stabilize.c @@ -1503,7 +1503,10 @@ typedef struct TrackingStabilizeFrameInterpolationData { interpolation_func interpolation; } TrackingStabilizeFrameInterpolationData; -static void tracking_stabilize_frame_interpolation_cb(void *userdata, int j) +static void tracking_stabilize_frame_interpolation_cb( + void *userdata, + int j, + const ParallelRangeTLS *UNUSED(tls)) { TrackingStabilizeFrameInterpolationData *data = userdata; ImBuf *ibuf = data->ibuf; @@ -1597,10 +1600,14 @@ ImBuf *BKE_tracking_stabilize_frame(MovieClip *clip, .ibuf = ibuf, .tmpibuf = tmpibuf, .mat = mat, .interpolation = interpolation }; + + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (tmpibuf->y > 128); BLI_task_parallel_range(0, tmpibuf->y, &data, tracking_stabilize_frame_interpolation_cb, - tmpibuf->y > 128); + &settings); if (tmpibuf->rect_float) tmpibuf->userflags |= IB_RECT_INVALID; diff --git a/source/blender/blenlib/BLI_task.h b/source/blender/blenlib/BLI_task.h index ccfa2b6e2e7..52f32c2999f 100644 --- a/source/blender/blenlib/BLI_task.h +++ b/source/blender/blenlib/BLI_task.h @@ -19,7 +19,9 @@ */ #ifndef __BLI_TASK_H__ -#define __BLI_TASK_H__ +#define __BLI_TASK_H__ + +#include <string.h> /* for memset() */ struct Link; struct ListBase; @@ -116,32 +118,76 @@ void BLI_task_pool_delayed_push_begin(TaskPool *pool, int thread_id); void BLI_task_pool_delayed_push_end(TaskPool *pool, int thread_id); /* Parallel for routines */ -typedef void (*TaskParallelRangeFunc)(void *userdata, const int iter); -typedef void (*TaskParallelRangeFuncEx)(void *userdata, void *userdata_chunk, const int iter, const int thread_id); + +typedef enum eTaskSchedulingMode { + /* Task scheduler will divide overall work into equal chunks, scheduling + * even chunks to all worker threads. + * Least run time benefit, ideal for cases when each task requires equal + * amount of compute power. + */ + TASK_SCHEDULING_STATIC, + /* Task scheduler will schedule small amount of work to each worker thread. + * Has more run time overhead, but deals much better with cases when each + * part of the work requires totally different amount of compute power. + */ + TASK_SCHEDULING_DYNAMIC, +} eTaskSchedulingMode; + +/* Per-thread specific data passed to the callback. */ +typedef struct ParallelRangeTLS { + /* Identifier of the thread who this data belongs to. */ + int thread_id; + /* Copy of user-specifier chunk, which is copied from original chunk to all + * worker threads. This is similar to OpenMP's firstprivate. + */ + void *userdata_chunk; +} ParallelRangeTLS; + +typedef void (*TaskParallelRangeFunc)(void *userdata, + const int iter, + const ParallelRangeTLS *tls); typedef void (*TaskParallelRangeFuncFinalize)(void *userdata, void *userdata_chunk); -void BLI_task_parallel_range_ex( - int start, int stop, - void *userdata, - void *userdata_chunk, - const size_t userdata_chunk_size, - TaskParallelRangeFuncEx func_ex, - const bool use_threading, - const bool use_dynamic_scheduling); + +typedef struct ParallelRangeSettings { + /* Whether caller allows to do threading of the particular range. + * Usually set by some equation, which forces threading off when threading + * overhead becomes higher than speed benefit. + * BLI_task_parallel_range() by itself will always use threading when range + * is higher than a chunk size. As in, threading will always be performed. + */ + bool use_threading; + /* Scheduling mode to use for this parallel range invocation. */ + eTaskSchedulingMode scheduling_mode; + /* Each instance of looping chunks will get a copy of this data + * (similar to OpenMP's firstprivate). + */ + void *userdata_chunk; /* Pointer to actual data. */ + size_t userdata_chunk_size; /* Size of that data. */ + /* Function called from calling thread once whole range have been + * processed. + */ + TaskParallelRangeFuncFinalize func_finalize; + /* Minimum allowed number of range iterators to be handled by a single + * thread. This allows to achieve following: + * - Reduce amount of threading overhead. + * - Partially occupy thread pool with ranges which are computationally + * expensive, but which are smaller than amount of available threads. + * For example, it's possible to multi-thread [0 .. 64] range into 4 + * thread which will be doing 16 iterators each. + * This is a preferred way to tell scheduler when to start threading than + * having a global use_threading switch based on just range size. + */ + int min_iter_per_thread; +} ParallelRangeSettings; + +BLI_INLINE void BLI_parallel_range_settings_defaults( + ParallelRangeSettings* settings); + void BLI_task_parallel_range( - int start, int stop, + const int start, const int stop, void *userdata, TaskParallelRangeFunc func, - const bool use_threading); - -void BLI_task_parallel_range_finalize( - int start, int stop, - void *userdata, - void *userdata_chunk, - const size_t userdata_chunk_size, - TaskParallelRangeFuncEx func_ex, - TaskParallelRangeFuncFinalize func_finalize, - const bool use_threading, - const bool use_dynamic_scheduling); + const ParallelRangeSettings *settings); typedef void (*TaskParallelListbaseFunc)(void *userdata, struct Link *iter, @@ -161,6 +207,20 @@ void BLI_task_parallel_mempool( TaskParallelMempoolFunc func, const bool use_threading); +/* TODO(sergey): Think of a better place for this. */ +BLI_INLINE void BLI_parallel_range_settings_defaults( + ParallelRangeSettings* settings) +{ + memset(settings, 0, sizeof(*settings)); + settings->use_threading = true; + settings->scheduling_mode = TASK_SCHEDULING_STATIC; + /* NOTE: Current value mimics old behavior, but it's not ideal by any + * means. Would be cool to find a common value which will work good enough + * for both static and dynamic scheduling. + */ + settings->min_iter_per_thread = 1; +} + #ifdef __cplusplus } #endif diff --git a/source/blender/blenlib/intern/BLI_kdopbvh.c b/source/blender/blenlib/intern/BLI_kdopbvh.c index bd16bc1a9c6..03784e31eee 100644 --- a/source/blender/blenlib/intern/BLI_kdopbvh.c +++ b/source/blender/blenlib/intern/BLI_kdopbvh.c @@ -798,7 +798,7 @@ typedef struct BVHDivNodesData { int first_of_next_level; } BVHDivNodesData; -static void non_recursive_bvh_div_nodes_task_cb(void *userdata, const int j) +static void non_recursive_bvh_div_nodes_task_cb(void *userdata, const int j, const ParallelRangeTLS *UNUSED(tls)) { BVHDivNodesData *data = userdata; @@ -923,14 +923,20 @@ static void non_recursive_bvh_div_nodes( cb_data.depth = depth; if (true) { + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD); BLI_task_parallel_range( - i, i_stop, &cb_data, non_recursive_bvh_div_nodes_task_cb, - num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD); + i, i_stop, + &cb_data, + non_recursive_bvh_div_nodes_task_cb, + &settings); } else { /* Less hassle for debugging. */ + ParallelRangeTLS tls = {0}; for (int i_task = i; i_task < i_stop; i_task++) { - non_recursive_bvh_div_nodes_task_cb(&cb_data, i_task); + non_recursive_bvh_div_nodes_task_cb(&cb_data, i_task, &tls); } } } @@ -1276,7 +1282,7 @@ int BLI_bvhtree_overlap_thread_num(const BVHTree *tree) return (int)MIN2(tree->tree_type, tree->nodes[tree->totleaf]->totnode); } -static void bvhtree_overlap_task_cb(void *userdata, const int j) +static void bvhtree_overlap_task_cb(void *userdata, const int j, const ParallelRangeTLS *UNUSED(tls)) { BVHOverlapData_Thread *data = &((BVHOverlapData_Thread *)userdata)[j]; BVHOverlapData_Shared *data_shared = data->shared; @@ -1341,9 +1347,14 @@ BVHTreeOverlap *BLI_bvhtree_overlap( data[j].thread = j; } + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD); BLI_task_parallel_range( - 0, thread_num, data, bvhtree_overlap_task_cb, - tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD); + 0, thread_num, + data, + bvhtree_overlap_task_cb, + &settings); for (j = 0; j < thread_num; j++) total += BLI_stack_count(data[j].overlap); diff --git a/source/blender/blenlib/intern/math_statistics.c b/source/blender/blenlib/intern/math_statistics.c index fd7418a8f7b..14e3aaea053 100644 --- a/source/blender/blenlib/intern/math_statistics.c +++ b/source/blender/blenlib/intern/math_statistics.c @@ -46,7 +46,8 @@ typedef struct CovarianceData { int nbr_cos_vn; } CovarianceData; -static void covariance_m_vn_ex_task_cb(void *userdata, const int a) +static void covariance_m_vn_ex_task_cb(void *userdata, const int a, + const ParallelRangeTLS *UNUSED(tls)) { CovarianceData *data = userdata; const float *cos_vn = data->cos_vn; @@ -117,8 +118,14 @@ void BLI_covariance_m_vn_ex( .covfac = covfac, .n = n, .nbr_cos_vn = nbr_cos_vn, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((nbr_cos_vn * n * n) >= 10000); BLI_task_parallel_range( - 0, n * n, &data, covariance_m_vn_ex_task_cb, (nbr_cos_vn * n * n) >= 10000); + 0, n * n, + &data, + covariance_m_vn_ex_task_cb, + &settings); } /** diff --git a/source/blender/blenlib/intern/task.c b/source/blender/blenlib/intern/task.c index afa20e3d766..2c756aa6d65 100644 --- a/source/blender/blenlib/intern/task.c +++ b/source/blender/blenlib/intern/task.c @@ -994,7 +994,6 @@ typedef struct ParallelRangeState { void *userdata; TaskParallelRangeFunc func; - TaskParallelRangeFuncEx func_ex; int iter; int chunk_size; @@ -1015,51 +1014,67 @@ BLI_INLINE bool parallel_range_next_iter_get( static void parallel_range_func( TaskPool * __restrict pool, void *userdata_chunk, - int threadid) + int thread_id) { ParallelRangeState * __restrict state = BLI_task_pool_userdata(pool); + ParallelRangeTLS tls = { + .thread_id = thread_id, + .userdata_chunk = userdata_chunk, + }; int iter, count; - while (parallel_range_next_iter_get(state, &iter, &count)) { - int i; - - if (state->func_ex) { - for (i = 0; i < count; ++i) { - state->func_ex(state->userdata, userdata_chunk, iter + i, threadid); - } - } - else { - for (i = 0; i < count; ++i) { - state->func(state->userdata, iter + i); - } + for (int i = 0; i < count; ++i) { + state->func(state->userdata, iter + i, &tls); } } } +static void palallel_range_single_thread(int start, int stop, + void *userdata, + TaskParallelRangeFunc func, + const ParallelRangeSettings *settings) +{ + void *userdata_chunk = settings->userdata_chunk; + const size_t userdata_chunk_size = settings->userdata_chunk_size; + void *userdata_chunk_local = NULL; + const bool use_userdata_chunk = (userdata_chunk_size != 0) && (userdata_chunk != NULL); + if (use_userdata_chunk) { + userdata_chunk_local = MALLOCA(userdata_chunk_size); + memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size); + } + ParallelRangeTLS tls = { + .thread_id = 0, + .userdata_chunk = userdata_chunk_local, + }; + for (int i = start; i < stop; ++i) { + func(userdata, i, &tls); + } + if (settings->func_finalize != NULL) { + settings->func_finalize(userdata, userdata_chunk_local); + } + MALLOCA_FREE(userdata_chunk_local, userdata_chunk_size); +} + /** * This function allows to parallelized for loops in a similar way to OpenMP's 'parallel for' statement. * - * See public API doc for description of parameters. + * See public API doc of ParallelRangeSettings for description of all settings. */ -static void task_parallel_range_ex( - int start, int stop, - void *userdata, - void *userdata_chunk, - const size_t userdata_chunk_size, - TaskParallelRangeFunc func, - TaskParallelRangeFuncEx func_ex, - TaskParallelRangeFuncFinalize func_finalize, - const bool use_threading, - const bool use_dynamic_scheduling) +void BLI_task_parallel_range(const int start, const int stop, + void *userdata, + TaskParallelRangeFunc func, + const ParallelRangeSettings *settings) { TaskScheduler *task_scheduler; TaskPool *task_pool; ParallelRangeState state; int i, num_threads, num_tasks; + void *userdata_chunk = settings->userdata_chunk; + const size_t userdata_chunk_size = settings->userdata_chunk_size; void *userdata_chunk_local = NULL; void *userdata_chunk_array = NULL; - const bool use_userdata_chunk = (func_ex != NULL) && (userdata_chunk_size != 0) && (userdata_chunk != NULL); + const bool use_userdata_chunk = (userdata_chunk_size != 0) && (userdata_chunk != NULL); if (start == stop) { return; @@ -1067,36 +1082,17 @@ static void task_parallel_range_ex( BLI_assert(start < stop); if (userdata_chunk_size != 0) { - BLI_assert(func_ex != NULL && func == NULL); BLI_assert(userdata_chunk != NULL); } /* If it's not enough data to be crunched, don't bother with tasks at all, * do everything from the main thread. */ - if (!use_threading) { - if (func_ex) { - if (use_userdata_chunk) { - userdata_chunk_local = MALLOCA(userdata_chunk_size); - memcpy(userdata_chunk_local, userdata_chunk, userdata_chunk_size); - } - - for (i = start; i < stop; ++i) { - func_ex(userdata, userdata_chunk_local, i, 0); - } - - if (func_finalize) { - func_finalize(userdata, userdata_chunk_local); - } - - MALLOCA_FREE(userdata_chunk_local, userdata_chunk_size); - } - else { - for (i = start; i < stop; ++i) { - func(userdata, i); - } - } - + if (!settings->use_threading) { + palallel_range_single_thread(start, stop, + userdata, + func, + settings); return; } @@ -1108,23 +1104,31 @@ static void task_parallel_range_ex( * and instead have tasks which are evenly distributed across CPU cores and * pull next iter to be crunched using the queue. */ - num_tasks = num_threads * 2; + num_tasks = num_threads + 2; state.start = start; state.stop = stop; state.userdata = userdata; state.func = func; - state.func_ex = func_ex; state.iter = start; - if (use_dynamic_scheduling) { - state.chunk_size = 32; - } - else { - state.chunk_size = max_ii(1, (stop - start) / (num_tasks)); + switch (settings->scheduling_mode) { + case TASK_SCHEDULING_STATIC: + state.chunk_size = max_ii( + settings->min_iter_per_thread, + (stop - start) / (num_tasks)); + break; + case TASK_SCHEDULING_DYNAMIC: + /* TODO(sergey): Make it configurable from min_iter_per_thread. */ + state.chunk_size = 32; + break; } num_tasks = min_ii(num_tasks, (stop - start) / state.chunk_size); + /* TODO(sergey): If number of tasks happened to be 1, use single threaded + * path. + */ + /* NOTE: This way we are adding a memory barrier and ensure all worker * threads can read and modify the value, without any locks. */ atomic_fetch_and_add_int32(&state.iter, 0); @@ -1150,98 +1154,16 @@ static void task_parallel_range_ex( BLI_task_pool_free(task_pool); if (use_userdata_chunk) { - if (func_finalize) { + if (settings->func_finalize != NULL) { for (i = 0; i < num_tasks; i++) { userdata_chunk_local = (char *)userdata_chunk_array + (userdata_chunk_size * i); - func_finalize(userdata, userdata_chunk_local); + settings->func_finalize(userdata, userdata_chunk_local); } } MALLOCA_FREE(userdata_chunk_array, userdata_chunk_size * num_tasks); } } -/** - * This function allows to parallelize for loops in a similar way to OpenMP's 'parallel for' statement. - * - * \param start First index to process. - * \param stop Index to stop looping (excluded). - * \param userdata Common userdata passed to all instances of \a func. - * \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data - * (similar to OpenMP's firstprivate). - * \param userdata_chunk_size Memory size of \a userdata_chunk. - * \param func_ex Callback function (advanced version). - * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop - * (allows caller to use any kind of test to switch on parallelization or not). - * \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently), - * otherwise whole range is split in a few big chunks (num_threads * 2 chunks currently). - */ -void BLI_task_parallel_range_ex( - int start, int stop, - void *userdata, - void *userdata_chunk, - const size_t userdata_chunk_size, - TaskParallelRangeFuncEx func_ex, - const bool use_threading, - const bool use_dynamic_scheduling) -{ - task_parallel_range_ex( - start, stop, userdata, userdata_chunk, userdata_chunk_size, NULL, func_ex, NULL, - use_threading, use_dynamic_scheduling); -} - -/** - * A simpler version of \a BLI_task_parallel_range_ex, which does not use \a use_dynamic_scheduling, - * and does not handle 'firstprivate'-like \a userdata_chunk. - * - * \param start First index to process. - * \param stop Index to stop looping (excluded). - * \param userdata Common userdata passed to all instances of \a func. - * \param func Callback function (simple version). - * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop - * (allows caller to use any kind of test to switch on parallelization or not). - */ -void BLI_task_parallel_range( - int start, int stop, - void *userdata, - TaskParallelRangeFunc func, - const bool use_threading) -{ - task_parallel_range_ex(start, stop, userdata, NULL, 0, func, NULL, NULL, use_threading, false); -} - -/** - * This function allows to parallelize for loops in a similar way to OpenMP's 'parallel for' statement, - * with an additional 'finalize' func called from calling thread once whole range have been processed. - * - * \param start First index to process. - * \param stop Index to stop looping (excluded). - * \param userdata Common userdata passed to all instances of \a func. - * \param userdata_chunk Optional, each instance of looping chunks will get a copy of this data - * (similar to OpenMP's firstprivate). - * \param userdata_chunk_size Memory size of \a userdata_chunk. - * \param func_ex Callback function (advanced version). - * \param func_finalize Callback function, called after all workers have finished, - * useful to finalize accumulative tasks. - * \param use_threading If \a true, actually split-execute loop in threads, else just do a sequential forloop - * (allows caller to use any kind of test to switch on parallelization or not). - * \param use_dynamic_scheduling If \a true, the whole range is divided in a lot of small chunks (of size 32 currently), - * otherwise whole range is split in a few big chunks (num_threads * 2 chunks currently). - */ -void BLI_task_parallel_range_finalize( - int start, int stop, - void *userdata, - void *userdata_chunk, - const size_t userdata_chunk_size, - TaskParallelRangeFuncEx func_ex, - TaskParallelRangeFuncFinalize func_finalize, - const bool use_threading, - const bool use_dynamic_scheduling) -{ - task_parallel_range_ex( - start, stop, userdata, userdata_chunk, userdata_chunk_size, NULL, func_ex, func_finalize, - use_threading, use_dynamic_scheduling); -} - #undef MALLOCA #undef MALLOCA_FREE @@ -1335,7 +1257,7 @@ void BLI_task_parallel_listbase( * and instead have tasks which are evenly distributed across CPU cores and * pull next iter to be crunched using the queue. */ - num_tasks = num_threads * 2; + num_tasks = num_threads + 2; state.index = 0; state.link = listbase->first; @@ -1423,7 +1345,7 @@ void BLI_task_parallel_mempool( * and instead have tasks which are evenly distributed across CPU cores and * pull next item to be crunched using the threaded-aware BLI_mempool_iter. */ - num_tasks = num_threads * 2; + num_tasks = num_threads + 2; state.userdata = userdata; state.func = func; diff --git a/source/blender/bmesh/intern/bmesh_interp.c b/source/blender/bmesh/intern/bmesh_interp.c index 96b2eb17c4c..447eb3aa858 100644 --- a/source/blender/bmesh/intern/bmesh_interp.c +++ b/source/blender/bmesh/intern/bmesh_interp.c @@ -419,7 +419,8 @@ typedef struct BMLoopInterpMultiresData { float d; } BMLoopInterpMultiresData; -static void loop_interp_multires_cb(void *userdata, int ix) +static void loop_interp_multires_cb(void *userdata, int ix, + const ParallelRangeTLS *UNUSED(tls)) { BMLoopInterpMultiresData *data = userdata; @@ -507,7 +508,10 @@ void BM_loop_interp_multires_ex( .axis_x = axis_x, .axis_y = axis_y, .v1 = v1, .v4 = v4, .e1 = e1, .e2 = e2, .res = res, .d = 1.0f / (float)(res - 1) }; - BLI_task_parallel_range(0, res, &data, loop_interp_multires_cb, res > 5); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (res > 5); + BLI_task_parallel_range(0, res, &data, loop_interp_multires_cb, &settings); } /** diff --git a/source/blender/depsgraph/intern/eval/deg_eval.cc b/source/blender/depsgraph/intern/eval/deg_eval.cc index ee58a3b02a5..38a7470c0ec 100644 --- a/source/blender/depsgraph/intern/eval/deg_eval.cc +++ b/source/blender/depsgraph/intern/eval/deg_eval.cc @@ -102,7 +102,9 @@ typedef struct CalculatePengindData { Depsgraph *graph; } CalculatePengindData; -static void calculate_pending_func(void *data_v, int i) +static void calculate_pending_func(void *data_v, + int i, + const ParallelRangeTLS * /*tls*/) { CalculatePengindData *data = (CalculatePengindData *)data_v; Depsgraph *graph = data->graph; @@ -129,14 +131,17 @@ static void calculate_pending_func(void *data_v, int i) static void calculate_pending_parents(Depsgraph *graph) { const int num_operations = graph->operations.size(); - const bool do_threads = num_operations > 256; + const bool do_threads = (num_operations > 256); CalculatePengindData data; data.graph = graph; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = do_threads; BLI_task_parallel_range(0, num_operations, &data, calculate_pending_func, - do_threads); + &settings); } static void initialize_execution(DepsgraphEvalState *state, Depsgraph *graph) diff --git a/source/blender/depsgraph/intern/eval/deg_eval_flush.cc b/source/blender/depsgraph/intern/eval/deg_eval_flush.cc index af1d09ad54a..6dc35bb8aba 100644 --- a/source/blender/depsgraph/intern/eval/deg_eval_flush.cc +++ b/source/blender/depsgraph/intern/eval/deg_eval_flush.cc @@ -71,14 +71,18 @@ typedef std::deque<OperationDepsNode *> FlushQueue; namespace { -void flush_init_operation_node_func(void *data_v, int i) +void flush_init_operation_node_func(void *data_v, + int i, + const ParallelRangeTLS * /*tls*/) { Depsgraph *graph = (Depsgraph *)data_v; OperationDepsNode *node = graph->operations[i]; node->scheduled = false; } -void flush_init_id_node_func(void *data_v, int i) +void flush_init_id_node_func(void *data_v, + int i, + const ParallelRangeTLS * /*tls*/) { Depsgraph *graph = (Depsgraph *)data_v; IDDepsNode *id_node = graph->id_nodes[i]; @@ -90,16 +94,26 @@ void flush_init_id_node_func(void *data_v, int i) BLI_INLINE void flush_prepare(Depsgraph *graph) { - const int num_operations = graph->operations.size(); - BLI_task_parallel_range(0, num_operations, - graph, - flush_init_operation_node_func, - (num_operations > 256)); - const int num_id_nodes = graph->id_nodes.size(); - BLI_task_parallel_range(0, num_id_nodes, - graph, - flush_init_id_node_func, - (num_id_nodes > 256)); + { + const int num_operations = graph->operations.size(); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (num_operations > 256); + BLI_task_parallel_range(0, num_operations, + graph, + flush_init_operation_node_func, + &settings); + } + { + const int num_id_nodes = graph->id_nodes.size(); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (num_id_nodes > 256); + BLI_task_parallel_range(0, num_id_nodes, + graph, + flush_init_id_node_func, + &settings); + } } BLI_INLINE void flush_schedule_entrypoints(Depsgraph *graph, FlushQueue *queue) @@ -278,7 +292,9 @@ void deg_graph_flush_updates(Main *bmain, Depsgraph *graph) flush_editors_id_update(bmain, graph, &update_ctx); } -static void graph_clear_func(void *data_v, int i) +static void graph_clear_func(void *data_v, + int i, + const ParallelRangeTLS * /*tls*/) { Depsgraph *graph = (Depsgraph *)data_v; OperationDepsNode *node = graph->operations[i]; @@ -292,7 +308,13 @@ void deg_graph_clear_tags(Depsgraph *graph) /* Go over all operation nodes, clearing tags. */ const int num_operations = graph->operations.size(); const bool do_threads = num_operations > 256; - BLI_task_parallel_range(0, num_operations, graph, graph_clear_func, do_threads); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = do_threads; + BLI_task_parallel_range(0, num_operations, + graph, + graph_clear_func, + &settings); /* Clear any entry tags which haven't been flushed. */ BLI_gset_clear(graph->entry_tags, NULL); } diff --git a/source/blender/editors/sculpt_paint/paint_cursor.c b/source/blender/editors/sculpt_paint/paint_cursor.c index e1e90506299..24a7137da19 100644 --- a/source/blender/editors/sculpt_paint/paint_cursor.c +++ b/source/blender/editors/sculpt_paint/paint_cursor.c @@ -151,7 +151,7 @@ typedef struct LoadTexData { float radius; } LoadTexData; -static void load_tex_task_cb_ex(void *userdata, void *UNUSED(userdata_chunck), const int j, const int thread_id) +static void load_tex_task_cb_ex(void *userdata, const int j, const ParallelRangeTLS *tls) { LoadTexData *data = userdata; Brush *br = data->br; @@ -212,7 +212,7 @@ static void load_tex_task_cb_ex(void *userdata, void *UNUSED(userdata_chunck), c if (col) { float rgba[4]; - paint_get_tex_pixel_col(mtex, x, y, rgba, pool, thread_id, convert_to_linear, colorspace); + paint_get_tex_pixel_col(mtex, x, y, rgba, pool, tls->thread_id, convert_to_linear, colorspace); buffer[index * 4] = rgba[0] * 255; buffer[index * 4 + 1] = rgba[1] * 255; @@ -220,7 +220,7 @@ static void load_tex_task_cb_ex(void *userdata, void *UNUSED(userdata_chunck), c buffer[index * 4 + 3] = rgba[3] * 255; } else { - float avg = paint_get_tex_pixel(mtex, x, y, pool, thread_id); + float avg = paint_get_tex_pixel(mtex, x, y, pool, tls->thread_id); avg += br->texture_sample_bias; @@ -318,7 +318,9 @@ static int load_tex(Brush *br, ViewContext *vc, float zoom, bool col, bool prima .pool = pool, .size = size, .rotation = rotation, .radius = radius, }; - BLI_task_parallel_range_ex(0, size, &data, NULL, 0, load_tex_task_cb_ex, true, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range(0, size, &data, load_tex_task_cb_ex, &settings); if (mtex->tex && mtex->tex->nodetree) ntreeTexEndExecTree(mtex->tex->nodetree->execdata); @@ -365,7 +367,7 @@ static int load_tex(Brush *br, ViewContext *vc, float zoom, bool col, bool prima return 1; } -static void load_tex_cursor_task_cb(void *userdata, const int j) +static void load_tex_cursor_task_cb(void *userdata, const int j, const ParallelRangeTLS *UNUSED(tls)) { LoadTexData *data = userdata; Brush *br = data->br; @@ -445,7 +447,9 @@ static int load_tex_cursor(Brush *br, ViewContext *vc, float zoom) .br = br, .buffer = buffer, .size = size, }; - BLI_task_parallel_range(0, size, &data, load_tex_cursor_task_cb, true); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range(0, size, &data, load_tex_cursor_task_cb, &settings); if (!cursor_snap.overlay_texture) glGenTextures(1, &cursor_snap.overlay_texture); diff --git a/source/blender/editors/sculpt_paint/paint_image_2d.c b/source/blender/editors/sculpt_paint/paint_image_2d.c index 603b8f9a185..8a7bbc90979 100644 --- a/source/blender/editors/sculpt_paint/paint_image_2d.c +++ b/source/blender/editors/sculpt_paint/paint_image_2d.c @@ -1071,7 +1071,8 @@ typedef struct Paint2DForeachData { int tilew; } Paint2DForeachData; -static void paint_2d_op_foreach_do(void *data_v, const int iter) +static void paint_2d_op_foreach_do(void *data_v, const int iter, + const ParallelRangeTLS *UNUSED(tls)) { Paint2DForeachData *data = (Paint2DForeachData *)data_v; paint_2d_do_making_brush(data->s, data->region, data->curveb, @@ -1157,9 +1158,12 @@ static int paint_2d_op(void *state, ImBuf *ibufb, unsigned short *curveb, unsign data.blend = blend; data.tilex = tilex; data.tilew = tilew; + + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); BLI_task_parallel_range(tiley, tileh + 1, &data, paint_2d_op_foreach_do, - true); + &settings); } } diff --git a/source/blender/editors/sculpt_paint/paint_mask.c b/source/blender/editors/sculpt_paint/paint_mask.c index 0fec4c4fc80..5ae59164743 100644 --- a/source/blender/editors/sculpt_paint/paint_mask.c +++ b/source/blender/editors/sculpt_paint/paint_mask.c @@ -104,7 +104,8 @@ typedef struct MaskTaskData { float (*clip_planes_final)[4]; } MaskTaskData; -static void mask_flood_fill_task_cb(void *userdata, const int i) +static void mask_flood_fill_task_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { MaskTaskData *data = userdata; @@ -158,9 +159,12 @@ static int mask_flood_fill_exec(bContext *C, wmOperator *op) .mode = mode, .value = value, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); BLI_task_parallel_range( 0, totnode, &data, mask_flood_fill_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT)); + &settings); if (multires) multires_mark_as_modified(ob, MULTIRES_COORDS_MODIFIED); @@ -221,7 +225,8 @@ static void flip_plane(float out[4], const float in[4], const char symm) out[3] = in[3]; } -static void mask_box_select_task_cb(void *userdata, const int i) +static void mask_box_select_task_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { MaskTaskData *data = userdata; @@ -303,9 +308,12 @@ int ED_sculpt_mask_box_select(struct bContext *C, ViewContext *vc, const rcti *r .mode = mode, .value = value, .clip_planes_final = clip_planes_final, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); BLI_task_parallel_range( 0, totnode, &data, mask_box_select_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT)); + &settings); if (nodes) MEM_freeN(nodes); @@ -377,7 +385,8 @@ static void mask_lasso_px_cb(int x, int x_end, int y, void *user_data) } while (++index != index_end); } -static void mask_gesture_lasso_task_cb(void *userdata, const int i) +static void mask_gesture_lasso_task_cb(void *userdata, const int i, + const ParallelRangeTLS *UNUSED(tls)) { LassoMaskData *lasso_data = userdata; MaskTaskData *data = &lasso_data->task_data; @@ -484,9 +493,12 @@ static int paint_mask_gesture_lasso_exec(bContext *C, wmOperator *op) data.task_data.mode = mode; data.task_data.value = value; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && (totnode > SCULPT_THREADED_LIMIT)); BLI_task_parallel_range( 0, totnode, &data, mask_gesture_lasso_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && (totnode > SCULPT_THREADED_LIMIT))); + &settings); if (nodes) MEM_freeN(nodes); diff --git a/source/blender/editors/sculpt_paint/paint_vertex.c b/source/blender/editors/sculpt_paint/paint_vertex.c index d9df8c78ba9..f6c486551d8 100644 --- a/source/blender/editors/sculpt_paint/paint_vertex.c +++ b/source/blender/editors/sculpt_paint/paint_vertex.c @@ -1441,7 +1441,7 @@ static float wpaint_get_active_weight(const MDeformVert *dv, const WeightPaintIn } static void do_wpaint_precompute_weight_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) + void *userdata, const int n, const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; const MDeformVert *dv = &data->me->dvert[n]; @@ -1460,15 +1460,19 @@ static void precompute_weight_values( .C = C, .ob = ob, .wpd = wpd, .wpi = wpi, .me = me, }; - BLI_task_parallel_range_ex( - 0, me->totvert, &data, NULL, 0, do_wpaint_precompute_weight_cb_ex, - true, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range( + 0, me->totvert, + &data, + do_wpaint_precompute_weight_cb_ex, + &settings); wpd->precomputed_weight_ready = true; } static void do_wpaint_brush_blur_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) + void *userdata, const int n, const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -1557,7 +1561,7 @@ static void do_wpaint_brush_blur_task_cb_ex( } static void do_wpaint_brush_smear_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) + void *userdata, const int n, const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -1664,7 +1668,7 @@ static void do_wpaint_brush_smear_task_cb_ex( } static void do_wpaint_brush_draw_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) + void *userdata, const int n, const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -1734,7 +1738,7 @@ static void do_wpaint_brush_draw_task_cb_ex( } static void do_wpaint_brush_calc_average_weight_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) + void *userdata, const int n, const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -1785,9 +1789,14 @@ static void calculate_average_weight(SculptThreadedTaskData *data, PBVHNode **UN struct WPaintAverageAccum *accum = MEM_mallocN(sizeof(*accum) * totnode, __func__); data->custom_data = accum; - BLI_task_parallel_range_ex( - 0, totnode, data, NULL, 0, do_wpaint_brush_calc_average_weight_cb_ex, - ((data->sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((data->sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + data, + do_wpaint_brush_calc_average_weight_cb_ex, + &settings); uint accum_len = 0; double accum_weight = 0.0; @@ -1819,30 +1828,40 @@ static void wpaint_paint_leaves( /* Use this so average can modify its weight without touching the brush. */ data.strength = BKE_brush_weight_get(scene, brush); - /* current mirroring code cannot be run in parallel */ - bool use_threading = !(me->editflag & ME_EDIT_MIRROR_X); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + /* NOTE: current mirroring code cannot be run in parallel */ + settings.use_threading = !(me->editflag & ME_EDIT_MIRROR_X); switch (brush->vertexpaint_tool) { case PAINT_BLEND_AVERAGE: calculate_average_weight(&data, nodes, totnode); - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, - do_wpaint_brush_draw_task_cb_ex, use_threading, false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_wpaint_brush_draw_task_cb_ex, + &settings); break; case PAINT_BLEND_SMEAR: - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, - do_wpaint_brush_smear_task_cb_ex, use_threading, false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_wpaint_brush_smear_task_cb_ex, + &settings); break; case PAINT_BLEND_BLUR: - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, - do_wpaint_brush_blur_task_cb_ex, use_threading, false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_wpaint_brush_blur_task_cb_ex, + &settings); break; default: - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, - do_wpaint_brush_draw_task_cb_ex, use_threading, false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_wpaint_brush_draw_task_cb_ex, + &settings); break; } } @@ -2398,7 +2417,7 @@ static bool vpaint_stroke_test_start(bContext *C, struct wmOperator *op, const f } static void do_vpaint_brush_calc_average_color_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) + void *userdata, const int n, const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2460,7 +2479,7 @@ static float tex_color_alpha_ubyte( } static void do_vpaint_brush_draw_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) + void *userdata, const int n, const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2552,7 +2571,7 @@ static void do_vpaint_brush_draw_task_cb_ex( } static void do_vpaint_brush_blur_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) + void *userdata, const int n, const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2661,7 +2680,7 @@ static void do_vpaint_brush_blur_task_cb_ex( } static void do_vpaint_brush_smear_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int UNUSED(thread_id)) + void *userdata, const int n, const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2799,9 +2818,13 @@ static void calculate_average_color(SculptThreadedTaskData *data, PBVHNode **UNU struct VPaintAverageAccum *accum = MEM_mallocN(sizeof(*accum) * totnode, __func__); data->custom_data = accum; - BLI_task_parallel_range_ex( - 0, totnode, data, NULL, 0, do_vpaint_brush_calc_average_color_cb_ex, - true, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range( + 0, totnode, + data, + do_vpaint_brush_calc_average_color_cb_ex, + &settings); uint accum_len = 0; uint accum_value[3] = {0}; @@ -2833,27 +2856,37 @@ static void vpaint_paint_leaves( .sd = sd, .ob = ob, .brush = brush, .nodes = nodes, .vp = vp, .vpd = vpd, .lcol = (uint *)me->mloopcol, .me = me, .C = C, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); switch (brush->vertexpaint_tool) { case PAINT_BLEND_AVERAGE: calculate_average_color(&data, nodes, totnode); - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, - do_vpaint_brush_draw_task_cb_ex, true, false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_vpaint_brush_draw_task_cb_ex, + &settings); break; case PAINT_BLEND_BLUR: - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, - do_vpaint_brush_blur_task_cb_ex, true, false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_vpaint_brush_blur_task_cb_ex, + &settings); break; case PAINT_BLEND_SMEAR: - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, - do_vpaint_brush_smear_task_cb_ex, true, false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_vpaint_brush_smear_task_cb_ex, + &settings); break; default: - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, - do_vpaint_brush_draw_task_cb_ex, true, false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_vpaint_brush_draw_task_cb_ex, + &settings); break; } } diff --git a/source/blender/editors/sculpt_paint/sculpt.c b/source/blender/editors/sculpt_paint/sculpt.c index 6e08f47f60d..1af522ffd8d 100644 --- a/source/blender/editors/sculpt_paint/sculpt.c +++ b/source/blender/editors/sculpt_paint/sculpt.c @@ -377,7 +377,8 @@ static bool sculpt_stroke_is_dynamic_topology( /*** paint mesh ***/ -static void paint_mesh_restore_co_task_cb(void *userdata, const int n) +static void paint_mesh_restore_co_task_cb(void *userdata, const int n, + const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -439,9 +440,14 @@ static void paint_mesh_restore_co(Sculpt *sd, Object *ob) .sd = sd, .ob = ob, .brush = brush, .nodes = nodes, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && !ss->bm && totnode > SCULPT_THREADED_LIMIT); BLI_task_parallel_range( - 0, totnode, &data, paint_mesh_restore_co_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && !ss->bm && totnode > SCULPT_THREADED_LIMIT)); + 0, totnode, + &data, + paint_mesh_restore_co_task_cb, + &settings); if (nodes) MEM_freeN(nodes); @@ -794,7 +800,8 @@ static float calc_symmetry_feather(Sculpt *sd, StrokeCache *cache) * \note These are all _very_ similar, when changing one, check others. * \{ */ -static void calc_area_normal_and_center_task_cb(void *userdata, const int n) +static void calc_area_normal_and_center_task_cb(void *userdata, const int n, + const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -946,9 +953,14 @@ static void calc_area_center( }; BLI_mutex_init(&data.mutex); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); BLI_task_parallel_range( - 0, totnode, &data, calc_area_normal_and_center_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT)); + 0, totnode, + &data, + calc_area_normal_and_center_task_cb, + &settings); BLI_mutex_end(&data.mutex); @@ -996,9 +1008,14 @@ void sculpt_pbvh_calc_area_normal( }; BLI_mutex_init(&data.mutex); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = use_threading; BLI_task_parallel_range( - 0, totnode, &data, calc_area_normal_and_center_task_cb, - use_threading); + 0, totnode, + &data, + calc_area_normal_and_center_task_cb, + &settings); BLI_mutex_end(&data.mutex); @@ -1036,9 +1053,14 @@ static void calc_area_normal_and_center( }; BLI_mutex_init(&data.mutex); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); BLI_task_parallel_range( - 0, totnode, &data, calc_area_normal_and_center_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT)); + 0, totnode, + &data, + calc_area_normal_and_center_task_cb, + &settings); BLI_mutex_end(&data.mutex); @@ -1626,7 +1648,7 @@ typedef struct { } SculptFindNearestToRayData; static void do_smooth_brush_mesh_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -1648,7 +1670,7 @@ static void do_smooth_brush_mesh_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, vd.co)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, smooth_mask ? 0.0f : (vd.mask ? *vd.mask : 0.0f), thread_id); + vd.no, vd.fno, smooth_mask ? 0.0f : (vd.mask ? *vd.mask : 0.0f), tls->thread_id); if (smooth_mask) { float val = neighbor_average_mask(ss, vd.vert_indices[vd.i]) - *vd.mask; val *= fade * bstrength; @@ -1674,7 +1696,7 @@ static void do_smooth_brush_mesh_task_cb_ex( } static void do_smooth_brush_bmesh_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -1696,7 +1718,7 @@ static void do_smooth_brush_bmesh_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, vd.co)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, smooth_mask ? 0.0f : *vd.mask, thread_id); + vd.no, vd.fno, smooth_mask ? 0.0f : *vd.mask, tls->thread_id); if (smooth_mask) { float val = bmesh_neighbor_average_mask(vd.bm_vert, vd.cd_vert_mask_offset) - *vd.mask; val *= fade * bstrength; @@ -1722,10 +1744,10 @@ static void do_smooth_brush_bmesh_task_cb_ex( } static void do_smooth_brush_multires_task_cb_ex( - void *userdata, void *userdata_chunk, const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; - SculptDoBrushSmoothGridDataChunk *data_chunk = userdata_chunk; + SculptDoBrushSmoothGridDataChunk *data_chunk = tls->userdata_chunk; SculptSession *ss = data->ob->sculpt; Sculpt *sd = data->sd; const Brush *brush = data->brush; @@ -1837,7 +1859,7 @@ static void do_smooth_brush_multires_task_cb_ex( const float strength_mask = (smooth_mask ? 0.0f : *mask); const float fade = bstrength * tex_strength( ss, brush, co, sqrtf(test.dist), - NULL, fno, strength_mask, thread_id); + NULL, fno, strength_mask, tls->thread_id); float f = 1.0f / 16.0f; if (x == 0 || x == gridsize - 1) @@ -1895,6 +1917,10 @@ static void smooth( .smooth_mask = smooth_mask, .strength = strength, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + switch (type) { case PBVH_GRIDS: { @@ -1909,22 +1935,30 @@ static void smooth( data_chunk->tmpgrid_size = size; size += sizeof(*data_chunk); - BLI_task_parallel_range_ex( - 0, totnode, &data, data_chunk, size, do_smooth_brush_multires_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + settings.userdata_chunk = data_chunk; + settings.userdata_chunk_size = size; + BLI_task_parallel_range( + 0, totnode, + &data, + do_smooth_brush_multires_task_cb_ex, + &settings); MEM_freeN(data_chunk); break; } case PBVH_FACES: - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_smooth_brush_mesh_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_smooth_brush_mesh_task_cb_ex, + &settings); break; case PBVH_BMESH: - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_smooth_brush_bmesh_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + BLI_task_parallel_range( + 0, totnode, + &data, + do_smooth_brush_bmesh_task_cb_ex, + &settings); break; } @@ -1940,7 +1974,7 @@ static void do_smooth_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnod } static void do_mask_brush_draw_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -1958,7 +1992,7 @@ static void do_mask_brush_draw_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, vd.co)) { const float fade = tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, 0.0f, thread_id); + vd.no, vd.fno, 0.0f, tls->thread_id); (*vd.mask) += fade * bstrength; CLAMP(*vd.mask, 0, 1); @@ -1979,9 +2013,14 @@ static void do_mask_brush_draw(Sculpt *sd, Object *ob, PBVHNode **nodes, int tot .sd = sd, .ob = ob, .brush = brush, .nodes = nodes, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_mask_brush_draw_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_mask_brush_draw_task_cb_ex, + &settings); } static void do_mask_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode) @@ -2000,7 +2039,7 @@ static void do_mask_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode) } static void do_draw_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2022,7 +2061,7 @@ static void do_draw_brush_task_cb_ex( /* offset vertex */ const float fade = tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], offset, fade); @@ -2055,16 +2094,21 @@ static void do_draw_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode) .offset = offset, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_draw_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_draw_brush_task_cb_ex, + &settings); } /** * Used for 'SCULPT_TOOL_CREASE' and 'SCULPT_TOOL_BLOB' */ static void do_crease_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2088,7 +2132,7 @@ static void do_crease_brush_task_cb_ex( /* offset vertex */ const float fade = tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); float val1[3]; float val2[3]; @@ -2152,13 +2196,18 @@ static void do_crease_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnod .spvc = &spvc, .offset = offset, .flippedbstrength = flippedbstrength, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_crease_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_crease_brush_task_cb_ex, + &settings); } static void do_pinch_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2179,7 +2228,7 @@ static void do_pinch_brush_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, vd.co)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); float val[3]; sub_v3_v3v3(val, test.location, vd.co); @@ -2203,13 +2252,18 @@ static void do_pinch_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode .sd = sd, .ob = ob, .brush = brush, .nodes = nodes, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_pinch_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_pinch_brush_task_cb_ex, + &settings); } static void do_grab_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2236,7 +2290,7 @@ static void do_grab_brush_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, orig_data.co)) { const float fade = bstrength * tex_strength( ss, brush, orig_data.co, sqrtf(test.dist), - orig_data.no, NULL, vd.mask ? *vd.mask : 0.0f, thread_id); + orig_data.no, NULL, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], grab_delta, fade); @@ -2264,13 +2318,18 @@ static void do_grab_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode) .grab_delta = grab_delta, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_grab_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_grab_brush_task_cb_ex, + &settings); } static void do_nudge_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2292,7 +2351,7 @@ static void do_nudge_brush_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, vd.co)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], cono, fade); @@ -2320,13 +2379,18 @@ static void do_nudge_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode .cono = cono, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_nudge_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_nudge_brush_task_cb_ex, + &settings); } static void do_snake_hook_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2353,7 +2417,7 @@ static void do_snake_hook_brush_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, vd.co)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], grab_delta, fade); @@ -2426,13 +2490,18 @@ static void do_snake_hook_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int to .spvc = &spvc, .grab_delta = grab_delta, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_snake_hook_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_snake_hook_brush_task_cb_ex, + &settings); } static void do_thumb_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2459,7 +2528,7 @@ static void do_thumb_brush_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, orig_data.co)) { const float fade = bstrength * tex_strength( ss, brush, orig_data.co, sqrtf(test.dist), - orig_data.no, NULL, vd.mask ? *vd.mask : 0.0f, thread_id); + orig_data.no, NULL, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], cono, fade); @@ -2487,13 +2556,18 @@ static void do_thumb_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode .cono = cono, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_thumb_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_thumb_brush_task_cb_ex, + &settings); } static void do_rotate_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2521,7 +2595,7 @@ static void do_rotate_brush_task_cb_ex( float vec[3], rot[3][3]; const float fade = bstrength * tex_strength( ss, brush, orig_data.co, sqrtf(test.dist), - orig_data.no, NULL, vd.mask ? *vd.mask : 0.0f, thread_id); + orig_data.no, NULL, vd.mask ? *vd.mask : 0.0f, tls->thread_id); sub_v3_v3v3(vec, orig_data.co, ss->cache->location); axis_angle_normalized_to_mat3(rot, ss->cache->sculpt_normal_symm, angle * fade); @@ -2549,13 +2623,18 @@ static void do_rotate_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnod .angle = angle, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_rotate_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_rotate_brush_task_cb_ex, + &settings); } static void do_layer_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2589,7 +2668,7 @@ static void do_layer_brush_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, orig_data.co)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); float *disp = &layer_disp[vd.i]; float val[3]; @@ -2634,15 +2713,20 @@ static void do_layer_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode }; BLI_mutex_init(&data.mutex); - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_layer_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_layer_brush_task_cb_ex, + &settings); BLI_mutex_end(&data.mutex); } static void do_inflate_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2663,7 +2747,7 @@ static void do_inflate_brush_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, vd.co)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); float val[3]; if (vd.fno) @@ -2689,9 +2773,14 @@ static void do_inflate_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totno .sd = sd, .ob = ob, .brush = brush, .nodes = nodes, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_inflate_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_inflate_brush_task_cb_ex, + &settings); } static void calc_sculpt_plane( @@ -2806,7 +2895,7 @@ static float get_offset(Sculpt *sd, SculptSession *ss) } static void do_flatten_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2839,7 +2928,7 @@ static void do_flatten_brush_task_cb_ex( if (plane_trim(ss->cache, brush, val)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], val, fade); @@ -2878,13 +2967,18 @@ static void do_flatten_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totno .area_no = area_no, .area_co = area_co, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_flatten_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_flatten_brush_task_cb_ex, + &settings); } static void do_clay_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -2921,7 +3015,7 @@ static void do_clay_brush_task_cb_ex( * causes glitch with planes, see: T44390 */ const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], val, fade); @@ -2964,13 +3058,18 @@ static void do_clay_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode) .area_no = area_no, .area_co = area_co, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_clay_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_clay_brush_task_cb_ex, + &settings); } static void do_clay_strips_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -3006,7 +3105,7 @@ static void do_clay_strips_brush_task_cb_ex( * causes glitch with planes, see: T44390 */ const float fade = bstrength * tex_strength( ss, brush, vd.co, ss->cache->radius * test.dist, - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], val, fade); @@ -3074,13 +3173,18 @@ static void do_clay_strips_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int t .area_no_sp = area_no_sp, .area_co = area_co, .mat = mat, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_clay_strips_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_clay_strips_brush_task_cb_ex, + &settings); } static void do_fill_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -3114,7 +3218,7 @@ static void do_fill_brush_task_cb_ex( if (plane_trim(ss->cache, brush, val)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], val, fade); @@ -3155,13 +3259,18 @@ static void do_fill_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode) .area_no = area_no, .area_co = area_co, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_fill_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_fill_brush_task_cb_ex, + &settings); } static void do_scrape_brush_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -3194,7 +3303,7 @@ static void do_scrape_brush_task_cb_ex( if (plane_trim(ss->cache, brush, val)) { const float fade = bstrength * tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], val, fade); @@ -3235,13 +3344,18 @@ static void do_scrape_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnod .area_no = area_no, .area_co = area_co, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_scrape_brush_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_scrape_brush_task_cb_ex, + &settings); } static void do_gravity_task_cb_ex( - void *userdata, void *UNUSED(userdata_chunk), const int n, const int thread_id) + void *userdata, const int n, const ParallelRangeTLS *tls) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -3261,7 +3375,7 @@ static void do_gravity_task_cb_ex( if (sculpt_brush_test_sq_fn(&test, vd.co)) { const float fade = tex_strength( ss, brush, vd.co, sqrtf(test.dist), - vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, thread_id); + vd.no, vd.fno, vd.mask ? *vd.mask : 0.0f, tls->thread_id); mul_v3_v3fl(proxy[vd.i], offset, fade); @@ -3292,9 +3406,14 @@ static void do_gravity(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode, fl .offset = offset, }; - BLI_task_parallel_range_ex( - 0, totnode, &data, NULL, 0, do_gravity_task_cb_ex, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT), false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); + BLI_task_parallel_range( + 0, totnode, + &data, + do_gravity_task_cb_ex, + &settings); } @@ -3396,7 +3515,8 @@ static void sculpt_topology_update(Sculpt *sd, Object *ob, Brush *brush, Unified } } -static void do_brush_action_task_cb(void *userdata, const int n) +static void do_brush_action_task_cb(void *userdata, const int n, + const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; @@ -3423,9 +3543,14 @@ static void do_brush_action(Sculpt *sd, Object *ob, Brush *brush, UnifiedPaintSe .sd = sd, .ob = ob, .brush = brush, .nodes = nodes, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); BLI_task_parallel_range( - 0, totnode, &task_data, do_brush_action_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT)); + 0, totnode, + &task_data, + do_brush_action_task_cb, + &settings); if (sculpt_brush_needs_normal(brush, ss->cache->normal_weight)) update_sculpt_normal(sd, ob, nodes, totnode); @@ -3537,7 +3662,8 @@ static void sculpt_flush_pbvhvert_deform(Object *ob, PBVHVertexIter *vd) copy_v3_v3(me->mvert[index].co, newco); } -static void sculpt_combine_proxies_task_cb(void *userdata, const int n) +static void sculpt_combine_proxies_task_cb(void *userdata, const int n, + const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -3604,9 +3730,14 @@ static void sculpt_combine_proxies(Sculpt *sd, Object *ob) .sd = sd, .ob = ob, .brush = brush, .nodes = nodes, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); BLI_task_parallel_range( - 0, totnode, &data, sculpt_combine_proxies_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT)); + 0, totnode, + &data, + sculpt_combine_proxies_task_cb, + &settings); } if (nodes) @@ -3632,7 +3763,8 @@ static void sculpt_update_keyblock(Object *ob) } } -static void sculpt_flush_stroke_deform_task_cb(void *userdata, const int n) +static void sculpt_flush_stroke_deform_task_cb(void *userdata, const int n, + const ParallelRangeTLS *UNUSED(tls)) { SculptThreadedTaskData *data = userdata; SculptSession *ss = data->ob->sculpt; @@ -3685,9 +3817,14 @@ static void sculpt_flush_stroke_deform(Sculpt *sd, Object *ob) .vertCos = vertCos, }; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); BLI_task_parallel_range( - 0, totnode, &data, sculpt_flush_stroke_deform_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT)); + 0, totnode, + &data, + sculpt_flush_stroke_deform_task_cb, + &settings); if (vertCos) { sculpt_vertcos_to_key(ob, ss->kb, vertCos); diff --git a/source/blender/editors/sculpt_paint/sculpt_undo.c b/source/blender/editors/sculpt_paint/sculpt_undo.c index a10c7477dc6..73aaedc856e 100644 --- a/source/blender/editors/sculpt_paint/sculpt_undo.c +++ b/source/blender/editors/sculpt_paint/sculpt_undo.c @@ -319,7 +319,8 @@ static bool sculpt_undo_restore_mask(bContext *C, DerivedMesh *dm, SculptUndoNod return 1; } -static void sculpt_undo_bmesh_restore_generic_task_cb(void *userdata, const int n) +static void sculpt_undo_bmesh_restore_generic_task_cb(void *userdata, const int n, + const ParallelRangeTLS *UNUSED(tls)) { PBVHNode **nodes = userdata; @@ -347,9 +348,14 @@ static void sculpt_undo_bmesh_restore_generic(bContext *C, BKE_pbvh_search_gather(ss->pbvh, NULL, NULL, &nodes, &totnode); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT); BLI_task_parallel_range( - 0, totnode, nodes, sculpt_undo_bmesh_restore_generic_task_cb, - ((sd->flags & SCULPT_USE_OPENMP) && totnode > SCULPT_THREADED_LIMIT)); + 0, totnode, + nodes, + sculpt_undo_bmesh_restore_generic_task_cb, + &settings); if (nodes) MEM_freeN(nodes); diff --git a/source/blender/editors/space_sequencer/sequencer_scopes.c b/source/blender/editors/space_sequencer/sequencer_scopes.c index 80cb42c0b3d..25ee4041d8e 100644 --- a/source/blender/editors/space_sequencer/sequencer_scopes.c +++ b/source/blender/editors/space_sequencer/sequencer_scopes.c @@ -459,13 +459,13 @@ typedef struct MakeHistogramViewData { } MakeHistogramViewData; static void make_histogram_view_from_ibuf_byte_cb_ex( - void *userdata, void *userdata_chunk, const int y, const int UNUSED(threadid)) + void *userdata, const int y, const ParallelRangeTLS *tls) { MakeHistogramViewData *data = userdata; const ImBuf *ibuf = data->ibuf; const unsigned char *src = (unsigned char *)ibuf->rect; - uint32_t (*cur_bins)[HIS_STEPS] = userdata_chunk; + uint32_t (*cur_bins)[HIS_STEPS] = tls->userdata_chunk; for (int x = 0; x < ibuf->x; x++) { const unsigned char *pixel = src + (y * ibuf->x + x) * 4; @@ -501,9 +501,17 @@ static ImBuf *make_histogram_view_from_ibuf_byte(ImBuf *ibuf) memset(bins, 0, sizeof(bins)); MakeHistogramViewData data = {.ibuf = ibuf, .bins = bins}; - BLI_task_parallel_range_finalize( - 0, ibuf->y, &data, bins, sizeof(bins), make_histogram_view_from_ibuf_byte_cb_ex, - make_histogram_view_from_ibuf_finalize, ibuf->y >= 256, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (ibuf->y >= 256); + settings.userdata_chunk = bins; + settings.userdata_chunk_size = sizeof(bins); + settings.func_finalize = make_histogram_view_from_ibuf_finalize; + BLI_task_parallel_range( + 0, ibuf->y, + &data, + make_histogram_view_from_ibuf_byte_cb_ex, + &settings); nr = nb = ng = 0; for (x = 0; x < HIS_STEPS; x++) { @@ -548,13 +556,13 @@ BLI_INLINE int get_bin_float(float f) } static void make_histogram_view_from_ibuf_float_cb_ex( - void *userdata, void *userdata_chunk, const int y, const int UNUSED(threadid)) + void *userdata, const int y, const ParallelRangeTLS *tls) { const MakeHistogramViewData *data = userdata; const ImBuf *ibuf = data->ibuf; const float *src = ibuf->rect_float; - uint32_t (*cur_bins)[HIS_STEPS] = userdata_chunk; + uint32_t (*cur_bins)[HIS_STEPS] = tls->userdata_chunk; for (int x = 0; x < ibuf->x; x++) { const float *pixel = src + (y * ibuf->x + x) * 4; @@ -576,9 +584,17 @@ static ImBuf *make_histogram_view_from_ibuf_float(ImBuf *ibuf) memset(bins, 0, sizeof(bins)); MakeHistogramViewData data = {.ibuf = ibuf, .bins = bins}; - BLI_task_parallel_range_finalize( - 0, ibuf->y, &data, bins, sizeof(bins), make_histogram_view_from_ibuf_float_cb_ex, - make_histogram_view_from_ibuf_finalize, ibuf->y >= 256, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (ibuf->y >= 256); + settings.userdata_chunk = bins; + settings.userdata_chunk_size = sizeof(bins); + settings.func_finalize = make_histogram_view_from_ibuf_finalize; + BLI_task_parallel_range( + 0, ibuf->y, + &data, + make_histogram_view_from_ibuf_float_cb_ex, + &settings); nr = nb = ng = 0; for (x = 0; x < HIS_STEPS; x++) { diff --git a/source/blender/modifiers/intern/MOD_displace.c b/source/blender/modifiers/intern/MOD_displace.c index c9ccdc3b8c2..9818b8a322e 100644 --- a/source/blender/modifiers/intern/MOD_displace.c +++ b/source/blender/modifiers/intern/MOD_displace.c @@ -187,7 +187,7 @@ typedef struct DisplaceUserdata { float (*vert_clnors)[3]; } DisplaceUserdata; -static void displaceModifier_do_task(void *userdata, const int iter) +static void displaceModifier_do_task(void *userdata, const int iter, const ParallelRangeTLS *UNUSED(tls)) { DisplaceUserdata *data = (DisplaceUserdata *)userdata; DisplaceModifierData *dmd = data->dmd; @@ -356,7 +356,13 @@ static void displaceModifier_do( data.pool = BKE_image_pool_new(); BKE_texture_fetch_images_for_pool(dmd->texture, data.pool); } - BLI_task_parallel_range(0, numVerts, &data, displaceModifier_do_task, numVerts > 512); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numVerts > 512); + BLI_task_parallel_range(0, numVerts, + &data, + displaceModifier_do_task, + &settings); if (data.pool != NULL) { BKE_image_pool_free(data.pool); diff --git a/source/blender/modifiers/intern/MOD_meshdeform.c b/source/blender/modifiers/intern/MOD_meshdeform.c index ab43204365d..e39328fdea4 100644 --- a/source/blender/modifiers/intern/MOD_meshdeform.c +++ b/source/blender/modifiers/intern/MOD_meshdeform.c @@ -215,7 +215,8 @@ typedef struct MeshdeformUserdata { float (*icagemat)[3]; } MeshdeformUserdata; -static void meshdeform_vert_task(void *userdata, const int iter) +static void meshdeform_vert_task(void *userdata, const int iter, + const ParallelRangeTLS *UNUSED(tls)) { MeshdeformUserdata *data = userdata; /*const*/ MeshDeformModifierData *mmd = data->mmd; @@ -394,7 +395,13 @@ static void meshdeformModifier_do( data.icagemat = icagemat; /* Do deformation. */ - BLI_task_parallel_range(0, totvert, &data, meshdeform_vert_task, totvert > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (totvert > 1000); + BLI_task_parallel_range(0, totvert, + &data, + meshdeform_vert_task, + &settings); /* release cage derivedmesh */ MEM_freeN(dco); diff --git a/source/blender/modifiers/intern/MOD_ocean.c b/source/blender/modifiers/intern/MOD_ocean.c index 189cfb8553e..fcf1874b407 100644 --- a/source/blender/modifiers/intern/MOD_ocean.c +++ b/source/blender/modifiers/intern/MOD_ocean.c @@ -261,7 +261,8 @@ typedef struct GenerateOceanGeometryData { float ix, iy; } GenerateOceanGeometryData; -static void generate_ocean_geometry_vertices(void *userdata, const int y) +static void generate_ocean_geometry_vertices(void *userdata, const int y, + const ParallelRangeTLS *UNUSED(tls)) { GenerateOceanGeometryData *gogd = userdata; int x; @@ -275,7 +276,8 @@ static void generate_ocean_geometry_vertices(void *userdata, const int y) } } -static void generate_ocean_geometry_polygons(void *userdata, const int y) +static void generate_ocean_geometry_polygons(void *userdata, const int y, + const ParallelRangeTLS *UNUSED(tls)) { GenerateOceanGeometryData *gogd = userdata; int x; @@ -305,7 +307,8 @@ static void generate_ocean_geometry_polygons(void *userdata, const int y) } } -static void generate_ocean_geometry_uvs(void *userdata, const int y) +static void generate_ocean_geometry_uvs(void *userdata, const int y, + const ParallelRangeTLS *UNUSED(tls)) { GenerateOceanGeometryData *gogd = userdata; int x; @@ -367,11 +370,15 @@ static DerivedMesh *generate_ocean_geometry(OceanModifierData *omd) gogd.origindex = CustomData_get_layer(&result->polyData, CD_ORIGINDEX); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = use_threading; + /* create vertices */ - BLI_task_parallel_range(0, gogd.res_y + 1, &gogd, generate_ocean_geometry_vertices, use_threading); + BLI_task_parallel_range(0, gogd.res_y + 1, &gogd, generate_ocean_geometry_vertices, &settings); /* create faces */ - BLI_task_parallel_range(0, gogd.res_y, &gogd, generate_ocean_geometry_polygons, use_threading); + BLI_task_parallel_range(0, gogd.res_y, &gogd, generate_ocean_geometry_polygons, &settings); CDDM_calc_edges(result); @@ -383,7 +390,7 @@ static DerivedMesh *generate_ocean_geometry(OceanModifierData *omd) gogd.ix = 1.0 / gogd.rx; gogd.iy = 1.0 / gogd.ry; - BLI_task_parallel_range(0, gogd.res_y, &gogd, generate_ocean_geometry_uvs, use_threading); + BLI_task_parallel_range(0, gogd.res_y, &gogd, generate_ocean_geometry_uvs, &settings); } } diff --git a/source/blender/modifiers/intern/MOD_surfacedeform.c b/source/blender/modifiers/intern/MOD_surfacedeform.c index 2919f169d00..ab1f48213ee 100644 --- a/source/blender/modifiers/intern/MOD_surfacedeform.c +++ b/source/blender/modifiers/intern/MOD_surfacedeform.c @@ -736,7 +736,7 @@ BLI_INLINE float computeNormalDisplacement(const float point_co[3], const float return normal_dist; } -static void bindVert(void *userdata, void *UNUSED(userdata_chunk), const int index, const int UNUSED(threadid)) +static void bindVert(void *userdata, const int index, const ParallelRangeTLS *UNUSED(tls)) { SDefBindCalcData * const data = (SDefBindCalcData *)userdata; float point_co[3]; @@ -996,8 +996,13 @@ static bool surfacedeformBind(SurfaceDeformModifierData *smd, float (*vertexCos) mul_v3_m4v3(data.targetCos[i], smd->mat, mvert[i].co); } - BLI_task_parallel_range_ex(0, numverts, &data, NULL, 0, bindVert, - numverts > 10000, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numverts > 10000); + BLI_task_parallel_range(0, numverts, + &data, + bindVert, + &settings); MEM_freeN(data.targetCos); @@ -1032,7 +1037,7 @@ static bool surfacedeformBind(SurfaceDeformModifierData *smd, float (*vertexCos) return data.success == 1; } -static void deformVert(void *userdata, void *UNUSED(userdata_chunk), const int index, const int UNUSED(threadid)) +static void deformVert(void *userdata, const int index, const ParallelRangeTLS *UNUSED(tls)) { const SDefDeformData * const data = (SDefDeformData *)userdata; const SDefBind *sdbind = data->bind_verts[index].binds; @@ -1153,8 +1158,13 @@ static void surfacedeformModifier_do(ModifierData *md, float (*vertexCos)[3], un mul_v3_m4v3(data.targetCos[i], smd->mat, mvert[i].co); } - BLI_task_parallel_range_ex(0, numverts, &data, NULL, 0, deformVert, - numverts > 10000, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numverts > 10000); + BLI_task_parallel_range(0, numverts, + &data, + deformVert, + &settings); if (tdm_vert_alloc) { MEM_freeN((void *)mvert); diff --git a/source/blender/modifiers/intern/MOD_uvwarp.c b/source/blender/modifiers/intern/MOD_uvwarp.c index 3773eed26dc..2980c1cd07a 100644 --- a/source/blender/modifiers/intern/MOD_uvwarp.c +++ b/source/blender/modifiers/intern/MOD_uvwarp.c @@ -110,7 +110,7 @@ typedef struct UVWarpData { int axis_v; } UVWarpData; -static void uv_warp_compute(void *userdata, const int i) +static void uv_warp_compute(void *userdata, const int i, const ParallelRangeTLS *UNUSED(tls)) { const UVWarpData *data = userdata; @@ -208,7 +208,13 @@ static DerivedMesh *applyModifier(ModifierData *md, const struct EvaluationConte UVWarpData data = {.mpoly = mpoly, .mloop = mloop, .mloopuv = mloopuv, .dvert = dvert, .defgrp_index = defgrp_index, .warp_mat = warp_mat, .axis_u = axis_u, .axis_v = axis_v}; - BLI_task_parallel_range(0, numPolys, &data, uv_warp_compute, numPolys > 1000); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numPolys > 1000); + BLI_task_parallel_range(0, numPolys, + &data, + uv_warp_compute, + &settings); dm->dirty |= DM_DIRTY_TESS_CDLAYERS; diff --git a/source/blender/modifiers/intern/MOD_weightvgproximity.c b/source/blender/modifiers/intern/MOD_weightvgproximity.c index c8bbbfe44b2..82f44f36545 100644 --- a/source/blender/modifiers/intern/MOD_weightvgproximity.c +++ b/source/blender/modifiers/intern/MOD_weightvgproximity.c @@ -90,10 +90,10 @@ typedef struct Vert2GeomDataChunk { /** * Callback used by BLI_task 'for loop' helper. */ -static void vert2geom_task_cb_ex(void *userdata, void *userdata_chunk, const int iter, const int UNUSED(thread_id)) +static void vert2geom_task_cb_ex(void *userdata, const int iter, const ParallelRangeTLS *tls) { Vert2GeomData *data = userdata; - Vert2GeomDataChunk *data_chunk = userdata_chunk; + Vert2GeomDataChunk *data_chunk = tls->userdata_chunk; float tmp_co[3]; int i; @@ -177,9 +177,16 @@ static void get_vert2geom_distance(int numVerts, float (*v_cos)[3], data.dist[1] = dist_e; data.dist[2] = dist_f; - BLI_task_parallel_range_ex( - 0, numVerts, &data, &data_chunk, sizeof(data_chunk), vert2geom_task_cb_ex, - numVerts > 10000, false); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (numVerts > 10000); + settings.userdata_chunk = &data_chunk; + settings.userdata_chunk_size = sizeof(data_chunk); + BLI_task_parallel_range( + 0, numVerts, + &data, + vert2geom_task_cb_ex, + &settings); if (dist_v) free_bvhtree_from_mesh(&treeData_v); diff --git a/source/blender/render/intern/source/pointdensity.c b/source/blender/render/intern/source/pointdensity.c index 28d62f36ddd..da5c26f568d 100644 --- a/source/blender/render/intern/source/pointdensity.c +++ b/source/blender/render/intern/source/pointdensity.c @@ -1033,7 +1033,8 @@ typedef struct SampleCallbackData { float *values; } SampleCallbackData; -static void point_density_sample_func(void *data_v, const int iter) +static void point_density_sample_func(void *data_v, const int iter, + const ParallelRangeTLS *UNUSED(tls)) { SampleCallbackData *data = (SampleCallbackData *)data_v; @@ -1108,11 +1109,14 @@ void RE_point_density_sample( data.min = min; data.dim = dim; data.values = values; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (resolution > 32); BLI_task_parallel_range(0, resolution, &data, point_density_sample_func, - resolution > 32); + &settings); free_pointdensity(pd); } |