From 90cd856ac34011496031eeae5a5e3a5bf2da1107 Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Thu, 11 Jan 2018 19:39:24 +0100 Subject: Nuke OMP usage in multires.c. New code is over three times quicker than old one here (e.g. Suzanne subdiv level 4, 250k tris, threaded part is now 1.4ms instead of 4.5ms with OMP). --- source/blender/blenkernel/intern/multires.c | 216 +++++++++++++++++----------- 1 file changed, 129 insertions(+), 87 deletions(-) (limited to 'source/blender/blenkernel/intern/multires.c') diff --git a/source/blender/blenkernel/intern/multires.c b/source/blender/blenkernel/intern/multires.c index 4eb550a9f4c..6bc4e359bbd 100644 --- a/source/blender/blenkernel/intern/multires.c +++ b/source/blender/blenkernel/intern/multires.c @@ -1003,6 +1003,115 @@ static void grid_tangent_matrix(float mat[3][3], const CCGKey *key, copy_v3_v3(mat[2], CCG_grid_elem_no(key, grid, x, y)); } + +typedef struct MultiresDispRunData { + DispOp op; + CCGElem **gridData, **subGridData; + CCGKey *key; + MPoly *mpoly; + MDisps *mdisps; + GridPaintMask *grid_paint_mask; + int *gridOffset; + int gridSize, dGridSize, dSkip; +} MultiresDispRunData; + +static void multires_disp_run_cb( + void *__restrict userdata, + const int pidx, + const ParallelRangeTLS *__restrict UNUSED(tls)) +{ + MultiresDispRunData *tdata = userdata; + + DispOp op = tdata->op; + CCGElem **gridData = tdata->gridData; + CCGElem **subGridData = tdata->subGridData; + CCGKey *key = tdata->key; + MPoly *mpoly = tdata->mpoly; + MDisps *mdisps = tdata->mdisps; + GridPaintMask *grid_paint_mask = tdata->grid_paint_mask; + int *gridOffset = tdata->gridOffset; + int gridSize = tdata->gridSize; + int dGridSize = tdata->dGridSize; + int dSkip = tdata->dSkip; + + const int numVerts = mpoly[pidx].totloop; + int S, x, y, gIndex = gridOffset[pidx]; + + for (S = 0; S < numVerts; ++S, ++gIndex) { + GridPaintMask *gpm = grid_paint_mask ? &grid_paint_mask[gIndex] : NULL; + MDisps *mdisp = &mdisps[mpoly[pidx].loopstart + S]; + CCGElem *grid = gridData[gIndex]; + CCGElem *subgrid = subGridData[gIndex]; + float (*dispgrid)[3] = NULL; + + dispgrid = mdisp->disps; + + /* if needed, reallocate multires paint mask */ + if (gpm && gpm->level < key->level) { + gpm->level = key->level; + if (gpm->data) { + MEM_freeN(gpm->data); + } + gpm->data = MEM_callocN(sizeof(float) * key->grid_area, "gpm.data"); + } + + for (y = 0; y < gridSize; y++) { + for (x = 0; x < gridSize; x++) { + float *co = CCG_grid_elem_co(key, grid, x, y); + float *sco = CCG_grid_elem_co(key, subgrid, x, y); + float *data = dispgrid[dGridSize * y * dSkip + x * dSkip]; + float mat[3][3], disp[3], d[3], mask; + + /* construct tangent space matrix */ + grid_tangent_matrix(mat, key, x, y, subgrid); + + switch (op) { + case APPLY_DISPLACEMENTS: + /* Convert displacement to object space + * and add to grid points */ + mul_v3_m3v3(disp, mat, data); + add_v3_v3v3(co, sco, disp); + break; + case CALC_DISPLACEMENTS: + /* Calculate displacement between new and old + * grid points and convert to tangent space */ + sub_v3_v3v3(disp, co, sco); + invert_m3(mat); + mul_v3_m3v3(data, mat, disp); + break; + case ADD_DISPLACEMENTS: + /* Convert subdivided displacements to tangent + * space and add to the original displacements */ + invert_m3(mat); + mul_v3_m3v3(d, mat, co); + add_v3_v3(data, d); + break; + } + + if (gpm) { + switch (op) { + case APPLY_DISPLACEMENTS: + /* Copy mask from gpm to DM */ + *CCG_grid_elem_mask(key, grid, x, y) = + paint_grid_paint_mask(gpm, key->level, x, y); + break; + case CALC_DISPLACEMENTS: + /* Copy mask from DM to gpm */ + mask = *CCG_grid_elem_mask(key, grid, x, y); + gpm->data[y * gridSize + x] = CLAMPIS(mask, 0, 1); + break; + case ADD_DISPLACEMENTS: + /* Add mask displacement to gpm */ + gpm->data[y * gridSize + x] += + *CCG_grid_elem_mask(key, grid, x, y); + break; + } + } + } + } + } +} + /* XXX WARNING: subsurf elements from dm and oldGridData *must* be of the same format (size), * because this code uses CCGKey's info from dm to access oldGridData's normals * (through the call to grid_tangent_matrix())! */ @@ -1015,7 +1124,7 @@ static void multiresModifier_disp_run(DerivedMesh *dm, Mesh *me, DerivedMesh *dm MDisps *mdisps = CustomData_get_layer(&me->ldata, CD_MDISPS); GridPaintMask *grid_paint_mask = NULL; int *gridOffset; - int i, k, /*numGrids, */ gridSize, dGridSize, dSkip; + int i, gridSize, dGridSize, dSkip; int totloop, totpoly; /* this happens in the dm made by bmesh_mdisps_space_set */ @@ -1051,8 +1160,6 @@ static void multiresModifier_disp_run(DerivedMesh *dm, Mesh *me, DerivedMesh *dm if (key.has_mask) grid_paint_mask = CustomData_get_layer(&me->ldata, CD_GRID_PAINT_MASK); - k = 0; /*current loop/mdisp index within the mloop array*/ - /* when adding new faces in edit mode, need to allocate disps */ for (i = 0; i < totloop; ++i) { if (mdisps[i].disps == NULL) { @@ -1061,90 +1168,25 @@ static void multiresModifier_disp_run(DerivedMesh *dm, Mesh *me, DerivedMesh *dm } } - BLI_begin_threaded_malloc(); - -#pragma omp parallel for private(i) if (totloop * gridSize * gridSize >= CCG_OMP_LIMIT) - - for (i = 0; i < totpoly; ++i) { - const int numVerts = mpoly[i].totloop; - int S, x, y, gIndex = gridOffset[i]; - - for (S = 0; S < numVerts; ++S, ++gIndex, ++k) { - GridPaintMask *gpm = grid_paint_mask ? &grid_paint_mask[gIndex] : NULL; - MDisps *mdisp = &mdisps[mpoly[i].loopstart + S]; - CCGElem *grid = gridData[gIndex]; - CCGElem *subgrid = subGridData[gIndex]; - float (*dispgrid)[3] = NULL; - - dispgrid = mdisp->disps; - - /* if needed, reallocate multires paint mask */ - if (gpm && gpm->level < key.level) { - gpm->level = key.level; - if (gpm->data) { - MEM_freeN(gpm->data); - } - gpm->data = MEM_callocN(sizeof(float) * key.grid_area, "gpm.data"); - } - - for (y = 0; y < gridSize; y++) { - for (x = 0; x < gridSize; x++) { - float *co = CCG_grid_elem_co(&key, grid, x, y); - float *sco = CCG_grid_elem_co(&key, subgrid, x, y); - float *data = dispgrid[dGridSize * y * dSkip + x * dSkip]; - float mat[3][3], disp[3], d[3], mask; - - /* construct tangent space matrix */ - grid_tangent_matrix(mat, &key, x, y, subgrid); - - switch (op) { - case APPLY_DISPLACEMENTS: - /* Convert displacement to object space - * and add to grid points */ - mul_v3_m3v3(disp, mat, data); - add_v3_v3v3(co, sco, disp); - break; - case CALC_DISPLACEMENTS: - /* Calculate displacement between new and old - * grid points and convert to tangent space */ - sub_v3_v3v3(disp, co, sco); - invert_m3(mat); - mul_v3_m3v3(data, mat, disp); - break; - case ADD_DISPLACEMENTS: - /* Convert subdivided displacements to tangent - * space and add to the original displacements */ - invert_m3(mat); - mul_v3_m3v3(d, mat, co); - add_v3_v3(data, d); - break; - } - - if (gpm) { - switch (op) { - case APPLY_DISPLACEMENTS: - /* Copy mask from gpm to DM */ - *CCG_grid_elem_mask(&key, grid, x, y) = - paint_grid_paint_mask(gpm, key.level, x, y); - break; - case CALC_DISPLACEMENTS: - /* Copy mask from DM to gpm */ - mask = *CCG_grid_elem_mask(&key, grid, x, y); - gpm->data[y * gridSize + x] = CLAMPIS(mask, 0, 1); - break; - case ADD_DISPLACEMENTS: - /* Add mask displacement to gpm */ - gpm->data[y * gridSize + x] += - *CCG_grid_elem_mask(&key, grid, x, y); - break; - } - } - } - } - } - } - - BLI_end_threaded_malloc(); + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.min_iter_per_thread = CCG_TASK_LIMIT; + + MultiresDispRunData data = { + .op = op, + .gridData = gridData, + .subGridData = subGridData, + .key = &key, + .mpoly = mpoly, + .mdisps = mdisps, + .grid_paint_mask = grid_paint_mask, + .gridOffset = gridOffset, + .gridSize = gridSize, + .dGridSize = dGridSize, + .dSkip = dSkip + }; + + BLI_task_parallel_range(0, totpoly, &data, multires_disp_run_cb, &settings); if (op == APPLY_DISPLACEMENTS) { ccgSubSurf_stitchFaces(ccgdm->ss, 0, NULL, 0); -- cgit v1.2.3