From a055395487a338ab5e4fe2b749df80cc87da97c5 Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Fri, 20 May 2016 23:16:52 +0200 Subject: Dynapaint: Serious cleanup and optimization of dynamicPaint_createUVSurface (mostly). Gives 3-4% speedup in pre-bake step (from 112 to 108 seconds with own heavy test file). Note that here we have a huge potential performance boost if we replace the flat `Bounds2D *faceBB` array of UV tris bounding boxes by a real 2D AABB tree (right now, we can loop over all UV triangles times the number of pixels of the surface times 5...). --- source/blender/blenkernel/intern/dynamicpaint.c | 278 ++++++++++++------------ 1 file changed, 134 insertions(+), 144 deletions(-) diff --git a/source/blender/blenkernel/intern/dynamicpaint.c b/source/blender/blenkernel/intern/dynamicpaint.c index 9e384ebbc17..daf1e866e16 100644 --- a/source/blender/blenkernel/intern/dynamicpaint.c +++ b/source/blender/blenkernel/intern/dynamicpaint.c @@ -529,7 +529,7 @@ static int surface_getBrushFlags(DynamicPaintSurface *surface, const Scene *scen return flags; } -static int brush_usesMaterial(DynamicPaintBrushSettings *brush, Scene *scene) +static int brush_usesMaterial(const DynamicPaintBrushSettings *brush, const Scene *scene) { return ((brush->flags & MOD_DPAINT_USE_MATERIAL) && (!BKE_scene_use_new_shading_nodes(scene))); } @@ -2174,11 +2174,9 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) 0.4f, -0.2f, -0.4f, 0.3f, }; - int ty; - int w, h; - int tottri; + const int aa_samples = (surface->flags & MOD_DPAINT_ANTIALIAS) ? 5 : 1; char uvname[MAX_CUSTOMDATA_LAYER_NAME]; - int active_points = 0; + uint32_t active_points = 0; bool error = false; PaintSurfaceData *sData; @@ -2193,7 +2191,6 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) Bounds2D *faceBB = NULL; int *final_index; - int aa_samples; if (!dm) return setError(canvas, N_("Canvas mesh not updated")); @@ -2202,7 +2199,7 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) mloop = dm->getLoopArray(dm); mlooptri = dm->getLoopTriArray(dm); - tottri = dm->getNumLoopTri(dm); + const int tottri = dm->getNumLoopTri(dm); /* get uv map */ if (CustomData_has_layer(&dm->loopData, CD_MLOOPUV)) { @@ -2216,7 +2213,8 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) if (surface->image_resolution < 16 || surface->image_resolution > 8192) return setError(canvas, N_("Invalid resolution")); - w = h = surface->image_resolution; + const int w = surface->image_resolution; + const int h = w; /* * Start generating the surface @@ -2230,16 +2228,15 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) if (!surface->data) return setError(canvas, N_("Not enough free memory")); - aa_samples = (surface->flags & MOD_DPAINT_ANTIALIAS) ? 5 : 1; - tempPoints = (struct PaintUVPoint *) MEM_callocN(w * h * sizeof(struct PaintUVPoint), "Temp PaintUVPoint"); + tempPoints = MEM_callocN(w * h * sizeof(*tempPoints), "Temp PaintUVPoint"); if (!tempPoints) error = true; - final_index = (int *) MEM_callocN(w * h * sizeof(int), "Temp UV Final Indexes"); + final_index = MEM_callocN(w * h * sizeof(*final_index), "Temp UV Final Indexes"); if (!final_index) error = true; - tempWeights = (struct Vec3f *) MEM_mallocN(w * h * aa_samples * sizeof(struct Vec3f), "Temp bWeights"); + tempWeights = MEM_mallocN(w * h * aa_samples * sizeof(*tempWeights), "Temp bWeights"); if (!tempWeights) error = true; @@ -2248,31 +2245,27 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) * the pixel-inside-a-face search. */ if (!error) { - faceBB = (struct Bounds2D *) MEM_mallocN(tottri * sizeof(struct Bounds2D), "MPCanvasFaceBB"); + faceBB = MEM_mallocN(tottri * sizeof(*faceBB), "MPCanvasFaceBB"); if (!faceBB) error = true; } if (!error) { - for (ty = 0; ty < tottri; ty++) { - int i; - - copy_v2_v2(faceBB[ty].min, mloopuv[mlooptri[ty].tri[0]].uv); - copy_v2_v2(faceBB[ty].max, mloopuv[mlooptri[ty].tri[0]].uv); + for (int i = 0; i < tottri; i++) { + copy_v2_v2(faceBB[i].min, mloopuv[mlooptri[i].tri[0]].uv); + copy_v2_v2(faceBB[i].max, mloopuv[mlooptri[i].tri[0]].uv); - for (i = 1; i < 3; i++) { - minmax_v2v2_v2(faceBB[ty].min, faceBB[ty].max, mloopuv[mlooptri[ty].tri[i]].uv); + for (int j = 1; j < 3; j++) { + minmax_v2v2_v2(faceBB[i].min, faceBB[i].max, mloopuv[mlooptri[i].tri[j]].uv); } } /* Loop through every pixel and check if pixel is uv-mapped on a canvas face. */ #pragma omp parallel for schedule(static) - for (ty = 0; ty < h; ty++) { - int tx; - for (tx = 0; tx < w; tx++) { - int i, sample; - int index = tx + w * ty; - PaintUVPoint *tPoint = (&tempPoints[index]); + for (int ty = 0; ty < h; ty++) { + for (int tx = 0; tx < w; tx++) { + const int index = tx + w * ty; + PaintUVPoint *tPoint = &tempPoints[index]; float point[5][2]; /* Init per pixel settings */ @@ -2302,14 +2295,15 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) /* Loop through samples, starting from middle point */ - for (sample = 0; sample < 5; sample++) { + for (int sample = 0; sample < 5; sample++) { /* Loop through every face in the mesh */ - for (i = 0; i < tottri; i++) { + /* XXX TODO This is *horrible* with big meshes, should use a 2D BVHTree over UV tris here! */ + for (int i = 0; i < tottri; i++) { /* Check uv bb */ - if ((faceBB[i].min[0] > (point[sample][0])) || - (faceBB[i].min[1] > (point[sample][1])) || - (faceBB[i].max[0] < (point[sample][0])) || - (faceBB[i].max[1] < (point[sample][1]))) + if ((faceBB[i].min[0] > point[sample][0]) || + (faceBB[i].min[1] > point[sample][1]) || + (faceBB[i].max[0] < point[sample][0]) || + (faceBB[i].max[1] < point[sample][1])) { continue; } @@ -2353,42 +2347,51 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) * (To avoid seams on uv island edges) */ #pragma omp parallel for schedule(static) - for (ty = 0; ty < h; ty++) { - int tx; - for (tx = 0; tx < w; tx++) { - int index = tx + w * ty; - PaintUVPoint *tPoint = (&tempPoints[index]); + for (int ty = 0; ty < h; ty++) { + for (int tx = 0; tx < w; tx++) { + const int index = tx + w * ty; + PaintUVPoint *tPoint = &tempPoints[index]; - /* If point isn't't on canvas mesh */ + /* If point isn't on canvas mesh */ if (tPoint->tri_index == -1) { - int u_min, u_max, v_min, v_max; - int u, v, ind; float point[2]; /* get loop area */ - u_min = (tx > 0) ? -1 : 0; - u_max = (tx < (w - 1)) ? 1 : 0; - v_min = (ty > 0) ? -1 : 0; - v_max = (ty < (h - 1)) ? 1 : 0; + const int u_min = (tx > 0) ? -1 : 0; + const int u_max = (tx < (w - 1)) ? 1 : 0; + const int v_min = (ty > 0) ? -1 : 0; + const int v_max = (ty < (h - 1)) ? 1 : 0; point[0] = ((float)tx + 0.5f) / w; point[1] = ((float)ty + 0.5f) / h; /* search through defined area for neighbor */ - for (u = u_min; u <= u_max; u++) - for (v = v_min; v <= v_max; v++) { + for (int u = u_min; u <= u_max; u++) { + for (int v = v_min; v <= v_max; v++) { /* if not this pixel itself */ if (u != 0 || v != 0) { - ind = (tx + u) + w * (ty + v); + const int ind = (tx + u) + w * (ty + v); - /* if neighbor has index */ - if (tempPoints[ind].tri_index != -1) { + /* if neighbor has index */ + if (tempPoints[ind].neighbour_pixel == -1 && tempPoints[ind].tri_index != -1) { float uv[2]; const int i = tempPoints[ind].tri_index; const float *uv1 = mloopuv[mlooptri[i].tri[0]].uv; const float *uv2 = mloopuv[mlooptri[i].tri[1]].uv; const float *uv3 = mloopuv[mlooptri[i].tri[2]].uv; + /* tri index */ + /* There is a low possibility of actually having a neighbor point which tri is + * already set from another neighbor in a separate thread here. + * Cheking for both tri_index and neighbour_pixel above reduces that probability + * but it remains possible. + * That atomic op (and its memory fence) ensures tPoint->neighbour_pixel is set + * to non--1 *before* its tri_index is set (i.e. that it cannot be used a neighbour). + */ + tPoint->neighbour_pixel = ind - 1; + atomic_add_uint32(&tPoint->neighbour_pixel, 1); + tPoint->tri_index = i; + /* Now calculate pixel data for this pixel as it was on polygon surface */ /* Add b-weights per anti-aliasing sample */ for (int j = 0; j < aa_samples; j++) { @@ -2397,9 +2400,6 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) barycentric_weights_v2(uv1, uv2, uv3, uv, tempWeights[index * aa_samples + j].v); } - /* Set values */ - tPoint->neighbour_pixel = ind; /* tri index */ - /* save vertex indexes */ tPoint->v1 = mloop[mlooptri[i].tri[0]].v; tPoint->v2 = mloop[mlooptri[i].tri[1]].v; @@ -2410,24 +2410,12 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) } } } + } } - } - } - /* - * When base loop is over convert found neighbor indexes to real ones - * Also count the final number of active surface points - */ - for (ty = 0; ty < h; ty++) { - int tx; - for (tx = 0; tx < w; tx++) { - int index = tx + w * ty; - PaintUVPoint *tPoint = &tempPoints[index]; - - if (tPoint->tri_index == -1 && tPoint->neighbour_pixel != -1) - tPoint->tri_index = tempPoints[tPoint->neighbour_pixel].tri_index; + /* Increase the final number of active surface points if relevant. */ if (tPoint->tri_index != -1) - active_points++; + atomic_add_uint32(&active_points, 1); } } @@ -2450,20 +2438,17 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) if (sData->adj_data) { PaintAdjData *ed = sData->adj_data; unsigned int n_pos = 0; - for (ty = 0; ty < h; ty++) { - int tx; - for (tx = 0; tx < w; tx++) { - int index = tx + w * ty; + for (int ty = 0; ty < h; ty++) { + for (int tx = 0; tx < w; tx++) { + const int index = tx + w * ty; if (tempPoints[index].tri_index != -1) { ed->n_index[final_index[index]] = n_pos; ed->n_num[final_index[index]] = 0; for (int i = 0; i < 8; i++) { - - /* Try to find a neighboring pixel in defined direction - * If not found, -1 is returned */ - int n_target = dynamicPaint_findNeighbourPixel(tempPoints, dm, uvname, w, h, tx, ty, i); + /* Try to find a neighboring pixel in defined direction. If not found, -1 is returned */ + const int n_target = dynamicPaint_findNeighbourPixel(tempPoints, dm, uvname, w, h, tx, ty, i); if (n_target >= 0) { ed->n_target[n_pos] = final_index[n_target]; @@ -2481,43 +2466,40 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) } /* Create final surface data without inactive points */ - { - ImgSeqFormatData *f_data = MEM_callocN(sizeof(struct ImgSeqFormatData), "ImgSeqFormatData"); - if (f_data) { - f_data->uv_p = MEM_callocN(active_points * sizeof(struct PaintUVPoint), "PaintUVPoint"); - f_data->barycentricWeights = MEM_callocN(active_points * aa_samples * sizeof(struct Vec3f), "PaintUVPoint"); + ImgSeqFormatData *f_data = MEM_callocN(sizeof(*f_data), "ImgSeqFormatData"); + if (f_data) { + f_data->uv_p = MEM_callocN(active_points * sizeof(*f_data->uv_p), "PaintUVPoint"); + f_data->barycentricWeights = MEM_callocN(active_points * aa_samples * sizeof(*f_data->barycentricWeights), + "PaintUVPoint"); - if (!f_data->uv_p || !f_data->barycentricWeights) - error = 1; - } - else { + if (!f_data->uv_p || !f_data->barycentricWeights) error = 1; - } + } + else { + error = 1; + } - sData->total_points = active_points; - - /* in case of allocation error, free everything */ - if (error) { - if (f_data) { - if (f_data->uv_p) - MEM_freeN(f_data->uv_p); - if (f_data->barycentricWeights) - MEM_freeN(f_data->barycentricWeights); - MEM_freeN(f_data); - } + /* in case of allocation error, free everything */ + if (error) { + if (f_data) { + if (f_data->uv_p) + MEM_freeN(f_data->uv_p); + if (f_data->barycentricWeights) + MEM_freeN(f_data->barycentricWeights); + MEM_freeN(f_data); } - else { - int index, cursor = 0; - sData->total_points = active_points; - sData->format_data = f_data; - - for (index = 0; index < (w * h); index++) { - if (tempPoints[index].tri_index != -1) { - memcpy(&f_data->uv_p[cursor], &tempPoints[index], sizeof(PaintUVPoint)); - memcpy(&f_data->barycentricWeights[cursor * aa_samples], &tempWeights[index * aa_samples], - sizeof(Vec3f) * aa_samples); - cursor++; - } + sData->total_points = 0; + } + else { + sData->total_points = (int)active_points; + sData->format_data = f_data; + + for (int index = 0, cursor = 0; index < (w * h); index++) { + if (tempPoints[index].tri_index != -1) { + memcpy(&f_data->uv_p[cursor], &tempPoints[index], sizeof(PaintUVPoint)); + memcpy(&f_data->barycentricWeights[cursor * aa_samples], &tempWeights[index * aa_samples], + sizeof(*tempWeights) * aa_samples); + cursor++; } } } @@ -2542,7 +2524,6 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) /* ----------------------------------------------------------------- * For debug, output pixel statuses to the color map * -----------------------------------------------------------------*/ -#pragma omp parallel for schedule(static) for (index = 0; index < sData->total_points; index++) { ImgSeqFormatData *f_data = (ImgSeqFormatData *)sData->format_data; PaintUVPoint *uvPoint = &((PaintUVPoint *)f_data->uv_p)[index]; @@ -2553,14 +2534,14 @@ int dynamicPaint_createUVSurface(Scene *scene, DynamicPaintSurface *surface) if (uvPoint->neighbour_pixel != -1) pPoint->color[2] = 1.0f; /* and every pixel that finally got an polygon gets red color */ - if (uvPoint->tri_index != -1) - pPoint->color[0] = 1.0f; /* green color shows pixel face index hash */ - if (uvPoint->tri_index != -1) + if (uvPoint->tri_index != -1) { + pPoint->color[0] = 1.0f; pPoint->color[1] = (float)(uvPoint->tri_index % 255) / 256.0f; + } } - #endif + dynamicPaint_setInitialColor(scene, surface); } @@ -2736,7 +2717,7 @@ static void dynamicPaint_freeBrushMaterials(BrushMaterials *bMats) * Get material diffuse color and alpha (including linked textures) in given coordinates */ static void dynamicPaint_doMaterialTex( - BrushMaterials *bMats, float color[3], float *alpha, Object *brushOb, + const BrushMaterials *bMats, float color[3], float *alpha, Object *brushOb, const float volume_co[3], const float surface_co[3], int triIndex, DerivedMesh *orcoDm) { @@ -2837,15 +2818,15 @@ static void mesh_tris_nearest_point_dp(void *userdata, int index, const float co * operations when using substeps */ static void dynamicPaint_mixPaintColors( - DynamicPaintSurface *surface, int index, int paintFlags, - const float paintColor[3], float *paintAlpha, float *paintWetness, float *timescale) + const DynamicPaintSurface *surface, const int index, const int paintFlags, + const float paintColor[3], const float paintAlpha, const float paintWetness, const float timescale) { PaintPoint *pPoint = &((PaintPoint *)surface->data->type_data)[index]; /* Add paint */ if (!(paintFlags & MOD_DPAINT_ERASE)) { float mix[4]; - float temp_alpha = (*paintAlpha) * ((paintFlags & MOD_DPAINT_ABS_ALPHA) ? 1.0f : (*timescale)); + float temp_alpha = paintAlpha * ((paintFlags & MOD_DPAINT_ABS_ALPHA) ? 1.0f : timescale); /* mix brush color with wet layer color */ blendColors(pPoint->e_color, pPoint->e_color[3], paintColor, temp_alpha, mix); @@ -2854,11 +2835,11 @@ static void dynamicPaint_mixPaintColors( /* mix wetness and alpha depending on selected alpha mode */ if (paintFlags & MOD_DPAINT_ABS_ALPHA) { /* update values to the brush level unless theyre higher already */ - CLAMP_MIN(pPoint->e_color[3], *paintAlpha); - CLAMP_MIN(pPoint->wetness, *paintWetness); + CLAMP_MIN(pPoint->e_color[3], paintAlpha); + CLAMP_MIN(pPoint->wetness, paintWetness); } else { - float wetness = (*paintWetness); + float wetness = paintWetness; CLAMP(wetness, 0.0f, 1.0f); pPoint->e_color[3] = mix[3]; pPoint->wetness = pPoint->wetness * (1.0f - wetness) + wetness; @@ -2872,7 +2853,7 @@ static void dynamicPaint_mixPaintColors( else { float a_ratio, a_highest; float wetness; - float invFact = 1.0f - (*paintAlpha); + float invFact = 1.0f - paintAlpha; /* * Make highest alpha to match erased value @@ -2888,48 +2869,57 @@ static void dynamicPaint_mixPaintColors( } } else { - pPoint->e_color[3] -= (*paintAlpha) * (*timescale); + pPoint->e_color[3] -= paintAlpha * timescale; CLAMP_MIN(pPoint->e_color[3], 0.0f); - pPoint->color[3] -= (*paintAlpha) * (*timescale); + pPoint->color[3] -= paintAlpha * timescale; CLAMP_MIN(pPoint->color[3], 0.0f); } - wetness = (1.0f - (*paintWetness)) * pPoint->e_color[3]; + wetness = (1.0f - paintWetness) * pPoint->e_color[3]; CLAMP_MAX(pPoint->wetness, wetness); } } /* applies given brush intersection value for wave surface */ -static void dynamicPaint_mixWaveHeight(PaintWavePoint *wPoint, DynamicPaintBrushSettings *brush, float isect_height) +static void dynamicPaint_mixWaveHeight( + PaintWavePoint *wPoint, const DynamicPaintBrushSettings *brush, float isect_height) { - float isect_change = isect_height - wPoint->brush_isect; + const float isect_change = isect_height - wPoint->brush_isect; + const float wave_factor = brush->wave_factor; bool hit = false; /* intersection marked regardless of brush type or hit */ wPoint->brush_isect = isect_height; wPoint->state = DPAINT_WAVE_ISECT_CHANGED; - isect_height *= brush->wave_factor; + isect_height *= wave_factor; /* determine hit depending on wave_factor */ - if (brush->wave_factor > 0.0f && wPoint->height > isect_height) + if (wave_factor > 0.0f && wPoint->height > isect_height) hit = true; - else if (brush->wave_factor < 0.0f && wPoint->height < isect_height) + else if (wave_factor < 0.0f && wPoint->height < isect_height) hit = true; if (hit) { - if (brush->wave_type == MOD_DPAINT_WAVEB_DEPTH) { - wPoint->height = isect_height; - wPoint->state = DPAINT_WAVE_OBSTACLE; - wPoint->velocity = 0.0f; - } - else if (brush->wave_type == MOD_DPAINT_WAVEB_FORCE) - wPoint->velocity = isect_height; - else if (brush->wave_type == MOD_DPAINT_WAVEB_REFLECT) - wPoint->state = DPAINT_WAVE_REFLECT_ONLY; - else if (brush->wave_type == MOD_DPAINT_WAVEB_CHANGE) { - if (isect_change < 0.0f) - wPoint->height += isect_change * brush->wave_factor; + switch (brush->wave_type) { + case MOD_DPAINT_WAVEB_DEPTH: + wPoint->height = isect_height; + wPoint->state = DPAINT_WAVE_OBSTACLE; + wPoint->velocity = 0.0f; + break; + case MOD_DPAINT_WAVEB_FORCE: + wPoint->velocity = isect_height; + break; + case MOD_DPAINT_WAVEB_REFLECT: + wPoint->state = DPAINT_WAVE_REFLECT_ONLY; + break; + case MOD_DPAINT_WAVEB_CHANGE: + if (isect_change < 0.0f) + wPoint->height += isect_change * wave_factor; + break; + default: + BLI_assert(0); + break; } } } @@ -2938,8 +2928,8 @@ static void dynamicPaint_mixWaveHeight(PaintWavePoint *wPoint, DynamicPaintBrush * add brush results to the surface data depending on surface type */ static void dynamicPaint_updatePointData( - DynamicPaintSurface *surface, unsigned int index, DynamicPaintBrushSettings *brush, - float paint[3], float influence, float depth, float vel_factor, float timescale) + const DynamicPaintSurface *surface, const int index, const DynamicPaintBrushSettings *brush, + float paint[3], float influence, float depth, float vel_factor, const float timescale) { PaintSurfaceData *sData = surface->data; float strength; @@ -2973,7 +2963,7 @@ static void dynamicPaint_updatePointData( float paintWetness = brush->wetness * strength; float paintAlpha = strength; - dynamicPaint_mixPaintColors(surface, index, brush->flags, paint, &paintAlpha, &paintWetness, ×cale); + dynamicPaint_mixPaintColors(surface, index, brush->flags, paint, paintAlpha, paintWetness, timescale); } /* displace surface */ else if (surface->type == MOD_DPAINT_SURFACE_T_DISPLACE) { -- cgit v1.2.3