diff options
author | Nicholas Bishop <nicholasbishop@gmail.com> | 2012-06-12 15:22:10 +0400 |
---|---|---|
committer | Nicholas Bishop <nicholasbishop@gmail.com> | 2012-06-12 15:22:10 +0400 |
commit | 2062ca6b6ce8393a85089df3a2b1fd51333450bb (patch) | |
tree | 2865b5305ee43f3274af0acbdfcd927fc131eb16 /source/blender/editors/sculpt_paint | |
parent | 552887251fa984cc7ff8e15aed59e82227d47c22 (diff) |
Decrease frequency of mallocs during multires sculpt smoothing
Patch from Jason Wilkins.
Creates a pool of allocations (one for each OpenMP thread) rather than
allocating every time do_multires_smooth_brush() is called.
Diffstat (limited to 'source/blender/editors/sculpt_paint')
-rw-r--r-- | source/blender/editors/sculpt_paint/sculpt.c | 118 |
1 files changed, 82 insertions, 36 deletions
diff --git a/source/blender/editors/sculpt_paint/sculpt.c b/source/blender/editors/sculpt_paint/sculpt.c index 98735139136..89dbe14f62a 100644 --- a/source/blender/editors/sculpt_paint/sculpt.c +++ b/source/blender/editors/sculpt_paint/sculpt.c @@ -212,6 +212,11 @@ typedef struct StrokeCache { float clip_tolerance[3]; float initial_mouse[2]; + /* Pre-allocated temporary storage used during smoothing */ + int num_threads; + float (**tmpgrid_co)[3], (**tmprow_co)[3]; + float **tmpgrid_mask, **tmprow_mask; + /* Variants */ float radius; float radius_squared; @@ -1257,6 +1262,7 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no float (*tmpgrid_co)[3], (*tmprow_co)[3]; float *tmpgrid_mask, *tmprow_mask; int v1, v2, v3, v4; + int thread_num; int *grid_indices, totgrid, gridsize, i, x, y; sculpt_brush_test_init(ss, &test); @@ -1267,17 +1273,15 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no NULL, &gridsize, &griddata, &gridadj); BLI_pbvh_get_grid_key(ss->pbvh, &key); - #pragma omp critical - { - if (smooth_mask) { - tmpgrid_mask = MEM_mallocN(sizeof(float) * gridsize * gridsize, "tmpgrid_mask"); - tmprow_mask = MEM_mallocN(sizeof(float) * gridsize, "tmprow_mask"); - } - else { - tmpgrid_co = MEM_mallocN(sizeof(float) * 3 * gridsize * gridsize, "tmpgrid_co"); - tmprow_co = MEM_mallocN(sizeof(float) * 3 * gridsize, "tmprow_co"); - } - } + thread_num = 0; +#ifdef _OPENMP + if (sd->flags & SCULPT_USE_OPENMP) + thread_num = omp_get_thread_num(); +#endif + tmpgrid_co = ss->cache->tmpgrid_co[thread_num]; + tmprow_co = ss->cache->tmprow_co[thread_num]; + tmpgrid_mask = ss->cache->tmpgrid_mask[thread_num]; + tmprow_mask = ss->cache->tmprow_mask[thread_num]; for (i = 0; i < totgrid; ++i) { data = griddata[grid_indices[i]]; @@ -1393,18 +1397,6 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no } } } - - #pragma omp critical - { - if (smooth_mask) { - MEM_freeN(tmpgrid_mask); - MEM_freeN(tmprow_mask); - } - else { - MEM_freeN(tmpgrid_co); - MEM_freeN(tmprow_co); - } - } } static void smooth(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode, @@ -3233,6 +3225,69 @@ static void sculpt_init_mirror_clipping(Object *ob, SculptSession *ss) } } +static void sculpt_omp_start(Sculpt *sd, SculptSession *ss) +{ + StrokeCache *cache = ss->cache; + +#ifdef _OPENMP + /* If using OpenMP then create a number of threads two times the + * number of processor cores. + * Justification: Empirically I've found that two threads per + * processor gives higher throughput. */ + if (sd->flags & SCULPT_USE_OPENMP) { + cache->num_threads = 2 * omp_get_num_procs(); + omp_set_num_threads(cache->num_threads); + } + else +#endif + { + (void)sd; + cache->num_threads = 1; + } + + if (ss->multires) { + int i, gridsize, array_mem_size; + BLI_pbvh_node_get_grids(ss->pbvh, NULL, NULL, NULL, NULL, + &gridsize, NULL, NULL); + + array_mem_size = cache->num_threads * sizeof(void*); + + cache->tmpgrid_co = MEM_mallocN(array_mem_size, "tmpgrid_co array"); + cache->tmprow_co = MEM_mallocN(array_mem_size, "tmprow_co array"); + cache->tmpgrid_mask = MEM_mallocN(array_mem_size, "tmpgrid_mask array"); + cache->tmprow_mask = MEM_mallocN(array_mem_size, "tmprow_mask array"); + + for (i = 0; i < cache->num_threads; i++) { + const size_t row_size = sizeof(float) * gridsize; + const size_t co_row_size = 3 * row_size; + + cache->tmprow_co[i] = MEM_mallocN(co_row_size, "tmprow_co"); + cache->tmpgrid_co[i] = MEM_mallocN(co_row_size * gridsize, "tmpgrid_co"); + cache->tmprow_mask[i] = MEM_mallocN(row_size, "tmprow_mask"); + cache->tmpgrid_mask[i] = MEM_mallocN(row_size * gridsize, "tmpgrid_mask"); + } + } +} + +static void sculpt_omp_done(SculptSession *ss) +{ + if (ss->multires) { + int i; + + for (i = 0; i < ss->cache->num_threads; i++) { + MEM_freeN(ss->cache->tmpgrid_co[i]); + MEM_freeN(ss->cache->tmprow_co[i]); + MEM_freeN(ss->cache->tmpgrid_mask[i]); + MEM_freeN(ss->cache->tmprow_mask[i]); + } + + MEM_freeN(ss->cache->tmpgrid_co); + MEM_freeN(ss->cache->tmprow_co); + MEM_freeN(ss->cache->tmpgrid_mask); + MEM_freeN(ss->cache->tmprow_mask); + } +} + /* Initialize the stroke cache invariants from operator properties */ static void sculpt_update_cache_invariants(bContext *C, Sculpt *sd, SculptSession *ss, wmOperator *op, const float mouse[2]) { @@ -3346,6 +3401,8 @@ static void sculpt_update_cache_invariants(bContext *C, Sculpt *sd, SculptSessio cache->first_time = 1; cache->vertex_rotation = 0; + + sculpt_omp_start(sd, ss); } static void sculpt_update_brush_delta(Sculpt *sd, Object *ob, Brush *brush) @@ -3798,19 +3855,6 @@ static int sculpt_stroke_test_start(bContext *C, struct wmOperator *op, sculpt_undo_push_begin(sculpt_tool_name(sd)); -#ifdef _OPENMP - /* If using OpenMP then create a number of threads two times the - * number of processor cores. - * Justification: Empirically I've found that two threads per - * processor gives higher throughput. */ - if (sd->flags & SCULPT_USE_OPENMP) { - int num_procs; - - num_procs = omp_get_num_procs(); - omp_set_num_threads(2 * num_procs); - } -#endif - return 1; } else @@ -3847,6 +3891,8 @@ static void sculpt_stroke_done(const bContext *C, struct PaintStroke *UNUSED(str SculptSession *ss = ob->sculpt; Sculpt *sd = CTX_data_tool_settings(C)->sculpt; + sculpt_omp_done(ss); + /* reset values used to draw brush after completing the stroke */ sd->draw_anchored = 0; sd->draw_pressure = 0; |