diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2018-01-11 17:14:30 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2018-01-11 17:14:30 +0300 |
commit | 656b9a46b356d304a688d8661c1958c57b454828 (patch) | |
tree | b1a04b0df8675c45d0133d178b995ebc99a970b3 /source/blender | |
parent | c36f4a7f7f370ebb6e944d9893d2677232d3b7f8 (diff) | |
parent | 38d480fb54dbac5bb87b002e4ee22c1d3df89f90 (diff) |
Merge branch 'master' into blender2.8
Diffstat (limited to 'source/blender')
-rw-r--r-- | source/blender/blenkernel/BKE_subsurf.h | 2 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/CCGSubSurf_legacy.c | 15 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/mesh_evaluate.c | 3 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/subsurf_ccg.c | 131 | ||||
-rw-r--r-- | source/blender/depsgraph/intern/eval/deg_eval.cc | 3 | ||||
-rw-r--r-- | source/blender/depsgraph/intern/eval/deg_eval_flush.cc | 7 | ||||
-rw-r--r-- | source/blender/modifiers/intern/MOD_meshdeform.c | 2 |
7 files changed, 96 insertions, 67 deletions
diff --git a/source/blender/blenkernel/BKE_subsurf.h b/source/blender/blenkernel/BKE_subsurf.h index d7b9d20d7b0..96320415b16 100644 --- a/source/blender/blenkernel/BKE_subsurf.h +++ b/source/blender/blenkernel/BKE_subsurf.h @@ -144,7 +144,7 @@ typedef struct CCGDerivedMesh { struct EdgeHash *ehash; - ThreadRWMutex loops_cache_rwlock; + ThreadMutex loops_cache_lock; ThreadRWMutex origindex_cache_rwlock; } CCGDerivedMesh; diff --git a/source/blender/blenkernel/intern/CCGSubSurf_legacy.c b/source/blender/blenkernel/intern/CCGSubSurf_legacy.c index d567b50af56..2b331eae950 100644 --- a/source/blender/blenkernel/intern/CCGSubSurf_legacy.c +++ b/source/blender/blenkernel/intern/CCGSubSurf_legacy.c @@ -34,6 +34,9 @@ #define FACE_calcIFNo(f, lvl, S, x, y, no) _face_calcIFNo(f, lvl, S, x, y, no, subdivLevels, vertDataSize) +/* TODO(sergey): This actually depends on subsurf level as well. */ +#define CCG_TASK_LIMIT 16 + /* TODO(sergey): Deduplicate the following functions/ */ static void *_edge_getCoVert(CCGEdge *e, CCGVert *v, int lvl, int x, int dataSize) { @@ -340,7 +343,7 @@ static void ccgSubSurf__calcVertNormals(CCGSubSurf *ss, { ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + settings.min_iter_per_thread = CCG_TASK_LIMIT; BLI_task_parallel_range(0, numEffectedF, &data, ccgSubSurf__calcVertNormals_faces_accumulate_cb, @@ -374,7 +377,7 @@ static void ccgSubSurf__calcVertNormals(CCGSubSurf *ss, { ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = (numEffectedE * edgeSize * 4 >= CCG_OMP_LIMIT); + settings.min_iter_per_thread = CCG_TASK_LIMIT; BLI_task_parallel_range(0, numEffectedE, &data, ccgSubSurf__calcVertNormals_edges_accumulate_cb, @@ -384,7 +387,7 @@ static void ccgSubSurf__calcVertNormals(CCGSubSurf *ss, { ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + settings.min_iter_per_thread = CCG_TASK_LIMIT; BLI_task_parallel_range(0, numEffectedF, &data, ccgSubSurf__calcVertNormals_faces_finalize_cb, @@ -683,7 +686,7 @@ static void ccgSubSurf__calcSubdivLevel( { ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + settings.min_iter_per_thread = CCG_TASK_LIMIT; BLI_task_parallel_range(0, numEffectedF, &data, ccgSubSurf__calcSubdivLevel_interior_faces_edges_midpoints_cb, @@ -966,7 +969,7 @@ static void ccgSubSurf__calcSubdivLevel( { ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + settings.min_iter_per_thread = CCG_TASK_LIMIT; BLI_task_parallel_range(0, numEffectedF, &data, ccgSubSurf__calcSubdivLevel_interior_faces_edges_centerpoints_shift_cb, @@ -986,7 +989,7 @@ static void ccgSubSurf__calcSubdivLevel( { ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = (numEffectedF * edgeSize * edgeSize * 4 >= CCG_OMP_LIMIT); + settings.min_iter_per_thread = CCG_TASK_LIMIT; BLI_task_parallel_range(0, numEffectedF, &data, ccgSubSurf__calcSubdivLevel_verts_copydata_cb, diff --git a/source/blender/blenkernel/intern/mesh_evaluate.c b/source/blender/blenkernel/intern/mesh_evaluate.c index f25b6ed41c7..4e03155ea6c 100644 --- a/source/blender/blenkernel/intern/mesh_evaluate.c +++ b/source/blender/blenkernel/intern/mesh_evaluate.c @@ -287,12 +287,11 @@ void BKE_mesh_calc_normals_poly( int numLoops, int numPolys, float (*r_polynors)[3], const bool only_face_normals) { - const bool do_threaded = (numPolys > BKE_MESH_OMP_LIMIT); float (*pnors)[3] = r_polynors; ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = do_threaded; + settings.min_iter_per_thread = 1024; if (only_face_normals) { BLI_assert((pnors != NULL) || (numPolys == 0)); diff --git a/source/blender/blenkernel/intern/subsurf_ccg.c b/source/blender/blenkernel/intern/subsurf_ccg.c index 24bcd4c8476..84b82ed53bb 100644 --- a/source/blender/blenkernel/intern/subsurf_ccg.c +++ b/source/blender/blenkernel/intern/subsurf_ccg.c @@ -58,6 +58,7 @@ #include "BLI_edgehash.h" #include "BLI_math.h" #include "BLI_memarena.h" +#include "BLI_task.h" #include "BLI_threads.h" #include "BKE_pbvh.h" @@ -1476,19 +1477,70 @@ static void ccgDM_copyFinalFaceArray(DerivedMesh *dm, MFace *mface) } } +typedef struct CopyFinalLoopArrayData { + CCGDerivedMesh *ccgdm; + MLoop *mloop; + int grid_size; + int *grid_offset; + int edge_size; + size_t mloop_index; +} CopyFinalLoopArrayData; + +static void copyFinalLoopArray_task_cb( + void *__restrict userdata, + const int iter, + const ParallelRangeTLS *__restrict UNUSED(tls)) +{ + CopyFinalLoopArrayData *data = userdata; + CCGDerivedMesh *ccgdm = data->ccgdm; + CCGSubSurf *ss = ccgdm->ss; + const int grid_size = data->grid_size; + const int edge_size = data->edge_size; + CCGFace *f = ccgdm->faceMap[iter].face; + const int num_verts = ccgSubSurf_getFaceNumVerts(f); + const int grid_index = data->grid_offset[iter]; + const size_t loop_index = 4 * (size_t)grid_index * (grid_size - 1) * (grid_size - 1); + MLoop *ml = &data->mloop[loop_index]; + for (int S = 0; S < num_verts; S++) { + for (int y = 0; y < grid_size - 1; y++) { + for (int x = 0; x < grid_size - 1; x++) { + + uint v1 = getFaceIndex(ss, f, S, x + 0, y + 0, + edge_size, grid_size); + uint v2 = getFaceIndex(ss, f, S, x + 0, y + 1, + edge_size, grid_size); + uint v3 = getFaceIndex(ss, f, S, x + 1, y + 1, + edge_size, grid_size); + uint v4 = getFaceIndex(ss, f, S, x + 1, y + 0, + edge_size, grid_size); + + ml->v = v1; + ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v1, v2)); + ml++; + + ml->v = v2; + ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v2, v3)); + ml++; + + ml->v = v3; + ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v3, v4)); + ml++; + + ml->v = v4; + ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v4, v1)); + ml++; + } + } + } +} + static void ccgDM_copyFinalLoopArray(DerivedMesh *dm, MLoop *mloop) { CCGDerivedMesh *ccgdm = (CCGDerivedMesh *) dm; CCGSubSurf *ss = ccgdm->ss; - int index; - int totface; - int gridSize = ccgSubSurf_getGridSize(ss); - int edgeSize = ccgSubSurf_getEdgeSize(ss); - MLoop *ml; - /* DMFlagMat *faceFlags = ccgdm->faceFlags; */ /* UNUSED */ if (!ccgdm->ehash) { - BLI_rw_mutex_lock(&ccgdm->loops_cache_rwlock, THREAD_LOCK_WRITE); + BLI_mutex_lock(&ccgdm->loops_cache_lock); if (!ccgdm->ehash) { MEdge *medge; EdgeHash *ehash; @@ -1502,53 +1554,30 @@ static void ccgDM_copyFinalLoopArray(DerivedMesh *dm, MLoop *mloop) atomic_cas_ptr((void**)&ccgdm->ehash, ccgdm->ehash, ehash); } - BLI_rw_mutex_unlock(&ccgdm->loops_cache_rwlock); + BLI_mutex_unlock(&ccgdm->loops_cache_lock); } - BLI_rw_mutex_lock(&ccgdm->loops_cache_rwlock, THREAD_LOCK_READ); - totface = ccgSubSurf_getNumFaces(ss); - ml = mloop; - for (index = 0; index < totface; index++) { - CCGFace *f = ccgdm->faceMap[index].face; - int x, y, S, numVerts = ccgSubSurf_getFaceNumVerts(f); - /* int flag = (faceFlags) ? faceFlags[index * 2]: ME_SMOOTH; */ /* UNUSED */ - /* int mat_nr = (faceFlags) ? faceFlags[index * 2 + 1]: 0; */ /* UNUSED */ - - for (S = 0; S < numVerts; S++) { - for (y = 0; y < gridSize - 1; y++) { - for (x = 0; x < gridSize - 1; x++) { - unsigned int v1, v2, v3, v4; - - v1 = getFaceIndex(ss, f, S, x + 0, y + 0, - edgeSize, gridSize); - - v2 = getFaceIndex(ss, f, S, x + 0, y + 1, - edgeSize, gridSize); - v3 = getFaceIndex(ss, f, S, x + 1, y + 1, - edgeSize, gridSize); - v4 = getFaceIndex(ss, f, S, x + 1, y + 0, - edgeSize, gridSize); + CopyFinalLoopArrayData data; + data.ccgdm = ccgdm; + data.mloop = mloop; + data.grid_size = ccgSubSurf_getGridSize(ss); + data.grid_offset = dm->getGridOffset(dm); + data.edge_size = ccgSubSurf_getEdgeSize(ss); - ml->v = v1; - ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v1, v2)); - ml++; - - ml->v = v2; - ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v2, v3)); - ml++; + /* NOTE: For a dense subdivision we've got enough work for each face and + * hence can dedicate whole thread to single face. For less dense + * subdivision we handle multiple faces per thread. + */ + data.mloop_index = data.grid_size >= 5 ? 1 : 8; - ml->v = v3; - ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v3, v4)); - ml++; + ParallelRangeSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.min_iter_per_thread = 1; - ml->v = v4; - ml->e = GET_UINT_FROM_POINTER(BLI_edgehash_lookup(ccgdm->ehash, v4, v1)); - ml++; - } - } - } - } - BLI_rw_mutex_unlock(&ccgdm->loops_cache_rwlock); + BLI_task_parallel_range(0, ccgSubSurf_getNumFaces(ss), + &data, + copyFinalLoopArray_task_cb, + &settings); } static void ccgDM_copyFinalPolyArray(DerivedMesh *dm, MPoly *mpoly) @@ -3796,7 +3825,7 @@ static void ccgDM_release(DerivedMesh *dm) MEM_freeN(ccgdm->faceMap); } - BLI_rw_mutex_end(&ccgdm->loops_cache_rwlock); + BLI_mutex_end(&ccgdm->loops_cache_lock); BLI_rw_mutex_end(&ccgdm->origindex_cache_rwlock); MEM_freeN(ccgdm); @@ -4787,7 +4816,7 @@ static CCGDerivedMesh *getCCGDerivedMesh(CCGSubSurf *ss, ccgdm->dm.numLoopData = ccgdm->dm.numPolyData * 4; ccgdm->dm.numTessFaceData = 0; - BLI_rw_mutex_init(&ccgdm->loops_cache_rwlock); + BLI_mutex_init(&ccgdm->loops_cache_lock); BLI_rw_mutex_init(&ccgdm->origindex_cache_rwlock); return ccgdm; diff --git a/source/blender/depsgraph/intern/eval/deg_eval.cc b/source/blender/depsgraph/intern/eval/deg_eval.cc index 116f853ebdf..a6c6a16a528 100644 --- a/source/blender/depsgraph/intern/eval/deg_eval.cc +++ b/source/blender/depsgraph/intern/eval/deg_eval.cc @@ -132,12 +132,11 @@ static void calculate_pending_func( static void calculate_pending_parents(Depsgraph *graph) { const int num_operations = graph->operations.size(); - const bool do_threads = (num_operations > 256); CalculatePengindData data; data.graph = graph; ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = do_threads; + settings.min_iter_per_thread = 1024; BLI_task_parallel_range(0, num_operations, &data, diff --git a/source/blender/depsgraph/intern/eval/deg_eval_flush.cc b/source/blender/depsgraph/intern/eval/deg_eval_flush.cc index c3b1f56a71b..74c3cd28455 100644 --- a/source/blender/depsgraph/intern/eval/deg_eval_flush.cc +++ b/source/blender/depsgraph/intern/eval/deg_eval_flush.cc @@ -100,7 +100,7 @@ BLI_INLINE void flush_prepare(Depsgraph *graph) const int num_operations = graph->operations.size(); ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = (num_operations > 256); + settings.min_iter_per_thread = 1024; BLI_task_parallel_range(0, num_operations, graph, flush_init_operation_node_func, @@ -110,7 +110,7 @@ BLI_INLINE void flush_prepare(Depsgraph *graph) const int num_id_nodes = graph->id_nodes.size(); ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = (num_id_nodes > 256); + settings.min_iter_per_thread = 1024; BLI_task_parallel_range(0, num_id_nodes, graph, flush_init_id_node_func, @@ -310,10 +310,9 @@ void deg_graph_clear_tags(Depsgraph *graph) { /* Go over all operation nodes, clearing tags. */ const int num_operations = graph->operations.size(); - const bool do_threads = num_operations > 256; ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = do_threads; + settings.min_iter_per_thread = 1024; BLI_task_parallel_range(0, num_operations, graph, graph_clear_func, diff --git a/source/blender/modifiers/intern/MOD_meshdeform.c b/source/blender/modifiers/intern/MOD_meshdeform.c index 3976433db93..47b51ac170b 100644 --- a/source/blender/modifiers/intern/MOD_meshdeform.c +++ b/source/blender/modifiers/intern/MOD_meshdeform.c @@ -399,7 +399,7 @@ static void meshdeformModifier_do( /* Do deformation. */ ParallelRangeSettings settings; BLI_parallel_range_settings_defaults(&settings); - settings.use_threading = (totvert > 1000); + settings.min_iter_per_thread = 16; BLI_task_parallel_range(0, totvert, &data, meshdeform_vert_task, |