diff options
author | Jeroen Bakker <jbakker> | 2021-06-18 17:09:35 +0300 |
---|---|---|
committer | Jeroen Bakker <jeroen@blender.org> | 2021-06-18 17:10:28 +0300 |
commit | e0f2f07d1e8bff3410edddc235b9c4d61f62c18f (patch) | |
tree | a8a20a0f983235bbed0ec570a7751490177bf221 /source/blender/draw | |
parent | 6d73d98fb62df19c03fb665cd37ff214458d7a70 (diff) |
DrawManager: Multithreaded counting of material buckets.
When having multiple materials in a mesh the triangles are sorted based
on material index. This sorting is done single threaded, but needs two
loops over the data. One to count the bucket size and the second one to
add the triangles to the right position in the buckets.
This patch will do the counting in a multithreaded approach that would
speed up the cache creation. It has been measured that this part is the
most blocking part of the cache creation.
Reviewed By: mano-wii
Differential Revision: https://developer.blender.org/D11615
Diffstat (limited to 'source/blender/draw')
3 files changed, 83 insertions, 20 deletions
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc b/source/blender/draw/intern/draw_cache_extract_mesh.cc index 4abe74bc190..e577069f000 100644 --- a/source/blender/draw/intern/draw_cache_extract_mesh.cc +++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc @@ -50,8 +50,6 @@ # include "PIL_time_utildefines.h" #endif -#define MIM_RANGE_LEN 1024 - namespace blender::draw { /* ---------------------------------------------------------------------- */ @@ -448,7 +446,7 @@ static void extract_task_range_run(void *__restrict taskdata) settings.userdata_chunk = userdata_chunk; settings.userdata_chunk_size = userdata_chunk_size; settings.func_reduce = extract_task_reduce; - settings.min_iter_per_thread = MIM_RANGE_LEN; + settings.min_iter_per_thread = MIN_RANGE_LEN; extract_init(data->mr, data->cache, *data->extractors, data->mbc, userdata_chunk); @@ -711,7 +709,7 @@ static void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph, task_graph, mr, extraction_cache, iter_type, data_flag); /* Simple heuristic. */ - const bool use_thread = (mr->loop_len + mr->loop_loose_len) > MIM_RANGE_LEN; + const bool use_thread = (mr->loop_len + mr->loop_loose_len) > MIN_RANGE_LEN; if (use_thread) { /* First run the requested extractors that do not support asynchronous ranges. */ diff --git a/source/blender/draw/intern/draw_cache_extract_mesh_private.h b/source/blender/draw/intern/draw_cache_extract_mesh_private.h index 8f2b9c26577..a258967564b 100644 --- a/source/blender/draw/intern/draw_cache_extract_mesh_private.h +++ b/source/blender/draw/intern/draw_cache_extract_mesh_private.h @@ -38,6 +38,8 @@ extern "C" { #endif +#define MIN_RANGE_LEN 1024 + /* ---------------------------------------------------------------------- */ /** \name Dependencies between buffer and batch * \{ */ diff --git a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c index 494217ee908..44026c0167b 100644 --- a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c +++ b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.c @@ -27,6 +27,7 @@ #include "BLI_bitmap.h" #include "BLI_math.h" +#include "BLI_task.h" #include "BKE_editmesh.h" #include "BKE_editmesh_cache.h" @@ -227,31 +228,93 @@ static void mesh_render_data_mat_offset_build(MeshRenderData *mr, MeshBufferExtr mesh_render_data_mat_offset_apply_offset(mr, cache); } +typedef struct MatOffsetUserData { + MeshRenderData *mr; + /* struct is extended during allocation to hold mat_tri_len for each material. */ + int mat_tri_len[0]; +} MatOffsetUserData; + +static void mesh_render_data_mat_offset_reduce(const void *__restrict UNUSED(userdata), + void *__restrict chunk_join, + void *__restrict chunk) +{ + MatOffsetUserData *dst = chunk_join; + MatOffsetUserData *src = chunk; + int *dst_mat_len = dst->mat_tri_len; + int *src_mat_len = src->mat_tri_len; + for (int i = 0; i < dst->mr->mat_len; i++) { + dst_mat_len[i] += src_mat_len[i]; + } +} + +static void mesh_render_data_mat_offset_build_threaded(MeshRenderData *mr, + MeshBufferExtractionCache *cache, + int face_len, + TaskParallelRangeFunc range_func) +{ + /* Extending the MatOffsetUserData with an int per material slot. */ + size_t userdata_size = sizeof(MatOffsetUserData) + + (mr->mat_len) * sizeof(*cache->mat_offsets.tri); + MatOffsetUserData *userdata = MEM_callocN(userdata_size, __func__); + userdata->mr = mr; + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.userdata_chunk = userdata; + settings.userdata_chunk_size = userdata_size; + settings.min_iter_per_thread = MIN_RANGE_LEN; + settings.func_reduce = mesh_render_data_mat_offset_reduce; + BLI_task_parallel_range(0, face_len, NULL, range_func, &settings); + + memcpy(cache->mat_offsets.tri, + &userdata->mat_tri_len, + (mr->mat_len) * sizeof(*cache->mat_offsets.tri)); + MEM_freeN(userdata); +} + +static void mesh_render_data_mat_offset_bm_range(void *__restrict UNUSED(userdata), + const int iter, + const TaskParallelTLS *__restrict tls) +{ + MatOffsetUserData *mat_offset_userdata = tls->userdata_chunk; + MeshRenderData *mr = mat_offset_userdata->mr; + int *mat_tri_len = mat_offset_userdata->mat_tri_len; + + BMesh *bm = mr->bm; + BMFace *efa = BM_face_at_index(bm, iter); + if (!BM_elem_flag_test(efa, BM_ELEM_HIDDEN)) { + int mat = min_ii(efa->mat_nr, mr->mat_len - 1); + mat_tri_len[mat] += efa->len - 2; + } +} + static void mesh_render_data_mat_offset_build_bm(MeshRenderData *mr, MeshBufferExtractionCache *cache) { - int *mat_tri_len = cache->mat_offsets.tri; - BMIter iter; - BMFace *efa; - BM_ITER_MESH (efa, &iter, mr->bm, BM_FACES_OF_MESH) { - if (!BM_elem_flag_test(efa, BM_ELEM_HIDDEN)) { - int mat = min_ii(efa->mat_nr, mr->mat_len - 1); - mat_tri_len[mat] += efa->len - 2; - } + BMesh *bm = mr->bm; + mesh_render_data_mat_offset_build_threaded( + mr, cache, bm->totface, mesh_render_data_mat_offset_bm_range); +} + +static void mesh_render_data_mat_offset_mesh_range(void *__restrict UNUSED(userdata), + const int iter, + const TaskParallelTLS *__restrict tls) +{ + MatOffsetUserData *mat_offset_userdata = tls->userdata_chunk; + const MeshRenderData *mr = mat_offset_userdata->mr; + int *mat_tri_len = mat_offset_userdata->mat_tri_len; + + const MPoly *mp = &mr->mpoly[iter]; + if (!(mr->use_hide && (mp->flag & ME_HIDE))) { + int mat = min_ii(mp->mat_nr, mr->mat_len - 1); + mat_tri_len[mat] += mp->totloop - 2; } } static void mesh_render_data_mat_offset_build_mesh(MeshRenderData *mr, MeshBufferExtractionCache *cache) { - int *mat_tri_len = cache->mat_offsets.tri; - const MPoly *mp = mr->mpoly; - for (int mp_index = 0; mp_index < mr->poly_len; mp_index++, mp++) { - if (!(mr->use_hide && (mp->flag & ME_HIDE))) { - int mat = min_ii(mp->mat_nr, mr->mat_len - 1); - mat_tri_len[mat] += mp->totloop - 2; - } - } + mesh_render_data_mat_offset_build_threaded( + mr, cache, mr->poly_len, mesh_render_data_mat_offset_mesh_range); } static void mesh_render_data_mat_offset_apply_offset(MeshRenderData *mr, |