From bf34b0c8f4b8c64bcc4ec0f3371d343e9c2fe029 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 2 Jun 2020 15:07:17 +0200 Subject: DrawManager: Graph Task Scheduling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch uses a graph flow scheduler for creating all mesh batches. On a Ryzen 1700 the framerate of Spring/020_02A.anim.blend went from 10 fps to 11.5 fps. For each mesh where batches needs to be updated a sub-graph will be added to the task_graph. This sub-graph starts with an extract_render_data_node. This fills/converts the required data from Mesh. Small extractions and extractions that can't be multi-threaded are grouped in a single `extract_single_threaded_task_node`. Other extractions will create a node for each loop exceeding 4096 items. these nodes are linked to the `user_data_init_task_node`. the `user_data_init_task_node` prepares the userdata needed for the extraction based on the data extracted from the mesh. Note: If the `lines` and `lines_loose` are requested, the `lines_loose` sub-buffer is created as part of the lines extraction. When the lines_loose is only requested the sub-buffer is created from the existing `lines` buffer. It is assumed that the lines buffer is always requested before or together with the lines_loose what is always the case (see `DRW_batch_requested(cache->batch.loose_edges, GPU_PRIM_LINES)` in `draw_cache_impl_mesh.c`). Reviewed By: Clément Foucault Differential Revision: https://developer.blender.org/D7618 --- source/blender/draw/intern/DRW_render.h | 2 + source/blender/draw/intern/draw_cache.c | 6 +- source/blender/draw/intern/draw_cache_extract.h | 5 +- .../blender/draw/intern/draw_cache_extract_mesh.c | 465 +++++++++++++++------ source/blender/draw/intern/draw_cache_impl.h | 4 +- source/blender/draw/intern/draw_cache_impl_mesh.c | 25 +- source/blender/draw/intern/draw_manager.c | 33 +- source/blender/draw/intern/draw_manager.h | 3 + source/blender/editors/uvedit/uvedit_draw.c | 18 +- 9 files changed, 424 insertions(+), 137 deletions(-) (limited to 'source/blender') diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h index e6e66eea32f..e4088b5654f 100644 --- a/source/blender/draw/intern/DRW_render.h +++ b/source/blender/draw/intern/DRW_render.h @@ -696,6 +696,8 @@ typedef struct DRWContextState { struct Depsgraph *depsgraph; + struct TaskGraph *task_graph; + eObjectMode object_mode; eGPUShaderConfig sh_cfg; diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c index 23d0d74534d..c23ea3d7c82 100644 --- a/source/blender/draw/intern/draw_cache.c +++ b/source/blender/draw/intern/draw_cache.c @@ -3534,13 +3534,15 @@ void drw_batch_cache_generate_requested(Object *ob) struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob); switch (ob->type) { case OB_MESH: - DRW_mesh_batch_cache_create_requested(ob, (Mesh *)ob->data, scene, is_paint_mode, use_hide); + DRW_mesh_batch_cache_create_requested( + DST.task_graph, ob, (Mesh *)ob->data, scene, is_paint_mode, use_hide); break; case OB_CURVE: case OB_FONT: case OB_SURF: if (mesh_eval) { - DRW_mesh_batch_cache_create_requested(ob, mesh_eval, scene, is_paint_mode, use_hide); + DRW_mesh_batch_cache_create_requested( + DST.task_graph, ob, mesh_eval, scene, is_paint_mode, use_hide); } DRW_curve_batch_cache_create_requested(ob); break; diff --git a/source/blender/draw/intern/draw_cache_extract.h b/source/blender/draw/intern/draw_cache_extract.h index b2fea957227..f203c2ff1ea 100644 --- a/source/blender/draw/intern/draw_cache_extract.h +++ b/source/blender/draw/intern/draw_cache_extract.h @@ -23,6 +23,8 @@ #ifndef __DRAW_CACHE_EXTRACT_H__ #define __DRAW_CACHE_EXTRACT_H__ +struct TaskGraph; + /* Vertex Group Selection and display options */ typedef struct DRW_MeshWeightState { int defgroup_active; @@ -249,7 +251,8 @@ typedef struct MeshBatchCache { bool no_loose_wire; } MeshBatchCache; -void mesh_buffer_cache_create_requested(MeshBatchCache *cache, +void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph, + MeshBatchCache *cache, MeshBufferCache mbc, Mesh *me, const bool is_editmode, diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.c b/source/blender/draw/intern/draw_cache_extract_mesh.c index cbb5f5f06ff..916ee9fe576 100644 --- a/source/blender/draw/intern/draw_cache_extract_mesh.c +++ b/source/blender/draw/intern/draw_cache_extract_mesh.c @@ -134,15 +134,12 @@ typedef struct MeshRenderData { float (*poly_normals)[3]; int *lverts, *ledges; } MeshRenderData; - static MeshRenderData *mesh_render_data_create(Mesh *me, const bool is_editmode, const bool is_paint_mode, const float obmat[4][4], const bool do_final, const bool do_uvedit, - const eMRIterType iter_type, - const eMRDataType data_flag, const DRW_MeshCDMask *UNUSED(cd_used), const ToolSettings *ts) { @@ -152,9 +149,6 @@ static MeshRenderData *mesh_render_data_create(Mesh *me, copy_m4_m4(mr->obmat, obmat); - const bool is_auto_smooth = (me->flag & ME_AUTOSMOOTH) != 0; - const float split_angle = is_auto_smooth ? me->smoothresh : (float)M_PI; - if (is_editmode) { BLI_assert(me->edit_mesh->mesh_eval_cage && me->edit_mesh->mesh_eval_final); mr->bm = me->edit_mesh->bm; @@ -244,7 +238,32 @@ static MeshRenderData *mesh_render_data_create(Mesh *me, mr->v_origindex = CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX); mr->e_origindex = CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX); mr->p_origindex = CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX); + } + else { + /* BMesh */ + BMesh *bm = mr->bm; + + mr->vert_len = bm->totvert; + mr->edge_len = bm->totedge; + mr->loop_len = bm->totloop; + mr->poly_len = bm->totface; + mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len); + } + + return mr; +} +/* Part of the creation of the MeshRenderData that happens in a thread. */ +static void mesh_render_data_update(MeshRenderData *mr, + const eMRIterType iter_type, + const eMRDataType data_flag) +{ + Mesh *me = mr->me; + const bool is_auto_smooth = (me->flag & ME_AUTOSMOOTH) != 0; + const float split_angle = is_auto_smooth ? me->smoothresh : (float)M_PI; + + if (mr->extract_type != MR_EXTRACT_BMESH) { + /* Mesh */ if (data_flag & (MR_DATA_POLY_NOR | MR_DATA_LOOP_NOR | MR_DATA_TAN_LOOP_NOR)) { mr->poly_normals = MEM_mallocN(sizeof(*mr->poly_normals) * mr->poly_len, __func__); BKE_mesh_calc_normals_poly((MVert *)mr->mvert, @@ -323,13 +342,6 @@ static MeshRenderData *mesh_render_data_create(Mesh *me, else { /* BMesh */ BMesh *bm = mr->bm; - - mr->vert_len = bm->totvert; - mr->edge_len = bm->totedge; - mr->loop_len = bm->totloop; - mr->poly_len = bm->totface; - mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len); - if (data_flag & MR_DATA_POLY_NOR) { /* Use bmface->no instead. */ } @@ -394,7 +406,6 @@ static MeshRenderData *mesh_render_data_create(Mesh *me, mr->loop_loose_len = mr->vert_loose_len + mr->edge_loose_len * 2; } } - return mr; } static void mesh_render_data_free(MeshRenderData *mr) @@ -742,46 +753,15 @@ static const MeshExtract extract_lines = { 0, false, }; - /** \} */ /* ---------------------------------------------------------------------- */ -/** \name Extract Loose Edges Indices +/** \name Extract Loose Edges Sub Buffer * \{ */ -static void *extract_lines_loose_init(const MeshRenderData *UNUSED(mr), void *UNUSED(buf)) -{ - return NULL; -} - -static void extract_lines_loose_ledge_mesh(const MeshRenderData *UNUSED(mr), - int UNUSED(e), - const MEdge *UNUSED(medge), - void *UNUSED(elb)) -{ - /* This function is intentionally empty. The existence of this functions ensures that - * `iter_type` `MR_ITER_LVERT` is set when initializing the `MeshRenderData` (See - * `mesh_extract_iter_type`). This flag ensures that `mr->edge_loose_len` field is filled. This - * field we use in the `extract_lines_loose_finish` function to create a subrange from the - * `ibo.lines`. */ -} - -static void extract_lines_loose_ledge_bmesh(const MeshRenderData *UNUSED(mr), - int UNUSED(e), - BMEdge *UNUSED(eed), - void *UNUSED(elb)) -{ - /* This function is intentionally empty. The existence of this functions ensures that - * `iter_type` `MR_ITER_LVERT` is set when initializing the `MeshRenderData` (See - * `mesh_extract_iter_type`). This flag ensures that `mr->edge_loose_len` field is filled. This - * field we use in the `extract_lines_loose_finish` function to create a subrange from the - * `ibo.lines`. */ -} - -static void extract_lines_loose_finish(const MeshRenderData *mr, - void *UNUSED(ibo), - void *UNUSED(elb)) +static void extract_lines_loose_subbuffer(const MeshRenderData *mr) { + BLI_assert(mr->cache->final.ibo.lines); /* Multiply by 2 because these are edges indices. */ const int start = mr->edge_len * 2; const int len = mr->edge_loose_len * 2; @@ -790,17 +770,24 @@ static void extract_lines_loose_finish(const MeshRenderData *mr, mr->cache->no_loose_wire = (len == 0); } -static const MeshExtract extract_lines_loose = { - extract_lines_loose_init, - NULL, - NULL, +static void extract_lines_with_lines_loose_finish(const MeshRenderData *mr, void *ibo, void *elb) +{ + GPU_indexbuf_build_in_place(elb, ibo); + extract_lines_loose_subbuffer(mr); + MEM_freeN(elb); +} + +static const MeshExtract extract_lines_with_lines_loose = { + extract_lines_init, NULL, NULL, - extract_lines_loose_ledge_bmesh, - extract_lines_loose_ledge_mesh, + extract_lines_loop_bmesh, + extract_lines_loop_mesh, + extract_lines_ledge_bmesh, + extract_lines_ledge_mesh, NULL, NULL, - extract_lines_loose_finish, + extract_lines_with_lines_loose_finish, 0, false, }; @@ -1716,8 +1703,8 @@ static void extract_lnor_hq_loop_mesh( } /* Flag for paint mode overlay. - * Only use MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals. In paint - * mode it will use the unmapped data to draw the wireframe. */ + * Only use MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals. In + * paint mode it will use the unmapped data to draw the wireframe. */ if (mpoly->flag & ME_HIDE || (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED && (mr->v_origindex) && mr->v_origindex[mloop->v] == ORIGINDEX_NONE)) { @@ -1795,8 +1782,8 @@ static void extract_lnor_loop_mesh( } /* Flag for paint mode overlay. - * Only use MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals. In paint - * mode it will use the unmapped data to draw the wireframe. */ + * Only use MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals. In + * paint mode it will use the unmapped data to draw the wireframe. */ if (mpoly->flag & ME_HIDE || (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED && (mr->v_origindex) && mr->v_origindex[mloop->v] == ORIGINDEX_NONE)) { @@ -4522,10 +4509,14 @@ static const MeshExtract extract_fdot_idx = { /** \} */ /* ---------------------------------------------------------------------- */ -/** \name Extract Loop +/** \name ExtractTaskData * \{ */ +typedef struct ExtractUserData { + void *user_data; +} ExtractUserData; typedef struct ExtractTaskData { + void *next, *prev; const MeshRenderData *mr; const MeshExtract *extract; eMRIterType iter_type; @@ -4533,9 +4524,16 @@ typedef struct ExtractTaskData { /** Decremented each time a task is finished. */ int32_t *task_counter; void *buf; - void *user_data; + ExtractUserData *user_data; } ExtractTaskData; +static void extract_task_data_free(void *data) +{ + ExtractTaskData *task_data = data; + MEM_SAFE_FREE(task_data->user_data); + MEM_freeN(task_data); +} + BLI_INLINE void mesh_extract_iter(const MeshRenderData *mr, const eMRIterType iter_type, int start, @@ -4612,31 +4610,184 @@ BLI_INLINE void mesh_extract_iter(const MeshRenderData *mr, } } -static void extract_run(TaskPool *__restrict UNUSED(pool), void *taskdata) +static void extract_init(ExtractTaskData *data) +{ + data->user_data->user_data = data->extract->init(data->mr, data->buf); +} + +static void extract_run(void *__restrict taskdata) { - ExtractTaskData *data = taskdata; - mesh_extract_iter( - data->mr, data->iter_type, data->start, data->end, data->extract, data->user_data); + ExtractTaskData *data = (ExtractTaskData *)taskdata; + mesh_extract_iter(data->mr, + data->iter_type, + data->start, + data->end, + data->extract, + data->user_data->user_data); /* If this is the last task, we do the finish function. */ int remainin_tasks = atomic_sub_and_fetch_int32(data->task_counter, 1); if (remainin_tasks == 0 && data->extract->finish != NULL) { - data->extract->finish(data->mr, data->buf, data->user_data); + data->extract->finish(data->mr, data->buf, data->user_data->user_data); + } +} + +static void extract_init_and_run(void *__restrict taskdata) +{ + extract_init((ExtractTaskData *)taskdata); + extract_run(taskdata); +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Task Node - Update Mesh Render Data + * \{ */ +typedef struct MeshRenderDataUpdateTaskData { + MeshRenderData *mr; + eMRIterType iter_type; + eMRDataType data_flag; +} MeshRenderDataUpdateTaskData; + +static void mesh_render_data_update_task_data_free(MeshRenderDataUpdateTaskData *taskdata) +{ + BLI_assert(taskdata); + mesh_render_data_free(taskdata->mr); + MEM_freeN(taskdata); +} + +static void mesh_extract_render_data_node_exec(void *__restrict task_data) +{ + MeshRenderDataUpdateTaskData *update_task_data = task_data; + mesh_render_data_update( + update_task_data->mr, update_task_data->iter_type, update_task_data->data_flag); +} + +static struct TaskNode *mesh_extract_render_data_node_create(struct TaskGraph *task_graph, + MeshRenderData *mr, + const eMRIterType iter_type, + const eMRDataType data_flag) +{ + MeshRenderDataUpdateTaskData *task_data = MEM_mallocN(sizeof(MeshRenderDataUpdateTaskData), + __func__); + task_data->mr = mr; + task_data->iter_type = iter_type; + task_data->data_flag = data_flag; + + struct TaskNode *task_node = BLI_task_graph_node_create( + task_graph, + mesh_extract_render_data_node_exec, + task_data, + (TaskGraphNodeFreeFunction)mesh_render_data_update_task_data_free); + return task_node; +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Task Node - Extract Single Threaded + * \{ */ +typedef struct ExtractSingleThreadedTaskData { + ListBase task_datas; +} ExtractSingleThreadedTaskData; + +static void extract_single_threaded_task_data_free(ExtractSingleThreadedTaskData *taskdata) +{ + BLI_assert(taskdata); + LISTBASE_FOREACH_MUTABLE (ExtractTaskData *, td, &taskdata->task_datas) { + extract_task_data_free(td); + } + BLI_listbase_clear(&taskdata->task_datas); + MEM_freeN(taskdata); +} + +static void extract_single_threaded_task_node_exec(void *__restrict task_data) +{ + ExtractSingleThreadedTaskData *extract_task_data = task_data; + LISTBASE_FOREACH (ExtractTaskData *, td, &extract_task_data->task_datas) { + extract_init_and_run(td); + } +} + +static struct TaskNode *extract_single_threaded_task_node_create( + struct TaskGraph *task_graph, ExtractSingleThreadedTaskData *task_data) +{ + struct TaskNode *task_node = BLI_task_graph_node_create( + task_graph, + extract_single_threaded_task_node_exec, + task_data, + (TaskGraphNodeFreeFunction)extract_single_threaded_task_data_free); + return task_node; +} + +/** \} */ + +/* ---------------------------------------------------------------------- */ +/** \name Task Node - UserData Initializer + * \{ */ +typedef struct UserDataInitTaskData { + ListBase task_datas; + int32_t *task_counters; + +} UserDataInitTaskData; + +static void user_data_init_task_data_free(UserDataInitTaskData *taskdata) +{ + BLI_assert(taskdata); + LISTBASE_FOREACH_MUTABLE (ExtractTaskData *, td, &taskdata->task_datas) { + extract_task_data_free(td); + } + BLI_listbase_clear(&taskdata->task_datas); + MEM_SAFE_FREE(taskdata->task_counters); + MEM_freeN(taskdata); +} + +static void user_data_init_task_data_exec(void *__restrict task_data) +{ + UserDataInitTaskData *extract_task_data = task_data; + LISTBASE_FOREACH (ExtractTaskData *, td, &extract_task_data->task_datas) { + extract_init(td); } } -static void extract_range_task_create( - TaskPool *task_pool, ExtractTaskData *taskdata, const eMRIterType type, int start, int length) +static struct TaskNode *user_data_init_task_node_create(struct TaskGraph *task_graph, + UserDataInitTaskData *task_data) +{ + struct TaskNode *task_node = BLI_task_graph_node_create( + task_graph, + user_data_init_task_data_exec, + task_data, + (TaskGraphNodeFreeFunction)user_data_init_task_data_free); + return task_node; +} + +/** \} */ +/* ---------------------------------------------------------------------- */ +/** \name Extract Loop + * \{ */ + +static void extract_range_task_create(struct TaskGraph *task_graph, + struct TaskNode *task_node_user_data_init, + ExtractTaskData *taskdata, + const eMRIterType type, + int start, + int length) { taskdata = MEM_dupallocN(taskdata); atomic_add_and_fetch_int32(taskdata->task_counter, 1); taskdata->iter_type = type; taskdata->start = start; taskdata->end = start + length; - BLI_task_pool_push(task_pool, extract_run, taskdata, true, NULL); + struct TaskNode *task_node = BLI_task_graph_node_create( + task_graph, extract_run, taskdata, MEM_freeN); + BLI_task_graph_edge_create(task_node_user_data_init, task_node); } -static void extract_task_create(TaskPool *task_pool, +static void extract_task_create(struct TaskGraph *task_graph, + struct TaskNode *task_node_mesh_render_data, + struct TaskNode *task_node_user_data_init, + ListBase *single_threaded_task_datas, + ListBase *user_data_init_task_datas, const Scene *scene, const MeshRenderData *mr, const MeshExtract *extract, @@ -4654,58 +4805,75 @@ static void extract_task_create(TaskPool *task_pool, /* Divide extraction of the VBO/IBO into sensible chunks of works. */ ExtractTaskData *taskdata = MEM_mallocN(sizeof(*taskdata), "ExtractTaskData"); + taskdata->next = NULL; + taskdata->prev = NULL; taskdata->mr = mr; taskdata->extract = extract; taskdata->buf = buf; - taskdata->user_data = extract->init(mr, buf); + + /* ExtractUserData is shared between the iterations as it holds counters to detect if the + * extraction is finished. To make sure the duplication of the userdata does not create a new + * instance of the counters we allocate the userdata in its own container. + * + * This structure makes sure that when extract_init is called, that the user data of all + * iterations are updated. */ + taskdata->user_data = MEM_callocN(sizeof(ExtractUserData), __func__); taskdata->iter_type = mesh_extract_iter_type(extract); taskdata->task_counter = task_counter; taskdata->start = 0; taskdata->end = INT_MAX; /* Simple heuristic. */ - const bool use_thread = (mr->loop_len + mr->loop_loose_len) > 8192; + const int chunk_size = 8192; + const bool use_thread = (mr->loop_len + mr->loop_loose_len) > chunk_size; if (use_thread && extract->use_threading) { + /* Divide task into sensible chunks. */ - const int chunk_size = 8192; if (taskdata->iter_type & MR_ITER_LOOPTRI) { for (int i = 0; i < mr->tri_len; i += chunk_size) { - extract_range_task_create(task_pool, taskdata, MR_ITER_LOOPTRI, i, chunk_size); + extract_range_task_create( + task_graph, task_node_user_data_init, taskdata, MR_ITER_LOOPTRI, i, chunk_size); } } if (taskdata->iter_type & MR_ITER_LOOP) { for (int i = 0; i < mr->poly_len; i += chunk_size) { - extract_range_task_create(task_pool, taskdata, MR_ITER_LOOP, i, chunk_size); + extract_range_task_create( + task_graph, task_node_user_data_init, taskdata, MR_ITER_LOOP, i, chunk_size); } } if (taskdata->iter_type & MR_ITER_LEDGE) { for (int i = 0; i < mr->edge_loose_len; i += chunk_size) { - extract_range_task_create(task_pool, taskdata, MR_ITER_LEDGE, i, chunk_size); + extract_range_task_create( + task_graph, task_node_user_data_init, taskdata, MR_ITER_LEDGE, i, chunk_size); } } if (taskdata->iter_type & MR_ITER_LVERT) { for (int i = 0; i < mr->vert_loose_len; i += chunk_size) { - extract_range_task_create(task_pool, taskdata, MR_ITER_LVERT, i, chunk_size); + extract_range_task_create( + task_graph, task_node_user_data_init, taskdata, MR_ITER_LVERT, i, chunk_size); } } - MEM_freeN(taskdata); + BLI_addtail(user_data_init_task_datas, taskdata); } else if (use_thread) { /* One task for the whole VBO. */ (*task_counter)++; - BLI_task_pool_push(task_pool, extract_run, taskdata, true, NULL); + struct TaskNode *one_task = BLI_task_graph_node_create( + task_graph, extract_init_and_run, taskdata, extract_task_data_free); + BLI_task_graph_edge_create(task_node_mesh_render_data, one_task); } else { /* Single threaded extraction. */ (*task_counter)++; - extract_run(NULL, taskdata); - MEM_freeN(taskdata); + BLI_addtail(single_threaded_task_datas, taskdata); } } -void mesh_buffer_cache_create_requested(MeshBatchCache *cache, +void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph, + MeshBatchCache *cache, MeshBufferCache mbc, Mesh *me, + const bool is_editmode, const bool is_paint_mode, const float obmat[4][4], @@ -4717,9 +4885,41 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache, const ToolSettings *ts, const bool use_hide) { + /* For each mesh where batches needs to be updated a sub-graph will be added to the task_graph. + * This sub-graph starts with an extract_render_data_node. This fills/converts the required data + * from Mesh. + * + * Small extractions and extractions that can't be multi-threaded are grouped in a single + * `extract_single_threaded_task_node`. + * + * Other extractions will create a node for each loop exceeding 8192 items. these nodes are + * linked to the `user_data_init_task_node`. the `user_data_init_task_node` prepares the userdata + * needed for the extraction based on the data extracted from the mesh. counters are used to + * check if the finalize of a task has to be called. + * + * Mesh extraction sub graph + * + * +----------------------+ + * +-----> | extract_task1_loop_1 | + * | +----------------------+ + * +------------------+ +----------------------+ +----------------------+ + * | mesh_render_data | --> | | --> | extract_task1_loop_2 | + * +------------------+ | | +----------------------+ + * | | | +----------------------+ + * | | user_data_init | --> | extract_task2_loop_1 | + * v | | +----------------------+ + * +------------------+ | | +----------------------+ + * | single_threaded | | | --> | extract_task2_loop_2 | + * +------------------+ +----------------------+ +----------------------+ + * | +----------------------+ + * +-----> | extract_task2_loop_3 | + * +----------------------+ + */ eMRIterType iter_flag = 0; eMRDataType data_flag = 0; + const bool do_lines_loose_subbuffer = mbc.ibo.lines_loose != NULL; + #define TEST_ASSIGN(type, type_lowercase, name) \ do { \ if (DRW_TEST_ASSIGN_##type(mbc.type_lowercase.name)) { \ @@ -4757,7 +4957,6 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache, TEST_ASSIGN(IBO, ibo, fdots); TEST_ASSIGN(IBO, ibo, lines_paint_mask); TEST_ASSIGN(IBO, ibo, lines_adjacency); - TEST_ASSIGN(IBO, ibo, lines_loose); TEST_ASSIGN(IBO, ibo, edituv_tris); TEST_ASSIGN(IBO, ibo, edituv_lines); TEST_ASSIGN(IBO, ibo, edituv_points); @@ -4769,16 +4968,8 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache, double rdata_start = PIL_check_seconds_timer(); #endif - MeshRenderData *mr = mesh_render_data_create(me, - is_editmode, - is_paint_mode, - obmat, - do_final, - do_uvedit, - iter_flag, - data_flag, - cd_layer_used, - ts); + MeshRenderData *mr = mesh_render_data_create( + me, is_editmode, is_paint_mode, obmat, do_final, do_uvedit, cd_layer_used, ts); mr->cache = cache; /* HACK */ mr->use_hide = use_hide; mr->use_subsurf_fdots = use_subsurf_fdots; @@ -4788,20 +4979,32 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache, double rdata_end = PIL_check_seconds_timer(); #endif - TaskPool *task_pool; - - /* Create a suspended pool as the finalize method could be called too early. - * See `extract_run`. */ - task_pool = BLI_task_pool_create_suspended(NULL, TASK_PRIORITY_HIGH); - size_t counters_size = (sizeof(mbc) / sizeof(void *)) * sizeof(int32_t); int32_t *task_counters = MEM_callocN(counters_size, __func__); int counter_used = 0; + struct TaskNode *task_node_mesh_render_data = mesh_extract_render_data_node_create( + task_graph, mr, iter_flag, data_flag); + ExtractSingleThreadedTaskData *single_threaded_task_data = MEM_callocN( + sizeof(ExtractSingleThreadedTaskData), __func__); + UserDataInitTaskData *user_data_init_task_data = MEM_callocN(sizeof(UserDataInitTaskData), + __func__); + user_data_init_task_data->task_counters = task_counters; + struct TaskNode *task_node_user_data_init = user_data_init_task_node_create( + task_graph, user_data_init_task_data); + #define EXTRACT(buf, name) \ if (mbc.buf.name) { \ - extract_task_create( \ - task_pool, scene, mr, &extract_##name, mbc.buf.name, &task_counters[counter_used++]); \ + extract_task_create(task_graph, \ + task_node_mesh_render_data, \ + task_node_user_data_init, \ + &single_threaded_task_data->task_datas, \ + &user_data_init_task_data->task_datas, \ + scene, \ + mr, \ + &extract_##name, \ + mbc.buf.name, \ + &task_counters[counter_used++]); \ } \ ((void)0) @@ -4829,7 +5032,33 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache, EXTRACT(vbo, skin_roots); EXTRACT(ibo, tris); - EXTRACT(ibo, lines); + if (mbc.ibo.lines) { + /* When `lines` and `lines_loose` are requested, schedule lines extraction that also creates + * the `lines_loose` sub-buffer. */ + const MeshExtract *lines_extractor = do_lines_loose_subbuffer ? + &extract_lines_with_lines_loose : + &extract_lines; + extract_task_create(task_graph, + task_node_mesh_render_data, + task_node_user_data_init, + &single_threaded_task_data->task_datas, + &user_data_init_task_data->task_datas, + scene, + mr, + lines_extractor, + mbc.ibo.lines, + &task_counters[counter_used++]); + } + else { + if (do_lines_loose_subbuffer) { + /* When `lines_loose` is requested without `lines` we can create the sub-buffer on the fly as + * the `lines` buffer should then already be up-to-date. + * (see `DRW_batch_requested(cache->batch.loose_edges, GPU_PRIM_LINES)` in + * `DRW_mesh_batch_cache_create_requested`). + */ + extract_lines_loose_subbuffer(mr); + } + } EXTRACT(ibo, points); EXTRACT(ibo, fdots); EXTRACT(ibo, lines_paint_mask); @@ -4839,27 +5068,29 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache, EXTRACT(ibo, edituv_points); EXTRACT(ibo, edituv_fdots); - /* TODO(fclem) Ideally, we should have one global pool for all - * objects and wait for finish only before drawing when buffers - * need to be ready. */ - BLI_task_pool_work_and_wait(task_pool); - - /* The next task(s) rely on the result of the tasks above. */ + /* Only create the edge when there are user datas that needs to be inited. + * The task is still part of the graph so the task_data will be freed when the graph is freed. + */ + if (!BLI_listbase_is_empty(&user_data_init_task_data->task_datas)) { + BLI_task_graph_edge_create(task_node_mesh_render_data, task_node_user_data_init); + } - /* The `lines_loose` is a sub buffer from `ibo.lines`. - * We schedule it here due to potential synchronization issues.*/ - EXTRACT(ibo, lines_loose); + if (!BLI_listbase_is_empty(&single_threaded_task_data->task_datas)) { + struct TaskNode *task_node = extract_single_threaded_task_node_create( + task_graph, single_threaded_task_data); + BLI_task_graph_edge_create(task_node_mesh_render_data, task_node); + } + else { + extract_single_threaded_task_data_free(single_threaded_task_data); + } - BLI_task_pool_work_and_wait(task_pool); + /* Trigger the sub-graph for this mesh. */ + BLI_task_graph_node_push_work(task_node_mesh_render_data); #undef EXTRACT - BLI_task_pool_free(task_pool); - MEM_freeN(task_counters); - - mesh_render_data_free(mr); - #ifdef DEBUG_TIME + BLI_task_graph_work_and_wait(task_graph); double end = PIL_check_seconds_timer(); static double avg = 0; diff --git a/source/blender/draw/intern/draw_cache_impl.h b/source/blender/draw/intern/draw_cache_impl.h index daa46252331..80649143537 100644 --- a/source/blender/draw/intern/draw_cache_impl.h +++ b/source/blender/draw/intern/draw_cache_impl.h @@ -31,6 +31,7 @@ struct ListBase; struct ModifierData; struct PTCacheEdit; struct ParticleSystem; +struct TaskGraph; struct Curve; struct Hair; @@ -150,7 +151,8 @@ int DRW_volume_material_count_get(struct Volume *volume); struct GPUBatch *DRW_volume_batch_cache_get_wireframes_face(struct Volume *volume); /* Mesh */ -void DRW_mesh_batch_cache_create_requested(struct Object *ob, +void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, + struct Object *ob, struct Mesh *me, const struct Scene *scene, const bool is_paint_mode, diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.c b/source/blender/draw/intern/draw_cache_impl_mesh.c index 237c7bddfad..99e285a18f1 100644 --- a/source/blender/draw/intern/draw_cache_impl_mesh.c +++ b/source/blender/draw/intern/draw_cache_impl_mesh.c @@ -33,6 +33,7 @@ #include "BLI_math_bits.h" #include "BLI_math_vector.h" #include "BLI_string.h" +#include "BLI_task.h" #include "BLI_utildefines.h" #include "DNA_mesh_types.h" @@ -1019,9 +1020,14 @@ void DRW_mesh_batch_cache_free_old(Mesh *me, int ctime) } /* Can be called for any surface type. Mesh *me is the final mesh. */ -void DRW_mesh_batch_cache_create_requested( - Object *ob, Mesh *me, const Scene *scene, const bool is_paint_mode, const bool use_hide) -{ +void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph, + Object *ob, + Mesh *me, + const Scene *scene, + const bool is_paint_mode, + const bool use_hide) +{ + BLI_assert(task_graph); GPUIndexBuf **saved_elem_ranges = NULL; const ToolSettings *ts = NULL; if (scene) { @@ -1378,7 +1384,8 @@ void DRW_mesh_batch_cache_create_requested( false; if (do_uvcage) { - mesh_buffer_cache_create_requested(cache, + mesh_buffer_cache_create_requested(task_graph, + cache, cache->uv_cage, me, is_editmode, @@ -1394,7 +1401,8 @@ void DRW_mesh_batch_cache_create_requested( } if (do_cage) { - mesh_buffer_cache_create_requested(cache, + mesh_buffer_cache_create_requested(task_graph, + cache, cache->cage, me, is_editmode, @@ -1409,7 +1417,8 @@ void DRW_mesh_batch_cache_create_requested( true); } - mesh_buffer_cache_create_requested(cache, + mesh_buffer_cache_create_requested(task_graph, + cache, cache->final, me, is_editmode, @@ -1422,10 +1431,12 @@ void DRW_mesh_batch_cache_create_requested( scene, ts, use_hide); - #ifdef DEBUG check: /* Make sure all requested batches have been setup. */ + /* TODO(jbakker): we should move this to the draw_manager but that needs refactoring and + * additional looping.*/ + BLI_task_graph_work_and_wait(task_graph); for (int i = 0; i < sizeof(cache->batch) / sizeof(void *); i++) { BLI_assert(!DRW_batch_requested(((GPUBatch **)&cache->batch)[i], 0)); } diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index 970578c7438..74bb596454b 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -27,6 +27,7 @@ #include "BLI_memblock.h" #include "BLI_rect.h" #include "BLI_string.h" +#include "BLI_task.h" #include "BLI_threads.h" #include "BLF_api.h" @@ -135,6 +136,23 @@ static void drw_state_ensure_not_reused(DRWManager *dst) } #endif +/* -------------------------------------------------------------------- */ +/** \name Threading + * \{ */ +static void drw_task_graph_init(void) +{ + BLI_assert(DST.task_graph == NULL); + DST.task_graph = BLI_task_graph_create(); +} + +static void drw_task_graph_deinit(void) +{ + BLI_task_graph_work_and_wait(DST.task_graph); + BLI_task_graph_free(DST.task_graph); + DST.task_graph = NULL; +} +/* \} */ + /* -------------------------------------------------------------------- */ /** \name Settings * \{ */ @@ -1424,6 +1442,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph, /* reuse if caller sets */ .evil_C = DST.draw_ctx.evil_C, }; + drw_task_graph_init(); drw_context_state_init(); drw_viewport_var_init(); @@ -1488,6 +1507,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph, #endif } + drw_task_graph_deinit(); DRW_stats_begin(); GPU_framebuffer_bind(DST.default_framebuffer); @@ -1768,7 +1788,6 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph) DST.options.is_image_render = true; DST.options.is_scene_render = true; DST.options.draw_background = scene->r.alphamode == R_ADDSKY; - DST.draw_ctx = (DRWContextState){ .scene = scene, .view_layer = view_layer, @@ -1853,9 +1872,9 @@ void DRW_render_object_iter( void (*callback)(void *vedata, Object *ob, RenderEngine *engine, struct Depsgraph *depsgraph)) { const DRWContextState *draw_ctx = DRW_context_state_get(); - DRW_hair_init(); + drw_task_graph_init(); const int object_type_exclude_viewport = draw_ctx->v3d ? draw_ctx->v3d->object_type_exclude_viewport : 0; @@ -1878,6 +1897,7 @@ void DRW_render_object_iter( DEG_OBJECT_ITER_FOR_RENDER_ENGINE_END; drw_duplidata_free(); + drw_task_graph_deinit(); } /* Assume a valid gl context is bound (and that the gl_context_mutex has been acquired). @@ -2049,7 +2069,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph, DST.viewport = viewport; DST.options.is_select = true; - + drw_task_graph_init(); /* Get list of enabled engines */ if (use_obedit) { drw_engines_enable_overlays(); @@ -2159,6 +2179,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph, } drw_duplidata_free(); + drw_task_graph_deinit(); drw_engines_cache_finish(); DRW_render_instance_buffer_finish(); @@ -2470,8 +2491,10 @@ void DRW_draw_depth_object( else { batch = DRW_mesh_batch_cache_get_surface(me); } - - DRW_mesh_batch_cache_create_requested(object, me, scene, false, true); + struct TaskGraph *task_graph = BLI_task_graph_create(); + DRW_mesh_batch_cache_create_requested(task_graph, object, me, scene, false, true); + BLI_task_graph_work_and_wait(task_graph); + BLI_task_graph_free(task_graph); const eGPUShaderConfig sh_cfg = world_clip_planes ? GPU_SHADER_CFG_CLIPPED : GPU_SHADER_CFG_DEFAULT; diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h index f6d8179b193..a27af84fa02 100644 --- a/source/blender/draw/intern/draw_manager.h +++ b/source/blender/draw/intern/draw_manager.h @@ -31,6 +31,7 @@ #include "BLI_assert.h" #include "BLI_linklist.h" #include "BLI_memblock.h" +#include "BLI_task.h" #include "BLI_threads.h" #include "GPU_batch.h" @@ -525,6 +526,8 @@ typedef struct DRWManager { uint select_id; #endif + struct TaskGraph *task_graph; + /* ---------- Nothing after this point is cleared after use ----------- */ /* gl_context serves as the offset for clearing only diff --git a/source/blender/editors/uvedit/uvedit_draw.c b/source/blender/editors/uvedit/uvedit_draw.c index f8cef95c776..897e2f13774 100644 --- a/source/blender/editors/uvedit/uvedit_draw.c +++ b/source/blender/editors/uvedit/uvedit_draw.c @@ -39,6 +39,7 @@ #include "../../draw/intern/draw_cache_impl.h" #include "BLI_math.h" +#include "BLI_task.h" #include "BLI_utildefines.h" #include "BKE_deform.h" @@ -206,8 +207,10 @@ static void uvedit_get_batches(Object *ob, else { batches->faces = NULL; } - - DRW_mesh_batch_cache_create_requested(ob, ob->data, scene, false, false); + struct TaskGraph *task_graph = BLI_task_graph_create(); + DRW_mesh_batch_cache_create_requested(task_graph, ob, ob->data, scene, false, false); + BLI_task_graph_work_and_wait(task_graph); + BLI_task_graph_free(task_graph); if (draw_stretch && (sima->dt_uvstretch == SI_UVDT_STRETCH_AREA)) { /* after create_requested we can load the actual areas */ @@ -229,7 +232,11 @@ static void draw_uvs_shadow(SpaceImage *sima, DRW_mesh_batch_cache_validate(me); GPUBatch *edges = DRW_mesh_batch_cache_get_uv_edges(me); - DRW_mesh_batch_cache_create_requested(ob_eval, me, scene, false, false); + + struct TaskGraph *task_graph = BLI_task_graph_create(); + DRW_mesh_batch_cache_create_requested(task_graph, ob_eval, me, scene, false, false); + BLI_task_graph_work_and_wait(task_graph); + BLI_task_graph_free(task_graph); if (edges) { if (sima->flag & SI_SMOOTH_UV) { @@ -269,7 +276,10 @@ static void draw_uvs_texpaint(const Scene *scene, Object *ob, Depsgraph *depsgra DRW_mesh_batch_cache_validate(me); GPUBatch *geom = DRW_mesh_batch_cache_get_uv_edges(me); - DRW_mesh_batch_cache_create_requested(ob_eval, me, scene, false, false); + struct TaskGraph *task_graph = BLI_task_graph_create(); + DRW_mesh_batch_cache_create_requested(task_graph, ob_eval, me, scene, false, false); + BLI_task_graph_work_and_wait(task_graph); + BLI_task_graph_free(task_graph); GPU_batch_program_set_builtin(geom, GPU_SHADER_2D_UV_UNIFORM_COLOR); GPU_batch_uniform_4fv(geom, "color", col); -- cgit v1.2.3