diff options
author | Clément Foucault <foucault.clem@gmail.com> | 2019-05-31 02:45:41 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2019-09-17 16:16:43 +0300 |
commit | 3a08153d7a842b7ab1e40a9048730e1a3ddab5f7 (patch) | |
tree | 37e7c902ceb35c5626644ccbbc7e0376e169e56c /source/blender/draw/intern | |
parent | 41299bce936afb5e7da9c332d1140c5a77d49317 (diff) |
DRW: Refactor to support draw call batching
Reviewers: brecht
Differential Revision: D4997
Diffstat (limited to 'source/blender/draw/intern')
-rw-r--r-- | source/blender/draw/intern/DRW_render.h | 44 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_anim_viz.c | 4 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_common.c | 2 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_hair.c | 41 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_instance_data.c | 4 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_instance_data.h | 2 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_manager.c | 93 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_manager.h | 320 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_manager_data.c | 806 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_manager_exec.c | 689 |
10 files changed, 1408 insertions, 597 deletions
diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h index e3713fdee8f..935920310dd 100644 --- a/source/blender/draw/intern/DRW_render.h +++ b/source/blender/draw/intern/DRW_render.h @@ -80,8 +80,6 @@ typedef struct DRWPass DRWPass; typedef struct DRWShadingGroup DRWShadingGroup; typedef struct DRWUniform DRWUniform; typedef struct DRWView DRWView; - -/* Opaque type to avoid usage as a DRWCall but it is exactly the same thing. */ typedef struct DRWCallBuffer DRWCallBuffer; /* TODO Put it somewhere else? */ @@ -404,35 +402,29 @@ void DRW_shgroup_call_ex(DRWShadingGroup *shgroup, Object *ob, float (*obmat)[4], struct GPUBatch *geom, - uint v_sta, - uint v_ct, bool bypass_culling, void *user_data); /* If ob is NULL, unit modelmatrix is assumed and culling is bypassed. */ -#define DRW_shgroup_call(shgrp, geom, ob) \ - DRW_shgroup_call_ex(shgrp, ob, NULL, geom, 0, 0, false, NULL) +#define DRW_shgroup_call(shgrp, geom, ob) DRW_shgroup_call_ex(shgrp, ob, NULL, geom, false, NULL) /* Same as DRW_shgroup_call but override the obmat. Not culled. */ #define DRW_shgroup_call_obmat(shgrp, geom, obmat) \ - DRW_shgroup_call_ex(shgrp, NULL, obmat, geom, 0, 0, false, NULL) + DRW_shgroup_call_ex(shgrp, NULL, obmat, geom, false, NULL) /* TODO(fclem) remove this when we have DRWView */ /* user_data is used by DRWCallVisibilityFn defined in DRWView. */ #define DRW_shgroup_call_with_callback(shgrp, geom, ob, user_data) \ - DRW_shgroup_call_ex(shgrp, ob, NULL, geom, 0, 0, false, user_data) + DRW_shgroup_call_ex(shgrp, ob, NULL, geom, false, user_data) /* Same as DRW_shgroup_call but bypass culling even if ob is not NULL. */ #define DRW_shgroup_call_no_cull(shgrp, geom, ob) \ - DRW_shgroup_call_ex(shgrp, ob, NULL, geom, 0, 0, true, NULL) - -/* Only draw a certain range of geom. */ -#define DRW_shgroup_call_range(shgrp, geom, ob, v_sta, v_ct) \ - DRW_shgroup_call_ex(shgrp, ob, NULL, geom, v_sta, v_ct, false, NULL) + DRW_shgroup_call_ex(shgrp, ob, NULL, geom, true, NULL) -/* Same as DRW_shgroup_call_range but override the obmat. Special for gpencil. */ -#define DRW_shgroup_call_range_obmat(shgrp, geom, obmat, v_sta, v_ct) \ - DRW_shgroup_call_ex(shgrp, NULL, obmat, geom, v_sta, v_ct, false, NULL) +void DRW_shgroup_call_range(DRWShadingGroup *shgroup, + struct GPUBatch *geom, + uint v_sta, + uint v_ct); void DRW_shgroup_call_procedural_points(DRWShadingGroup *sh, Object *ob, uint point_ct); void DRW_shgroup_call_procedural_lines(DRWShadingGroup *sh, Object *ob, uint line_ct); @@ -469,6 +461,16 @@ void DRW_shgroup_state_enable(DRWShadingGroup *shgroup, DRWState state); void DRW_shgroup_state_disable(DRWShadingGroup *shgroup, DRWState state); void DRW_shgroup_stencil_mask(DRWShadingGroup *shgroup, uint mask); +/* Issue a clear command. */ +void DRW_shgroup_clear_framebuffer(DRWShadingGroup *shgroup, + eGPUFrameBufferBits channels, + uchar r, + uchar g, + uchar b, + uchar a, + float depth, + uchar stencil); + void DRW_shgroup_uniform_texture(DRWShadingGroup *shgroup, const char *name, const struct GPUTexture *tex); @@ -525,17 +527,17 @@ void DRW_shgroup_uniform_mat3(DRWShadingGroup *shgroup, const char *name, const void DRW_shgroup_uniform_mat4(DRWShadingGroup *shgroup, const char *name, const float (*value)[4]); /* Store value instead of referencing it. */ void DRW_shgroup_uniform_int_copy(DRWShadingGroup *shgroup, const char *name, const int value); +void DRW_shgroup_uniform_ivec2_copy(DRWShadingGroup *shgrp, const char *name, const int *value); +void DRW_shgroup_uniform_ivec3_copy(DRWShadingGroup *shgrp, const char *name, const int *value); +void DRW_shgroup_uniform_ivec4_copy(DRWShadingGroup *shgrp, const char *name, const int *value); void DRW_shgroup_uniform_bool_copy(DRWShadingGroup *shgroup, const char *name, const bool value); void DRW_shgroup_uniform_float_copy(DRWShadingGroup *shgroup, const char *name, const float value); void DRW_shgroup_uniform_vec2_copy(DRWShadingGroup *shgroup, const char *name, const float *value); +void DRW_shgroup_uniform_vec3_copy(DRWShadingGroup *shgroup, const char *name, const float *value); +void DRW_shgroup_uniform_vec4_copy(DRWShadingGroup *shgroup, const char *name, const float *value); bool DRW_shgroup_is_empty(DRWShadingGroup *shgroup); -/* TODO: workaround functions waiting for the clearing operation to be available inside the - * shgroups. */ -DRWShadingGroup *DRW_shgroup_get_next(DRWShadingGroup *shgroup); -uint DRW_shgroup_stencil_mask_get(DRWShadingGroup *shgroup); - /* Passes */ DRWPass *DRW_pass_create(const char *name, DRWState state); /* TODO Replace with passes inheritance. */ diff --git a/source/blender/draw/intern/draw_anim_viz.c b/source/blender/draw/intern/draw_anim_viz.c index 72459309133..2d71fdf0782 100644 --- a/source/blender/draw/intern/draw_anim_viz.c +++ b/source/blender/draw/intern/draw_anim_viz.c @@ -215,7 +215,7 @@ static void MPATH_cache_motion_path(MPATH_PassList *psl, DRW_shgroup_uniform_vec3(shgrp, "customColor", mpath->color, 1); } /* Only draw the required range. */ - DRW_shgroup_call_range(shgrp, mpath_batch_line_get(mpath), NULL, start_index, len); + DRW_shgroup_call_range(shgrp, mpath_batch_line_get(mpath), start_index, len); } /* Draw points. */ @@ -231,7 +231,7 @@ static void MPATH_cache_motion_path(MPATH_PassList *psl, DRW_shgroup_uniform_vec3(shgrp, "customColor", mpath->color, 1); } /* Only draw the required range. */ - DRW_shgroup_call_range(shgrp, mpath_batch_points_get(mpath), NULL, start_index, len); + DRW_shgroup_call_range(shgrp, mpath_batch_points_get(mpath), start_index, len); /* Draw frame numbers at each framestep value */ bool show_kf_no = (avs->path_viewflag & MOTIONPATH_VIEW_KFNOS) != 0; diff --git a/source/blender/draw/intern/draw_common.c b/source/blender/draw/intern/draw_common.c index 8e0f713add6..7f679dd5581 100644 --- a/source/blender/draw/intern/draw_common.c +++ b/source/blender/draw/intern/draw_common.c @@ -1095,7 +1095,7 @@ struct GPUShader *volume_velocity_shader_get(bool use_needle) NULL, datatoc_gpu_shader_flat_color_frag_glsl, datatoc_common_view_lib_glsl, - "#define USE_NEEDLE"); + "#define USE_NEEDLE\n"); } return sh_data->volume_velocity_needle_sh; } diff --git a/source/blender/draw/intern/draw_hair.c b/source/blender/draw/intern/draw_hair.c index f77243ca9f1..58085cf08c6 100644 --- a/source/blender/draw/intern/draw_hair.c +++ b/source/blender/draw/intern/draw_hair.c @@ -201,30 +201,33 @@ static DRWShadingGroup *drw_shgroup_create_hair_procedural_ex(Object *object, /* Transform Feedback subdiv. */ if (need_ft_update) { int final_points_len = hair_cache->final[subdiv].strands_res * hair_cache->strands_len; - GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM); + if (final_points_len) { + GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM); #ifdef USE_TRANSFORM_FEEDBACK - DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create( - tf_shader, g_tf_pass, hair_cache->final[subdiv].proc_buf); + DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create( + tf_shader, g_tf_pass, hair_cache->final[subdiv].proc_buf); #else - DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); - - ParticleRefineCall *pr_call = MEM_mallocN(sizeof(*pr_call), __func__); - pr_call->next = g_tf_calls; - pr_call->vbo = hair_cache->final[subdiv].proc_buf; - pr_call->shgrp = tf_shgrp; - pr_call->vert_len = final_points_len; - g_tf_calls = pr_call; - DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); - DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); - DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); + DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass); + + ParticleRefineCall *pr_call = MEM_mallocN(sizeof(*pr_call), __func__); + pr_call->next = g_tf_calls; + pr_call->vbo = hair_cache->final[subdiv].proc_buf; + pr_call->shgrp = tf_shgrp; + pr_call->vert_len = final_points_len; + g_tf_calls = pr_call; + DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1); + DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1); + DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1); #endif - DRW_shgroup_uniform_texture(tf_shgrp, "hairPointBuffer", hair_cache->point_tex); - DRW_shgroup_uniform_texture(tf_shgrp, "hairStrandBuffer", hair_cache->strand_tex); - DRW_shgroup_uniform_texture(tf_shgrp, "hairStrandSegBuffer", hair_cache->strand_seg_tex); - DRW_shgroup_uniform_int(tf_shgrp, "hairStrandsRes", &hair_cache->final[subdiv].strands_res, 1); - DRW_shgroup_call_procedural_points(tf_shgrp, NULL, final_points_len); + DRW_shgroup_uniform_texture(tf_shgrp, "hairPointBuffer", hair_cache->point_tex); + DRW_shgroup_uniform_texture(tf_shgrp, "hairStrandBuffer", hair_cache->strand_tex); + DRW_shgroup_uniform_texture(tf_shgrp, "hairStrandSegBuffer", hair_cache->strand_seg_tex); + DRW_shgroup_uniform_int( + tf_shgrp, "hairStrandsRes", &hair_cache->final[subdiv].strands_res, 1); + DRW_shgroup_call_procedural_points(tf_shgrp, NULL, final_points_len); + } } return shgrp; diff --git a/source/blender/draw/intern/draw_instance_data.c b/source/blender/draw/intern/draw_instance_data.c index 69756203d66..81b10e095c3 100644 --- a/source/blender/draw/intern/draw_instance_data.c +++ b/source/blender/draw/intern/draw_instance_data.c @@ -64,7 +64,7 @@ typedef struct DRWTempBufferHandle { /** Format pointer for reuse. */ GPUVertFormat *format; /** Touched vertex length for resize. */ - uint *vert_len; + int *vert_len; } DRWTempBufferHandle; static ListBase g_idatalists = {NULL, NULL}; @@ -112,7 +112,7 @@ static void instance_batch_free(GPUBatch *geom, void *UNUSED(user_data)) */ GPUVertBuf *DRW_temp_buffer_request(DRWInstanceDataList *idatalist, GPUVertFormat *format, - uint *vert_len) + int *vert_len) { BLI_assert(format != NULL); BLI_assert(vert_len != NULL); diff --git a/source/blender/draw/intern/draw_instance_data.h b/source/blender/draw/intern/draw_instance_data.h index 2ede68e16d8..524c4cd96d8 100644 --- a/source/blender/draw/intern/draw_instance_data.h +++ b/source/blender/draw/intern/draw_instance_data.h @@ -40,7 +40,7 @@ DRWInstanceData *DRW_instance_data_request(DRWInstanceDataList *idatalist, uint GPUVertBuf *DRW_temp_buffer_request(DRWInstanceDataList *idatalist, GPUVertFormat *format, - uint *vert_len); + int *vert_len); GPUBatch *DRW_temp_batch_instance_request(DRWInstanceDataList *idatalist, GPUVertBuf *buf, GPUBatch *geom); diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index fde7fdfa222..66e3905b212 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -537,8 +537,11 @@ static void drw_viewport_cache_resize(void) GPU_texture_free(*tex); } - BLI_memblock_clear(DST.vmempool->calls, NULL); - BLI_memblock_clear(DST.vmempool->states, NULL); + BLI_memblock_clear(DST.vmempool->commands, NULL); + BLI_memblock_clear(DST.vmempool->commands_small, NULL); + BLI_memblock_clear(DST.vmempool->callbuffers, NULL); + BLI_memblock_clear(DST.vmempool->obmats, NULL); + BLI_memblock_clear(DST.vmempool->obinfos, NULL); BLI_memblock_clear(DST.vmempool->cullstates, NULL); BLI_memblock_clear(DST.vmempool->shgroups, NULL); BLI_memblock_clear(DST.vmempool->uniforms, NULL); @@ -586,28 +589,28 @@ static void drw_context_state_init(void) } } -static DRWCallState *draw_unit_state_create(void) +static void draw_unit_state_create(void) { - DRWCallState *state = BLI_memblock_alloc(DST.vmempool->states); - state->flag = 0; - state->matflag = 0; + DRWObjectInfos *infos = BLI_memblock_alloc(DST.vmempool->obinfos); + DRWObjectMatrix *mats = BLI_memblock_alloc(DST.vmempool->obmats); + DRWCullingState *culling = BLI_memblock_alloc(DST.vmempool->cullstates); - unit_m4(state->model); - unit_m4(state->modelinverse); + unit_m4(mats->model); + unit_m4(mats->modelinverse); - copy_v3_fl(state->orcotexfac[0], 0.0f); - copy_v3_fl(state->orcotexfac[1], 1.0f); + copy_v3_fl(infos->orcotexfac[0], 0.0f); + copy_v3_fl(infos->orcotexfac[1], 1.0f); - state->ob_index = 0; - state->ob_random = 0.0f; - copy_v3_fl(state->ob_color, 1.0f); + infos->ob_index = 0; + infos->ob_random = 0.0f; + infos->ob_neg_scale = 1.0f; + copy_v3_fl(infos->ob_color, 1.0f); /* TODO(fclem) get rid of this. */ - state->culling = BLI_memblock_alloc(DST.vmempool->cullstates); - state->culling->bsphere.radius = -1.0f; - state->culling->user_data = NULL; + culling->bsphere.radius = -1.0f; + culling->user_data = NULL; - return state; + DRW_handle_increment(&DST.resource_handle); } /* It also stores viewport variable to an immutable place: DST @@ -632,33 +635,48 @@ static void drw_viewport_var_init(void) DST.vmempool = GPU_viewport_mempool_get(DST.viewport); - if (DST.vmempool->calls == NULL) { - DST.vmempool->calls = BLI_memblock_create(sizeof(DRWCall)); + if (DST.vmempool->commands == NULL) { + DST.vmempool->commands = BLI_memblock_create(sizeof(DRWCommandChunk)); } - if (DST.vmempool->states == NULL) { - DST.vmempool->states = BLI_memblock_create(sizeof(DRWCallState)); + if (DST.vmempool->commands_small == NULL) { + DST.vmempool->commands_small = BLI_memblock_create(sizeof(DRWCommandSmallChunk)); + } + if (DST.vmempool->callbuffers == NULL) { + DST.vmempool->callbuffers = BLI_memblock_create(sizeof(DRWCallBuffer)); + } + if (DST.vmempool->obmats == NULL) { + uint chunk_len = sizeof(DRWObjectMatrix) * DRW_RESOURCE_CHUNK_LEN; + DST.vmempool->obmats = BLI_memblock_create_ex(sizeof(DRWObjectMatrix), chunk_len); + } + if (DST.vmempool->obinfos == NULL) { + uint chunk_len = sizeof(DRWObjectInfos) * DRW_RESOURCE_CHUNK_LEN; + DST.vmempool->obinfos = BLI_memblock_create_ex(sizeof(DRWObjectInfos), chunk_len); } if (DST.vmempool->cullstates == NULL) { - DST.vmempool->cullstates = BLI_memblock_create(sizeof(DRWCullingState)); + uint chunk_len = sizeof(DRWCullingState) * DRW_RESOURCE_CHUNK_LEN; + DST.vmempool->cullstates = BLI_memblock_create_ex(sizeof(DRWCullingState), chunk_len); } if (DST.vmempool->shgroups == NULL) { DST.vmempool->shgroups = BLI_memblock_create(sizeof(DRWShadingGroup)); } if (DST.vmempool->uniforms == NULL) { - DST.vmempool->uniforms = BLI_memblock_create(sizeof(DRWUniform)); + DST.vmempool->uniforms = BLI_memblock_create(sizeof(DRWUniformChunk)); } if (DST.vmempool->views == NULL) { DST.vmempool->views = BLI_memblock_create(sizeof(DRWView)); } if (DST.vmempool->passes == NULL) { - DST.vmempool->passes = BLI_memblock_create(sizeof(DRWPass)); + uint chunk_len = sizeof(DRWPass) * DRW_RESOURCE_CHUNK_LEN; + DST.vmempool->passes = BLI_memblock_create_ex(sizeof(DRWPass), chunk_len); } if (DST.vmempool->images == NULL) { DST.vmempool->images = BLI_memblock_create(sizeof(GPUTexture *)); } - /* Alloc default unit state */ - DST.unit_state = draw_unit_state_create(); + DST.resource_handle = 0; + DST.pass_handle = 0; + + draw_unit_state_create(); DST.idatalist = GPU_viewport_instance_data_list_get(DST.viewport); DRW_instance_data_list_reset(DST.idatalist); @@ -672,8 +690,6 @@ static void drw_viewport_var_init(void) DST.default_framebuffer = NULL; DST.vmempool = NULL; - - DST.unit_state = NULL; } DST.primary_view_ct = 0; @@ -716,6 +732,10 @@ static void drw_viewport_var_init(void) G_draw.view_ubo = DRW_uniformbuffer_create(sizeof(DRWViewUboStorage), NULL); } + if (DST.draw_list == NULL) { + DST.draw_list = GPU_draw_list_create(DRW_DRAWLIST_LEN); + } + memset(DST.object_instance_data, 0x0, sizeof(DST.object_instance_data)); } @@ -1099,7 +1119,7 @@ static void drw_engines_world_update(Scene *scene) static void drw_engines_cache_populate(Object *ob) { - DST.ob_state = NULL; + DST.ob_handle = 0; /* HACK: DrawData is copied by COW from the duplicated object. * This is valid for IDs that cannot be instantiated but this @@ -1917,6 +1937,8 @@ void DRW_render_gpencil(struct RenderEngine *engine, struct Depsgraph *depsgraph RenderResult *render_result = RE_engine_get_result(engine); RenderLayer *render_layer = RE_GetRenderLayer(render_result, view_layer->name); + DST.buffer_finish_called = false; + DRW_render_gpencil_to_image(engine, render_layer, &render_rect); /* Force cache to reset. */ @@ -2027,13 +2049,15 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph) RE_SetActiveRenderView(render, render_view->name); drw_view_reset(); engine_type->draw_engine->render_to_image(data, engine, render_layer, &render_rect); + DST.buffer_finish_called = false; + /* grease pencil: render result is merged in the previous render result. */ if (DRW_render_check_grease_pencil(depsgraph)) { DRW_state_reset(); drw_view_reset(); DRW_render_gpencil_to_image(engine, render_layer, &render_rect); + DST.buffer_finish_called = false; } - DST.buffer_finish_called = false; } RE_engine_end_result(engine, render_result, false, false, false); @@ -2079,7 +2103,7 @@ void DRW_render_object_iter( if ((object_type_exclude_viewport & (1 << ob->type)) == 0) { DST.dupli_parent = data_.dupli_parent; DST.dupli_source = data_.dupli_object_current; - DST.ob_state = NULL; + DST.ob_handle = 0; drw_duplidata_load(DST.dupli_source); if (!DST.dupli_source) { @@ -2186,6 +2210,7 @@ void DRW_render_instance_buffer_finish(void) BLI_assert(!DST.buffer_finish_called && "DRW_render_instance_buffer_finish called twice!"); DST.buffer_finish_called = true; DRW_instance_buffer_finish(DST.idatalist); + drw_resource_buffer_finish(DST.vmempool); } /** @@ -2209,7 +2234,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph, Object *obact = OBACT(view_layer); Object *obedit = OBEDIT_FROM_OBACT(obact); #ifndef USE_GPU_SELECT - UNUSED_VARS(vc, scene, view_layer, v3d, ar, rect); + UNUSED_VARS(scene, view_layer, v3d, ar, rect); #else RegionView3D *rv3d = ar->regiondata; @@ -2934,6 +2959,10 @@ void DRW_engines_free(void) MEM_SAFE_FREE(DST.uniform_names.buffer); + if (DST.draw_list) { + GPU_draw_list_discard(DST.draw_list); + } + DRW_opengl_context_disable(); } diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h index 85f6cf05e83..b55a84b2765 100644 --- a/source/blender/draw/intern/draw_manager.h +++ b/source/blender/draw/intern/draw_manager.h @@ -28,8 +28,10 @@ #include "DRW_engine.h" #include "DRW_render.h" +#include "BLI_assert.h" #include "BLI_linklist.h" #include "BLI_threads.h" +#include "BLI_memblock.h" #include "GPU_batch.h" #include "GPU_context.h" @@ -43,6 +45,9 @@ /* Use draw manager to call GPU_select, see: DRW_draw_select_loop */ #define USE_GPU_SELECT +/* Use drawcall batching using instanced rendering. */ +#define USE_BATCHING 1 + // #define DRW_DEBUG_CULLING #define DRW_DEBUG_USE_UNIFORM_NAME 0 #define DRW_UNIFORM_BUFFER_NAME 64 @@ -90,20 +95,6 @@ * > DRWUniform */ -/* Used by DRWCallState.flag */ -enum { - DRW_CALL_NEGSCALE = (1 << 1), -}; - -/* Used by DRWCallState.matflag */ -enum { - DRW_CALL_MODELINVERSE = (1 << 0), - DRW_CALL_MODELVIEWPROJECTION = (1 << 1), - DRW_CALL_ORCOTEXFAC = (1 << 2), - DRW_CALL_OBJECTINFO = (1 << 3), - DRW_CALL_OBJECTCOLOR = (1 << 4), -}; - typedef struct DRWCullingState { uint32_t mask; /* Culling: Using Bounding Sphere for now for faster culling. @@ -113,38 +104,161 @@ typedef struct DRWCullingState { void *user_data; } DRWCullingState; -typedef struct DRWCallState { - DRWCullingState *culling; - uchar flag; - uchar matflag; /* Which matrices to compute. */ - short ob_index; - /* Matrices */ +/* Minimum max UBO size is 64KiB. We take the largest + * UBO struct and alloc the max number. + * ((1 << 16) / sizeof(DRWObjectMatrix)) = 512 + * Keep in sync with common_view_lib.glsl */ +#define DRW_RESOURCE_CHUNK_LEN 512 + +/** + * Identifier used to sort similar drawcalls together. + * Also used to reference elements inside memory blocks. + * + * From MSB to LSB + * 1 bit for negative scale. + * 22 bits for chunk id. + * 9 bits for resource id inside the chunk. (can go up to 511) + * |-|----------------------|---------| + * + * Use manual bitsift and mask instead of bitfields to avoid + * compiler dependant behavior that would mess the ordering of + * the members thus changing the sorting order. + */ +typedef uint32_t DRWResourceHandle; + +BLI_INLINE uint32_t DRW_handle_negative_scale_get(const DRWResourceHandle *handle) +{ + return (*handle & 0x80000000) != 0; +} + +BLI_INLINE uint32_t DRW_handle_chunk_get(const DRWResourceHandle *handle) +{ + return (*handle & 0x7FFFFFFF) >> 9; +} + +BLI_INLINE uint32_t DRW_handle_id_get(const DRWResourceHandle *handle) +{ + return (*handle & 0x000001FF); +} + +BLI_INLINE void DRW_handle_increment(DRWResourceHandle *handle) +{ + *handle += 1; +} + +BLI_INLINE void DRW_handle_negative_scale_enable(DRWResourceHandle *handle) +{ + *handle |= 0x80000000; +} + +BLI_INLINE void *DRW_memblock_elem_from_handle(struct BLI_memblock *memblock, + const DRWResourceHandle *handle) +{ + int elem = DRW_handle_id_get(handle); + int chunk = DRW_handle_chunk_get(handle); + return BLI_memblock_elem_get(memblock, chunk, elem); +} + +typedef struct DRWObjectMatrix { float model[4][4]; float modelinverse[4][4]; - float orcotexfac[2][3]; - float ob_random; +} DRWObjectMatrix; + +typedef struct DRWObjectInfos { + float orcotexfac[2][4]; float ob_color[4]; -} DRWCallState; + float ob_index; + float pad; /* UNUSED*/ + float ob_random; + float ob_neg_scale; +} DRWObjectInfos; + +BLI_STATIC_ASSERT_ALIGN(DRWObjectMatrix, 16) +BLI_STATIC_ASSERT_ALIGN(DRWObjectInfos, 16) -typedef struct DRWCall { - struct DRWCall *next; - DRWCallState *state; +typedef enum { + /* Draw Commands */ + DRW_CMD_DRAW = 0, /* Only sortable type. Must be 0. */ + DRW_CMD_DRAW_RANGE = 1, + DRW_CMD_DRAW_INSTANCE = 2, + DRW_CMD_DRAW_PROCEDURAL = 3, + /* Other Commands */ + DRW_CMD_CLEAR = 12, + DRW_CMD_DRWSTATE = 13, + DRW_CMD_STENCIL = 14, + DRW_CMD_SELECTID = 15, + /* Needs to fit in 4bits */ +} eDRWCommandType; + +#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_PROCEDURAL + +typedef struct DRWCommandDraw { + GPUBatch *batch; + DRWResourceHandle handle; +} DRWCommandDraw; +/* Assume DRWResourceHandle to be 0. */ +typedef struct DRWCommandDrawRange { GPUBatch *batch; uint vert_first; uint vert_count; +} DRWCommandDrawRange; + +typedef struct DRWCommandDrawInstance { + GPUBatch *batch; + DRWResourceHandle handle; uint inst_count; +} DRWCommandDrawInstance; -#ifdef USE_GPU_SELECT - /* TODO(fclem) remove once we have a dedicated selection engine. */ - int select_id; - GPUVertBuf *inst_selectid; -#endif -} DRWCall; +typedef struct DRWCommandDrawProcedural { + GPUBatch *batch; + DRWResourceHandle handle; + uint vert_count; +} DRWCommandDrawProcedural; + +typedef struct DRWCommandSetMutableState { + /** State changes (or'd or and'd with the pass's state) */ + DRWState enable; + DRWState disable; +} DRWCommandSetMutableState; + +typedef struct DRWCommandSetStencil { + uint mask; +} DRWCommandSetStencil; + +typedef struct DRWCommandSetSelectID { + GPUVertBuf *select_buf; + uint select_id; +} DRWCommandSetSelectID; + +typedef struct DRWCommandClear { + eGPUFrameBufferBits clear_channels; + uchar r, g, b, a; /* [0..1] for each channels. Normalized. */ + float depth; /* [0..1] for depth. Normalized. */ + uchar stencil; /* Stencil value [0..255] */ +} DRWCommandClear; + +typedef union DRWCommand { + DRWCommandDraw draw; + DRWCommandDrawRange range; + DRWCommandDrawInstance instance; + DRWCommandDrawProcedural procedural; + DRWCommandSetMutableState state; + DRWCommandSetStencil stencil; + DRWCommandSetSelectID select_id; + DRWCommandClear clear; +} DRWCommand; + +/* Used for agregating calls into GPUVertBufs. */ +struct DRWCallBuffer { + GPUVertBuf *buf; + GPUVertBuf *buf_select; + int count; +}; /* Used by DRWUniform.type */ typedef enum { - DRW_UNIFORM_INT, + DRW_UNIFORM_INT = 0, DRW_UNIFORM_INT_COPY, DRW_UNIFORM_FLOAT, DRW_UNIFORM_FLOAT_COPY, @@ -153,55 +267,56 @@ typedef enum { DRW_UNIFORM_TEXTURE_REF, DRW_UNIFORM_BLOCK, DRW_UNIFORM_BLOCK_PERSIST, + DRW_UNIFORM_TFEEDBACK_TARGET, + /** Per drawcall uniforms/UBO */ + DRW_UNIFORM_BLOCK_OBMATS, + DRW_UNIFORM_BLOCK_OBINFOS, + DRW_UNIFORM_RESOURCE_CHUNK, + /** Legacy / Fallback */ + DRW_UNIFORM_BASE_INSTANCE, + DRW_UNIFORM_MODEL_MATRIX, + DRW_UNIFORM_MODEL_MATRIX_INVERSE, + DRW_UNIFORM_MODELVIEWPROJECTION_MATRIX, + /* WARNING: set DRWUniform->type + * bit length accordingly. */ } DRWUniformType; struct DRWUniform { - DRWUniform *next; /* single-linked list */ union { /* For reference or array/vector types. */ const void *pvalue; /* Single values. */ - float fvalue[2]; - int ivalue[2]; + float fvalue[4]; + int ivalue[4]; }; - int name_ofs; /* name offset in name buffer. */ int location; - char type; /* DRWUniformType */ - char length; /* cannot be more than 16 */ - char arraysize; /* cannot be more than 16 too */ + uint32_t type : 5; /* DRWUniformType */ + uint32_t length : 5; /* cannot be more than 16 */ + uint32_t arraysize : 5; /* cannot be more than 16 too */ + uint32_t name_ofs : 17; /* name offset in name buffer. */ }; struct DRWShadingGroup { DRWShadingGroup *next; - GPUShader *shader; /* Shader to bind */ - DRWUniform *uniforms; /* Uniforms pointers */ + GPUShader *shader; /* Shader to bind */ + struct DRWUniformChunk *uniforms; /* Uniforms pointers */ struct { - DRWCall *first, *last; /* Linked list of DRWCall */ - } calls; + /* Chunks of draw calls. */ + struct DRWCommandChunk *first, *last; + } cmd; - /** TODO Maybe remove from here */ - struct GPUVertBuf *tfeedback_target; - - /** State changes for this batch only (or'd with the pass's state) */ - DRWState state_extra; - /** State changes for this batch only (and'd with the pass's state) */ - DRWState state_extra_disable; - /** Stencil mask to use for stencil test / write operations */ - uint stencil_mask; - - /* Builtin matrices locations */ - int model; - int modelinverse; - int modelviewprojection; - int orcotexfac; - int callid; - int objectinfo; - int objectcolor; - uchar matflag; /* Matrices needed, same as DRWCall.flag */ - - DRWPass *pass_parent; /* backlink to pass we're in */ + union { + struct { + int objectinfo; /* Equal to 1 if the shader needs obinfos. */ + DRWResourceHandle pass_handle; /* Memblock key to parent pass. */ + }; + struct { + float distance; /* Distance from camera. */ + uint original_index; /* Original position inside the shgroup list. */ + } z_sorting; + }; }; #define MAX_PASS_NAME 32 @@ -213,6 +328,7 @@ struct DRWPass { DRWShadingGroup *last; } shgroups; + DRWResourceHandle handle; DRWState state; char name[MAX_PASS_NAME]; }; @@ -232,6 +348,8 @@ typedef struct DRWViewUboStorage { float viewcamtexcofac[4]; } DRWViewUboStorage; +BLI_STATIC_ASSERT_ALIGN(DRWViewUboStorage, 16) + #define MAX_CULLED_VIEWS 32 struct DRWView { @@ -253,13 +371,45 @@ struct DRWView { void *user_data; }; -/* TODO(fclem): Future awaits */ -#if 0 -typedef struct ModelUboStorage { - float model[4][4]; - float modelinverse[4][4]; -} ModelUboStorage; -#endif +/* ------------ Data Chunks --------------- */ +/** + * In order to keep a cache friendly data structure, + * we alloc most of our little data into chunks of multiple item. + * Iteration, allocation and memory usage are better. + * We loose a bit of memory by allocating more than what we need + * but it's counterbalanced by not needing the linked-list pointers + * for each item. + **/ + +typedef struct DRWUniformChunk { + struct DRWUniformChunk *next; /* single-linked list */ + uint32_t uniform_len; + uint32_t uniform_used; + DRWUniform uniforms[10]; +} DRWUniformChunk; + +typedef struct DRWCommandChunk { + struct DRWCommandChunk *next; + uint32_t command_len; + uint32_t command_used; + /* 4bits for each command. */ + uint64_t command_type[6]; + /* -- 64 bytes aligned -- */ + DRWCommand commands[96]; + /* -- 64 bytes aligned -- */ +} DRWCommandChunk; + +typedef struct DRWCommandSmallChunk { + struct DRWCommandChunk *next; + uint32_t command_len; + uint32_t command_used; + /* 4bits for each command. */ + /* TODO reduce size of command_type. */ + uint64_t command_type[6]; + DRWCommand commands[6]; +} DRWCommandSmallChunk; + +BLI_STATIC_ASSERT_ALIGN(DRWCommandChunk, 16); /* ------------- DRAW DEBUG ------------ */ @@ -280,21 +430,31 @@ typedef struct DRWDebugSphere { #define DST_MAX_SLOTS 64 /* Cannot be changed without modifying RST.bound_tex_slots */ #define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */ #define STENCIL_UNDEFINED 256 +#define DRW_DRAWLIST_LEN 256 typedef struct DRWManager { /* TODO clean up this struct a bit */ /* Cache generation */ ViewportMemoryPool *vmempool; DRWInstanceDataList *idatalist; - DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE]; - /* Default Unit model matrix state without culling. */ - DRWCallState *unit_state; /* State of the object being evaluated if already allocated. */ - DRWCallState *ob_state; + DRWResourceHandle ob_handle; + /** True if current DST.ob_state has its matching DRWObjectInfos init. */ + bool ob_state_obinfo_init; + /** Handle of current object resource in object resource arrays (DRWObjectMatrices/Infos). */ + DRWResourceHandle resource_handle; + /** Handle of next DRWPass to be allocated. */ + DRWResourceHandle pass_handle; + + /** Dupli state. NULL if not dupli. */ struct DupliObject *dupli_source; struct Object *dupli_parent; struct Object *dupli_origin; + /** Ghash containing original objects. */ struct GHash *dupli_ghash; - void **dupli_datas; /* Array of dupli_data (one for each enabled engine) to handle duplis. */ + /** TODO(fclem) try to remove usage of this. */ + DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE]; + /* Array of dupli_data (one for each enabled engine) to handle duplis. */ + void **dupli_datas; /* Rendering state */ GPUShader *shader; @@ -357,6 +517,8 @@ typedef struct DRWManager { /** Mutex to lock the drw manager and avoid concurrent context usage. */ TicketMutex *gl_context_mutex; + GPUDrawList *draw_list; + /** GPU Resource State: Memory storage between drawing. */ struct { /* High end GPUs supports up to 32 binds per shader stage. @@ -397,9 +559,13 @@ void drw_state_set(DRWState state); void drw_debug_draw(void); void drw_debug_init(void); +eDRWCommandType command_type_get(uint64_t *command_type_bits, int index); + void drw_batch_cache_validate(Object *ob); void drw_batch_cache_generate_requested(struct Object *ob); +void drw_resource_buffer_finish(ViewportMemoryPool *vmempool); + /* Procedural Drawing */ GPUBatch *drw_cache_procedural_points_get(void); GPUBatch *drw_cache_procedural_lines_get(void); diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c index cd174e899d4..cdf2c208351 100644 --- a/source/blender/draw/intern/draw_manager_data.c +++ b/source/blender/draw/intern/draw_manager_data.c @@ -34,6 +34,7 @@ #include "DNA_mesh_types.h" #include "DNA_meta_types.h" +#include "BLI_alloca.h" #include "BLI_hash.h" #include "BLI_link_utils.h" #include "BLI_mempool.h" @@ -51,6 +52,35 @@ /** \name Uniform Buffer Object (DRW_uniformbuffer) * \{ */ +static void draw_call_sort(DRWCommand *array, DRWCommand *array_tmp, int array_len) +{ + /* Count unique batches. Tt's not really important if + * there is colisions. If there is a lot of different batches, + * the sorting benefit will be negligeable. So at least + * sort fast! */ + uchar idx[128] = {0}; + /* Shift by 6 positions knowing each GPUBatch is > 64 bytes */ +#define KEY(a) ((((size_t)((a).draw.batch)) >> 6) % ARRAY_SIZE(idx)) + BLI_assert(array_len <= ARRAY_SIZE(idx)); + + for (int i = 0; i < array_len; i++) { + /* Early out if nothing to sort. */ + if (++idx[KEY(array[i])] == array_len) + return; + } + /* Cumulate batch indices */ + for (int i = 1; i < ARRAY_SIZE(idx); i++) { + idx[i] += idx[i - 1]; + } + /* Traverse in reverse to not change the order of the resource ids. */ + for (int src = array_len - 1; src >= 0; src--) { + array_tmp[--idx[KEY(array[src])]] = array[src]; + } +#undef KEY + + memcpy(array, array_tmp, sizeof(*array) * array_len); +} + GPUUniformBuffer *DRW_uniformbuffer_create(int size, const void *data) { return GPU_uniformbuffer_create(size, data, NULL); @@ -66,20 +96,94 @@ void DRW_uniformbuffer_free(GPUUniformBuffer *ubo) GPU_uniformbuffer_free(ubo); } +void drw_resource_buffer_finish(ViewportMemoryPool *vmempool) +{ + int chunk_id = DRW_handle_chunk_get(&DST.resource_handle); + int elem_id = DRW_handle_id_get(&DST.resource_handle); + int ubo_len = 1 + chunk_id - ((elem_id == 0) ? 1 : 0); + size_t list_size = sizeof(GPUUniformBuffer *) * ubo_len; + + /* TODO find a better system. currently a lot of obinfos UBO are going to be unused + * if not rendering with Eevee. */ + + if (vmempool->matrices_ubo == NULL) { + vmempool->matrices_ubo = MEM_callocN(list_size, __func__); + vmempool->obinfos_ubo = MEM_callocN(list_size, __func__); + vmempool->ubo_len = ubo_len; + } + + /* Remove unecessary buffers */ + for (int i = ubo_len; i < vmempool->ubo_len; i++) { + GPU_uniformbuffer_free(vmempool->matrices_ubo[i]); + GPU_uniformbuffer_free(vmempool->obinfos_ubo[i]); + } + + if (ubo_len != vmempool->ubo_len) { + vmempool->matrices_ubo = MEM_recallocN(vmempool->matrices_ubo, list_size); + vmempool->obinfos_ubo = MEM_recallocN(vmempool->obinfos_ubo, list_size); + vmempool->ubo_len = ubo_len; + } + + /* Create/Update buffers. */ + for (int i = 0; i < ubo_len; i++) { + void *data_obmat = BLI_memblock_elem_get(vmempool->obmats, i, 0); + void *data_infos = BLI_memblock_elem_get(vmempool->obinfos, i, 0); + if (vmempool->matrices_ubo[i] == NULL) { + vmempool->matrices_ubo[i] = GPU_uniformbuffer_create( + sizeof(DRWObjectMatrix) * DRW_RESOURCE_CHUNK_LEN, data_obmat, NULL); + vmempool->obinfos_ubo[i] = GPU_uniformbuffer_create( + sizeof(DRWObjectInfos) * DRW_RESOURCE_CHUNK_LEN, data_infos, NULL); + } + else { + GPU_uniformbuffer_update(vmempool->matrices_ubo[i], data_obmat); + GPU_uniformbuffer_update(vmempool->obinfos_ubo[i], data_infos); + } + } + + /* Aligned alloc to avoid unaligned memcpy. */ + DRWCommandChunk *chunk_tmp = MEM_mallocN_aligned(sizeof(DRWCommandChunk), 16, "tmp call chunk"); + DRWCommandChunk *chunk; + BLI_memblock_iter iter; + BLI_memblock_iternew(vmempool->commands, &iter); + while ((chunk = BLI_memblock_iterstep(&iter))) { + bool sortable = true; + /* We can only sort chunks that contain DRWCommandDraw only. */ + for (int i = 0; i < ARRAY_SIZE(chunk->command_type) && sortable; i++) { + if (chunk->command_type[i] != 0) { + sortable = false; + } + } + if (sortable) { + draw_call_sort(chunk->commands, chunk_tmp->commands, chunk->command_used); + } + } + MEM_freeN(chunk_tmp); +} + /** \} */ /* -------------------------------------------------------------------- */ /** \name Uniforms (DRW_shgroup_uniform) * \{ */ -static void drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup, - int loc, - DRWUniformType type, - const void *value, - int length, - int arraysize) +static DRWUniform *drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup, + int loc, + DRWUniformType type, + const void *value, + int length, + int arraysize) { - DRWUniform *uni = BLI_memblock_alloc(DST.vmempool->uniforms); + DRWUniformChunk *unichunk = shgroup->uniforms; + /* Happens on first uniform or if chunk is full. */ + if (!unichunk || unichunk->uniform_used == unichunk->uniform_len) { + unichunk = BLI_memblock_alloc(DST.vmempool->uniforms); + unichunk->uniform_len = ARRAY_SIZE(shgroup->uniforms->uniforms); + unichunk->uniform_used = 0; + BLI_LINKS_PREPEND(shgroup->uniforms, unichunk); + } + + DRWUniform *uni = unichunk->uniforms + unichunk->uniform_used++; + uni->location = loc; uni->type = type; uni->length = length; @@ -87,11 +191,11 @@ static void drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup, switch (type) { case DRW_UNIFORM_INT_COPY: - BLI_assert(length <= 2); + BLI_assert(length <= 4); memcpy(uni->ivalue, value, sizeof(int) * length); break; case DRW_UNIFORM_FLOAT_COPY: - BLI_assert(length <= 2); + BLI_assert(length <= 4); memcpy(uni->fvalue, value, sizeof(float) * length); break; default: @@ -99,7 +203,7 @@ static void drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup, break; } - BLI_LINKS_PREPEND(shgroup->uniforms, uni); + return uni; } static void drw_shgroup_builtin_uniform( @@ -136,7 +240,8 @@ static void drw_shgroup_uniform(DRWShadingGroup *shgroup, BLI_assert(arraysize > 0 && arraysize <= 16); BLI_assert(length >= 0 && length <= 16); - drw_shgroup_uniform_create_ex(shgroup, location, type, value, length, arraysize); + DRWUniform *uni = drw_shgroup_uniform_create_ex( + shgroup, location, type, value, length, arraysize); /* If location is -2, the uniform has not yet been queried. * We save the name for query just before drawing. */ @@ -155,7 +260,7 @@ static void drw_shgroup_uniform(DRWShadingGroup *shgroup, memcpy(dst, name, len); /* Copies NULL terminator. */ DST.uniform_names.buffer_ofs += len; - shgroup->uniforms->name_ofs = ofs; + uni->name_ofs = ofs; } } @@ -286,6 +391,21 @@ void DRW_shgroup_uniform_int_copy(DRWShadingGroup *shgroup, const char *name, co drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_INT_COPY, &value, 1, 1); } +void DRW_shgroup_uniform_ivec2_copy(DRWShadingGroup *shgroup, const char *name, const int *value) +{ + drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_INT_COPY, value, 2, 1); +} + +void DRW_shgroup_uniform_ivec3_copy(DRWShadingGroup *shgroup, const char *name, const int *value) +{ + drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_INT_COPY, value, 3, 1); +} + +void DRW_shgroup_uniform_ivec4_copy(DRWShadingGroup *shgroup, const char *name, const int *value) +{ + drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_INT_COPY, value, 4, 1); +} + void DRW_shgroup_uniform_bool_copy(DRWShadingGroup *shgroup, const char *name, const bool value) { int ival = value; @@ -302,13 +422,23 @@ void DRW_shgroup_uniform_vec2_copy(DRWShadingGroup *shgroup, const char *name, c drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_FLOAT_COPY, value, 2, 1); } +void DRW_shgroup_uniform_vec3_copy(DRWShadingGroup *shgroup, const char *name, const float *value) +{ + drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_FLOAT_COPY, value, 3, 1); +} + +void DRW_shgroup_uniform_vec4_copy(DRWShadingGroup *shgroup, const char *name, const float *value) +{ + drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_FLOAT_COPY, value, 4, 1); +} + /** \} */ /* -------------------------------------------------------------------- */ /** \name Draw Call (DRW_calls) * \{ */ -static void drw_call_calc_orco(Object *ob, float (*r_orcofacs)[3]) +static void drw_call_calc_orco(Object *ob, float (*r_orcofacs)[4]) { ID *ob_data = (ob) ? ob->data : NULL; float *texcoloc = NULL; @@ -351,159 +481,283 @@ static void drw_call_calc_orco(Object *ob, float (*r_orcofacs)[3]) } } -static void drw_call_state_update_matflag(DRWCallState *state, - DRWShadingGroup *shgroup, - Object *ob) +BLI_INLINE void drw_call_matrix_init(DRWObjectMatrix *ob_mats, Object *ob, float (*obmat)[4]) { - uchar new_flags = ((state->matflag ^ shgroup->matflag) & shgroup->matflag); - - /* HACK: Here we set the matflags bit to 1 when computing the value - * so that it's not recomputed for other drawcalls. - * This is the opposite of what draw_matrices_model_prepare() does. */ - state->matflag |= shgroup->matflag; - - if (new_flags & DRW_CALL_MODELINVERSE) { - if (ob) { - copy_m4_m4(state->modelinverse, ob->imat); - } - else { - invert_m4_m4(state->modelinverse, state->model); - } + copy_m4_m4(ob_mats->model, obmat); + if (ob) { + copy_m4_m4(ob_mats->modelinverse, ob->imat); } - - /* Orco factors: We compute this at creation to not have to save the *ob_data */ - if (new_flags & DRW_CALL_ORCOTEXFAC) { - drw_call_calc_orco(ob, state->orcotexfac); - } - - if (new_flags & DRW_CALL_OBJECTINFO) { - state->ob_index = ob ? ob->index : 0; - uint random; - if (DST.dupli_source) { - random = DST.dupli_source->random_id; - } - else { - random = BLI_hash_int_2d(BLI_hash_string(ob->id.name + 2), 0); - } - state->ob_random = random * (1.0f / (float)0xFFFFFFFF); - } - - if (new_flags & DRW_CALL_OBJECTCOLOR) { - copy_v4_v4(state->ob_color, ob->color); + else { + /* WATCH: Can be costly. */ + invert_m4_m4(ob_mats->modelinverse, ob_mats->model); } } -static DRWCallState *drw_call_state_create(DRWShadingGroup *shgroup, float (*obmat)[4], Object *ob) +static void drw_call_obinfos_init(DRWObjectInfos *ob_infos, Object *ob) { - DRWCallState *state = BLI_memblock_alloc(DST.vmempool->states); - state->flag = 0; - state->matflag = 0; - - /* Matrices */ - copy_m4_m4(state->model, obmat); - - if (ob && (ob->transflag & OB_NEG_SCALE)) { - state->flag |= DRW_CALL_NEGSCALE; - } - - drw_call_state_update_matflag(state, shgroup, ob); - - DRWCullingState *cull = BLI_memblock_alloc(DST.vmempool->cullstates); - state->culling = cull; + BLI_assert(ob); + /* Index. */ + ob_infos->ob_index = ob->index; + /* Orco factors. */ + drw_call_calc_orco(ob, ob_infos->orcotexfac); + /* Random float value. */ + uint random = (DST.dupli_source) ? + DST.dupli_source->random_id : + /* TODO(fclem) this is rather costly to do at runtime. Maybe we can + * put it in ob->runtime and make depsgraph ensure it is up to date. */ + BLI_hash_int_2d(BLI_hash_string(ob->id.name + 2), 0); + ob_infos->ob_random = random * (1.0f / (float)0xFFFFFFFF); + /* Negative scalling. */ + ob_infos->ob_neg_scale = (ob->transflag & OB_NEG_SCALE) ? -1.0f : 1.0f; + /* Object Color. */ + copy_v4_v4(ob_infos->ob_color, ob->color); +} +static void drw_call_culling_init(DRWCullingState *cull, Object *ob) +{ BoundBox *bbox; if (ob != NULL && (bbox = BKE_object_boundbox_get(ob))) { float corner[3]; /* Get BoundSphere center and radius from the BoundBox. */ mid_v3_v3v3(cull->bsphere.center, bbox->vec[0], bbox->vec[6]); - mul_v3_m4v3(corner, obmat, bbox->vec[0]); - mul_m4_v3(obmat, cull->bsphere.center); + mul_v3_m4v3(corner, ob->obmat, bbox->vec[0]); + mul_m4_v3(ob->obmat, cull->bsphere.center); cull->bsphere.radius = len_v3v3(cull->bsphere.center, corner); } else { - /* TODO(fclem) Bypass alloc if we can (see if eevee's - * probe visibility collection still works). */ /* Bypass test. */ cull->bsphere.radius = -1.0f; } + /* Reset user data */ + cull->user_data = NULL; +} + +static DRWResourceHandle drw_resource_handle_new(float (*obmat)[4], Object *ob) +{ + DRWCullingState *culling = BLI_memblock_alloc(DST.vmempool->cullstates); + DRWObjectMatrix *ob_mats = BLI_memblock_alloc(DST.vmempool->obmats); + /* FIXME Meh, not always needed but can be accessed after creation. + * Also it needs to have the same resource handle. */ + DRWObjectInfos *ob_infos = BLI_memblock_alloc(DST.vmempool->obinfos); + UNUSED_VARS(ob_infos); + + DRWResourceHandle handle = DST.resource_handle; + DRW_handle_increment(&DST.resource_handle); - return state; + if (ob && (ob->transflag & OB_NEG_SCALE)) { + DRW_handle_negative_scale_enable(&handle); + } + + drw_call_matrix_init(ob_mats, ob, obmat); + drw_call_culling_init(culling, ob); + /* ob_infos is init only if needed. */ + + return handle; } -static DRWCallState *drw_call_state_object(DRWShadingGroup *shgroup, float (*obmat)[4], Object *ob) +static DRWResourceHandle drw_resource_handle(DRWShadingGroup *shgroup, + float (*obmat)[4], + Object *ob) { if (ob == NULL) { if (obmat == NULL) { - BLI_assert(DST.unit_state); - return DST.unit_state; + DRWResourceHandle handle = 0; + return handle; } else { - return drw_call_state_create(shgroup, obmat, ob); + return drw_resource_handle_new(obmat, NULL); } } else { - if (DST.ob_state == NULL) { - DST.ob_state = drw_call_state_create(shgroup, obmat, ob); + if (DST.ob_handle == 0) { + DST.ob_handle = drw_resource_handle_new(obmat, ob); + DST.ob_state_obinfo_init = false; } - else { - /* If the DRWCallState is reused, add necessary matrices. */ - drw_call_state_update_matflag(DST.ob_state, shgroup, ob); + + if (shgroup->objectinfo) { + if (!DST.ob_state_obinfo_init) { + DST.ob_state_obinfo_init = true; + DRWObjectInfos *ob_infos = DRW_memblock_elem_from_handle(DST.vmempool->obinfos, + &DST.ob_handle); + + drw_call_obinfos_init(ob_infos, ob); + } } - return DST.ob_state; + return DST.ob_handle; + } +} + +static void command_type_set(uint64_t *command_type_bits, int index, eDRWCommandType type) +{ + command_type_bits[index / 16] |= ((uint64_t)type) << ((index % 16) * 4); +} + +eDRWCommandType command_type_get(uint64_t *command_type_bits, int index) +{ + return ((command_type_bits[index / 16] >> ((index % 16) * 4)) & 0xF); +} + +static void *drw_command_create(DRWShadingGroup *shgroup, eDRWCommandType type) +{ + DRWCommandChunk *chunk = shgroup->cmd.last; + + if (chunk == NULL) { + DRWCommandSmallChunk *smallchunk = BLI_memblock_alloc(DST.vmempool->commands_small); + smallchunk->command_len = ARRAY_SIZE(smallchunk->commands); + smallchunk->command_used = 0; + smallchunk->command_type[0] = 0x0lu; + chunk = (DRWCommandChunk *)smallchunk; + BLI_LINKS_APPEND(&shgroup->cmd, chunk); } + else if (chunk->command_used == chunk->command_len) { + chunk = BLI_memblock_alloc(DST.vmempool->commands); + chunk->command_len = ARRAY_SIZE(chunk->commands); + chunk->command_used = 0; + memset(chunk->command_type, 0x0, sizeof(chunk->command_type)); + BLI_LINKS_APPEND(&shgroup->cmd, chunk); + } + + command_type_set(chunk->command_type, chunk->command_used, type); + + return chunk->commands + chunk->command_used++; +} + +static void drw_command_draw(DRWShadingGroup *shgroup, GPUBatch *batch, DRWResourceHandle handle) +{ + DRWCommandDraw *cmd = drw_command_create(shgroup, DRW_CMD_DRAW); + cmd->batch = batch; + cmd->handle = handle; +} + +static void drw_command_draw_range(DRWShadingGroup *shgroup, + GPUBatch *batch, + uint start, + uint count) +{ + DRWCommandDrawRange *cmd = drw_command_create(shgroup, DRW_CMD_DRAW_RANGE); + cmd->batch = batch; + cmd->vert_first = start; + cmd->vert_count = count; +} + +static void drw_command_draw_instance(DRWShadingGroup *shgroup, + GPUBatch *batch, + DRWResourceHandle handle, + uint count) +{ + DRWCommandDrawInstance *cmd = drw_command_create(shgroup, DRW_CMD_DRAW_INSTANCE); + cmd->batch = batch; + cmd->handle = handle; + cmd->inst_count = count; +} + +static void drw_command_draw_procedural(DRWShadingGroup *shgroup, + GPUBatch *batch, + DRWResourceHandle handle, + uint vert_count) +{ + DRWCommandDrawProcedural *cmd = drw_command_create(shgroup, DRW_CMD_DRAW_PROCEDURAL); + cmd->batch = batch; + cmd->handle = handle; + cmd->vert_count = vert_count; +} + +static void drw_command_set_select_id(DRWShadingGroup *shgroup, GPUVertBuf *buf, uint select_id) +{ + /* Only one can be valid. */ + BLI_assert(buf == NULL || select_id == -1); + DRWCommandSetSelectID *cmd = drw_command_create(shgroup, DRW_CMD_SELECTID); + cmd->select_buf = buf; + cmd->select_id = select_id; +} + +static void drw_command_set_stencil_mask(DRWShadingGroup *shgroup, uint mask) +{ + BLI_assert(mask <= 0xFF); + DRWCommandSetStencil *cmd = drw_command_create(shgroup, DRW_CMD_STENCIL); + cmd->mask = mask; +} + +static void drw_command_clear(DRWShadingGroup *shgroup, + eGPUFrameBufferBits channels, + uchar r, + uchar g, + uchar b, + uchar a, + float depth, + uchar stencil) +{ + DRWCommandClear *cmd = drw_command_create(shgroup, DRW_CMD_CLEAR); + cmd->clear_channels = channels; + cmd->r = r; + cmd->g = g; + cmd->b = b; + cmd->a = a; + cmd->depth = depth; + cmd->stencil = stencil; +} + +static void drw_command_set_mutable_state(DRWShadingGroup *shgroup, + DRWState enable, + DRWState disable) +{ + /* TODO Restrict what state can be changed. */ + DRWCommandSetMutableState *cmd = drw_command_create(shgroup, DRW_CMD_DRWSTATE); + cmd->enable = enable; + cmd->disable = disable; } void DRW_shgroup_call_ex(DRWShadingGroup *shgroup, Object *ob, float (*obmat)[4], struct GPUBatch *geom, - uint v_sta, - uint v_ct, bool bypass_culling, void *user_data) { BLI_assert(geom != NULL); + if (G.f & G_FLAG_PICKSEL) { + drw_command_set_select_id(shgroup, NULL, DST.select_id); + } + DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : obmat, ob); + drw_command_draw(shgroup, geom, handle); - DRWCall *call = BLI_memblock_alloc(DST.vmempool->calls); - BLI_LINKS_APPEND(&shgroup->calls, call); - - call->state = drw_call_state_object(shgroup, ob ? ob->obmat : obmat, ob); - call->batch = geom; - call->vert_first = v_sta; - call->vert_count = v_ct; /* 0 means auto from batch. */ - call->inst_count = 0; -#ifdef USE_GPU_SELECT - call->select_id = DST.select_id; - call->inst_selectid = NULL; -#endif - if (call->state->culling) { - call->state->culling->user_data = user_data; + /* Culling data. */ + if (user_data || bypass_culling) { + DRWCullingState *culling = DRW_memblock_elem_from_handle(DST.vmempool->cullstates, + &DST.ob_handle); + + if (user_data) { + culling->user_data = user_data; + } if (bypass_culling) { /* NOTE this will disable culling for the whole object. */ - call->state->culling->bsphere.radius = -1.0f; + culling->bsphere.radius = -1.0f; } } } +void DRW_shgroup_call_range(DRWShadingGroup *shgroup, struct GPUBatch *geom, uint v_sta, uint v_ct) +{ + BLI_assert(geom != NULL); + if (G.f & G_FLAG_PICKSEL) { + drw_command_set_select_id(shgroup, NULL, DST.select_id); + } + drw_command_draw_range(shgroup, geom, v_sta, v_ct); +} + static void drw_shgroup_call_procedural_add_ex(DRWShadingGroup *shgroup, GPUBatch *geom, Object *ob, uint vert_count) { - - DRWCall *call = BLI_memblock_alloc(DST.vmempool->calls); - BLI_LINKS_APPEND(&shgroup->calls, call); - - call->state = drw_call_state_object(shgroup, ob ? ob->obmat : NULL, ob); - call->batch = geom; - call->vert_first = 0; - call->vert_count = vert_count; - call->inst_count = 0; -#ifdef USE_GPU_SELECT - call->select_id = DST.select_id; - call->inst_selectid = NULL; -#endif + BLI_assert(vert_count > 0); + BLI_assert(geom != NULL); + if (G.f & G_FLAG_PICKSEL) { + drw_command_set_select_id(shgroup, NULL, DST.select_id); + } + DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : NULL, ob); + drw_command_draw_procedural(shgroup, geom, handle, vert_count); } void DRW_shgroup_call_procedural_points(DRWShadingGroup *shgroup, Object *ob, uint point_len) @@ -524,25 +778,18 @@ void DRW_shgroup_call_procedural_triangles(DRWShadingGroup *shgroup, Object *ob, drw_shgroup_call_procedural_add_ex(shgroup, geom, ob, tria_count * 3); } +/* Should be removed */ void DRW_shgroup_call_instances(DRWShadingGroup *shgroup, Object *ob, struct GPUBatch *geom, uint count) { BLI_assert(geom != NULL); - - DRWCall *call = BLI_memblock_alloc(DST.vmempool->calls); - BLI_LINKS_APPEND(&shgroup->calls, call); - - call->state = drw_call_state_object(shgroup, ob ? ob->obmat : NULL, ob); - call->batch = geom; - call->vert_first = 0; - call->vert_count = 0; /* Auto from batch. */ - call->inst_count = count; -#ifdef USE_GPU_SELECT - call->select_id = DST.select_id; - call->inst_selectid = NULL; -#endif + if (G.f & G_FLAG_PICKSEL) { + drw_command_set_select_id(shgroup, NULL, DST.select_id); + } + DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : NULL, ob); + drw_command_draw_instance(shgroup, geom, handle, count); } void DRW_shgroup_call_instances_with_attribs(DRWShadingGroup *shgroup, @@ -552,21 +799,13 @@ void DRW_shgroup_call_instances_with_attribs(DRWShadingGroup *shgroup, { BLI_assert(geom != NULL); BLI_assert(inst_attributes->verts[0] != NULL); - + if (G.f & G_FLAG_PICKSEL) { + drw_command_set_select_id(shgroup, NULL, DST.select_id); + } + DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : NULL, ob); GPUVertBuf *buf_inst = inst_attributes->verts[0]; - - DRWCall *call = BLI_memblock_alloc(DST.vmempool->calls); - BLI_LINKS_APPEND(&shgroup->calls, call); - - call->state = drw_call_state_object(shgroup, ob ? ob->obmat : NULL, ob); - call->batch = DRW_temp_batch_instance_request(DST.idatalist, buf_inst, geom); - call->vert_first = 0; - call->vert_count = 0; /* Auto from batch. */ - call->inst_count = buf_inst->vertex_len; -#ifdef USE_GPU_SELECT - call->select_id = DST.select_id; - call->inst_selectid = NULL; -#endif + GPUBatch *batch = DRW_temp_batch_instance_request(DST.idatalist, buf_inst, geom); + drw_command_draw(shgroup, batch, handle); } // #define SCULPT_DEBUG_BUFFERS @@ -719,27 +958,26 @@ DRWCallBuffer *DRW_shgroup_call_buffer(DRWShadingGroup *shgroup, BLI_assert(ELEM(prim_type, GPU_PRIM_POINTS, GPU_PRIM_LINES, GPU_PRIM_TRI_FAN)); BLI_assert(format != NULL); - DRWCall *call = BLI_memblock_alloc(DST.vmempool->calls); - BLI_LINKS_APPEND(&shgroup->calls, call); + DRWCallBuffer *callbuf = BLI_memblock_alloc(DST.vmempool->callbuffers); + callbuf->buf = DRW_temp_buffer_request(DST.idatalist, format, &callbuf->count); + callbuf->buf_select = NULL; + callbuf->count = 0; - call->state = drw_call_state_object(shgroup, NULL, NULL); - GPUVertBuf *buf = DRW_temp_buffer_request(DST.idatalist, format, &call->vert_count); - call->batch = DRW_temp_batch_request(DST.idatalist, buf, prim_type); - call->vert_first = 0; - call->vert_count = 0; - call->inst_count = 0; - -#ifdef USE_GPU_SELECT if (G.f & G_FLAG_PICKSEL) { /* Not actually used for rendering but alloced in one chunk. */ if (inst_select_format.attr_len == 0) { GPU_vertformat_attr_add(&inst_select_format, "selectId", GPU_COMP_I32, 1, GPU_FETCH_INT); } - call->inst_selectid = DRW_temp_buffer_request( - DST.idatalist, &inst_select_format, &call->vert_count); + callbuf->buf_select = DRW_temp_buffer_request( + DST.idatalist, &inst_select_format, &callbuf->count); + drw_command_set_select_id(shgroup, callbuf->buf_select, -1); } -#endif - return (DRWCallBuffer *)call; + + DRWResourceHandle handle = drw_resource_handle(shgroup, NULL, NULL); + GPUBatch *batch = DRW_temp_batch_request(DST.idatalist, callbuf->buf, prim_type); + drw_command_draw(shgroup, batch, handle); + + return callbuf; } DRWCallBuffer *DRW_shgroup_call_buffer_instance(DRWShadingGroup *shgroup, @@ -749,56 +987,52 @@ DRWCallBuffer *DRW_shgroup_call_buffer_instance(DRWShadingGroup *shgroup, BLI_assert(geom != NULL); BLI_assert(format != NULL); - DRWCall *call = BLI_memblock_alloc(DST.vmempool->calls); - BLI_LINKS_APPEND(&shgroup->calls, call); + DRWCallBuffer *callbuf = BLI_memblock_alloc(DST.vmempool->callbuffers); + callbuf->buf = DRW_temp_buffer_request(DST.idatalist, format, &callbuf->count); + callbuf->buf_select = NULL; + callbuf->count = 0; - call->state = drw_call_state_object(shgroup, NULL, NULL); - GPUVertBuf *buf = DRW_temp_buffer_request(DST.idatalist, format, &call->inst_count); - call->batch = DRW_temp_batch_instance_request(DST.idatalist, buf, geom); - call->vert_first = 0; - call->vert_count = 0; /* Auto from batch. */ - call->inst_count = 0; - -#ifdef USE_GPU_SELECT if (G.f & G_FLAG_PICKSEL) { /* Not actually used for rendering but alloced in one chunk. */ if (inst_select_format.attr_len == 0) { GPU_vertformat_attr_add(&inst_select_format, "selectId", GPU_COMP_I32, 1, GPU_FETCH_INT); } - call->inst_selectid = DRW_temp_buffer_request( - DST.idatalist, &inst_select_format, &call->inst_count); + callbuf->buf_select = DRW_temp_buffer_request( + DST.idatalist, &inst_select_format, &callbuf->count); + drw_command_set_select_id(shgroup, callbuf->buf_select, -1); } -#endif - return (DRWCallBuffer *)call; + + DRWResourceHandle handle = drw_resource_handle(shgroup, NULL, NULL); + GPUBatch *batch = DRW_temp_batch_instance_request(DST.idatalist, callbuf->buf, geom); + drw_command_draw(shgroup, batch, handle); + + return callbuf; } void DRW_buffer_add_entry_array(DRWCallBuffer *callbuf, const void *attr[], uint attr_len) { - DRWCall *call = (DRWCall *)callbuf; - const bool is_instance = call->batch->inst != NULL; - GPUVertBuf *buf = is_instance ? call->batch->inst : call->batch->verts[0]; - uint count = is_instance ? call->inst_count++ : call->vert_count++; - const bool resize = (count == buf->vertex_alloc); + GPUVertBuf *buf = callbuf->buf; + const bool resize = (callbuf->count == buf->vertex_alloc); BLI_assert(attr_len == buf->format.attr_len); UNUSED_VARS_NDEBUG(attr_len); if (UNLIKELY(resize)) { - GPU_vertbuf_data_resize(buf, count + DRW_BUFFER_VERTS_CHUNK); + GPU_vertbuf_data_resize(buf, callbuf->count + DRW_BUFFER_VERTS_CHUNK); } for (int i = 0; i < attr_len; i++) { - GPU_vertbuf_attr_set(buf, i, count, attr[i]); + GPU_vertbuf_attr_set(buf, i, callbuf->count, attr[i]); } -#ifdef USE_GPU_SELECT if (G.f & G_FLAG_PICKSEL) { if (UNLIKELY(resize)) { - GPU_vertbuf_data_resize(call->inst_selectid, count + DRW_BUFFER_VERTS_CHUNK); + GPU_vertbuf_data_resize(callbuf->buf_select, callbuf->count + DRW_BUFFER_VERTS_CHUNK); } - GPU_vertbuf_attr_set(call->inst_selectid, 0, count, &DST.select_id); + GPU_vertbuf_attr_set(callbuf->buf_select, 0, callbuf->count, &DST.select_id); } -#endif + + callbuf->count++; } /** \} */ @@ -811,7 +1045,54 @@ static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader) { shgroup->uniforms = NULL; + /* TODO(fclem) make them builtin. */ int view_ubo_location = GPU_shader_get_uniform_block(shader, "viewBlock"); + int model_ubo_location = GPU_shader_get_uniform_block(shader, "modelBlock"); + int info_ubo_location = GPU_shader_get_uniform_block(shader, "infoBlock"); + int baseinst_location = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_BASE_INSTANCE); + int chunkid_location = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_RESOURCE_CHUNK); + + if (chunkid_location != -1) { + drw_shgroup_uniform_create_ex( + shgroup, chunkid_location, DRW_UNIFORM_RESOURCE_CHUNK, NULL, 0, 1); + } + + if (baseinst_location != -1) { + drw_shgroup_uniform_create_ex( + shgroup, baseinst_location, DRW_UNIFORM_BASE_INSTANCE, NULL, 0, 1); + } + + if (model_ubo_location != -1) { + drw_shgroup_uniform_create_ex( + shgroup, model_ubo_location, DRW_UNIFORM_BLOCK_OBMATS, NULL, 0, 1); + } + else { + int model = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODEL); + int modelinverse = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODEL_INV); + int modelviewprojection = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MVP); + if (model != -1) { + drw_shgroup_uniform_create_ex(shgroup, model, DRW_UNIFORM_MODEL_MATRIX, NULL, 0, 1); + } + if (modelinverse != -1) { + drw_shgroup_uniform_create_ex( + shgroup, modelinverse, DRW_UNIFORM_MODEL_MATRIX_INVERSE, NULL, 0, 1); + } + if (modelviewprojection != -1) { + drw_shgroup_uniform_create_ex( + shgroup, modelviewprojection, DRW_UNIFORM_MODELVIEWPROJECTION_MATRIX, NULL, 0, 1); + } + } + + if (info_ubo_location != -1) { + drw_shgroup_uniform_create_ex( + shgroup, info_ubo_location, DRW_UNIFORM_BLOCK_OBINFOS, NULL, 0, 1); + + /* Abusing this loc to tell shgroup we need the obinfos. */ + shgroup->objectinfo = 1; + } + else { + shgroup->objectinfo = 0; + } if (view_ubo_location != -1) { drw_shgroup_uniform_create_ex( @@ -834,31 +1115,6 @@ static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader) BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW_INV) == -1); BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW) == -1); BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_NORMAL) == -1); - - shgroup->model = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODEL); - shgroup->modelinverse = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODEL_INV); - shgroup->modelviewprojection = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MVP); - shgroup->orcotexfac = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_ORCO); - shgroup->objectinfo = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_OBJECT_INFO); - shgroup->objectcolor = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_OBJECT_COLOR); - shgroup->callid = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_CALLID); - - shgroup->matflag = 0; - if (shgroup->modelinverse > -1) { - shgroup->matflag |= DRW_CALL_MODELINVERSE; - } - if (shgroup->modelviewprojection > -1) { - shgroup->matflag |= DRW_CALL_MODELVIEWPROJECTION; - } - if (shgroup->orcotexfac > -1) { - shgroup->matflag |= DRW_CALL_ORCOTEXFAC; - } - if (shgroup->objectinfo > -1) { - shgroup->matflag |= DRW_CALL_OBJECTINFO; - } - if (shgroup->objectcolor > -1) { - shgroup->matflag |= DRW_CALL_OBJECTCOLOR; - } } static DRWShadingGroup *drw_shgroup_create_ex(struct GPUShader *shader, DRWPass *pass) @@ -868,13 +1124,9 @@ static DRWShadingGroup *drw_shgroup_create_ex(struct GPUShader *shader, DRWPass BLI_LINKS_APPEND(&pass->shgroups, shgroup); shgroup->shader = shader; - shgroup->state_extra = 0; - shgroup->state_extra_disable = ~0x0; - shgroup->stencil_mask = 0; - shgroup->calls.first = NULL; - shgroup->calls.last = NULL; - shgroup->tfeedback_target = NULL; - shgroup->pass_parent = pass; + shgroup->cmd.first = NULL; + shgroup->cmd.last = NULL; + shgroup->pass_handle = pass->handle; return shgroup; } @@ -975,7 +1227,7 @@ DRWShadingGroup *DRW_shgroup_transform_feedback_create(struct GPUShader *shader, BLI_assert(tf_target != NULL); DRWShadingGroup *shgroup = drw_shgroup_create_ex(shader, pass); drw_shgroup_init(shgroup, shader); - shgroup->tfeedback_target = tf_target; + drw_shgroup_uniform_create_ex(shgroup, 0, DRW_UNIFORM_TFEEDBACK_TARGET, tf_target, 0, 1); return shgroup; } @@ -985,37 +1237,42 @@ DRWShadingGroup *DRW_shgroup_transform_feedback_create(struct GPUShader *shader, */ void DRW_shgroup_state_enable(DRWShadingGroup *shgroup, DRWState state) { - shgroup->state_extra |= state; + drw_command_set_mutable_state(shgroup, state, 0x0); } void DRW_shgroup_state_disable(DRWShadingGroup *shgroup, DRWState state) { - shgroup->state_extra_disable &= ~state; + drw_command_set_mutable_state(shgroup, 0x0, state); } void DRW_shgroup_stencil_mask(DRWShadingGroup *shgroup, uint mask) { - BLI_assert(mask <= 255); - shgroup->stencil_mask = mask; -} - -bool DRW_shgroup_is_empty(DRWShadingGroup *shgroup) -{ - return shgroup->calls.first == NULL; + drw_command_set_stencil_mask(shgroup, mask); } -/* This is a workaround function waiting for the clearing operation to be available inside the - * shgroups. */ -DRWShadingGroup *DRW_shgroup_get_next(DRWShadingGroup *shgroup) +void DRW_shgroup_clear_framebuffer(DRWShadingGroup *shgroup, + eGPUFrameBufferBits channels, + uchar r, + uchar g, + uchar b, + uchar a, + float depth, + uchar stencil) { - return shgroup->next; + drw_command_clear(shgroup, channels, r, g, b, a, depth, stencil); } -/* This is a workaround function waiting for the clearing operation to be available inside the - * shgroups. */ -uint DRW_shgroup_stencil_mask_get(DRWShadingGroup *shgroup) +bool DRW_shgroup_is_empty(DRWShadingGroup *shgroup) { - return shgroup->stencil_mask; + DRWCommandChunk *chunk = shgroup->cmd.first; + for (; chunk; chunk = chunk->next) { + for (int i = 0; i < chunk->command_used; i++) { + if (command_type_get(chunk->command_type, i) <= DRW_MAX_DRAW_CMD_TYPE) { + return false; + } + } + } + return true; } DRWShadingGroup *DRW_shgroup_create_sub(DRWShadingGroup *shgroup) @@ -1023,11 +1280,14 @@ DRWShadingGroup *DRW_shgroup_create_sub(DRWShadingGroup *shgroup) DRWShadingGroup *shgroup_new = BLI_memblock_alloc(DST.vmempool->shgroups); *shgroup_new = *shgroup; - shgroup_new->uniforms = NULL; - shgroup_new->calls.first = NULL; - shgroup_new->calls.last = NULL; + drw_shgroup_init(shgroup_new, shgroup_new->shader); + shgroup_new->cmd.first = NULL; + shgroup_new->cmd.last = NULL; - BLI_LINKS_INSERT_AFTER(&shgroup->pass_parent->shgroups, shgroup, shgroup_new); + DRWPass *parent_pass = DRW_memblock_elem_from_handle(DST.vmempool->passes, + &shgroup->pass_handle); + + BLI_LINKS_INSERT_AFTER(&parent_pass->shgroups, shgroup, shgroup_new); return shgroup_new; } @@ -1522,6 +1782,8 @@ DRWPass *DRW_pass_create(const char *name, DRWState state) pass->shgroups.first = NULL; pass->shgroups.last = NULL; + pass->handle = DST.pass_handle; + DRW_handle_increment(&DST.pass_handle); return pass; } @@ -1565,42 +1827,22 @@ typedef struct ZSortData { const float *origin; } ZSortData; -static int pass_shgroup_dist_sort(void *thunk, const void *a, const void *b) +static int pass_shgroup_dist_sort(const void *a, const void *b) { - const ZSortData *zsortdata = (ZSortData *)thunk; const DRWShadingGroup *shgrp_a = (const DRWShadingGroup *)a; const DRWShadingGroup *shgrp_b = (const DRWShadingGroup *)b; - const DRWCall *call_a = (DRWCall *)shgrp_a->calls.first; - const DRWCall *call_b = (DRWCall *)shgrp_b->calls.first; - - if (call_a == NULL) { - return -1; - } - if (call_b == NULL) { - return -1; - } - - float tmp[3]; - sub_v3_v3v3(tmp, zsortdata->origin, call_a->state->model[3]); - const float a_sq = dot_v3v3(zsortdata->axis, tmp); - sub_v3_v3v3(tmp, zsortdata->origin, call_b->state->model[3]); - const float b_sq = dot_v3v3(zsortdata->axis, tmp); - - if (a_sq < b_sq) { + if (shgrp_a->z_sorting.distance < shgrp_b->z_sorting.distance) { return 1; } - else if (a_sq > b_sq) { + else if (shgrp_a->z_sorting.distance > shgrp_b->z_sorting.distance) { return -1; } else { - /* If there is a depth prepass put it before */ - if ((shgrp_a->state_extra & DRW_STATE_WRITE_DEPTH) != 0) { + /* If distances are the same, keep original order. */ + if (shgrp_a->z_sorting.original_index > shgrp_b->z_sorting.original_index) { return -1; } - else if ((shgrp_b->state_extra & DRW_STATE_WRITE_DEPTH) != 0) { - return 1; - } else { return 0; } @@ -1611,35 +1853,61 @@ static int pass_shgroup_dist_sort(void *thunk, const void *a, const void *b) #define SORT_IMPL_LINKTYPE DRWShadingGroup -#define SORT_IMPL_USE_THUNK #define SORT_IMPL_FUNC shgroup_sort_fn_r #include "../../blenlib/intern/list_sort_impl.h" #undef SORT_IMPL_FUNC -#undef SORT_IMPL_USE_THUNK #undef SORT_IMPL_LINKTYPE /** * Sort Shading groups by decreasing Z of their first draw call. - * This is useful for order dependent effect such as transparency. + * This is useful for order dependent effect such as alpha-blending. */ void DRW_pass_sort_shgroup_z(DRWPass *pass) { const float(*viewinv)[4] = DST.view_active->storage.viewinv; - ZSortData zsortdata = {viewinv[2], viewinv[3]}; - - if (pass->shgroups.first && pass->shgroups.first->next) { - pass->shgroups.first = shgroup_sort_fn_r( - pass->shgroups.first, pass_shgroup_dist_sort, &zsortdata); + if (!(pass->shgroups.first && pass->shgroups.first->next)) { + /* Nothing to sort */ + return; + } - /* Find the next last */ - DRWShadingGroup *last = pass->shgroups.first; - while ((last = last->next)) { - /* Do nothing */ + uint index = 0; + DRWShadingGroup *shgroup = pass->shgroups.first; + do { + DRWResourceHandle handle = 0; + /* Find first DRWCommandDraw. */ + DRWCommandChunk *cmd_chunk = shgroup->cmd.first; + for (; cmd_chunk && handle == 0; cmd_chunk = cmd_chunk->next) { + for (int i = 0; i < cmd_chunk->command_used && handle == 0; i++) { + if (DRW_CMD_DRAW == command_type_get(cmd_chunk->command_type, i)) { + handle = cmd_chunk->commands[i].draw.handle; + } + } } - pass->shgroups.last = last; + /* To be sorted a shgroup needs to have at least one draw command. */ + BLI_assert(handle != 0); + + DRWObjectMatrix *obmats = DRW_memblock_elem_from_handle(DST.vmempool->obmats, &handle); + + /* Compute distance to camera. */ + float tmp[3]; + sub_v3_v3v3(tmp, viewinv[3], obmats->model[3]); + shgroup->z_sorting.distance = dot_v3v3(viewinv[2], tmp); + shgroup->z_sorting.original_index = index++; + + } while ((shgroup = shgroup->next)); + + /* Sort using computed distances. */ + pass->shgroups.first = shgroup_sort_fn_r(pass->shgroups.first, pass_shgroup_dist_sort); + + /* Find the new last */ + DRWShadingGroup *last = pass->shgroups.first; + while ((last = last->next)) { + /* Reset the pass id for debugging. */ + last->pass_handle = pass->handle; } + pass->shgroups.last = last; } /** \} */ diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c index 1dcaf39fc19..78da744abf8 100644 --- a/source/blender/draw/intern/draw_manager_exec.c +++ b/source/blender/draw/intern/draw_manager_exec.c @@ -36,16 +36,41 @@ # include "GPU_select.h" #endif -#ifdef USE_GPU_SELECT void DRW_select_load_id(uint id) { +#ifdef USE_GPU_SELECT BLI_assert(G.f & G_FLAG_PICKSEL); DST.select_id = id; -} #endif +} #define DEBUG_UBO_BINDING +typedef struct DRWCommandsState { + GPUBatch *batch; + int resource_chunk; + int base_inst; + int inst_count; + int v_first; + int v_count; + bool neg_scale; + /* Resource location. */ + int obmats_loc; + int obinfos_loc; + int baseinst_loc; + int chunkid_loc; + /* Legacy matrix support. */ + int obmat_loc; + int obinv_loc; + int mvp_loc; + /* Selection ID state. */ + GPUVertBuf *select_buf; + uint select_id; + /* Drawing State */ + DRWState drw_state_enabled; + DRWState drw_state_disabled; +} DRWCommandsState; + /* -------------------------------------------------------------------- */ /** \name Draw State (DRW_state) * \{ */ @@ -407,9 +432,10 @@ void DRW_state_reset(void) /** \name Culling (DRW_culling) * \{ */ -static bool draw_call_is_culled(DRWCall *call, DRWView *view) +static bool draw_call_is_culled(const DRWResourceHandle *handle, DRWView *view) { - return (call->state->culling->mask & view->culling_mask) != 0; + DRWCullingState *culling = DRW_memblock_elem_from_handle(DST.vmempool->cullstates, handle); + return (culling->mask & view->culling_mask) != 0; } /* Set active view for rendering. */ @@ -588,66 +614,96 @@ static void draw_compute_culling(DRWView *view) /** \name Draw (DRW_draw) * \{ */ -static void draw_geometry_prepare(DRWShadingGroup *shgroup, DRWCall *call) +BLI_INLINE void draw_legacy_matrix_update(DRWShadingGroup *shgroup, + DRWResourceHandle *handle, + float obmat_loc, + float obinv_loc, + float mvp_loc) { - BLI_assert(call); - DRWCallState *state = call->state; - - if (shgroup->model != -1) { - GPU_shader_uniform_vector(shgroup->shader, shgroup->model, 16, 1, (float *)state->model); - } - if (shgroup->modelinverse != -1) { - GPU_shader_uniform_vector( - shgroup->shader, shgroup->modelinverse, 16, 1, (float *)state->modelinverse); - } - if (shgroup->objectinfo != -1) { - float infos[4]; - infos[0] = state->ob_index; - // infos[1]; /* UNUSED. */ - infos[2] = state->ob_random; - infos[3] = (state->flag & DRW_CALL_NEGSCALE) ? -1.0f : 1.0f; - GPU_shader_uniform_vector(shgroup->shader, shgroup->objectinfo, 4, 1, (float *)infos); - } - if (shgroup->objectcolor != -1) { - GPU_shader_uniform_vector( - shgroup->shader, shgroup->objectcolor, 4, 1, (float *)state->ob_color); + /* Still supported for compatibility with gpu_shader_* but should be forbidden. */ + DRWObjectMatrix *ob_mats = DRW_memblock_elem_from_handle(DST.vmempool->obmats, handle); + if (obmat_loc != -1) { + GPU_shader_uniform_vector(shgroup->shader, obmat_loc, 16, 1, (float *)ob_mats->model); } - if (shgroup->orcotexfac != -1) { - GPU_shader_uniform_vector( - shgroup->shader, shgroup->orcotexfac, 3, 2, (float *)state->orcotexfac); + if (obinv_loc != -1) { + GPU_shader_uniform_vector(shgroup->shader, obinv_loc, 16, 1, (float *)ob_mats->modelinverse); } /* Still supported for compatibility with gpu_shader_* but should be forbidden * and is slow (since it does not cache the result). */ - if (shgroup->modelviewprojection != -1) { + if (mvp_loc != -1) { float mvp[4][4]; - mul_m4_m4m4(mvp, DST.view_active->storage.persmat, state->model); - GPU_shader_uniform_vector(shgroup->shader, shgroup->modelviewprojection, 16, 1, (float *)mvp); + mul_m4_m4m4(mvp, DST.view_active->storage.persmat, ob_mats->model); + GPU_shader_uniform_vector(shgroup->shader, mvp_loc, 16, 1, (float *)mvp); } } +BLI_INLINE void draw_geometry_bind(DRWShadingGroup *shgroup, GPUBatch *geom) +{ + /* XXX hacking #GPUBatch. we don't want to call glUseProgram! (huge performance loss) */ + if (DST.batch) { + DST.batch->program_in_use = false; + } + + DST.batch = geom; + + GPU_batch_program_set_no_use( + geom, GPU_shader_get_program(shgroup->shader), GPU_shader_get_interface(shgroup->shader)); + + geom->program_in_use = true; /* XXX hacking #GPUBatch */ + + GPU_batch_bind(geom); +} + BLI_INLINE void draw_geometry_execute(DRWShadingGroup *shgroup, GPUBatch *geom, - uint vert_first, - uint vert_count, - uint inst_first, - uint inst_count) + int vert_first, + int vert_count, + int inst_first, + int inst_count, + int baseinst_loc) { + /* inst_count can be -1. */ + inst_count = max_ii(0, inst_count); + + if (baseinst_loc != -1) { + /* Fallback when ARB_shader_draw_parameters is not supported. */ + GPU_shader_uniform_vector_int(shgroup->shader, baseinst_loc, 1, 1, (int *)&inst_first); + /* Avoids VAO reconfiguration on older hardware. (see GPU_batch_draw_advanced) */ + inst_first = 0; + } + /* bind vertex array */ if (DST.batch != geom) { - DST.batch = geom; - - GPU_batch_program_set_no_use( - geom, GPU_shader_get_program(shgroup->shader), GPU_shader_get_interface(shgroup->shader)); - - GPU_batch_bind(geom); + draw_geometry_bind(shgroup, geom); } - /* XXX hacking #GPUBatch. we don't want to call glUseProgram! (huge performance loss) */ - geom->program_in_use = true; - GPU_batch_draw_advanced(geom, vert_first, vert_count, inst_first, inst_count); +} - geom->program_in_use = false; /* XXX hacking #GPUBatch */ +BLI_INLINE void draw_indirect_call(DRWShadingGroup *shgroup, DRWCommandsState *state) +{ + if (state->inst_count == 0) { + return; + } + if (state->baseinst_loc == -1) { + /* bind vertex array */ + if (DST.batch != state->batch) { + GPU_draw_list_submit(DST.draw_list); + draw_geometry_bind(shgroup, state->batch); + } + GPU_draw_list_command_add( + DST.draw_list, state->v_first, state->v_count, state->base_inst, state->inst_count); + } + /* Fallback when unsupported */ + else { + draw_geometry_execute(shgroup, + state->batch, + state->v_first, + state->v_count, + state->base_inst, + state->inst_count, + state->baseinst_loc); + } } enum { @@ -719,6 +775,9 @@ static void bind_ubo(GPUUniformBuffer *ubo, char bind_type) /* UBO isn't bound yet. Find an empty slot and bind it. */ idx = get_empty_slot_index(DST.RST.bound_ubo_slots); + /* [0..1] are reserved ubo slots. */ + idx += 2; + if (idx < GPU_max_ubo_binds()) { GPUUniformBuffer **gpu_ubo_slot = &DST.RST.bound_ubos[idx]; /* Unbind any previous UBO. */ @@ -738,10 +797,13 @@ static void bind_ubo(GPUUniformBuffer *ubo, char bind_type) } } else { + BLI_assert(idx < 64); /* This UBO slot was released but the UBO is * still bound here. Just flag the slot again. */ BLI_assert(DST.RST.bound_ubos[idx] == ubo); } + /* Remove offset for flag bitfield. */ + idx -= 2; set_bound_flags(&DST.RST.bound_ubo_slots, &DST.RST.bound_ubo_slots_persist, idx, bind_type); } @@ -785,8 +847,12 @@ static bool ubo_bindings_validate(DRWShadingGroup *shgroup) printf("Trying to draw with missing UBO binding.\n"); valid = false; } + + DRWPass *parent_pass = DRW_memblock_elem_from_handle(DST.vmempool->passes, + &shgroup->pass_handle); + printf("Pass : %s, Shader : %s, Block : %s\n", - shgroup->pass_parent->name, + parent_pass->name, shgroup->shader->name, blockname); } @@ -818,119 +884,331 @@ static void release_ubo_slots(bool with_persist) } } -static void draw_update_uniforms(DRWShadingGroup *shgroup) +static void draw_update_uniforms(DRWShadingGroup *shgroup, + DRWCommandsState *state, + bool *use_tfeedback) { - for (DRWUniform *uni = shgroup->uniforms; uni; uni = uni->next) { - GPUTexture *tex; - GPUUniformBuffer *ubo; - if (uni->location == -2) { - uni->location = GPU_shader_get_uniform_ensure(shgroup->shader, - DST.uniform_names.buffer + uni->name_ofs); - if (uni->location == -1) { - continue; + for (DRWUniformChunk *unichunk = shgroup->uniforms; unichunk; unichunk = unichunk->next) { + DRWUniform *uni = unichunk->uniforms; + for (int i = 0; i < unichunk->uniform_used; i++, uni++) { + GPUTexture *tex; + GPUUniformBuffer *ubo; + if (uni->location == -2) { + uni->location = GPU_shader_get_uniform_ensure(shgroup->shader, + DST.uniform_names.buffer + uni->name_ofs); + if (uni->location == -1) { + continue; + } + } + const void *data = uni->pvalue; + if (ELEM(uni->type, DRW_UNIFORM_INT_COPY, DRW_UNIFORM_FLOAT_COPY)) { + data = uni->fvalue; + } + switch (uni->type) { + case DRW_UNIFORM_INT_COPY: + case DRW_UNIFORM_INT: + GPU_shader_uniform_vector_int( + shgroup->shader, uni->location, uni->length, uni->arraysize, data); + break; + case DRW_UNIFORM_FLOAT_COPY: + case DRW_UNIFORM_FLOAT: + GPU_shader_uniform_vector( + shgroup->shader, uni->location, uni->length, uni->arraysize, data); + break; + case DRW_UNIFORM_TEXTURE: + tex = (GPUTexture *)uni->pvalue; + BLI_assert(tex); + bind_texture(tex, BIND_TEMP); + GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); + break; + case DRW_UNIFORM_TEXTURE_PERSIST: + tex = (GPUTexture *)uni->pvalue; + BLI_assert(tex); + bind_texture(tex, BIND_PERSIST); + GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); + break; + case DRW_UNIFORM_TEXTURE_REF: + tex = *((GPUTexture **)uni->pvalue); + BLI_assert(tex); + bind_texture(tex, BIND_TEMP); + GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); + break; + case DRW_UNIFORM_BLOCK: + ubo = (GPUUniformBuffer *)uni->pvalue; + bind_ubo(ubo, BIND_TEMP); + GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); + break; + case DRW_UNIFORM_BLOCK_PERSIST: + ubo = (GPUUniformBuffer *)uni->pvalue; + bind_ubo(ubo, BIND_PERSIST); + GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); + break; + case DRW_UNIFORM_BLOCK_OBMATS: + state->obmats_loc = uni->location; + ubo = DST.vmempool->matrices_ubo[0]; + GPU_uniformbuffer_bind(ubo, 0); + GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); + break; + case DRW_UNIFORM_BLOCK_OBINFOS: + state->obinfos_loc = uni->location; + ubo = DST.vmempool->obinfos_ubo[0]; + GPU_uniformbuffer_bind(ubo, 1); + GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); + break; + case DRW_UNIFORM_RESOURCE_CHUNK: + state->chunkid_loc = uni->location; + GPU_shader_uniform_int(shgroup->shader, uni->location, 0); + break; + case DRW_UNIFORM_TFEEDBACK_TARGET: + BLI_assert(data && (*use_tfeedback == false)); + *use_tfeedback = GPU_shader_transform_feedback_enable(shgroup->shader, + ((GPUVertBuf *)data)->vbo_id); + break; + /* Legacy/Fallback support. */ + case DRW_UNIFORM_BASE_INSTANCE: + state->baseinst_loc = uni->location; + break; + case DRW_UNIFORM_MODEL_MATRIX: + state->obmat_loc = uni->location; + break; + case DRW_UNIFORM_MODEL_MATRIX_INVERSE: + state->obinv_loc = uni->location; + break; + case DRW_UNIFORM_MODELVIEWPROJECTION_MATRIX: + state->mvp_loc = uni->location; + break; } - } - const void *data = uni->pvalue; - if (ELEM(uni->type, DRW_UNIFORM_INT_COPY, DRW_UNIFORM_FLOAT_COPY)) { - data = uni->fvalue; - } - switch (uni->type) { - case DRW_UNIFORM_INT_COPY: - case DRW_UNIFORM_INT: - GPU_shader_uniform_vector_int( - shgroup->shader, uni->location, uni->length, uni->arraysize, data); - break; - case DRW_UNIFORM_FLOAT_COPY: - case DRW_UNIFORM_FLOAT: - GPU_shader_uniform_vector( - shgroup->shader, uni->location, uni->length, uni->arraysize, data); - break; - case DRW_UNIFORM_TEXTURE: - tex = (GPUTexture *)uni->pvalue; - BLI_assert(tex); - bind_texture(tex, BIND_TEMP); - GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); - break; - case DRW_UNIFORM_TEXTURE_PERSIST: - tex = (GPUTexture *)uni->pvalue; - BLI_assert(tex); - bind_texture(tex, BIND_PERSIST); - GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); - break; - case DRW_UNIFORM_TEXTURE_REF: - tex = *((GPUTexture **)uni->pvalue); - BLI_assert(tex); - bind_texture(tex, BIND_TEMP); - GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); - break; - case DRW_UNIFORM_BLOCK: - ubo = (GPUUniformBuffer *)uni->pvalue; - bind_ubo(ubo, BIND_TEMP); - GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); - break; - case DRW_UNIFORM_BLOCK_PERSIST: - ubo = (GPUUniformBuffer *)uni->pvalue; - bind_ubo(ubo, BIND_PERSIST); - GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); - break; } } BLI_assert(ubo_bindings_validate(shgroup)); } -BLI_INLINE bool draw_select_do_call(DRWShadingGroup *shgroup, DRWCall *call) +BLI_INLINE void draw_select_buffer(DRWShadingGroup *shgroup, + DRWCommandsState *state, + GPUBatch *batch, + const DRWResourceHandle *handle) { -#ifdef USE_GPU_SELECT - if ((G.f & G_FLAG_PICKSEL) == 0) { - return false; + const bool is_instancing = (batch->inst != NULL); + int start = 0; + int count = 1; + int tot = is_instancing ? batch->inst->vertex_len : batch->verts[0]->vertex_len; + /* Hack : get "vbo" data without actually drawing. */ + int *select_id = (void *)state->select_buf->data; + + /* Batching */ + if (!is_instancing) { + /* FIXME: Meh a bit nasty. */ + if (batch->gl_prim_type == convert_prim_type_to_gl(GPU_PRIM_TRIS)) { + count = 3; + } + else if (batch->gl_prim_type == convert_prim_type_to_gl(GPU_PRIM_LINES)) { + count = 2; + } } - if (call->inst_selectid != NULL) { - const bool is_instancing = (call->inst_count != 0); - uint start = 0; - uint count = 1; - uint tot = is_instancing ? call->inst_count : call->vert_count; - /* Hack : get vbo data without actually drawing. */ - GPUVertBufRaw raw; - GPU_vertbuf_attr_get_raw_data(call->inst_selectid, 0, &raw); - int *select_id = GPU_vertbuf_raw_step(&raw); - - /* Batching */ - if (!is_instancing) { - /* FIXME: Meh a bit nasty. */ - if (call->batch->gl_prim_type == convert_prim_type_to_gl(GPU_PRIM_TRIS)) { - count = 3; - } - else if (call->batch->gl_prim_type == convert_prim_type_to_gl(GPU_PRIM_LINES)) { - count = 2; - } + + while (start < tot) { + GPU_select_load_id(select_id[start]); + if (is_instancing) { + draw_geometry_execute(shgroup, batch, 0, 0, start, count, state->baseinst_loc); + } + else { + draw_geometry_execute( + shgroup, batch, start, count, DRW_handle_id_get(handle), 0, state->baseinst_loc); } + start += count; + } +} - while (start < tot) { - GPU_select_load_id(select_id[start]); - if (is_instancing) { - draw_geometry_execute(shgroup, call->batch, 0, 0, start, count); - } - else { - draw_geometry_execute(shgroup, call->batch, start, count, 0, 0); +typedef struct DRWCommandIterator { + int cmd_index; + DRWCommandChunk *curr_chunk; +} DRWCommandIterator; + +static void draw_command_iter_begin(DRWCommandIterator *iter, DRWShadingGroup *shgroup) +{ + iter->curr_chunk = shgroup->cmd.first; + iter->cmd_index = 0; +} + +static DRWCommand *draw_command_iter_step(DRWCommandIterator *iter, eDRWCommandType *cmd_type) +{ + if (iter->curr_chunk) { + if (iter->cmd_index == iter->curr_chunk->command_len) { + iter->curr_chunk = iter->curr_chunk->next; + iter->cmd_index = 0; + } + if (iter->curr_chunk) { + *cmd_type = command_type_get(iter->curr_chunk->command_type, iter->cmd_index); + if (iter->cmd_index < iter->curr_chunk->command_used) { + return iter->curr_chunk->commands + iter->cmd_index++; } - start += count; } - return true; } + return NULL; +} + +static void draw_call_resource_bind(DRWCommandsState *state, const DRWResourceHandle *handle) +{ + /* Front face is not a resource but it is inside the resource handle. */ + bool neg_scale = DRW_handle_negative_scale_get(handle); + if (neg_scale != state->neg_scale) { + glFrontFace((neg_scale) ? GL_CW : GL_CCW); + state->neg_scale = neg_scale; + } + + int chunk = DRW_handle_chunk_get(handle); + if (state->resource_chunk != chunk) { + if (state->chunkid_loc != -1) { + GPU_shader_uniform_int(NULL, state->chunkid_loc, chunk); + } + if (state->obmats_loc != -1) { + GPU_uniformbuffer_unbind(DST.vmempool->matrices_ubo[state->resource_chunk]); + GPU_uniformbuffer_bind(DST.vmempool->matrices_ubo[chunk], 0); + } + if (state->obinfos_loc != -1) { + GPU_uniformbuffer_unbind(DST.vmempool->obinfos_ubo[state->resource_chunk]); + GPU_uniformbuffer_bind(DST.vmempool->obinfos_ubo[chunk], 1); + } + state->resource_chunk = chunk; + } +} + +static void draw_call_batching_flush(DRWShadingGroup *shgroup, DRWCommandsState *state) +{ + draw_indirect_call(shgroup, state); + GPU_draw_list_submit(DST.draw_list); + + state->batch = NULL; + state->inst_count = 0; + state->base_inst = -1; +} + +static void draw_call_single_do(DRWShadingGroup *shgroup, + DRWCommandsState *state, + GPUBatch *batch, + DRWResourceHandle handle, + int vert_first, + int vert_count, + int inst_count) +{ + draw_call_batching_flush(shgroup, state); + + draw_call_resource_bind(state, &handle); + + /* TODO This is Legacy. Need to be removed. */ + if (state->obmats_loc == -1 && + (state->obmat_loc != -1 || state->obinv_loc != -1 || state->mvp_loc != -1)) { + draw_legacy_matrix_update( + shgroup, &handle, state->obmat_loc, state->obinv_loc, state->mvp_loc); + } + + if (G.f & G_FLAG_PICKSEL) { + if (state->select_buf != NULL) { + draw_select_buffer(shgroup, state, batch, &handle); + return; + } + else { + GPU_select_load_id(state->select_id); + } + } + + draw_geometry_execute(shgroup, + batch, + vert_first, + vert_count, + DRW_handle_id_get(&handle), + inst_count, + state->baseinst_loc); +} + +static void draw_call_batching_start(DRWCommandsState *state) +{ + state->neg_scale = false; + state->resource_chunk = 0; + state->base_inst = 0; + state->inst_count = 0; + state->v_first = 0; + state->v_count = 0; + state->batch = NULL; + + state->select_id = -1; + state->select_buf = NULL; +} + +/* NOTE: Does not support batches with instancing VBOs. */ +static void draw_call_batching_do(DRWShadingGroup *shgroup, + DRWCommandsState *state, + DRWCommandDraw *call) +{ + /* If any condition requires to interupt the merging. */ + bool neg_scale = DRW_handle_negative_scale_get(&call->handle); + int chunk = DRW_handle_chunk_get(&call->handle); + int id = DRW_handle_id_get(&call->handle); + if ((state->neg_scale != neg_scale) || /* Need to change state. */ + (state->resource_chunk != chunk) || /* Need to change UBOs. */ + (state->batch != call->batch) /* Need to change VAO. */ + ) { + draw_call_batching_flush(shgroup, state); + + state->batch = call->batch; + state->v_first = (call->batch->elem) ? call->batch->elem->index_start : 0; + state->v_count = (call->batch->elem) ? call->batch->elem->index_len : + call->batch->verts[0]->vertex_len; + state->inst_count = 1; + state->base_inst = id; + + draw_call_resource_bind(state, &call->handle); + + GPU_draw_list_init(DST.draw_list, state->batch); + } + /* Is the id consecutive? */ + else if (id != state->base_inst + state->inst_count) { + /* We need to add a draw command for the pending instances. */ + draw_indirect_call(shgroup, state); + state->inst_count = 1; + state->base_inst = id; + } + /* We avoid a drawcall by merging with the precedent + * drawcall using instancing. */ else { - GPU_select_load_id(call->select_id); - return false; + state->inst_count++; + } +} + +/* Flush remaining pending drawcalls. */ +static void draw_call_batching_finish(DRWShadingGroup *shgroup, DRWCommandsState *state) +{ + draw_call_batching_flush(shgroup, state); + + /* Reset state */ + if (state->neg_scale) { + glFrontFace(GL_CCW); + } + if (state->obmats_loc != -1) { + GPU_uniformbuffer_unbind(DST.vmempool->matrices_ubo[state->resource_chunk]); + } + if (state->obinfos_loc != -1) { + GPU_uniformbuffer_unbind(DST.vmempool->obinfos_ubo[state->resource_chunk]); } -#else - return false; -#endif } static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state) { BLI_assert(shgroup->shader); + DRWCommandsState state = { + .obmats_loc = -1, + .obinfos_loc = -1, + .baseinst_loc = -1, + .chunkid_loc = -1, + .obmat_loc = -1, + .obinv_loc = -1, + .mvp_loc = -1, + .drw_state_enabled = 0, + .drw_state_disabled = 0, + }; + const bool shader_changed = (DST.shader != shgroup->shader); bool use_tfeedback = false; @@ -940,56 +1218,116 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state) } GPU_shader_bind(shgroup->shader); DST.shader = shgroup->shader; + /* XXX hacking gawain */ + if (DST.batch) { + DST.batch->program_in_use = false; + } DST.batch = NULL; } - if (shgroup->tfeedback_target != NULL) { - use_tfeedback = GPU_shader_transform_feedback_enable(shgroup->shader, - shgroup->tfeedback_target->vbo_id); - } - release_ubo_slots(shader_changed); release_texture_slots(shader_changed); - drw_state_set((pass_state & shgroup->state_extra_disable) | shgroup->state_extra); - drw_stencil_set(shgroup->stencil_mask); + draw_update_uniforms(shgroup, &state, &use_tfeedback); - draw_update_uniforms(shgroup); + drw_state_set(pass_state); /* Rendering Calls */ { - bool prev_neg_scale = false; - int callid = 0; - for (DRWCall *call = shgroup->calls.first; call; call = call->next) { - - if (draw_call_is_culled(call, DST.view_active)) { - continue; - } - - /* XXX small exception/optimisation for outline rendering. */ - if (shgroup->callid != -1) { - GPU_shader_uniform_vector_int(shgroup->shader, shgroup->callid, 1, 1, &callid); - callid += 1; - } - - /* Negative scale objects */ - bool neg_scale = call->state->flag & DRW_CALL_NEGSCALE; - if (neg_scale != prev_neg_scale) { - glFrontFace((neg_scale) ? GL_CW : GL_CCW); - prev_neg_scale = neg_scale; + DRWCommandIterator iter; + DRWCommand *cmd; + eDRWCommandType cmd_type; + + draw_command_iter_begin(&iter, shgroup); + + draw_call_batching_start(&state); + + while ((cmd = draw_command_iter_step(&iter, &cmd_type))) { + + switch (cmd_type) { + case DRW_CMD_DRWSTATE: + case DRW_CMD_STENCIL: + draw_call_batching_flush(shgroup, &state); + break; + case DRW_CMD_DRAW: + case DRW_CMD_DRAW_PROCEDURAL: + case DRW_CMD_DRAW_INSTANCE: + if (draw_call_is_culled(&cmd->instance.handle, DST.view_active)) { + continue; + } + break; + default: + break; } - draw_geometry_prepare(shgroup, call); - - if (draw_select_do_call(shgroup, call)) { - continue; + switch (cmd_type) { + case DRW_CMD_CLEAR: + GPU_framebuffer_clear( +#ifndef NDEBUG + GPU_framebuffer_active_get(), +#else + NULL, +#endif + cmd->clear.clear_channels, + (float[4]){cmd->clear.r / 255.0f, + cmd->clear.g / 255.0f, + cmd->clear.b / 255.0f, + cmd->clear.a / 255.0f}, + cmd->clear.depth, + cmd->clear.stencil); + break; + case DRW_CMD_DRWSTATE: + state.drw_state_enabled |= cmd->state.enable; + state.drw_state_disabled |= cmd->state.disable; + drw_state_set((pass_state & ~state.drw_state_disabled) | state.drw_state_enabled); + break; + case DRW_CMD_STENCIL: + drw_stencil_set(cmd->stencil.mask); + break; + case DRW_CMD_SELECTID: + state.select_id = cmd->select_id.select_id; + state.select_buf = cmd->select_id.select_buf; + break; + case DRW_CMD_DRAW: + if (!USE_BATCHING || state.obmats_loc == -1 || (G.f & G_FLAG_PICKSEL) || + cmd->draw.batch->inst) { + draw_call_single_do(shgroup, &state, cmd->draw.batch, cmd->draw.handle, 0, 0, 0); + } + else { + draw_call_batching_do(shgroup, &state, &cmd->draw); + } + break; + case DRW_CMD_DRAW_PROCEDURAL: + draw_call_single_do(shgroup, + &state, + cmd->procedural.batch, + cmd->procedural.handle, + 0, + cmd->procedural.vert_count, + 1); + break; + case DRW_CMD_DRAW_INSTANCE: + draw_call_single_do(shgroup, + &state, + cmd->instance.batch, + cmd->instance.handle, + 0, + 0, + cmd->instance.inst_count); + break; + case DRW_CMD_DRAW_RANGE: + draw_call_single_do(shgroup, + &state, + cmd->range.batch, + (DRWResourceHandle)0, + cmd->range.vert_first, + cmd->range.vert_count, + 1); + break; } - - draw_geometry_execute( - shgroup, call->batch, call->vert_first, call->vert_count, 0, call->inst_count); } - /* Reset state */ - glFrontFace(GL_CCW); + + draw_call_batching_finish(shgroup, &state); } if (use_tfeedback) { @@ -1065,6 +1403,11 @@ static void drw_draw_pass_ex(DRWPass *pass, DST.shader = NULL; } + if (DST.batch) { + DST.batch->program_in_use = false; + DST.batch = NULL; + } + /* HACK: Rasterized discard can affect clear commands which are not * part of a DRWPass (as of now). So disable rasterized discard here * if it has been enabled. */ |