From fefc9c95e4818768ba08c665111d2e405ae72672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Fri, 5 Apr 2019 20:45:32 +0200 Subject: DRW: Opti: Replace bound tex/ubo tracking array by bitfields release_texture_slots() and release_ubo_slots() were one hotspot when drawing taking ~9% of total CPU counters for no reason. This was because of the loops using GPU_max_textures that was overkill and slow. Replace those by a simple 64bit bitwise OR operation. --- source/blender/draw/intern/draw_manager.c | 18 +-- source/blender/draw/intern/draw_manager.h | 17 ++- source/blender/draw/intern/draw_manager_exec.c | 148 +++++++++++++++---------- 3 files changed, 100 insertions(+), 83 deletions(-) (limited to 'source/blender/draw') diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index e31c2f5cbbd..cde7b283976 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -644,18 +644,7 @@ static void drw_viewport_var_init(void) } /* Alloc array of texture reference. */ - if (DST.RST.bound_texs == NULL) { - DST.RST.bound_texs = MEM_callocN(sizeof(GPUTexture *) * GPU_max_textures(), "Bound GPUTexture refs"); - } - if (DST.RST.bound_tex_slots == NULL) { - DST.RST.bound_tex_slots = MEM_callocN(sizeof(char) * GPU_max_textures(), "Bound Texture Slots"); - } - if (DST.RST.bound_ubos == NULL) { - DST.RST.bound_ubos = MEM_callocN(sizeof(GPUUniformBuffer *) * GPU_max_ubo_binds(), "Bound GPUUniformBuffer refs"); - } - if (DST.RST.bound_ubo_slots == NULL) { - DST.RST.bound_ubo_slots = MEM_callocN(sizeof(char) * GPU_max_ubo_binds(), "Bound Ubo Slots"); - } + memset(&DST.RST, 0x0, sizeof(DST.RST)); if (G_draw.view_ubo == NULL) { G_draw.view_ubo = DRW_uniformbuffer_create(sizeof(ViewUboStorage), NULL); @@ -2796,11 +2785,6 @@ void DRW_engines_free(void) DRW_TEXTURE_FREE_SAFE(G_draw.weight_ramp); MEM_SAFE_FREE(g_pos_format); - MEM_SAFE_FREE(DST.RST.bound_texs); - MEM_SAFE_FREE(DST.RST.bound_tex_slots); - MEM_SAFE_FREE(DST.RST.bound_ubos); - MEM_SAFE_FREE(DST.RST.bound_ubo_slots); - MEM_SAFE_FREE(DST.uniform_names.buffer); DRW_opengl_context_disable(); diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h index 45721951abf..03f6eef225e 100644 --- a/source/blender/draw/intern/draw_manager.h +++ b/source/blender/draw/intern/draw_manager.h @@ -313,6 +313,7 @@ typedef struct DRWDebugSphere { /* ------------- DRAW MANAGER ------------ */ +#define DST_MAX_SLOTS 64 /* Cannot be changed without modifying RST.bound_tex_slots */ #define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */ #define STENCIL_UNDEFINED 256 typedef struct DRWManager { @@ -394,12 +395,16 @@ typedef struct DRWManager { /** GPU Resource State: Memory storage between drawing. */ struct { - GPUTexture **bound_texs; - char *bound_tex_slots; - int bind_tex_inc; - GPUUniformBuffer **bound_ubos; - char *bound_ubo_slots; - int bind_ubo_inc; + /* High end GPUs supports up to 32 binds per shader stage. + * We only use textures during the vertex and fragment stage, + * so 2 * 32 slots is a nice limit. */ + GPUTexture *bound_texs[DST_MAX_SLOTS]; + uint64_t bound_tex_slots; + uint64_t bound_tex_slots_persist; + + GPUUniformBuffer *bound_ubos[DST_MAX_SLOTS]; + uint64_t bound_ubo_slots; + uint64_t bound_ubo_slots_persist; } RST; struct { diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c index 7dc42c4d459..52c3f773e77 100644 --- a/source/blender/draw/intern/draw_manager_exec.c +++ b/source/blender/draw/intern/draw_manager_exec.c @@ -22,9 +22,9 @@ #include "draw_manager.h" +#include "BLI_math_bits.h" #include "BLI_mempool.h" - #include "BKE_global.h" #include "GPU_draw.h" @@ -892,55 +892,97 @@ enum { BIND_PERSIST = 2, /* Release slot only after the next shader change. */ }; +static void set_bound_flags(uint64_t *slots, uint64_t *persist_slots, int slot_idx, char bind_type) +{ + uint64_t slot = 1lu << slot_idx; + *slots |= slot; + if (bind_type == BIND_PERSIST) { + *persist_slots |= slot; + } +} + +static int get_empty_slot_index(uint64_t slots) +{ + uint64_t empty_slots = ~slots; + /* Find first empty slot using bitscan. */ + if (empty_slots != 0) { + if ((empty_slots & 0xFFFFFFFFlu) != 0) { + return (int)bitscan_forward_uint(empty_slots); + } + else { + return (int)bitscan_forward_uint(empty_slots >> 32) + 32; + } + } + else { + /* Greater than GPU_max_textures() */ + return 99999; + } +} + static void bind_texture(GPUTexture *tex, char bind_type) { - int index; - char *slot_flags = DST.RST.bound_tex_slots; - int bind_num = GPU_texture_bound_number(tex); - if (bind_num == -1) { - for (int i = 0; i < GPU_max_textures(); ++i) { - index = DST.RST.bind_tex_inc = (DST.RST.bind_tex_inc + 1) % GPU_max_textures(); - if (slot_flags[index] == BIND_NONE) { - if (DST.RST.bound_texs[index] != NULL) { - GPU_texture_unbind(DST.RST.bound_texs[index]); - } - GPU_texture_bind(tex, index); - DST.RST.bound_texs[index] = tex; - slot_flags[index] = bind_type; - // printf("Binds Texture %d %p\n", DST.RST.bind_tex_inc, tex); - return; + int idx = GPU_texture_bound_number(tex); + if (idx == -1) { + /* Texture isn't bound yet. Find an empty slot and bind it. */ + idx = get_empty_slot_index(DST.RST.bound_tex_slots); + + if (idx < GPU_max_textures()) { + GPUTexture **gpu_tex_slot = &DST.RST.bound_texs[idx]; + /* Unbind any previous texture. */ + if (*gpu_tex_slot != NULL) { + GPU_texture_unbind(*gpu_tex_slot); } + GPU_texture_bind(tex, idx); + *gpu_tex_slot = tex; } - printf("Not enough texture slots! Reduce number of textures used by your shader.\n"); + else { + printf("Not enough texture slots! Reduce number of textures used by your shader.\n"); + return; + } + } + else { + /* This texture slot was released but the tex + * is still bound. Just flag the slot again. */ + BLI_assert(DST.RST.bound_texs[idx] == tex); } - slot_flags[bind_num] = bind_type; + set_bound_flags(&DST.RST.bound_tex_slots, + &DST.RST.bound_tex_slots_persist, + idx, bind_type); } static void bind_ubo(GPUUniformBuffer *ubo, char bind_type) { - int index; - char *slot_flags = DST.RST.bound_ubo_slots; - int bind_num = GPU_uniformbuffer_bindpoint(ubo); - if (bind_num == -1) { - for (int i = 0; i < GPU_max_ubo_binds(); ++i) { - index = DST.RST.bind_ubo_inc = (DST.RST.bind_ubo_inc + 1) % GPU_max_ubo_binds(); - if (slot_flags[index] == BIND_NONE) { - if (DST.RST.bound_ubos[index] != NULL) { - GPU_uniformbuffer_unbind(DST.RST.bound_ubos[index]); - } - GPU_uniformbuffer_bind(ubo, index); - DST.RST.bound_ubos[index] = ubo; - slot_flags[index] = bind_type; - return; + int idx = GPU_uniformbuffer_bindpoint(ubo); + if (idx == -1) { + /* UBO isn't bound yet. Find an empty slot and bind it. */ + idx = get_empty_slot_index(DST.RST.bound_ubo_slots); + + if (idx < GPU_max_ubo_binds()) { + GPUUniformBuffer **gpu_ubo_slot = &DST.RST.bound_ubos[idx]; + /* Unbind any previous UBO. */ + if (*gpu_ubo_slot != NULL) { + GPU_uniformbuffer_unbind(*gpu_ubo_slot); } + GPU_uniformbuffer_bind(ubo, idx); + *gpu_ubo_slot = ubo; } - /* printf so user can report bad behavior */ - printf("Not enough ubo slots! This should not happen!\n"); - /* This is not depending on user input. - * It is our responsibility to make sure there is enough slots. */ - BLI_assert(0); + else { + /* printf so user can report bad behavior */ + printf("Not enough ubo slots! This should not happen!\n"); + /* This is not depending on user input. + * It is our responsibility to make sure there is enough slots. */ + BLI_assert(0); + return; + } + } + else { + /* This UBO slot was released but the UBO is + * still bound here. Just flag the slot again. */ + BLI_assert(DST.RST.bound_ubos[idx] == ubo); } - slot_flags[bind_num] = bind_type; + set_bound_flags(&DST.RST.bound_ubo_slots, + &DST.RST.bound_ubo_slots_persist, + idx, bind_type); } #ifndef NDEBUG @@ -994,37 +1036,23 @@ static bool ubo_bindings_validate(DRWShadingGroup *shgroup) static void release_texture_slots(bool with_persist) { if (with_persist) { - memset(DST.RST.bound_tex_slots, 0x0, sizeof(*DST.RST.bound_tex_slots) * GPU_max_textures()); + DST.RST.bound_tex_slots = 0; + DST.RST.bound_tex_slots_persist = 0; } else { - for (int i = 0; i < GPU_max_textures(); ++i) { - if (DST.RST.bound_tex_slots[i] != BIND_PERSIST) { - DST.RST.bound_tex_slots[i] = BIND_NONE; - } - } + DST.RST.bound_tex_slots &= DST.RST.bound_tex_slots_persist; } - - /* Reset so that slots are consistently assigned for different shader - * draw calls, to avoid shader specialization/patching by the driver. */ - DST.RST.bind_tex_inc = 0; } static void release_ubo_slots(bool with_persist) { if (with_persist) { - memset(DST.RST.bound_ubo_slots, 0x0, sizeof(*DST.RST.bound_ubo_slots) * GPU_max_ubo_binds()); + DST.RST.bound_ubo_slots = 0; + DST.RST.bound_ubo_slots_persist = 0; } else { - for (int i = 0; i < GPU_max_ubo_binds(); ++i) { - if (DST.RST.bound_ubo_slots[i] != BIND_PERSIST) { - DST.RST.bound_ubo_slots[i] = BIND_NONE; - } - } + DST.RST.bound_ubo_slots &= DST.RST.bound_ubo_slots_persist; } - - /* Reset so that slots are consistently assigned for different shader - * draw calls, to avoid shader specialization/patching by the driver. */ - DST.RST.bind_ubo_inc = 0; } static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state) @@ -1331,7 +1359,7 @@ static void drw_draw_pass_ex(DRWPass *pass, DRWShadingGroup *start_group, DRWSha } /* Clear Bound textures */ - for (int i = 0; i < GPU_max_textures(); i++) { + for (int i = 0; i < DST_MAX_SLOTS; i++) { if (DST.RST.bound_texs[i] != NULL) { GPU_texture_unbind(DST.RST.bound_texs[i]); DST.RST.bound_texs[i] = NULL; @@ -1339,7 +1367,7 @@ static void drw_draw_pass_ex(DRWPass *pass, DRWShadingGroup *start_group, DRWSha } /* Clear Bound Ubos */ - for (int i = 0; i < GPU_max_ubo_binds(); i++) { + for (int i = 0; i < DST_MAX_SLOTS; i++) { if (DST.RST.bound_ubos[i] != NULL) { GPU_uniformbuffer_unbind(DST.RST.bound_ubos[i]); DST.RST.bound_ubos[i] = NULL; -- cgit v1.2.3