Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorClément Foucault <foucault.clem@gmail.com>2019-04-05 21:45:32 +0300
committerClément Foucault <foucault.clem@gmail.com>2019-04-05 22:15:25 +0300
commitfefc9c95e4818768ba08c665111d2e405ae72672 (patch)
tree3eaf8cdd0a7ead15111c618009c32c2480642a45 /source
parent2219f28a68b69ba227270abadbcaa431601d42dc (diff)
DRW: Opti: Replace bound tex/ubo tracking array by bitfields
release_texture_slots() and release_ubo_slots() were one hotspot when drawing taking ~9% of total CPU counters for no reason. This was because of the loops using GPU_max_textures that was overkill and slow. Replace those by a simple 64bit bitwise OR operation.
Diffstat (limited to 'source')
-rw-r--r--source/blender/draw/intern/draw_manager.c18
-rw-r--r--source/blender/draw/intern/draw_manager.h17
-rw-r--r--source/blender/draw/intern/draw_manager_exec.c148
3 files changed, 100 insertions, 83 deletions
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index e31c2f5cbbd..cde7b283976 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -644,18 +644,7 @@ static void drw_viewport_var_init(void)
}
/* Alloc array of texture reference. */
- if (DST.RST.bound_texs == NULL) {
- DST.RST.bound_texs = MEM_callocN(sizeof(GPUTexture *) * GPU_max_textures(), "Bound GPUTexture refs");
- }
- if (DST.RST.bound_tex_slots == NULL) {
- DST.RST.bound_tex_slots = MEM_callocN(sizeof(char) * GPU_max_textures(), "Bound Texture Slots");
- }
- if (DST.RST.bound_ubos == NULL) {
- DST.RST.bound_ubos = MEM_callocN(sizeof(GPUUniformBuffer *) * GPU_max_ubo_binds(), "Bound GPUUniformBuffer refs");
- }
- if (DST.RST.bound_ubo_slots == NULL) {
- DST.RST.bound_ubo_slots = MEM_callocN(sizeof(char) * GPU_max_ubo_binds(), "Bound Ubo Slots");
- }
+ memset(&DST.RST, 0x0, sizeof(DST.RST));
if (G_draw.view_ubo == NULL) {
G_draw.view_ubo = DRW_uniformbuffer_create(sizeof(ViewUboStorage), NULL);
@@ -2796,11 +2785,6 @@ void DRW_engines_free(void)
DRW_TEXTURE_FREE_SAFE(G_draw.weight_ramp);
MEM_SAFE_FREE(g_pos_format);
- MEM_SAFE_FREE(DST.RST.bound_texs);
- MEM_SAFE_FREE(DST.RST.bound_tex_slots);
- MEM_SAFE_FREE(DST.RST.bound_ubos);
- MEM_SAFE_FREE(DST.RST.bound_ubo_slots);
-
MEM_SAFE_FREE(DST.uniform_names.buffer);
DRW_opengl_context_disable();
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index 45721951abf..03f6eef225e 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -313,6 +313,7 @@ typedef struct DRWDebugSphere {
/* ------------- DRAW MANAGER ------------ */
+#define DST_MAX_SLOTS 64 /* Cannot be changed without modifying RST.bound_tex_slots */
#define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */
#define STENCIL_UNDEFINED 256
typedef struct DRWManager {
@@ -394,12 +395,16 @@ typedef struct DRWManager {
/** GPU Resource State: Memory storage between drawing. */
struct {
- GPUTexture **bound_texs;
- char *bound_tex_slots;
- int bind_tex_inc;
- GPUUniformBuffer **bound_ubos;
- char *bound_ubo_slots;
- int bind_ubo_inc;
+ /* High end GPUs supports up to 32 binds per shader stage.
+ * We only use textures during the vertex and fragment stage,
+ * so 2 * 32 slots is a nice limit. */
+ GPUTexture *bound_texs[DST_MAX_SLOTS];
+ uint64_t bound_tex_slots;
+ uint64_t bound_tex_slots_persist;
+
+ GPUUniformBuffer *bound_ubos[DST_MAX_SLOTS];
+ uint64_t bound_ubo_slots;
+ uint64_t bound_ubo_slots_persist;
} RST;
struct {
diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c
index 7dc42c4d459..52c3f773e77 100644
--- a/source/blender/draw/intern/draw_manager_exec.c
+++ b/source/blender/draw/intern/draw_manager_exec.c
@@ -22,9 +22,9 @@
#include "draw_manager.h"
+#include "BLI_math_bits.h"
#include "BLI_mempool.h"
-
#include "BKE_global.h"
#include "GPU_draw.h"
@@ -892,55 +892,97 @@ enum {
BIND_PERSIST = 2, /* Release slot only after the next shader change. */
};
+static void set_bound_flags(uint64_t *slots, uint64_t *persist_slots, int slot_idx, char bind_type)
+{
+ uint64_t slot = 1lu << slot_idx;
+ *slots |= slot;
+ if (bind_type == BIND_PERSIST) {
+ *persist_slots |= slot;
+ }
+}
+
+static int get_empty_slot_index(uint64_t slots)
+{
+ uint64_t empty_slots = ~slots;
+ /* Find first empty slot using bitscan. */
+ if (empty_slots != 0) {
+ if ((empty_slots & 0xFFFFFFFFlu) != 0) {
+ return (int)bitscan_forward_uint(empty_slots);
+ }
+ else {
+ return (int)bitscan_forward_uint(empty_slots >> 32) + 32;
+ }
+ }
+ else {
+ /* Greater than GPU_max_textures() */
+ return 99999;
+ }
+}
+
static void bind_texture(GPUTexture *tex, char bind_type)
{
- int index;
- char *slot_flags = DST.RST.bound_tex_slots;
- int bind_num = GPU_texture_bound_number(tex);
- if (bind_num == -1) {
- for (int i = 0; i < GPU_max_textures(); ++i) {
- index = DST.RST.bind_tex_inc = (DST.RST.bind_tex_inc + 1) % GPU_max_textures();
- if (slot_flags[index] == BIND_NONE) {
- if (DST.RST.bound_texs[index] != NULL) {
- GPU_texture_unbind(DST.RST.bound_texs[index]);
- }
- GPU_texture_bind(tex, index);
- DST.RST.bound_texs[index] = tex;
- slot_flags[index] = bind_type;
- // printf("Binds Texture %d %p\n", DST.RST.bind_tex_inc, tex);
- return;
+ int idx = GPU_texture_bound_number(tex);
+ if (idx == -1) {
+ /* Texture isn't bound yet. Find an empty slot and bind it. */
+ idx = get_empty_slot_index(DST.RST.bound_tex_slots);
+
+ if (idx < GPU_max_textures()) {
+ GPUTexture **gpu_tex_slot = &DST.RST.bound_texs[idx];
+ /* Unbind any previous texture. */
+ if (*gpu_tex_slot != NULL) {
+ GPU_texture_unbind(*gpu_tex_slot);
}
+ GPU_texture_bind(tex, idx);
+ *gpu_tex_slot = tex;
}
- printf("Not enough texture slots! Reduce number of textures used by your shader.\n");
+ else {
+ printf("Not enough texture slots! Reduce number of textures used by your shader.\n");
+ return;
+ }
+ }
+ else {
+ /* This texture slot was released but the tex
+ * is still bound. Just flag the slot again. */
+ BLI_assert(DST.RST.bound_texs[idx] == tex);
}
- slot_flags[bind_num] = bind_type;
+ set_bound_flags(&DST.RST.bound_tex_slots,
+ &DST.RST.bound_tex_slots_persist,
+ idx, bind_type);
}
static void bind_ubo(GPUUniformBuffer *ubo, char bind_type)
{
- int index;
- char *slot_flags = DST.RST.bound_ubo_slots;
- int bind_num = GPU_uniformbuffer_bindpoint(ubo);
- if (bind_num == -1) {
- for (int i = 0; i < GPU_max_ubo_binds(); ++i) {
- index = DST.RST.bind_ubo_inc = (DST.RST.bind_ubo_inc + 1) % GPU_max_ubo_binds();
- if (slot_flags[index] == BIND_NONE) {
- if (DST.RST.bound_ubos[index] != NULL) {
- GPU_uniformbuffer_unbind(DST.RST.bound_ubos[index]);
- }
- GPU_uniformbuffer_bind(ubo, index);
- DST.RST.bound_ubos[index] = ubo;
- slot_flags[index] = bind_type;
- return;
+ int idx = GPU_uniformbuffer_bindpoint(ubo);
+ if (idx == -1) {
+ /* UBO isn't bound yet. Find an empty slot and bind it. */
+ idx = get_empty_slot_index(DST.RST.bound_ubo_slots);
+
+ if (idx < GPU_max_ubo_binds()) {
+ GPUUniformBuffer **gpu_ubo_slot = &DST.RST.bound_ubos[idx];
+ /* Unbind any previous UBO. */
+ if (*gpu_ubo_slot != NULL) {
+ GPU_uniformbuffer_unbind(*gpu_ubo_slot);
}
+ GPU_uniformbuffer_bind(ubo, idx);
+ *gpu_ubo_slot = ubo;
}
- /* printf so user can report bad behavior */
- printf("Not enough ubo slots! This should not happen!\n");
- /* This is not depending on user input.
- * It is our responsibility to make sure there is enough slots. */
- BLI_assert(0);
+ else {
+ /* printf so user can report bad behavior */
+ printf("Not enough ubo slots! This should not happen!\n");
+ /* This is not depending on user input.
+ * It is our responsibility to make sure there is enough slots. */
+ BLI_assert(0);
+ return;
+ }
+ }
+ else {
+ /* This UBO slot was released but the UBO is
+ * still bound here. Just flag the slot again. */
+ BLI_assert(DST.RST.bound_ubos[idx] == ubo);
}
- slot_flags[bind_num] = bind_type;
+ set_bound_flags(&DST.RST.bound_ubo_slots,
+ &DST.RST.bound_ubo_slots_persist,
+ idx, bind_type);
}
#ifndef NDEBUG
@@ -994,37 +1036,23 @@ static bool ubo_bindings_validate(DRWShadingGroup *shgroup)
static void release_texture_slots(bool with_persist)
{
if (with_persist) {
- memset(DST.RST.bound_tex_slots, 0x0, sizeof(*DST.RST.bound_tex_slots) * GPU_max_textures());
+ DST.RST.bound_tex_slots = 0;
+ DST.RST.bound_tex_slots_persist = 0;
}
else {
- for (int i = 0; i < GPU_max_textures(); ++i) {
- if (DST.RST.bound_tex_slots[i] != BIND_PERSIST) {
- DST.RST.bound_tex_slots[i] = BIND_NONE;
- }
- }
+ DST.RST.bound_tex_slots &= DST.RST.bound_tex_slots_persist;
}
-
- /* Reset so that slots are consistently assigned for different shader
- * draw calls, to avoid shader specialization/patching by the driver. */
- DST.RST.bind_tex_inc = 0;
}
static void release_ubo_slots(bool with_persist)
{
if (with_persist) {
- memset(DST.RST.bound_ubo_slots, 0x0, sizeof(*DST.RST.bound_ubo_slots) * GPU_max_ubo_binds());
+ DST.RST.bound_ubo_slots = 0;
+ DST.RST.bound_ubo_slots_persist = 0;
}
else {
- for (int i = 0; i < GPU_max_ubo_binds(); ++i) {
- if (DST.RST.bound_ubo_slots[i] != BIND_PERSIST) {
- DST.RST.bound_ubo_slots[i] = BIND_NONE;
- }
- }
+ DST.RST.bound_ubo_slots &= DST.RST.bound_ubo_slots_persist;
}
-
- /* Reset so that slots are consistently assigned for different shader
- * draw calls, to avoid shader specialization/patching by the driver. */
- DST.RST.bind_ubo_inc = 0;
}
static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
@@ -1331,7 +1359,7 @@ static void drw_draw_pass_ex(DRWPass *pass, DRWShadingGroup *start_group, DRWSha
}
/* Clear Bound textures */
- for (int i = 0; i < GPU_max_textures(); i++) {
+ for (int i = 0; i < DST_MAX_SLOTS; i++) {
if (DST.RST.bound_texs[i] != NULL) {
GPU_texture_unbind(DST.RST.bound_texs[i]);
DST.RST.bound_texs[i] = NULL;
@@ -1339,7 +1367,7 @@ static void drw_draw_pass_ex(DRWPass *pass, DRWShadingGroup *start_group, DRWSha
}
/* Clear Bound Ubos */
- for (int i = 0; i < GPU_max_ubo_binds(); i++) {
+ for (int i = 0; i < DST_MAX_SLOTS; i++) {
if (DST.RST.bound_ubos[i] != NULL) {
GPU_uniformbuffer_unbind(DST.RST.bound_ubos[i]);
DST.RST.bound_ubos[i] = NULL;