diff options
author | Clément Foucault <foucault.clem@gmail.com> | 2019-06-02 01:20:50 +0300 |
---|---|---|
committer | Clément Foucault <foucault.clem@gmail.com> | 2019-08-17 15:48:48 +0300 |
commit | 542d51881a533e25c1d767180f57b970bf3a9138 (patch) | |
tree | effe248797f0c5fbd80923fc8c1a849f326d0186 | |
parent | 31ff2fd530bc0ff808c5f5d5e8cdf435313ae9ac (diff) |
DRW: Replace single DRWUniform alloc by DRWUniformChunk
This is in order to improve cache coherence and lower memory
usage.
The savings in memory are used to be able to do vec4_copy
uniforms.
-rw-r--r-- | source/blender/draw/intern/draw_manager.c | 2 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_manager.h | 32 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_manager_data.c | 39 | ||||
-rw-r--r-- | source/blender/draw/intern/draw_manager_exec.c | 144 |
4 files changed, 125 insertions, 92 deletions
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index 0ee572f3f15..eb75abaa8af 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -654,7 +654,7 @@ static void drw_viewport_var_init(void) DST.vmempool->shgroups = BLI_memblock_create(sizeof(DRWShadingGroup)); } if (DST.vmempool->uniforms == NULL) { - DST.vmempool->uniforms = BLI_memblock_create(sizeof(DRWUniform)); + DST.vmempool->uniforms = BLI_memblock_create(sizeof(DRWUniformChunk)); } if (DST.vmempool->views == NULL) { DST.vmempool->views = BLI_memblock_create(sizeof(DRWView)); diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h index 941bc6cba84..b150cb3cbc3 100644 --- a/source/blender/draw/intern/draw_manager.h +++ b/source/blender/draw/intern/draw_manager.h @@ -184,26 +184,27 @@ typedef enum { } DRWUniformType; struct DRWUniform { - DRWUniform *next; /* single-linked list */ union { /* For reference or array/vector types. */ const void *pvalue; /* Single values. */ - float fvalue[2]; - int ivalue[2]; + float fvalue[4]; + int ivalue[4]; }; int location; uint32_t type : 4; /* DRWUniformType */ - uint32_t length : 4; /* cannot be more than 16 */ - uint32_t arraysize : 4; /* cannot be more than 16 too */ - uint32_t name_ofs : 20; /* name offset in name buffer. */ + uint32_t length : 5; /* cannot be more than 16 */ + uint32_t arraysize : 5; /* cannot be more than 16 too */ + uint32_t name_ofs : 18; /* name offset in name buffer. */ }; struct DRWShadingGroup { DRWShadingGroup *next; - GPUShader *shader; /* Shader to bind */ - DRWUniform *uniforms; /* Uniforms pointers */ + GPUShader *shader; /* Shader to bind */ + struct DRWUniformChunk *uniforms; /* Uniforms pointers */ + + uint32_t uniform_count; struct { DRWCall *first, *last; /* Linked list of DRWCall */ @@ -289,6 +290,21 @@ typedef struct ModelUboStorage { } ModelUboStorage; #endif +/* ------------ Data Chunks --------------- */ +/** + * In order to keep a cache friendly data structure, + * we alloc most of our little data into chunks of multiple item. + * Iteration, allocation and memory usage are better. + * We loose a bit of memory by allocating more than what we need + * but it's counterbalanced by not needing the linked-list pointers + * for each item. + **/ + +typedef struct DRWUniformChunk { + struct DRWUniformChunk *next; /* single-linked list */ + DRWUniform uniforms[5]; +} DRWUniformChunk; + /* ------------- DRAW DEBUG ------------ */ typedef struct DRWDebugLine { diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c index 0142da8c170..969694ac869 100644 --- a/source/blender/draw/intern/draw_manager_data.c +++ b/source/blender/draw/intern/draw_manager_data.c @@ -115,14 +115,23 @@ void drw_resource_buffer_finish(ViewportMemoryPool *vmempool) /** \name Uniforms (DRW_shgroup_uniform) * \{ */ -static void drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup, - int loc, - DRWUniformType type, - const void *value, - int length, - int arraysize) -{ - DRWUniform *uni = BLI_memblock_alloc(DST.vmempool->uniforms); +static DRWUniform *drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup, + int loc, + DRWUniformType type, + const void *value, + int length, + int arraysize) +{ + /* Happens on first uniform or if chunk is full. */ + if (shgroup->uniform_count == 0) { + DRWUniformChunk *unichunk = BLI_memblock_alloc(DST.vmempool->uniforms); + BLI_LINKS_PREPEND(shgroup->uniforms, unichunk); + } + + DRWUniform *uni = &shgroup->uniforms->uniforms[shgroup->uniform_count]; + + shgroup->uniform_count = (shgroup->uniform_count + 1) % ARRAY_SIZE(shgroup->uniforms->uniforms); + uni->location = loc; uni->type = type; uni->length = length; @@ -130,11 +139,11 @@ static void drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup, switch (type) { case DRW_UNIFORM_INT_COPY: - BLI_assert(length <= 2); + BLI_assert(length <= 4); memcpy(uni->ivalue, value, sizeof(int) * length); break; case DRW_UNIFORM_FLOAT_COPY: - BLI_assert(length <= 2); + BLI_assert(length <= 4); memcpy(uni->fvalue, value, sizeof(float) * length); break; default: @@ -142,7 +151,7 @@ static void drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup, break; } - BLI_LINKS_PREPEND(shgroup->uniforms, uni); + return uni; } static void drw_shgroup_builtin_uniform( @@ -179,7 +188,8 @@ static void drw_shgroup_uniform(DRWShadingGroup *shgroup, BLI_assert(arraysize > 0 && arraysize <= 16); BLI_assert(length >= 0 && length <= 16); - drw_shgroup_uniform_create_ex(shgroup, location, type, value, length, arraysize); + DRWUniform *uni = drw_shgroup_uniform_create_ex( + shgroup, location, type, value, length, arraysize); /* If location is -2, the uniform has not yet been queried. * We save the name for query just before drawing. */ @@ -198,7 +208,7 @@ static void drw_shgroup_uniform(DRWShadingGroup *shgroup, memcpy(dst, name, len); /* Copies NULL terminator. */ DST.uniform_names.buffer_ofs += len; - shgroup->uniforms->name_ofs = ofs; + uni->name_ofs = ofs; } } @@ -860,6 +870,7 @@ void DRW_buffer_add_entry_array(DRWCallBuffer *callbuf, const void *attr[], uint static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader) { shgroup->uniforms = NULL; + shgroup->uniform_count = 0; /* TODO(fclem) make them builtin. */ int view_ubo_location = GPU_shader_get_uniform_block(shader, "viewBlock"); @@ -1084,7 +1095,7 @@ DRWShadingGroup *DRW_shgroup_create_sub(DRWShadingGroup *shgroup) DRWShadingGroup *shgroup_new = BLI_memblock_alloc(DST.vmempool->shgroups); *shgroup_new = *shgroup; - shgroup_new->uniforms = NULL; + drw_shgroup_init(shgroup_new, shgroup_new->shader); shgroup_new->calls.first = NULL; shgroup_new->calls.last = NULL; diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c index 9afcf8ce44e..51562df2268 100644 --- a/source/blender/draw/intern/draw_manager_exec.c +++ b/source/blender/draw/intern/draw_manager_exec.c @@ -802,74 +802,81 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup, int *obinfos_loc, int *baseinst_loc) { - for (DRWUniform *uni = shgroup->uniforms; uni; uni = uni->next) { - GPUTexture *tex; - GPUUniformBuffer *ubo; - if (uni->location == -2) { - uni->location = GPU_shader_get_uniform_ensure(shgroup->shader, - DST.uniform_names.buffer + uni->name_ofs); - if (uni->location == -1) { - continue; + for (DRWUniformChunk *unichunk = shgroup->uniforms; unichunk; unichunk = unichunk->next) { + DRWUniform *uni = unichunk->uniforms; + /* shgroup->uniform_count is 0 if the first chunk is full. */ + int uniform_count = ((unichunk == shgroup->uniforms) && shgroup->uniform_count > 0) ? + shgroup->uniform_count : + ARRAY_SIZE(shgroup->uniforms->uniforms); + for (int i = 0; i < uniform_count; i++, uni++) { + GPUTexture *tex; + GPUUniformBuffer *ubo; + if (uni->location == -2) { + uni->location = GPU_shader_get_uniform_ensure(shgroup->shader, + DST.uniform_names.buffer + uni->name_ofs); + if (uni->location == -1) { + continue; + } + } + const void *data = uni->pvalue; + if (ELEM(uni->type, DRW_UNIFORM_INT_COPY, DRW_UNIFORM_FLOAT_COPY)) { + data = uni->fvalue; + } + switch (uni->type) { + case DRW_UNIFORM_INT_COPY: + case DRW_UNIFORM_INT: + GPU_shader_uniform_vector_int( + shgroup->shader, uni->location, uni->length, uni->arraysize, data); + break; + case DRW_UNIFORM_FLOAT_COPY: + case DRW_UNIFORM_FLOAT: + GPU_shader_uniform_vector( + shgroup->shader, uni->location, uni->length, uni->arraysize, data); + break; + case DRW_UNIFORM_TEXTURE: + tex = (GPUTexture *)uni->pvalue; + BLI_assert(tex); + bind_texture(tex, BIND_TEMP); + GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); + break; + case DRW_UNIFORM_TEXTURE_PERSIST: + tex = (GPUTexture *)uni->pvalue; + BLI_assert(tex); + bind_texture(tex, BIND_PERSIST); + GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); + break; + case DRW_UNIFORM_TEXTURE_REF: + tex = *((GPUTexture **)uni->pvalue); + BLI_assert(tex); + bind_texture(tex, BIND_TEMP); + GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); + break; + case DRW_UNIFORM_BLOCK: + ubo = (GPUUniformBuffer *)uni->pvalue; + bind_ubo(ubo, BIND_TEMP); + GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); + break; + case DRW_UNIFORM_BLOCK_PERSIST: + ubo = (GPUUniformBuffer *)uni->pvalue; + bind_ubo(ubo, BIND_PERSIST); + GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); + break; + case DRW_UNIFORM_BLOCK_OBMATS: + *obmats_loc = uni->location; + ubo = DST.vmempool->matrices_ubo[0]; + GPU_uniformbuffer_bind(ubo, 0); + GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); + break; + case DRW_UNIFORM_BLOCK_OBINFOS: + *obinfos_loc = uni->location; + ubo = DST.vmempool->obinfos_ubo[0]; + GPU_uniformbuffer_bind(ubo, 1); + GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); + break; + case DRW_UNIFORM_BASE_INSTANCE: + *baseinst_loc = uni->location; + break; } - } - const void *data = uni->pvalue; - if (ELEM(uni->type, DRW_UNIFORM_INT_COPY, DRW_UNIFORM_FLOAT_COPY)) { - data = uni->fvalue; - } - switch (uni->type) { - case DRW_UNIFORM_INT_COPY: - case DRW_UNIFORM_INT: - GPU_shader_uniform_vector_int( - shgroup->shader, uni->location, uni->length, uni->arraysize, data); - break; - case DRW_UNIFORM_FLOAT_COPY: - case DRW_UNIFORM_FLOAT: - GPU_shader_uniform_vector( - shgroup->shader, uni->location, uni->length, uni->arraysize, data); - break; - case DRW_UNIFORM_TEXTURE: - tex = (GPUTexture *)uni->pvalue; - BLI_assert(tex); - bind_texture(tex, BIND_TEMP); - GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); - break; - case DRW_UNIFORM_TEXTURE_PERSIST: - tex = (GPUTexture *)uni->pvalue; - BLI_assert(tex); - bind_texture(tex, BIND_PERSIST); - GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); - break; - case DRW_UNIFORM_TEXTURE_REF: - tex = *((GPUTexture **)uni->pvalue); - BLI_assert(tex); - bind_texture(tex, BIND_TEMP); - GPU_shader_uniform_texture(shgroup->shader, uni->location, tex); - break; - case DRW_UNIFORM_BLOCK: - ubo = (GPUUniformBuffer *)uni->pvalue; - bind_ubo(ubo, BIND_TEMP); - GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); - break; - case DRW_UNIFORM_BLOCK_PERSIST: - ubo = (GPUUniformBuffer *)uni->pvalue; - bind_ubo(ubo, BIND_PERSIST); - GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); - break; - case DRW_UNIFORM_BLOCK_OBMATS: - *obmats_loc = uni->location; - ubo = DST.vmempool->matrices_ubo[0]; - GPU_uniformbuffer_bind(ubo, 0); - GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); - break; - case DRW_UNIFORM_BLOCK_OBINFOS: - *obinfos_loc = uni->location; - ubo = DST.vmempool->obinfos_ubo[0]; - GPU_uniformbuffer_bind(ubo, 1); - GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo); - break; - case DRW_UNIFORM_BASE_INSTANCE: - *baseinst_loc = uni->location; - break; } } @@ -986,13 +993,12 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state) if (obmats_loc != -1) { GPU_uniformbuffer_unbind(DST.vmempool->matrices_ubo[resource_chunk]); GPU_uniformbuffer_bind(DST.vmempool->matrices_ubo[handle.chunk], 0); - resource_chunk = handle.chunk; } if (obinfos_loc != -1) { GPU_uniformbuffer_unbind(DST.vmempool->obinfos_ubo[resource_chunk]); GPU_uniformbuffer_bind(DST.vmempool->obinfos_ubo[handle.chunk], 0); - resource_chunk = handle.chunk; } + resource_chunk = handle.chunk; } if (baseinst_loc != -1) { |