From 8bc8a62c57f91326ab3f8850785dce5452b5d703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Mon, 13 May 2019 17:56:20 +0200 Subject: DRW: Refactor: Use DRWCall to accumulate per instance attributes This is a big change that cleanup a lot of confusing code. - The instancing/batching data buffer distribution in draw_instance_data.c. - The selection & drawing code in draw_manager_exec.c - Prety much every non-meshes object drawing (object_mode.c). Most of the changes are just renaming but there still a chance a typo might have sneek through. The Batching/Instancing Shading groups are replace by DRWCallBuffers. This is cleaner and conceptually more in line with what a DRWShadingGroup should be. There is still some little confusion in draw_common.c where some function takes shgroup as input and some don't. --- source/blender/draw/intern/draw_instance_data.c | 343 +++++++++--------------- 1 file changed, 131 insertions(+), 212 deletions(-) (limited to 'source/blender/draw/intern/draw_instance_data.c') diff --git a/source/blender/draw/intern/draw_instance_data.c b/source/blender/draw/intern/draw_instance_data.c index e8d91309e06..b88ad936c28 100644 --- a/source/blender/draw/intern/draw_instance_data.c +++ b/source/blender/draw/intern/draw_instance_data.c @@ -36,33 +36,9 @@ #include "MEM_guardedalloc.h" #include "BLI_utildefines.h" #include "BLI_mempool.h" +#include "BLI_memblock.h" -#define BUFFER_CHUNK_SIZE 32 -#define BUFFER_VERTS_CHUNK 32 - -typedef struct DRWBatchingBuffer { - struct DRWShadingGroup *shgroup; /* Link back to the owning shGroup. Also tells if it's used */ - GPUVertFormat *format; /* Identifier. */ - GPUVertBuf *vert; /* GPUVertBuf contained in the GPUBatch. */ - GPUBatch *batch; /* GPUBatch containing the GPUVertBuf. */ -} DRWBatchingBuffer; - -typedef struct DRWInstancingBuffer { - struct DRWShadingGroup *shgroup; /* Link back to the owning shGroup. Also tells if it's used */ - GPUVertFormat *format; /* Identifier. */ - GPUBatch *instance; /* Identifier. */ - GPUVertBuf *vert; /* GPUVertBuf contained in the GPUBatch. */ - GPUBatch *batch; /* GPUBatch containing the GPUVertBuf. */ -} DRWInstancingBuffer; - -typedef struct DRWInstanceChunk { - size_t cursor; /* Offset to the next instance data. */ - size_t alloc_size; /* Number of DRWBatchingBuffer/Batches alloc'd in ibufs/btchs. */ - union { - DRWBatchingBuffer *bbufs; - DRWInstancingBuffer *ibufs; - }; -} DRWInstanceChunk; +#include "intern/gpu_primitive_private.h" struct DRWInstanceData { struct DRWInstanceData *next; @@ -77,212 +53,167 @@ struct DRWInstanceDataList { DRWInstanceData *idata_head[MAX_INSTANCE_DATA_SIZE]; DRWInstanceData *idata_tail[MAX_INSTANCE_DATA_SIZE]; - DRWInstanceChunk instancing; - DRWInstanceChunk batching; + BLI_memblock *pool_instancing; + BLI_memblock *pool_batching; + BLI_memblock *pool_buffers; }; +typedef struct DRWTempBufferHandle { + /** Must be first for casting. */ + GPUVertBuf buf; + /** Format pointer for reuse. */ + GPUVertFormat *format; + /** Touched vertex length for resize. */ + uint *vert_len; +} DRWTempBufferHandle; + static ListBase g_idatalists = {NULL, NULL}; /* -------------------------------------------------------------------- */ /** \name Instance Buffer Management * \{ */ -/** - * This manager allows to distribute existing batches for instancing - * attributes. This reduce the number of batches creation. - * Querying a batch is done with a vertex format. This format should - * be static so that it's pointer never changes (because we are using - * this pointer as identifier [we don't want to check the full format - * that would be too slow]). - */ -static void instance_batch_free(GPUBatch *batch, void *UNUSED(user_data)) +static void instance_batch_free(GPUBatch *geom, void *UNUSED(user_data)) { - if (batch->verts[0] == NULL) { + if (geom->verts[0] == NULL) { /** XXX This is a false positive case. * The batch has been requested but not init yet * and there is a chance that it might become init. */ return; } - /* Free all batches that have the same key before they are reused. */ + + /* Free all batches that use the same vbos before they are reused. */ /* TODO: Make it thread safe! Batch freeing can happen from another thread. */ - /* XXX we need to iterate over all idatalists unless we make some smart - * data structure to store the locations to update. */ - for (DRWInstanceDataList *idatalist = g_idatalists.first; idatalist; - idatalist = idatalist->next) { - DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs; - for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) { - if (ibuf->instance == batch) { - BLI_assert(ibuf->shgroup == NULL); /* Make sure it has no other users. */ - GPU_VERTBUF_DISCARD_SAFE(ibuf->vert); - GPU_BATCH_DISCARD_SAFE(ibuf->batch); - /* Tag as non alloced. */ - ibuf->format = NULL; + /* FIXME: This is not really correct. The correct way would be to check based on + * the vertex buffers. We assume the batch containing the VBO is being when it should. */ + /* PERF: This is doing a linear search. This can be very costly. */ + LISTBASE_FOREACH (DRWInstanceDataList *, data_list, &g_idatalists) { + BLI_memblock *memblock = data_list->pool_instancing; + BLI_memblock_iter iter; + BLI_memblock_iternew(memblock, &iter); + GPUBatch *batch; + while ((batch = (GPUBatch *)BLI_memblock_iterstep(&iter))) { + /* Only check verts[0] that's enough. */ + if (batch->verts[0] == geom->verts[0]) { + GPU_batch_clear(batch); } } } } -void DRW_batching_buffer_request(DRWInstanceDataList *idatalist, - GPUVertFormat *format, - GPUPrimType type, - struct DRWShadingGroup *shgroup, - GPUBatch **r_batch, - GPUVertBuf **r_vert) +/** + * This manager allows to distribute existing batches for instancing + * attributes. This reduce the number of batches creation. + * Querying a batch is done with a vertex format. This format should + * be static so that it's pointer never changes (because we are using + * this pointer as identifier [we don't want to check the full format + * that would be too slow]). + */ +GPUVertBuf *DRW_temp_buffer_request(DRWInstanceDataList *idatalist, + GPUVertFormat *format, + uint *vert_len) { - DRWInstanceChunk *chunk = &idatalist->batching; - DRWBatchingBuffer *bbuf = idatalist->batching.bbufs; - BLI_assert(format); - /* Search for an unused batch. */ - for (int i = 0; i < idatalist->batching.alloc_size; i++, bbuf++) { - if (bbuf->shgroup == NULL) { - if (bbuf->format == format) { - bbuf->shgroup = shgroup; - *r_batch = bbuf->batch; - *r_vert = bbuf->vert; - return; - } - } - } - int new_id = 0; /* Find insertion point. */ - for (; new_id < chunk->alloc_size; ++new_id) { - if (chunk->bbufs[new_id].format == NULL) { - break; - } + BLI_assert(format != NULL); + BLI_assert(vert_len != NULL); + + DRWTempBufferHandle *handle = BLI_memblock_alloc(idatalist->pool_buffers); + GPUVertBuf *vert = &handle->buf; + handle->vert_len = vert_len; + + if (handle->format != format) { + handle->format = format; + /* TODO/PERF: Save the allocated data from freeing to avoid reallocation. */ + GPU_vertbuf_clear(vert); + GPU_vertbuf_init_with_format_ex(vert, format, GPU_USAGE_DYNAMIC); + GPU_vertbuf_data_alloc(vert, DRW_BUFFER_VERTS_CHUNK); } - /* If there is no batch left. Allocate more. */ - if (new_id == chunk->alloc_size) { - new_id = chunk->alloc_size; - chunk->alloc_size += BUFFER_CHUNK_SIZE; - chunk->bbufs = MEM_reallocN(chunk->bbufs, chunk->alloc_size * sizeof(DRWBatchingBuffer)); - memset(chunk->bbufs + new_id, 0, sizeof(DRWBatchingBuffer) * BUFFER_CHUNK_SIZE); - } - /* Create the batch. */ - bbuf = chunk->bbufs + new_id; - bbuf->vert = *r_vert = GPU_vertbuf_create_with_format_ex(format, GPU_USAGE_DYNAMIC); - bbuf->batch = *r_batch = GPU_batch_create_ex(type, bbuf->vert, NULL, 0); - bbuf->format = format; - bbuf->shgroup = shgroup; - GPU_vertbuf_data_alloc(*r_vert, BUFFER_VERTS_CHUNK); + return vert; } -void DRW_instancing_buffer_request(DRWInstanceDataList *idatalist, - GPUVertFormat *format, - GPUBatch *instance, - struct DRWShadingGroup *shgroup, - GPUBatch **r_batch, - GPUVertBuf **r_vert) +/* NOTE: Does not return a valid drawable batch until DRW_instance_buffer_finish has run. */ +GPUBatch *DRW_temp_batch_instance_request(DRWInstanceDataList *idatalist, + GPUVertBuf *buf, + GPUBatch *geom) { - DRWInstanceChunk *chunk = &idatalist->instancing; - DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs; - BLI_assert(format); - /* Search for an unused batch. */ - for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) { - if (ibuf->shgroup == NULL) { - if (ibuf->format == format) { - if (ibuf->instance == instance) { - ibuf->shgroup = shgroup; - *r_batch = ibuf->batch; - *r_vert = ibuf->vert; - return; - } - } + /* Do not call this with a batch that is already an instancing batch. */ + BLI_assert(geom->inst == NULL); + + GPUBatch *batch = BLI_memblock_alloc(idatalist->pool_instancing); + bool is_compatible = (batch->gl_prim_type == geom->gl_prim_type) && (batch->inst == buf) && + (batch->phase == GPU_BATCH_READY_TO_DRAW); + for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN && is_compatible; i++) { + if (batch->verts[i] != geom->verts[i]) { + is_compatible = false; } } - int new_id = 0; /* Find insertion point. */ - for (; new_id < chunk->alloc_size; ++new_id) { - if (chunk->ibufs[new_id].format == NULL) { - break; - } + + if (!is_compatible) { + GPU_batch_clear(batch); + /* Save args and init later */ + batch->inst = buf; + batch->phase = GPU_BATCH_READY_TO_BUILD; + batch->verts[0] = (void *)geom; /* HACK to save the pointer without other alloc. */ + + /* Make sure to free this batch if the instance geom gets free. */ + GPU_batch_callback_free_set(geom, &instance_batch_free, NULL); } - /* If there is no batch left. Allocate more. */ - if (new_id == chunk->alloc_size) { - new_id = chunk->alloc_size; - chunk->alloc_size += BUFFER_CHUNK_SIZE; - chunk->ibufs = MEM_reallocN(chunk->ibufs, chunk->alloc_size * sizeof(DRWInstancingBuffer)); - memset(chunk->ibufs + new_id, 0, sizeof(DRWInstancingBuffer) * BUFFER_CHUNK_SIZE); + return batch; +} + +/* NOTE: Use only with buf allocated via DRW_temp_buffer_request. */ +GPUBatch *DRW_temp_batch_request(DRWInstanceDataList *idatalist, + GPUVertBuf *buf, + GPUPrimType prim_type) +{ + GPUBatch *batch = BLI_memblock_alloc(idatalist->pool_batching); + bool is_compatible = (batch->verts[0] == buf) && + (batch->gl_prim_type == convert_prim_type_to_gl(prim_type)); + if (!is_compatible) { + GPU_batch_clear(batch); + GPU_batch_init(batch, prim_type, buf, NULL); } - /* Create the batch. */ - ibuf = chunk->ibufs + new_id; - ibuf->vert = *r_vert = GPU_vertbuf_create_with_format_ex(format, GPU_USAGE_DYNAMIC); - ibuf->batch = *r_batch = MEM_callocN(sizeof(GPUBatch), "GPUBatch"); - ibuf->format = format; - ibuf->shgroup = shgroup; - ibuf->instance = instance; - GPU_vertbuf_data_alloc(*r_vert, BUFFER_VERTS_CHUNK); - /* Make sure to free this ibuf if the instance batch gets free. */ - GPU_batch_callback_free_set(instance, &instance_batch_free, NULL); + return batch; +} + +static void temp_buffer_handle_free(DRWTempBufferHandle *handle) +{ + handle->format = NULL; + GPU_vertbuf_clear(&handle->buf); } void DRW_instance_buffer_finish(DRWInstanceDataList *idatalist) { - size_t realloc_size = 1; /* Avoid 0 size realloc. */ - /* Resize down buffers in use and send data to GPU & free unused buffers. */ - DRWInstanceChunk *batching = &idatalist->batching; - DRWBatchingBuffer *bbuf = batching->bbufs; - for (int i = 0; i < batching->alloc_size; i++, bbuf++) { - if (bbuf->shgroup != NULL) { - realloc_size = i + 1; - uint vert_len = DRW_shgroup_get_instance_count(bbuf->shgroup); - vert_len += (vert_len == 0) ? 1 : 0; /* Do not realloc to 0 size buffer */ - if (vert_len + BUFFER_VERTS_CHUNK <= bbuf->vert->vertex_len) { - uint size = vert_len + BUFFER_VERTS_CHUNK - 1; - size = size - size % BUFFER_VERTS_CHUNK; - GPU_vertbuf_data_resize(bbuf->vert, size); + /* Resize down buffers in use and send data to GPU. */ + BLI_memblock_iter iter; + DRWTempBufferHandle *handle; + BLI_memblock_iternew(idatalist->pool_buffers, &iter); + while ((handle = BLI_memblock_iterstep(&iter))) { + if (handle->vert_len != NULL) { + uint vert_len = *(handle->vert_len); + uint target_buf_size = ((vert_len / DRW_BUFFER_VERTS_CHUNK) + 1) * DRW_BUFFER_VERTS_CHUNK; + if (target_buf_size < handle->buf.vertex_alloc) { + GPU_vertbuf_data_resize(&handle->buf, target_buf_size); } - GPU_vertbuf_use(bbuf->vert); /* Send data. */ - bbuf->shgroup = NULL; /* Set as non used for the next round. */ - } - else { - GPU_VERTBUF_DISCARD_SAFE(bbuf->vert); - GPU_BATCH_DISCARD_SAFE(bbuf->batch); - bbuf->format = NULL; /* Tag as non alloced. */ + GPU_vertbuf_data_len_set(&handle->buf, vert_len); + GPU_vertbuf_use(&handle->buf); /* Send data. */ } } - /* Rounding up to nearest chunk size. */ - realloc_size += BUFFER_CHUNK_SIZE - 1; - realloc_size -= realloc_size % BUFFER_CHUNK_SIZE; - /* Resize down if necessary. */ - if (realloc_size < batching->alloc_size) { - batching->alloc_size = realloc_size; - batching->ibufs = MEM_reallocN(batching->ibufs, realloc_size * sizeof(DRWBatchingBuffer)); - } - - realloc_size = 1; - /* Resize down buffers in use and send data to GPU & free unused buffers. */ - DRWInstanceChunk *instancing = &idatalist->instancing; - DRWInstancingBuffer *ibuf = instancing->ibufs; - for (int i = 0; i < instancing->alloc_size; i++, ibuf++) { - if (ibuf->shgroup != NULL) { - realloc_size = i + 1; - uint vert_len = DRW_shgroup_get_instance_count(ibuf->shgroup); - vert_len += (vert_len == 0) ? 1 : 0; /* Do not realloc to 0 size buffer */ - if (vert_len + BUFFER_VERTS_CHUNK <= ibuf->vert->vertex_len) { - uint size = vert_len + BUFFER_VERTS_CHUNK - 1; - size = size - size % BUFFER_VERTS_CHUNK; - GPU_vertbuf_data_resize(ibuf->vert, size); - } - GPU_vertbuf_use(ibuf->vert); /* Send data. */ - /* Setup batch now that we are sure ibuf->instance is setup. */ - GPU_batch_copy(ibuf->batch, ibuf->instance); - GPU_batch_instbuf_set(ibuf->batch, ibuf->vert, false); - ibuf->shgroup = NULL; /* Set as non used for the next round. */ - } - else { - GPU_VERTBUF_DISCARD_SAFE(ibuf->vert); - GPU_BATCH_DISCARD_SAFE(ibuf->batch); - ibuf->format = NULL; /* Tag as non alloced. */ + /* Finish pending instancing batches. */ + GPUBatch *batch; + BLI_memblock_iternew(idatalist->pool_instancing, &iter); + while ((batch = BLI_memblock_iterstep(&iter))) { + if (batch->phase == GPU_BATCH_READY_TO_BUILD) { + GPUVertBuf *inst = batch->inst; + GPUBatch *geom = (void *)batch->verts[0]; /* HACK see DRW_temp_batch_instance_request. */ + GPU_batch_copy(batch, geom); + GPU_batch_instbuf_set(batch, inst, false); } } - /* Rounding up to nearest chunk size. */ - realloc_size += BUFFER_CHUNK_SIZE - 1; - realloc_size -= realloc_size % BUFFER_CHUNK_SIZE; - /* Resize down if necessary. */ - if (realloc_size < instancing->alloc_size) { - instancing->alloc_size = realloc_size; - instancing->ibufs = MEM_reallocN(instancing->ibufs, - realloc_size * sizeof(DRWInstancingBuffer)); - } + /* Resize pools and free unused. */ + BLI_memblock_clear(idatalist->pool_buffers, (MemblockValFreeFP)temp_buffer_handle_free); + BLI_memblock_clear(idatalist->pool_instancing, (MemblockValFreeFP)GPU_batch_clear); + BLI_memblock_clear(idatalist->pool_batching, (MemblockValFreeFP)GPU_batch_clear); } /** \} */ @@ -352,12 +283,10 @@ DRWInstanceData *DRW_instance_data_request(DRWInstanceDataList *idatalist, uint DRWInstanceDataList *DRW_instance_data_list_create(void) { DRWInstanceDataList *idatalist = MEM_callocN(sizeof(DRWInstanceDataList), "DRWInstanceDataList"); - idatalist->batching.bbufs = MEM_callocN(sizeof(DRWBatchingBuffer) * BUFFER_CHUNK_SIZE, - "DRWBatchingBuffers"); - idatalist->batching.alloc_size = BUFFER_CHUNK_SIZE; - idatalist->instancing.ibufs = MEM_callocN(sizeof(DRWInstancingBuffer) * BUFFER_CHUNK_SIZE, - "DRWInstancingBuffers"); - idatalist->instancing.alloc_size = BUFFER_CHUNK_SIZE; + + idatalist->pool_batching = BLI_memblock_create(sizeof(GPUBatch), true); + idatalist->pool_instancing = BLI_memblock_create(sizeof(GPUBatch), true); + idatalist->pool_buffers = BLI_memblock_create(sizeof(DRWTempBufferHandle), true); BLI_addtail(&g_idatalists, idatalist); @@ -378,19 +307,9 @@ void DRW_instance_data_list_free(DRWInstanceDataList *idatalist) idatalist->idata_tail[i] = NULL; } - DRWBatchingBuffer *bbuf = idatalist->batching.bbufs; - for (int i = 0; i < idatalist->batching.alloc_size; i++, bbuf++) { - GPU_VERTBUF_DISCARD_SAFE(bbuf->vert); - GPU_BATCH_DISCARD_SAFE(bbuf->batch); - } - MEM_freeN(idatalist->batching.bbufs); - - DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs; - for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) { - GPU_VERTBUF_DISCARD_SAFE(ibuf->vert); - GPU_BATCH_DISCARD_SAFE(ibuf->batch); - } - MEM_freeN(idatalist->instancing.ibufs); + BLI_memblock_destroy(idatalist->pool_buffers, (MemblockValFreeFP)temp_buffer_handle_free); + BLI_memblock_destroy(idatalist->pool_instancing, (MemblockValFreeFP)GPU_batch_clear); + BLI_memblock_destroy(idatalist->pool_batching, (MemblockValFreeFP)GPU_batch_clear); BLI_remlink(&g_idatalists, idatalist); } -- cgit v1.2.3