diff options
Diffstat (limited to 'source/blender/draw/intern/draw_instance_data.c')
-rw-r--r-- | source/blender/draw/intern/draw_instance_data.c | 436 |
1 files changed, 436 insertions, 0 deletions
diff --git a/source/blender/draw/intern/draw_instance_data.c b/source/blender/draw/intern/draw_instance_data.c new file mode 100644 index 00000000000..89cffd6c19c --- /dev/null +++ b/source/blender/draw/intern/draw_instance_data.c @@ -0,0 +1,436 @@ +/* + * Copyright 2016, Blender Foundation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Contributor(s): Blender Institute + * + */ + +/** \file blender/draw/intern/draw_instance_data.c + * \ingroup draw + */ + +/** + * DRW Instance Data Manager + * This is a special memory manager that keeps memory blocks ready to send as vbo data in one continuous allocation. + * This way we avoid feeding gawain each instance data one by one and unecessary memcpy. + * Since we loose which memory block was used each DRWShadingGroup we need to redistribute them in the same order/size + * to avoid to realloc each frame. + * This is why DRWInstanceDatas are sorted in a list for each different data size. + **/ + +#include "draw_instance_data.h" +#include "DRW_engine.h" +#include "DRW_render.h" /* For DRW_shgroup_get_instance_count() */ + +#include "MEM_guardedalloc.h" +#include "BLI_utildefines.h" +#include "BLI_mempool.h" + +#define BUFFER_CHUNK_SIZE 32 +#define BUFFER_VERTS_CHUNK 32 + +typedef struct DRWBatchingBuffer { + struct DRWShadingGroup *shgroup; /* Link back to the owning shGroup. Also tells if it's used */ + GPUVertFormat *format; /* Identifier. */ + GPUVertBuf *vert; /* GPUVertBuf contained in the GPUBatch. */ + GPUBatch *batch; /* GPUBatch containing the GPUVertBuf. */ +} DRWBatchingBuffer; + +typedef struct DRWInstancingBuffer { + struct DRWShadingGroup *shgroup; /* Link back to the owning shGroup. Also tells if it's used */ + GPUVertFormat *format; /* Identifier. */ + GPUBatch *instance; /* Identifier. */ + GPUVertBuf *vert; /* GPUVertBuf contained in the GPUBatch. */ + GPUBatch *batch; /* GPUBatch containing the GPUVertBuf. */ +} DRWInstancingBuffer; + +typedef struct DRWInstanceChunk { + size_t cursor; /* Offset to the next instance data. */ + size_t alloc_size; /* Number of DRWBatchingBuffer/Batches alloc'd in ibufs/btchs. */ + union { + DRWBatchingBuffer *bbufs; + DRWInstancingBuffer *ibufs; + }; +} DRWInstanceChunk; + +struct DRWInstanceData { + struct DRWInstanceData *next; + bool used; /* If this data is used or not. */ + size_t data_size; /* Size of one instance data. */ + BLI_mempool *mempool; +}; + +struct DRWInstanceDataList { + struct DRWInstanceDataList *next, *prev; + /* Linked lists for all possible data pool size */ + DRWInstanceData *idata_head[MAX_INSTANCE_DATA_SIZE]; + DRWInstanceData *idata_tail[MAX_INSTANCE_DATA_SIZE]; + + DRWInstanceChunk instancing; + DRWInstanceChunk batching; +}; + +static ListBase g_idatalists = {NULL, NULL}; + +/* -------------------------------------------------------------------- */ + +/** \name Instance Buffer Management + * \{ */ + +/** + * This manager allows to distribute existing batches for instancing + * attributes. This reduce the number of batches creation. + * Querying a batch is done with a vertex format. This format should + * be static so that it's pointer never changes (because we are using + * this pointer as identifier [we don't want to check the full format + * that would be too slow]). + **/ + +static void instance_batch_free(GPUBatch *batch, void *UNUSED(user_data)) +{ + /* Free all batches that have the same key before they are reused. */ + /* TODO: Make it thread safe! Batch freeing can happen from another thread. */ + /* XXX we need to iterate over all idatalists unless we make some smart + * data structure to store the locations to update. */ + for (DRWInstanceDataList *idatalist = g_idatalists.first; idatalist; idatalist = idatalist->next) { + DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs; + for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) { + if (ibuf->instance == batch) { + BLI_assert(ibuf->shgroup == NULL); /* Make sure it has no other users. */ + GPU_VERTBUF_DISCARD_SAFE(ibuf->vert); + GPU_BATCH_DISCARD_SAFE(ibuf->batch); + /* Tag as non alloced. */ + ibuf->format = NULL; + } + } + } +} + +void DRW_batching_buffer_request( + DRWInstanceDataList *idatalist, GPUVertFormat *format, GPUPrimType type, struct DRWShadingGroup *shgroup, + GPUBatch **r_batch, GPUVertBuf **r_vert) +{ + DRWInstanceChunk *chunk = &idatalist->batching; + DRWBatchingBuffer *bbuf = idatalist->batching.bbufs; + BLI_assert(format); + /* Search for an unused batch. */ + for (int i = 0; i < idatalist->batching.alloc_size; i++, bbuf++) { + if (bbuf->shgroup == NULL) { + if (bbuf->format == format) { + bbuf->shgroup = shgroup; + *r_batch = bbuf->batch; + *r_vert = bbuf->vert; + return; + } + } + } + int new_id = 0; /* Find insertion point. */ + for (; new_id < chunk->alloc_size; ++new_id) { + if (chunk->bbufs[new_id].format == NULL) + break; + } + /* If there is no batch left. Allocate more. */ + if (new_id == chunk->alloc_size) { + new_id = chunk->alloc_size; + chunk->alloc_size += BUFFER_CHUNK_SIZE; + chunk->bbufs = MEM_reallocN(chunk->bbufs, chunk->alloc_size * sizeof(DRWBatchingBuffer)); + memset(chunk->bbufs + new_id, 0, sizeof(DRWBatchingBuffer) * BUFFER_CHUNK_SIZE); + } + /* Create the batch. */ + bbuf = chunk->bbufs + new_id; + bbuf->vert = *r_vert = GPU_vertbuf_create_with_format_ex(format, GPU_USAGE_DYNAMIC); + bbuf->batch = *r_batch = GPU_batch_create_ex(type, bbuf->vert, NULL, 0); + bbuf->format = format; + bbuf->shgroup = shgroup; + GPU_vertbuf_data_alloc(*r_vert, BUFFER_VERTS_CHUNK); +} + +void DRW_instancing_buffer_request( + DRWInstanceDataList *idatalist, GPUVertFormat *format, GPUBatch *instance, struct DRWShadingGroup *shgroup, + GPUBatch **r_batch, GPUVertBuf **r_vert) +{ + DRWInstanceChunk *chunk = &idatalist->instancing; + DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs; + BLI_assert(format); + /* Search for an unused batch. */ + for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) { + if (ibuf->shgroup == NULL) { + if (ibuf->format == format) { + if (ibuf->instance == instance) { + ibuf->shgroup = shgroup; + *r_batch = ibuf->batch; + *r_vert = ibuf->vert; + return; + } + } + } + } + int new_id = 0; /* Find insertion point. */ + for (; new_id < chunk->alloc_size; ++new_id) { + if (chunk->ibufs[new_id].format == NULL) + break; + } + /* If there is no batch left. Allocate more. */ + if (new_id == chunk->alloc_size) { + new_id = chunk->alloc_size; + chunk->alloc_size += BUFFER_CHUNK_SIZE; + chunk->ibufs = MEM_reallocN(chunk->ibufs, chunk->alloc_size * sizeof(DRWInstancingBuffer)); + memset(chunk->ibufs + new_id, 0, sizeof(DRWInstancingBuffer) * BUFFER_CHUNK_SIZE); + } + /* Create the batch. */ + ibuf = chunk->ibufs + new_id; + ibuf->vert = *r_vert = GPU_vertbuf_create_with_format_ex(format, GPU_USAGE_DYNAMIC); + ibuf->batch = *r_batch = GPU_batch_duplicate(instance); + ibuf->format = format; + ibuf->shgroup = shgroup; + ibuf->instance = instance; + GPU_vertbuf_data_alloc(*r_vert, BUFFER_VERTS_CHUNK); + GPU_batch_instbuf_set(ibuf->batch, ibuf->vert, false); + /* Make sure to free this ibuf if the instance batch gets free. */ + GPU_batch_callback_free_set(instance, &instance_batch_free, NULL); +} + +void DRW_instance_buffer_finish(DRWInstanceDataList *idatalist) +{ + size_t realloc_size = 1; /* Avoid 0 size realloc. */ + /* Resize down buffers in use and send data to GPU & free unused buffers. */ + DRWInstanceChunk *batching = &idatalist->batching; + DRWBatchingBuffer *bbuf = batching->bbufs; + for (int i = 0; i < batching->alloc_size; i++, bbuf++) { + if (bbuf->shgroup != NULL) { + realloc_size = i + 1; + uint vert_len = DRW_shgroup_get_instance_count(bbuf->shgroup); + vert_len += (vert_len == 0) ? 1 : 0; /* Do not realloc to 0 size buffer */ + if (vert_len + BUFFER_VERTS_CHUNK <= bbuf->vert->vertex_len) { + uint size = vert_len + BUFFER_VERTS_CHUNK - 1; + size = size - size % BUFFER_VERTS_CHUNK; + GPU_vertbuf_data_resize(bbuf->vert, size); + } + GPU_vertbuf_use(bbuf->vert); /* Send data. */ + bbuf->shgroup = NULL; /* Set as non used for the next round. */ + } + else { + GPU_VERTBUF_DISCARD_SAFE(bbuf->vert); + GPU_BATCH_DISCARD_SAFE(bbuf->batch); + bbuf->format = NULL; /* Tag as non alloced. */ + } + } + /* Rounding up to nearest chunk size. */ + realloc_size += BUFFER_CHUNK_SIZE - 1; + realloc_size -= realloc_size % BUFFER_CHUNK_SIZE; + /* Resize down if necessary. */ + if (realloc_size < batching->alloc_size) { + batching->alloc_size = realloc_size; + batching->ibufs = MEM_reallocN(batching->ibufs, realloc_size * sizeof(DRWBatchingBuffer)); + } + + realloc_size = 1; + /* Resize down buffers in use and send data to GPU & free unused buffers. */ + DRWInstanceChunk *instancing = &idatalist->instancing; + DRWInstancingBuffer *ibuf = instancing->ibufs; + for (int i = 0; i < instancing->alloc_size; i++, ibuf++) { + if (ibuf->shgroup != NULL) { + realloc_size = i + 1; + uint vert_len = DRW_shgroup_get_instance_count(ibuf->shgroup); + vert_len += (vert_len == 0) ? 1 : 0; /* Do not realloc to 0 size buffer */ + if (vert_len + BUFFER_VERTS_CHUNK <= ibuf->vert->vertex_len) { + uint size = vert_len + BUFFER_VERTS_CHUNK - 1; + size = size - size % BUFFER_VERTS_CHUNK; + GPU_vertbuf_data_resize(ibuf->vert, size); + } + GPU_vertbuf_use(ibuf->vert); /* Send data. */ + ibuf->shgroup = NULL; /* Set as non used for the next round. */ + } + else { + GPU_VERTBUF_DISCARD_SAFE(ibuf->vert); + GPU_BATCH_DISCARD_SAFE(ibuf->batch); + ibuf->format = NULL; /* Tag as non alloced. */ + } + } + /* Rounding up to nearest chunk size. */ + realloc_size += BUFFER_CHUNK_SIZE - 1; + realloc_size -= realloc_size % BUFFER_CHUNK_SIZE; + /* Resize down if necessary. */ + if (realloc_size < instancing->alloc_size) { + instancing->alloc_size = realloc_size; + instancing->ibufs = MEM_reallocN(instancing->ibufs, realloc_size * sizeof(DRWInstancingBuffer)); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ + +/** \name Instance Data (DRWInstanceData) + * \{ */ + +static DRWInstanceData *drw_instance_data_create(DRWInstanceDataList *idatalist, uint attrib_size) +{ + DRWInstanceData *idata = MEM_callocN(sizeof(DRWInstanceData), "DRWInstanceData"); + idata->next = NULL; + idata->used = true; + idata->data_size = attrib_size; + idata->mempool = BLI_mempool_create(sizeof(float) * idata->data_size, 0, 16, 0); + + BLI_assert(attrib_size > 0); + + /* Push to linked list. */ + if (idatalist->idata_head[attrib_size - 1] == NULL) { + idatalist->idata_head[attrib_size - 1] = idata; + } + else { + idatalist->idata_tail[attrib_size - 1]->next = idata; + } + idatalist->idata_tail[attrib_size - 1] = idata; + + return idata; +} + +static void DRW_instance_data_free(DRWInstanceData *idata) +{ + BLI_mempool_destroy(idata->mempool); +} + +/** + * Return a pointer to the next instance data space. + **/ +void *DRW_instance_data_next(DRWInstanceData *idata) +{ + return BLI_mempool_alloc(idata->mempool); +} + +DRWInstanceData *DRW_instance_data_request(DRWInstanceDataList *idatalist, uint attrib_size) +{ + BLI_assert(attrib_size > 0 && attrib_size <= MAX_INSTANCE_DATA_SIZE); + + DRWInstanceData *idata = idatalist->idata_head[attrib_size - 1]; + + /* Search for an unused data chunk. */ + for (; idata; idata = idata->next) { + if (idata->used == false) { + idata->used = true; + return idata; + } + } + + return drw_instance_data_create(idatalist, attrib_size); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ + +/** \name Instance Data List (DRWInstanceDataList) + * \{ */ + +DRWInstanceDataList *DRW_instance_data_list_create(void) +{ + DRWInstanceDataList *idatalist = MEM_callocN(sizeof(DRWInstanceDataList), "DRWInstanceDataList"); + idatalist->batching.bbufs = MEM_callocN(sizeof(DRWBatchingBuffer) * BUFFER_CHUNK_SIZE, "DRWBatchingBuffers"); + idatalist->batching.alloc_size = BUFFER_CHUNK_SIZE; + idatalist->instancing.ibufs = MEM_callocN(sizeof(DRWInstancingBuffer) * BUFFER_CHUNK_SIZE, "DRWInstancingBuffers"); + idatalist->instancing.alloc_size = BUFFER_CHUNK_SIZE; + + BLI_addtail(&g_idatalists, idatalist); + + return idatalist; +} + +void DRW_instance_data_list_free(DRWInstanceDataList *idatalist) +{ + DRWInstanceData *idata, *next_idata; + + for (int i = 0; i < MAX_INSTANCE_DATA_SIZE; ++i) { + for (idata = idatalist->idata_head[i]; idata; idata = next_idata) { + next_idata = idata->next; + DRW_instance_data_free(idata); + MEM_freeN(idata); + } + idatalist->idata_head[i] = NULL; + idatalist->idata_tail[i] = NULL; + } + + DRWBatchingBuffer *bbuf = idatalist->batching.bbufs; + for (int i = 0; i < idatalist->batching.alloc_size; i++, bbuf++) { + GPU_VERTBUF_DISCARD_SAFE(bbuf->vert); + GPU_BATCH_DISCARD_SAFE(bbuf->batch); + } + MEM_freeN(idatalist->batching.bbufs); + + DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs; + for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) { + GPU_VERTBUF_DISCARD_SAFE(ibuf->vert); + GPU_BATCH_DISCARD_SAFE(ibuf->batch); + } + MEM_freeN(idatalist->instancing.ibufs); + + BLI_remlink(&g_idatalists, idatalist); +} + +void DRW_instance_data_list_reset(DRWInstanceDataList *idatalist) +{ + DRWInstanceData *idata; + + for (int i = 0; i < MAX_INSTANCE_DATA_SIZE; ++i) { + for (idata = idatalist->idata_head[i]; idata; idata = idata->next) { + idata->used = false; + } + } +} + +void DRW_instance_data_list_free_unused(DRWInstanceDataList *idatalist) +{ + DRWInstanceData *idata, *next_idata; + + /* Remove unused data blocks and sanitize each list. */ + for (int i = 0; i < MAX_INSTANCE_DATA_SIZE; ++i) { + idatalist->idata_tail[i] = NULL; + for (idata = idatalist->idata_head[i]; idata; idata = next_idata) { + next_idata = idata->next; + if (idata->used == false) { + if (idatalist->idata_head[i] == idata) { + idatalist->idata_head[i] = next_idata; + } + else { + /* idatalist->idata_tail[i] is garanteed not to be null in this case. */ + idatalist->idata_tail[i]->next = next_idata; + } + DRW_instance_data_free(idata); + MEM_freeN(idata); + } + else { + if (idatalist->idata_tail[i] != NULL) { + idatalist->idata_tail[i]->next = idata; + } + idatalist->idata_tail[i] = idata; + } + } + } +} + +void DRW_instance_data_list_resize(DRWInstanceDataList *idatalist) +{ + DRWInstanceData *idata; + + for (int i = 0; i < MAX_INSTANCE_DATA_SIZE; ++i) { + for (idata = idatalist->idata_head[i]; idata; idata = idata->next) { + BLI_mempool_clear_ex(idata->mempool, BLI_mempool_len(idata->mempool)); + } + } +} + +/** \} */ |