From 8bc8a62c57f91326ab3f8850785dce5452b5d703 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= <foucault.clem@gmail.com>
Date: Mon, 13 May 2019 17:56:20 +0200
Subject: DRW: Refactor: Use DRWCall to accumulate per instance attributes

This is a big change that cleanup a lot of confusing code.
- The instancing/batching data buffer distribution in draw_instance_data.c.
- The selection & drawing code in draw_manager_exec.c
- Prety much every non-meshes object drawing (object_mode.c).

Most of the changes are just renaming but there still a chance a typo might
have sneek through.

The Batching/Instancing Shading groups are replace by DRWCallBuffers. This
is cleaner and conceptually more in line with what a DRWShadingGroup should
be.

There is still some little confusion in draw_common.c where some function
takes shgroup as input and some don't.
---
 source/blender/draw/intern/draw_instance_data.c | 343 +++++++++---------------
 1 file changed, 131 insertions(+), 212 deletions(-)

(limited to 'source/blender/draw/intern/draw_instance_data.c')

diff --git a/source/blender/draw/intern/draw_instance_data.c b/source/blender/draw/intern/draw_instance_data.c
index e8d91309e06..b88ad936c28 100644
--- a/source/blender/draw/intern/draw_instance_data.c
+++ b/source/blender/draw/intern/draw_instance_data.c
@@ -36,33 +36,9 @@
 #include "MEM_guardedalloc.h"
 #include "BLI_utildefines.h"
 #include "BLI_mempool.h"
+#include "BLI_memblock.h"
 
-#define BUFFER_CHUNK_SIZE 32
-#define BUFFER_VERTS_CHUNK 32
-
-typedef struct DRWBatchingBuffer {
-  struct DRWShadingGroup *shgroup; /* Link back to the owning shGroup. Also tells if it's used */
-  GPUVertFormat *format;           /* Identifier. */
-  GPUVertBuf *vert;                /* GPUVertBuf contained in the GPUBatch. */
-  GPUBatch *batch;                 /* GPUBatch containing the GPUVertBuf. */
-} DRWBatchingBuffer;
-
-typedef struct DRWInstancingBuffer {
-  struct DRWShadingGroup *shgroup; /* Link back to the owning shGroup. Also tells if it's used */
-  GPUVertFormat *format;           /* Identifier. */
-  GPUBatch *instance;              /* Identifier. */
-  GPUVertBuf *vert;                /* GPUVertBuf contained in the GPUBatch. */
-  GPUBatch *batch;                 /* GPUBatch containing the GPUVertBuf. */
-} DRWInstancingBuffer;
-
-typedef struct DRWInstanceChunk {
-  size_t cursor;     /* Offset to the next instance data. */
-  size_t alloc_size; /* Number of DRWBatchingBuffer/Batches alloc'd in ibufs/btchs. */
-  union {
-    DRWBatchingBuffer *bbufs;
-    DRWInstancingBuffer *ibufs;
-  };
-} DRWInstanceChunk;
+#include "intern/gpu_primitive_private.h"
 
 struct DRWInstanceData {
   struct DRWInstanceData *next;
@@ -77,212 +53,167 @@ struct DRWInstanceDataList {
   DRWInstanceData *idata_head[MAX_INSTANCE_DATA_SIZE];
   DRWInstanceData *idata_tail[MAX_INSTANCE_DATA_SIZE];
 
-  DRWInstanceChunk instancing;
-  DRWInstanceChunk batching;
+  BLI_memblock *pool_instancing;
+  BLI_memblock *pool_batching;
+  BLI_memblock *pool_buffers;
 };
 
+typedef struct DRWTempBufferHandle {
+  /** Must be first for casting. */
+  GPUVertBuf buf;
+  /** Format pointer for reuse. */
+  GPUVertFormat *format;
+  /** Touched vertex length for resize. */
+  uint *vert_len;
+} DRWTempBufferHandle;
+
 static ListBase g_idatalists = {NULL, NULL};
 
 /* -------------------------------------------------------------------- */
 /** \name Instance Buffer Management
  * \{ */
 
-/**
- * This manager allows to distribute existing batches for instancing
- * attributes. This reduce the number of batches creation.
- * Querying a batch is done with a vertex format. This format should
- * be static so that it's pointer never changes (because we are using
- * this pointer as identifier [we don't want to check the full format
- * that would be too slow]).
- */
-static void instance_batch_free(GPUBatch *batch, void *UNUSED(user_data))
+static void instance_batch_free(GPUBatch *geom, void *UNUSED(user_data))
 {
-  if (batch->verts[0] == NULL) {
+  if (geom->verts[0] == NULL) {
     /** XXX This is a false positive case.
      * The batch has been requested but not init yet
      * and there is a chance that it might become init.
      */
     return;
   }
-  /* Free all batches that have the same key before they are reused. */
+
+  /* Free all batches that use the same vbos before they are reused. */
   /* TODO: Make it thread safe! Batch freeing can happen from another thread. */
-  /* XXX we need to iterate over all idatalists unless we make some smart
-   * data structure to store the locations to update. */
-  for (DRWInstanceDataList *idatalist = g_idatalists.first; idatalist;
-       idatalist = idatalist->next) {
-    DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs;
-    for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) {
-      if (ibuf->instance == batch) {
-        BLI_assert(ibuf->shgroup == NULL); /* Make sure it has no other users. */
-        GPU_VERTBUF_DISCARD_SAFE(ibuf->vert);
-        GPU_BATCH_DISCARD_SAFE(ibuf->batch);
-        /* Tag as non alloced. */
-        ibuf->format = NULL;
+  /* FIXME: This is not really correct. The correct way would be to check based on
+   * the vertex buffers. We assume the batch containing the VBO is being when it should. */
+  /* PERF: This is doing a linear search. This can be very costly. */
+  LISTBASE_FOREACH (DRWInstanceDataList *, data_list, &g_idatalists) {
+    BLI_memblock *memblock = data_list->pool_instancing;
+    BLI_memblock_iter iter;
+    BLI_memblock_iternew(memblock, &iter);
+    GPUBatch *batch;
+    while ((batch = (GPUBatch *)BLI_memblock_iterstep(&iter))) {
+      /* Only check verts[0] that's enough. */
+      if (batch->verts[0] == geom->verts[0]) {
+        GPU_batch_clear(batch);
       }
     }
   }
 }
 
-void DRW_batching_buffer_request(DRWInstanceDataList *idatalist,
-                                 GPUVertFormat *format,
-                                 GPUPrimType type,
-                                 struct DRWShadingGroup *shgroup,
-                                 GPUBatch **r_batch,
-                                 GPUVertBuf **r_vert)
+/**
+ * This manager allows to distribute existing batches for instancing
+ * attributes. This reduce the number of batches creation.
+ * Querying a batch is done with a vertex format. This format should
+ * be static so that it's pointer never changes (because we are using
+ * this pointer as identifier [we don't want to check the full format
+ * that would be too slow]).
+ */
+GPUVertBuf *DRW_temp_buffer_request(DRWInstanceDataList *idatalist,
+                                    GPUVertFormat *format,
+                                    uint *vert_len)
 {
-  DRWInstanceChunk *chunk = &idatalist->batching;
-  DRWBatchingBuffer *bbuf = idatalist->batching.bbufs;
-  BLI_assert(format);
-  /* Search for an unused batch. */
-  for (int i = 0; i < idatalist->batching.alloc_size; i++, bbuf++) {
-    if (bbuf->shgroup == NULL) {
-      if (bbuf->format == format) {
-        bbuf->shgroup = shgroup;
-        *r_batch = bbuf->batch;
-        *r_vert = bbuf->vert;
-        return;
-      }
-    }
-  }
-  int new_id = 0; /* Find insertion point. */
-  for (; new_id < chunk->alloc_size; ++new_id) {
-    if (chunk->bbufs[new_id].format == NULL) {
-      break;
-    }
+  BLI_assert(format != NULL);
+  BLI_assert(vert_len != NULL);
+
+  DRWTempBufferHandle *handle = BLI_memblock_alloc(idatalist->pool_buffers);
+  GPUVertBuf *vert = &handle->buf;
+  handle->vert_len = vert_len;
+
+  if (handle->format != format) {
+    handle->format = format;
+    /* TODO/PERF: Save the allocated data from freeing to avoid reallocation. */
+    GPU_vertbuf_clear(vert);
+    GPU_vertbuf_init_with_format_ex(vert, format, GPU_USAGE_DYNAMIC);
+    GPU_vertbuf_data_alloc(vert, DRW_BUFFER_VERTS_CHUNK);
   }
-  /* If there is no batch left. Allocate more. */
-  if (new_id == chunk->alloc_size) {
-    new_id = chunk->alloc_size;
-    chunk->alloc_size += BUFFER_CHUNK_SIZE;
-    chunk->bbufs = MEM_reallocN(chunk->bbufs, chunk->alloc_size * sizeof(DRWBatchingBuffer));
-    memset(chunk->bbufs + new_id, 0, sizeof(DRWBatchingBuffer) * BUFFER_CHUNK_SIZE);
-  }
-  /* Create the batch. */
-  bbuf = chunk->bbufs + new_id;
-  bbuf->vert = *r_vert = GPU_vertbuf_create_with_format_ex(format, GPU_USAGE_DYNAMIC);
-  bbuf->batch = *r_batch = GPU_batch_create_ex(type, bbuf->vert, NULL, 0);
-  bbuf->format = format;
-  bbuf->shgroup = shgroup;
-  GPU_vertbuf_data_alloc(*r_vert, BUFFER_VERTS_CHUNK);
+  return vert;
 }
 
-void DRW_instancing_buffer_request(DRWInstanceDataList *idatalist,
-                                   GPUVertFormat *format,
-                                   GPUBatch *instance,
-                                   struct DRWShadingGroup *shgroup,
-                                   GPUBatch **r_batch,
-                                   GPUVertBuf **r_vert)
+/* NOTE: Does not return a valid drawable batch until DRW_instance_buffer_finish has run. */
+GPUBatch *DRW_temp_batch_instance_request(DRWInstanceDataList *idatalist,
+                                          GPUVertBuf *buf,
+                                          GPUBatch *geom)
 {
-  DRWInstanceChunk *chunk = &idatalist->instancing;
-  DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs;
-  BLI_assert(format);
-  /* Search for an unused batch. */
-  for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) {
-    if (ibuf->shgroup == NULL) {
-      if (ibuf->format == format) {
-        if (ibuf->instance == instance) {
-          ibuf->shgroup = shgroup;
-          *r_batch = ibuf->batch;
-          *r_vert = ibuf->vert;
-          return;
-        }
-      }
+  /* Do not call this with a batch that is already an instancing batch. */
+  BLI_assert(geom->inst == NULL);
+
+  GPUBatch *batch = BLI_memblock_alloc(idatalist->pool_instancing);
+  bool is_compatible = (batch->gl_prim_type == geom->gl_prim_type) && (batch->inst == buf) &&
+                       (batch->phase == GPU_BATCH_READY_TO_DRAW);
+  for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN && is_compatible; i++) {
+    if (batch->verts[i] != geom->verts[i]) {
+      is_compatible = false;
     }
   }
-  int new_id = 0; /* Find insertion point. */
-  for (; new_id < chunk->alloc_size; ++new_id) {
-    if (chunk->ibufs[new_id].format == NULL) {
-      break;
-    }
+
+  if (!is_compatible) {
+    GPU_batch_clear(batch);
+    /* Save args and init later */
+    batch->inst = buf;
+    batch->phase = GPU_BATCH_READY_TO_BUILD;
+    batch->verts[0] = (void *)geom; /* HACK to save the pointer without other alloc. */
+
+    /* Make sure to free this batch if the instance geom gets free. */
+    GPU_batch_callback_free_set(geom, &instance_batch_free, NULL);
   }
-  /* If there is no batch left. Allocate more. */
-  if (new_id == chunk->alloc_size) {
-    new_id = chunk->alloc_size;
-    chunk->alloc_size += BUFFER_CHUNK_SIZE;
-    chunk->ibufs = MEM_reallocN(chunk->ibufs, chunk->alloc_size * sizeof(DRWInstancingBuffer));
-    memset(chunk->ibufs + new_id, 0, sizeof(DRWInstancingBuffer) * BUFFER_CHUNK_SIZE);
+  return batch;
+}
+
+/* NOTE: Use only with buf allocated via DRW_temp_buffer_request. */
+GPUBatch *DRW_temp_batch_request(DRWInstanceDataList *idatalist,
+                                 GPUVertBuf *buf,
+                                 GPUPrimType prim_type)
+{
+  GPUBatch *batch = BLI_memblock_alloc(idatalist->pool_batching);
+  bool is_compatible = (batch->verts[0] == buf) &&
+                       (batch->gl_prim_type == convert_prim_type_to_gl(prim_type));
+  if (!is_compatible) {
+    GPU_batch_clear(batch);
+    GPU_batch_init(batch, prim_type, buf, NULL);
   }
-  /* Create the batch. */
-  ibuf = chunk->ibufs + new_id;
-  ibuf->vert = *r_vert = GPU_vertbuf_create_with_format_ex(format, GPU_USAGE_DYNAMIC);
-  ibuf->batch = *r_batch = MEM_callocN(sizeof(GPUBatch), "GPUBatch");
-  ibuf->format = format;
-  ibuf->shgroup = shgroup;
-  ibuf->instance = instance;
-  GPU_vertbuf_data_alloc(*r_vert, BUFFER_VERTS_CHUNK);
-  /* Make sure to free this ibuf if the instance batch gets free. */
-  GPU_batch_callback_free_set(instance, &instance_batch_free, NULL);
+  return batch;
+}
+
+static void temp_buffer_handle_free(DRWTempBufferHandle *handle)
+{
+  handle->format = NULL;
+  GPU_vertbuf_clear(&handle->buf);
 }
 
 void DRW_instance_buffer_finish(DRWInstanceDataList *idatalist)
 {
-  size_t realloc_size = 1; /* Avoid 0 size realloc. */
-  /* Resize down buffers in use and send data to GPU & free unused buffers. */
-  DRWInstanceChunk *batching = &idatalist->batching;
-  DRWBatchingBuffer *bbuf = batching->bbufs;
-  for (int i = 0; i < batching->alloc_size; i++, bbuf++) {
-    if (bbuf->shgroup != NULL) {
-      realloc_size = i + 1;
-      uint vert_len = DRW_shgroup_get_instance_count(bbuf->shgroup);
-      vert_len += (vert_len == 0) ? 1 : 0; /* Do not realloc to 0 size buffer */
-      if (vert_len + BUFFER_VERTS_CHUNK <= bbuf->vert->vertex_len) {
-        uint size = vert_len + BUFFER_VERTS_CHUNK - 1;
-        size = size - size % BUFFER_VERTS_CHUNK;
-        GPU_vertbuf_data_resize(bbuf->vert, size);
+  /* Resize down buffers in use and send data to GPU. */
+  BLI_memblock_iter iter;
+  DRWTempBufferHandle *handle;
+  BLI_memblock_iternew(idatalist->pool_buffers, &iter);
+  while ((handle = BLI_memblock_iterstep(&iter))) {
+    if (handle->vert_len != NULL) {
+      uint vert_len = *(handle->vert_len);
+      uint target_buf_size = ((vert_len / DRW_BUFFER_VERTS_CHUNK) + 1) * DRW_BUFFER_VERTS_CHUNK;
+      if (target_buf_size < handle->buf.vertex_alloc) {
+        GPU_vertbuf_data_resize(&handle->buf, target_buf_size);
       }
-      GPU_vertbuf_use(bbuf->vert); /* Send data. */
-      bbuf->shgroup = NULL;        /* Set as non used for the next round. */
-    }
-    else {
-      GPU_VERTBUF_DISCARD_SAFE(bbuf->vert);
-      GPU_BATCH_DISCARD_SAFE(bbuf->batch);
-      bbuf->format = NULL; /* Tag as non alloced. */
+      GPU_vertbuf_data_len_set(&handle->buf, vert_len);
+      GPU_vertbuf_use(&handle->buf); /* Send data. */
     }
   }
-  /* Rounding up to nearest chunk size. */
-  realloc_size += BUFFER_CHUNK_SIZE - 1;
-  realloc_size -= realloc_size % BUFFER_CHUNK_SIZE;
-  /* Resize down if necessary. */
-  if (realloc_size < batching->alloc_size) {
-    batching->alloc_size = realloc_size;
-    batching->ibufs = MEM_reallocN(batching->ibufs, realloc_size * sizeof(DRWBatchingBuffer));
-  }
-
-  realloc_size = 1;
-  /* Resize down buffers in use and send data to GPU & free unused buffers. */
-  DRWInstanceChunk *instancing = &idatalist->instancing;
-  DRWInstancingBuffer *ibuf = instancing->ibufs;
-  for (int i = 0; i < instancing->alloc_size; i++, ibuf++) {
-    if (ibuf->shgroup != NULL) {
-      realloc_size = i + 1;
-      uint vert_len = DRW_shgroup_get_instance_count(ibuf->shgroup);
-      vert_len += (vert_len == 0) ? 1 : 0; /* Do not realloc to 0 size buffer */
-      if (vert_len + BUFFER_VERTS_CHUNK <= ibuf->vert->vertex_len) {
-        uint size = vert_len + BUFFER_VERTS_CHUNK - 1;
-        size = size - size % BUFFER_VERTS_CHUNK;
-        GPU_vertbuf_data_resize(ibuf->vert, size);
-      }
-      GPU_vertbuf_use(ibuf->vert); /* Send data. */
-      /* Setup batch now that we are sure ibuf->instance is setup. */
-      GPU_batch_copy(ibuf->batch, ibuf->instance);
-      GPU_batch_instbuf_set(ibuf->batch, ibuf->vert, false);
-      ibuf->shgroup = NULL; /* Set as non used for the next round. */
-    }
-    else {
-      GPU_VERTBUF_DISCARD_SAFE(ibuf->vert);
-      GPU_BATCH_DISCARD_SAFE(ibuf->batch);
-      ibuf->format = NULL; /* Tag as non alloced. */
+  /* Finish pending instancing batches. */
+  GPUBatch *batch;
+  BLI_memblock_iternew(idatalist->pool_instancing, &iter);
+  while ((batch = BLI_memblock_iterstep(&iter))) {
+    if (batch->phase == GPU_BATCH_READY_TO_BUILD) {
+      GPUVertBuf *inst = batch->inst;
+      GPUBatch *geom = (void *)batch->verts[0]; /* HACK see DRW_temp_batch_instance_request. */
+      GPU_batch_copy(batch, geom);
+      GPU_batch_instbuf_set(batch, inst, false);
     }
   }
-  /* Rounding up to nearest chunk size. */
-  realloc_size += BUFFER_CHUNK_SIZE - 1;
-  realloc_size -= realloc_size % BUFFER_CHUNK_SIZE;
-  /* Resize down if necessary. */
-  if (realloc_size < instancing->alloc_size) {
-    instancing->alloc_size = realloc_size;
-    instancing->ibufs = MEM_reallocN(instancing->ibufs,
-                                     realloc_size * sizeof(DRWInstancingBuffer));
-  }
+  /* Resize pools and free unused. */
+  BLI_memblock_clear(idatalist->pool_buffers, (MemblockValFreeFP)temp_buffer_handle_free);
+  BLI_memblock_clear(idatalist->pool_instancing, (MemblockValFreeFP)GPU_batch_clear);
+  BLI_memblock_clear(idatalist->pool_batching, (MemblockValFreeFP)GPU_batch_clear);
 }
 
 /** \} */
@@ -352,12 +283,10 @@ DRWInstanceData *DRW_instance_data_request(DRWInstanceDataList *idatalist, uint
 DRWInstanceDataList *DRW_instance_data_list_create(void)
 {
   DRWInstanceDataList *idatalist = MEM_callocN(sizeof(DRWInstanceDataList), "DRWInstanceDataList");
-  idatalist->batching.bbufs = MEM_callocN(sizeof(DRWBatchingBuffer) * BUFFER_CHUNK_SIZE,
-                                          "DRWBatchingBuffers");
-  idatalist->batching.alloc_size = BUFFER_CHUNK_SIZE;
-  idatalist->instancing.ibufs = MEM_callocN(sizeof(DRWInstancingBuffer) * BUFFER_CHUNK_SIZE,
-                                            "DRWInstancingBuffers");
-  idatalist->instancing.alloc_size = BUFFER_CHUNK_SIZE;
+
+  idatalist->pool_batching = BLI_memblock_create(sizeof(GPUBatch), true);
+  idatalist->pool_instancing = BLI_memblock_create(sizeof(GPUBatch), true);
+  idatalist->pool_buffers = BLI_memblock_create(sizeof(DRWTempBufferHandle), true);
 
   BLI_addtail(&g_idatalists, idatalist);
 
@@ -378,19 +307,9 @@ void DRW_instance_data_list_free(DRWInstanceDataList *idatalist)
     idatalist->idata_tail[i] = NULL;
   }
 
-  DRWBatchingBuffer *bbuf = idatalist->batching.bbufs;
-  for (int i = 0; i < idatalist->batching.alloc_size; i++, bbuf++) {
-    GPU_VERTBUF_DISCARD_SAFE(bbuf->vert);
-    GPU_BATCH_DISCARD_SAFE(bbuf->batch);
-  }
-  MEM_freeN(idatalist->batching.bbufs);
-
-  DRWInstancingBuffer *ibuf = idatalist->instancing.ibufs;
-  for (int i = 0; i < idatalist->instancing.alloc_size; i++, ibuf++) {
-    GPU_VERTBUF_DISCARD_SAFE(ibuf->vert);
-    GPU_BATCH_DISCARD_SAFE(ibuf->batch);
-  }
-  MEM_freeN(idatalist->instancing.ibufs);
+  BLI_memblock_destroy(idatalist->pool_buffers, (MemblockValFreeFP)temp_buffer_handle_free);
+  BLI_memblock_destroy(idatalist->pool_instancing, (MemblockValFreeFP)GPU_batch_clear);
+  BLI_memblock_destroy(idatalist->pool_batching, (MemblockValFreeFP)GPU_batch_clear);
 
   BLI_remlink(&g_idatalists, idatalist);
 }
-- 
cgit v1.2.3