GPUImmediate: Use 2 Buffers For (Un)Strict

We used to have a single buffer that was shared between strict and unstrict draw calls. This leads to many recreation events for the draw buffers. This patch separates the Unstrict draw buffer from the strict draw buffer. This improves performance on Windows Intel 10th gen platform. On a reference platfor before the patch I got 10 FPS, after this patch it became 34fps. Note that the same test normally on a low end GPU can get to 60fps so this does not solve all teh bottlenecks yet. Reviewed By: Clément Foucault Differential Revision: https://developer.blender.org/D7421
author: Jeroen Bakker <jbakker> 2020-04-16 09:43:32 +0300
committer: Jeroen Bakker <j.bakker@atmind.nl> 2020-04-16 09:46:31 +0300
commit: 5d9d246851082c785104388399b3766eff7d2228 (patch)
tree: e5444e6b8f664dd0e38697c8718aa0570c54e65a /source/blender/gpu
parent: 502b8e0f08cfddc1f1c1793f14584c02e7eea39d (diff)
1 files changed, 52 insertions, 31 deletions
diff --git a/source/blender/gpu/intern/gpu_immediate.c b/source/blender/gpu/intern/gpu_immediate.c
index b30fbd66670..72e17dce776 100644
--- a/source/blender/gpu/intern/gpu_immediate.c
+++ b/source/blender/gpu/intern/gpu_immediate.c
@@ -43,6 +43,14 @@
 extern void GPU_matrix_bind(const GPUShaderInterface *);
 extern bool GPU_matrix_dirty_get(void);
 
+typedef struct ImmediateDrawBuffer {
+  GLuint vbo_id;
+  GLubyte *buffer_data;
+  uint buffer_offset;
+  uint buffer_size;
+  uint default_size;
+} ImmediateDrawBuffer;
+
 typedef struct {
   /* TODO: organize this struct by frequency of change (run-time) */
 
@@ -50,14 +58,14 @@ typedef struct {
   GPUContext *context;
 
   /* current draw call */
-  GLubyte *buffer_data;
-  uint buffer_offset;
-  uint buffer_bytes_mapped;
-  uint vertex_len;
   bool strict_vertex_len;
+  uint vertex_len;
+  uint buffer_bytes_mapped;
+  ImmediateDrawBuffer *active_buffer;
   GPUPrimType prim_type;
-
   GPUVertFormat vertex_format;
+  ImmediateDrawBuffer draw_buffer;
+  ImmediateDrawBuffer draw_buffer_strict;
 
   /* current vertex */
   uint vertex_idx;
@@ -65,7 +73,6 @@ typedef struct {
   uint16_t
       unassigned_attr_bits; /* which attributes of current vertex have not been given values? */
 
-  GLuint vbo_id;
   GLuint vao_id;
 
   GLuint bound_program;
@@ -76,7 +83,6 @@ typedef struct {
 
 /* size of internal buffer */
 #define DEFAULT_INTERNAL_BUFFER_SIZE (4 * 1024 * 1024)
-static uint imm_buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE;
 
 static bool initialized = false;
 static Immediate imm;
@@ -88,9 +94,16 @@ void immInit(void)
 #endif
   memset(&imm, 0, sizeof(Immediate));
 
-  imm.vbo_id = GPU_buf_alloc();
-  glBindBuffer(GL_ARRAY_BUFFER, imm.vbo_id);
-  glBufferData(GL_ARRAY_BUFFER, imm_buffer_size, NULL, GL_DYNAMIC_DRAW);
+  imm.draw_buffer.vbo_id = GPU_buf_alloc();
+  imm.draw_buffer.buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE;
+  imm.draw_buffer.default_size = DEFAULT_INTERNAL_BUFFER_SIZE;
+  glBindBuffer(GL_ARRAY_BUFFER, imm.draw_buffer.vbo_id);
+  glBufferData(GL_ARRAY_BUFFER, imm.draw_buffer.buffer_size, NULL, GL_DYNAMIC_DRAW);
+  imm.draw_buffer_strict.vbo_id = GPU_buf_alloc();
+  imm.draw_buffer_strict.buffer_size = 0;
+  imm.draw_buffer_strict.default_size = 0;
+  glBindBuffer(GL_ARRAY_BUFFER, imm.draw_buffer_strict.vbo_id);
+  glBufferData(GL_ARRAY_BUFFER, imm.draw_buffer_strict.buffer_size, NULL, GL_DYNAMIC_DRAW);
 
   imm.prim_type = GPU_PRIM_NONE;
   imm.strict_vertex_len = true;
@@ -124,7 +137,8 @@ void immDeactivate(void)
 
 void immDestroy(void)
 {
-  GPU_buf_free(imm.vbo_id);
+  GPU_buf_free(imm.draw_buffer.vbo_id);
+  GPU_buf_free(imm.draw_buffer_strict.vbo_id);
   initialized = false;
 }
 
@@ -213,6 +227,7 @@ void immBegin(GPUPrimType prim_type, uint vertex_len)
   assert(initialized);
   assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we haven't already begun */
   assert(vertex_count_makes_sense_for_primitive(vertex_len, prim_type));
+  assert(imm.active_buffer == NULL);
 #endif
   imm.prim_type = prim_type;
   imm.vertex_len = vertex_len;
@@ -221,54 +236,58 @@ void immBegin(GPUPrimType prim_type, uint vertex_len)
 
   /* how many bytes do we need for this draw call? */
   const uint bytes_needed = vertex_buffer_size(&imm.vertex_format, vertex_len);
+  ImmediateDrawBuffer *active_buffer = imm.strict_vertex_len ? &imm.draw_buffer_strict :
+                                                               &imm.draw_buffer;
+  imm.active_buffer = active_buffer;
 
-  glBindBuffer(GL_ARRAY_BUFFER, imm.vbo_id);
+  glBindBuffer(GL_ARRAY_BUFFER, active_buffer->vbo_id);
 
   /* does the current buffer have enough room? */
-  const uint available_bytes = imm_buffer_size - imm.buffer_offset;
+  const uint available_bytes = active_buffer->buffer_size - active_buffer->buffer_offset;
 
   bool recreate_buffer = false;
-  if (bytes_needed > imm_buffer_size) {
+  if (bytes_needed > active_buffer->buffer_size) {
     /* expand the internal buffer */
-    imm_buffer_size = bytes_needed;
+    active_buffer->buffer_size = bytes_needed;
     recreate_buffer = true;
   }
-  else if (bytes_needed < DEFAULT_INTERNAL_BUFFER_SIZE &&
-           imm_buffer_size > DEFAULT_INTERNAL_BUFFER_SIZE) {
+  else if (bytes_needed < active_buffer->default_size &&
+           active_buffer->buffer_size > active_buffer->default_size) {
     /* shrink the internal buffer */
-    imm_buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE;
+    active_buffer->buffer_size = active_buffer->default_size;
     recreate_buffer = true;
   }
 
   /* ensure vertex data is aligned */
   /* Might waste a little space, but it's safe. */
-  const uint pre_padding = padding(imm.buffer_offset, imm.vertex_format.stride);
+  const uint pre_padding = padding(active_buffer->buffer_offset, imm.vertex_format.stride);
 
   if (!recreate_buffer && ((bytes_needed + pre_padding) <= available_bytes)) {
-    imm.buffer_offset += pre_padding;
+    active_buffer->buffer_offset += pre_padding;
   }
   else {
     /* orphan this buffer & start with a fresh one */
     /* this method works on all platforms, old & new */
-    glBufferData(GL_ARRAY_BUFFER, imm_buffer_size, NULL, GL_DYNAMIC_DRAW);
+    glBufferData(GL_ARRAY_BUFFER, active_buffer->buffer_size, NULL, GL_DYNAMIC_DRAW);
 
-    imm.buffer_offset = 0;
+    active_buffer->buffer_offset = 0;
   }
 
   /*  printf("mapping %u to %u\n", imm.buffer_offset, imm.buffer_offset + bytes_needed - 1); */
 
-  imm.buffer_data = glMapBufferRange(GL_ARRAY_BUFFER,
-                                     imm.buffer_offset,
-                                     bytes_needed,
-                                     GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT |
-                                         (imm.strict_vertex_len ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT));
+  active_buffer->buffer_data = glMapBufferRange(
+      GL_ARRAY_BUFFER,
+      active_buffer->buffer_offset,
+      bytes_needed,
+      GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT |
+          (imm.strict_vertex_len ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT));
 
 #if TRUST_NO_ONE
-  assert(imm.buffer_data != NULL);
+  assert(active_buffer->buffer_data != NULL);
 #endif
 
   imm.buffer_bytes_mapped = bytes_needed;
-  imm.vertex_data = imm.buffer_data;
+  imm.vertex_data = active_buffer->buffer_data;
 }
 
 void immBeginAtMost(GPUPrimType prim_type, uint vertex_len)
@@ -338,7 +357,7 @@ static void immDrawSetup(void)
   for (uint a_idx = 0; a_idx < imm.vertex_format.attr_len; a_idx++) {
     const GPUVertAttr *a = &imm.vertex_format.attrs[a_idx];
 
-    const uint offset = imm.buffer_offset + a->offset;
+    const uint offset = imm.active_buffer->buffer_offset + a->offset;
     const GLvoid *pointer = (const GLubyte *)0 + offset;
 
     const uint loc = read_attr_location(&imm.attr_binding, a_idx);
@@ -365,6 +384,7 @@ void immEnd(void)
 {
 #if TRUST_NO_ONE
   assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+  assert(imm.active_buffer);
 #endif
 
   uint buffer_bytes_used;
@@ -421,12 +441,13 @@ void immEnd(void)
     // glBindBuffer(GL_ARRAY_BUFFER, 0);
     // glBindVertexArray(0);
     /* prep for next immBegin */
-    imm.buffer_offset += buffer_bytes_used;
+    imm.active_buffer->buffer_offset += buffer_bytes_used;
   }
 
   /* prep for next immBegin */
   imm.prim_type = GPU_PRIM_NONE;
   imm.strict_vertex_len = true;
+  imm.active_buffer = NULL;
 }
 
 static void setAttrValueBit(uint attr_id)
author	Jeroen Bakker <jbakker>	2020-04-16 09:43:32 +0300
committer	Jeroen Bakker <j.bakker@atmind.nl>	2020-04-16 09:46:31 +0300
commit	5d9d246851082c785104388399b3766eff7d2228 (patch)
tree	e5444e6b8f664dd0e38697c8718aa0570c54e65a /source/blender/gpu
parent	502b8e0f08cfddc1f1c1793f14584c02e7eea39d (diff)