GPUBatch: GL backend isolation

This changes the drawing paradigm a bit. The VAO configuration is done JIT-style and depends on context active shader. This is to allow more flexibility for implementations to do optimization at lower level. The vao cache is now its own class to isolate the concept. It is this class that is reference by the GLContext for ownership of the containing VAO ids.
author: Clément Foucault <foucault.clem@gmail.com> 2020-08-11 02:31:40 +0300
committer: Clément Foucault <foucault.clem@gmail.com> 2020-08-13 15:20:24 +0300
commit: efc97b3919ea4fd46b9d2e931ca3fea27e7ea31c (patch)
tree: 125e4239c90902a967487f44d5aca1696d49be68 /source/blender/gpu
parent: 47bfb0f7ad2f70017585fe55a68e49ae09f1150c (diff)
15 files changed, 623 insertions, 493 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index fcbe53e599a..906ae31fbc7 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -92,6 +92,7 @@ set(SRC
   opengl/gl_batch.cc
   opengl/gl_context.cc
   opengl/gl_drawlist.cc
+  opengl/gl_vertex_array.cc
 
   GPU_attr_binding.h
   GPU_batch.h
@@ -143,6 +144,7 @@ set(SRC
   opengl/gl_batch.hh
   opengl/gl_context.hh
   opengl/gl_drawlist.hh
+  opengl/gl_vertex_array.hh
 )
 
 set(LIB
diff --git a/source/blender/gpu/GPU_batch.h b/source/blender/gpu/GPU_batch.h
index d71d4d5435f..33d539e3a9e 100644
--- a/source/blender/gpu/GPU_batch.h
+++ b/source/blender/gpu/GPU_batch.h
@@ -30,7 +30,6 @@
 
 #include "GPU_element.h"
 #include "GPU_shader.h"
-#include "GPU_shader_interface.h"
 #include "GPU_vertex_buffer.h"
 
 #define GPU_BATCH_VBO_MAX_LEN 6
@@ -59,9 +58,7 @@ typedef enum eGPUBatchFlag {
   /** Batch is initialized but it's VBOs are still being populated. (optional) */
   GPU_BATCH_BUILDING = (1 << 16),
   /** Cached data need to be rebuild. (VAO, PSO, ...) */
-  GPU_BATCH_DIRTY_BINDINGS = (1 << 17),
-  GPU_BATCH_DIRTY_INTERFACE = (1 << 18),
-  GPU_BATCH_DIRTY = (GPU_BATCH_DIRTY_BINDINGS | GPU_BATCH_DIRTY_INTERFACE),
+  GPU_BATCH_DIRTY = (1 << 17),
 } eGPUBatchFlag;
 
 #define GPU_BATCH_OWNS_NONE GPU_BATCH_INVALID
@@ -78,6 +75,7 @@ extern "C" {
 /**
  * IMPORTANT: Do not allocate manually as the real struct is bigger (i.e: GLBatch). This is only
  * the common and "public" part of the struct. Use the provided allocator.
+ * TODO(fclem) Make the content of this struct hidden and expose getters/setters.
  **/
 typedef struct GPUBatch {
   /** verts[0] is required, others can be NULL */
@@ -90,32 +88,8 @@ typedef struct GPUBatch {
   eGPUBatchFlag flag;
   /** Type of geometry to draw. */
   GPUPrimType prim_type;
-
-  /** Current assigned shader. */
+  /** Current assigned shader. DEPRECATED. Here only for uniform binding. */
   struct GPUShader *shader;
-  /** Last context used to draw this batch. */
-  struct GPUContext *context;
-
-  struct GPUShaderInterface *interface;
-  GLuint vao_id;
-
-  /* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
-   * for each shader interface. Start with a static number of vaos and fallback to dynamic count
-   * if necessary. Once a batch goes dynamic it does not go back. */
-  bool is_dynamic_vao_count;
-  union {
-    /** Static handle count */
-    struct {
-      const struct GPUShaderInterface *interfaces[GPU_BATCH_VAO_STATIC_LEN];
-      uint32_t vao_ids[GPU_BATCH_VAO_STATIC_LEN];
-    } static_vaos;
-    /** Dynamic handle count */
-    struct {
-      uint count;
-      const struct GPUShaderInterface **interfaces;
-      uint32_t *vao_ids;
-    } dynamic_vaos;
-  };
 } GPUBatch;
 
 GPUBatch *GPU_batch_calloc(void);
diff --git a/source/blender/gpu/GPU_shader_interface.h b/source/blender/gpu/GPU_shader_interface.h
index 8aba1236b65..47e4e432d66 100644
--- a/source/blender/gpu/GPU_shader_interface.h
+++ b/source/blender/gpu/GPU_shader_interface.h
@@ -80,7 +80,7 @@ typedef struct GPUShaderInterface {
   /** Buffer containing all inputs names separated by '\0'. */
   char *name_buffer;
   /** Reference to GPUBatches using this interface */
-  struct GPUBatch **batches;
+  void **batches;
   uint batches_len;
   /** Input counts. */
   uint attribute_len;
@@ -109,8 +109,8 @@ const GPUShaderInput *GPU_shaderinterface_ubo(const GPUShaderInterface *, const
 const GPUShaderInput *GPU_shaderinterface_attr(const GPUShaderInterface *, const char *name);
 
 /* keep track of batches using this interface */
-void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *, struct GPUBatch *);
-void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *, struct GPUBatch *);
+void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *interface, void *cache);
+void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *interface, void *cache);
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/gpu/intern/gpu_batch.cc b/source/blender/gpu/intern/gpu_batch.cc
index 27196413b20..995e1afb236 100644
--- a/source/blender/gpu/intern/gpu_batch.cc
+++ b/source/blender/gpu/intern/gpu_batch.cc
@@ -26,6 +26,8 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "BLI_math_base.h"
+
 #include "GPU_batch.h"
 #include "GPU_batch_presets.h"
 #include "GPU_extensions.h"
@@ -46,49 +48,15 @@
 
 using namespace blender::gpu;
 
-static GLuint g_default_attr_vbo = 0;
-
-static void gpu_batch_bind(GPUBatch *batch);
-static void batch_update_program_bindings(GPUBatch *batch, uint i_first);
-
-void GPU_batch_vao_cache_clear(GPUBatch *batch)
+void GPU_batch_vao_cache_clear(GPUBatch *UNUSED(batch))
 {
-  if (batch->context == NULL) {
-    return;
-  }
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.vao_ids[i]) {
-        GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
-      }
-      if (batch->dynamic_vaos.interfaces[i]) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->dynamic_vaos.interfaces[i], batch);
-      }
-    }
-    MEM_freeN((void *)batch->dynamic_vaos.interfaces);
-    MEM_freeN(batch->dynamic_vaos.vao_ids);
-  }
-  else {
-    for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.vao_ids[i]) {
-        GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
-      }
-      if (batch->static_vaos.interfaces[i]) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->static_vaos.interfaces[i], batch);
-      }
-    }
-  }
-  batch->is_dynamic_vao_count = false;
-  for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-    batch->static_vaos.vao_ids[i] = 0;
-    batch->static_vaos.interfaces[i] = NULL;
-  }
-  gpu_context_remove_batch(batch->context, batch);
-  batch->context = NULL;
+  /* TODO remove */
 }
 
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
 GPUBatch *GPU_batch_calloc(void)
 {
   GPUBatch *batch = GPUBackend::get()->batch_alloc();
@@ -126,7 +94,6 @@ void GPU_batch_init_ex(GPUBatch *batch,
   batch->elem = elem;
   batch->prim_type = prim_type;
   batch->flag = owns_flag | GPU_BATCH_INIT | GPU_BATCH_DIRTY;
-  batch->context = NULL;
   batch->shader = NULL;
 }
 
@@ -144,7 +111,6 @@ void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src)
 
 void GPU_batch_clear(GPUBatch *batch)
 {
-  GPU_batch_vao_cache_clear(batch);
   if (batch->flag & GPU_BATCH_OWNS_INDEX) {
     GPU_indexbuf_discard(batch->elem);
   }
@@ -172,11 +138,17 @@ void GPU_batch_discard(GPUBatch *batch)
   delete static_cast<Batch *>(batch);
 }
 
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Buffers Management
+ * \{ */
+
 /* NOTE: Override ONLY the first instance vbo (and free them if owned). */
 void GPU_batch_instbuf_set(GPUBatch *batch, GPUVertBuf *inst, bool own_vbo)
 {
   BLI_assert(inst);
-  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;
+  batch->flag |= GPU_BATCH_DIRTY;
 
   if (batch->inst[0] && (batch->flag & GPU_BATCH_OWNS_INST_VBO)) {
     GPU_vertbuf_discard(batch->inst[0]);
@@ -190,7 +162,7 @@ void GPU_batch_instbuf_set(GPUBatch *batch, GPUVertBuf *inst, bool own_vbo)
 void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo)
 {
   BLI_assert(elem);
-  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;
+  batch->flag |= GPU_BATCH_DIRTY;
 
   if (batch->elem && (batch->flag & GPU_BATCH_OWNS_INDEX)) {
     GPU_indexbuf_discard(batch->elem);
@@ -203,7 +175,7 @@ void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo)
 int GPU_batch_instbuf_add_ex(GPUBatch *batch, GPUVertBuf *insts, bool own_vbo)
 {
   BLI_assert(insts);
-  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;
+  batch->flag |= GPU_BATCH_DIRTY;
 
   for (uint v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
     if (batch->inst[v] == NULL) {
@@ -228,7 +200,7 @@ int GPU_batch_instbuf_add_ex(GPUBatch *batch, GPUVertBuf *insts, bool own_vbo)
 int GPU_batch_vertbuf_add_ex(GPUBatch *batch, GPUVertBuf *verts, bool own_vbo)
 {
   BLI_assert(verts);
-  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;
+  batch->flag |= GPU_BATCH_DIRTY;
 
   for (uint v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
     if (batch->verts[v] == NULL) {
@@ -246,254 +218,20 @@ int GPU_batch_vertbuf_add_ex(GPUBatch *batch, GPUVertBuf *verts, bool own_vbo)
   return -1;
 }
 
-static GLuint batch_vao_get(GPUBatch *batch)
-{
-  /* Search through cache */
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.interfaces[i] == batch->interface) {
-        return batch->dynamic_vaos.vao_ids[i];
-      }
-    }
-  }
-  else {
-    for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.interfaces[i] == batch->interface) {
-        return batch->static_vaos.vao_ids[i];
-      }
-    }
-  }
-
-  /* Set context of this batch.
-   * It will be bound to it until GPU_batch_vao_cache_clear is called.
-   * Until then it can only be drawn with this context. */
-  if (batch->context == NULL) {
-    batch->context = GPU_context_active_get();
-    gpu_context_add_batch(batch->context, batch);
-  }
-#if TRUST_NO_ONE
-  else {
-    /* Make sure you are not trying to draw this batch in another context. */
-    assert(batch->context == GPU_context_active_get());
-  }
-#endif
-
-  /* Cache miss, time to add a new entry! */
-  GLuint new_vao = 0;
-  if (!batch->is_dynamic_vao_count) {
-    int i; /* find first unused slot */
-    for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.vao_ids[i] == 0) {
-        break;
-      }
-    }
-
-    if (i < GPU_BATCH_VAO_STATIC_LEN) {
-      batch->static_vaos.interfaces[i] = batch->interface;
-      batch->static_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
-    }
-    else {
-      /* Not enough place switch to dynamic. */
-      batch->is_dynamic_vao_count = true;
-      /* Erase previous entries, they will be added back if drawn again. */
-      for (int j = 0; j < GPU_BATCH_VAO_STATIC_LEN; j++) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->static_vaos.interfaces[j], batch);
-        GPU_vao_free(batch->static_vaos.vao_ids[j], batch->context);
-      }
-      /* Init dynamic arrays and let the branch below set the values. */
-      batch->dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
-      batch->dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_callocN(
-          batch->dynamic_vaos.count * sizeof(GPUShaderInterface *), "dyn vaos interfaces");
-      batch->dynamic_vaos.vao_ids = (GLuint *)MEM_callocN(
-          batch->dynamic_vaos.count * sizeof(GLuint), "dyn vaos ids");
-    }
-  }
-
-  if (batch->is_dynamic_vao_count) {
-    int i; /* find first unused slot */
-    for (i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.vao_ids[i] == 0) {
-        break;
-      }
-    }
-
-    if (i == batch->dynamic_vaos.count) {
-      /* Not enough place, realloc the array. */
-      i = batch->dynamic_vaos.count;
-      batch->dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
-      batch->dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_recallocN(
-          (void *)batch->dynamic_vaos.interfaces,
-          sizeof(GPUShaderInterface *) * batch->dynamic_vaos.count);
-      batch->dynamic_vaos.vao_ids = (GLuint *)MEM_recallocN(
-          batch->dynamic_vaos.vao_ids, sizeof(GLuint) * batch->dynamic_vaos.count);
-    }
-    batch->dynamic_vaos.interfaces[i] = batch->interface;
-    batch->dynamic_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
-  }
-
-  GPU_shaderinterface_add_batch_ref((GPUShaderInterface *)batch->interface, batch);
-
-#if TRUST_NO_ONE
-  assert(new_vao != 0);
-#endif
-
-  /* We just got a fresh VAO we need to initialize it. */
-  glBindVertexArray(new_vao);
-  batch_update_program_bindings(batch, 0);
-  glBindVertexArray(0);
+/** \} */
 
-  return new_vao;
-}
+/* -------------------------------------------------------------------- */
+/** \name Uniform setters
+ *
+ * TODO(fclem) port this to GPUShader.
+ * \{ */
 
 void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader)
 {
-  batch->interface = shader->interface;
   batch->shader = shader;
-  if (batch->flag & GPU_BATCH_DIRTY_BINDINGS) {
-    GPU_batch_vao_cache_clear(batch);
-  }
-  batch->vao_id = batch_vao_get(batch);
   GPU_shader_bind(batch->shader);
-  GPU_matrix_bind(batch->shader->interface);
-  GPU_shader_set_srgb_uniform(batch->shader->interface);
-  gpu_batch_bind(batch);
-}
-
-void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface)
-{
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.interfaces[i] == interface) {
-        GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
-        batch->dynamic_vaos.vao_ids[i] = 0;
-        batch->dynamic_vaos.interfaces[i] = NULL;
-        break; /* cannot have duplicates */
-      }
-    }
-  }
-  else {
-    int i;
-    for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.interfaces[i] == interface) {
-        GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
-        batch->static_vaos.vao_ids[i] = 0;
-        batch->static_vaos.interfaces[i] = NULL;
-        break; /* cannot have duplicates */
-      }
-    }
-  }
-}
-
-static void create_bindings(GPUVertBuf *verts,
-                            const GPUShaderInterface *interface,
-                            uint16_t *attr_mask,
-                            uint v_first,
-                            const bool use_instancing)
-{
-  const GPUVertFormat *format = &verts->format;
-
-  const uint attr_len = format->attr_len;
-  uint stride = format->stride;
-  uint offset = 0;
-
-  GPU_vertbuf_use(verts);
-
-  for (uint a_idx = 0; a_idx < attr_len; a_idx++) {
-    const GPUVertAttr *a = &format->attrs[a_idx];
-
-    if (format->deinterleaved) {
-      offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].sz) * verts->vertex_len;
-      stride = a->sz;
-    }
-    else {
-      offset = a->offset;
-    }
-
-    const GLvoid *pointer = (const GLubyte *)0 + offset + v_first * stride;
-    const GLenum type = convert_comp_type_to_gl(static_cast<GPUVertCompType>(a->comp_type));
-
-    for (uint n_idx = 0; n_idx < a->name_len; n_idx++) {
-      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
-      const GPUShaderInput *input = GPU_shaderinterface_attr(interface, name);
-
-      if (input == NULL) {
-        continue;
-      }
-
-      *attr_mask &= ~(1 << input->location);
-
-      if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
-        BLI_assert(a->fetch_mode == GPU_FETCH_FLOAT);
-        BLI_assert(a->comp_type == GPU_COMP_F32);
-        for (int i = 0; i < a->comp_len / 4; i++) {
-          glEnableVertexAttribArray(input->location + i);
-          glVertexAttribDivisor(input->location + i, (use_instancing) ? 1 : 0);
-          glVertexAttribPointer(
-              input->location + i, 4, type, GL_FALSE, stride, (const GLubyte *)pointer + i * 16);
-        }
-      }
-      else {
-        glEnableVertexAttribArray(input->location);
-        glVertexAttribDivisor(input->location, (use_instancing) ? 1 : 0);
-
-        switch (a->fetch_mode) {
-          case GPU_FETCH_FLOAT:
-          case GPU_FETCH_INT_TO_FLOAT:
-            glVertexAttribPointer(input->location, a->comp_len, type, GL_FALSE, stride, pointer);
-            break;
-          case GPU_FETCH_INT_TO_FLOAT_UNIT:
-            glVertexAttribPointer(input->location, a->comp_len, type, GL_TRUE, stride, pointer);
-            break;
-          case GPU_FETCH_INT:
-            glVertexAttribIPointer(input->location, a->comp_len, type, stride, pointer);
-            break;
-        }
-      }
-    }
-  }
-}
-
-static void batch_update_program_bindings(GPUBatch *batch, uint i_first)
-{
-  uint16_t attr_mask = batch->interface->enabled_attr_mask;
-
-  /* Reverse order so first VBO'S have more prevalence (in term of attribute override). */
-  for (int v = GPU_BATCH_VBO_MAX_LEN - 1; v > -1; v--) {
-    if (batch->verts[v] != NULL) {
-      create_bindings(batch->verts[v], batch->interface, &attr_mask, 0, false);
-    }
-  }
-
-  for (int v = GPU_BATCH_INST_VBO_MAX_LEN - 1; v > -1; v--) {
-    if (batch->inst[v]) {
-      create_bindings(batch->inst[v], batch->interface, &attr_mask, i_first, true);
-    }
-  }
-
-  if (attr_mask != 0 && GLEW_ARB_vertex_attrib_binding) {
-    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
-      if (attr_mask & mask) {
-        /* This replaces glVertexAttrib4f(a, 0.0f, 0.0f, 0.0f, 1.0f); with a more modern style.
-         * Fix issues for some drivers (see T75069). */
-        glBindVertexBuffer(a, g_default_attr_vbo, (intptr_t)0, (intptr_t)0);
-
-        glEnableVertexAttribArray(a);
-        glVertexAttribFormat(a, 4, GL_FLOAT, GL_FALSE, 0);
-        glVertexAttribBinding(a, a);
-      }
-    }
-  }
-
-  if (batch->elem) {
-    GPU_indexbuf_use(batch->elem);
-  }
 }
 
-/* -------------------------------------------------------------------- */
-/** \name Uniform setters
- * \{ */
-
 #define GET_UNIFORM \
   const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->shader->interface, name); \
   BLI_assert(uniform);
@@ -581,31 +319,6 @@ void GPU_batch_uniform_mat4(GPUBatch *batch, const char *name, const float data[
 /** \name Drawing / Drawcall functions
  * \{ */
 
-static void *elem_offset(const GPUIndexBuf *el, int v_first)
-{
-#if GPU_TRACK_INDEX_RANGE
-  if (el->index_type == GPU_INDEX_U16) {
-    return (GLushort *)0 + v_first + el->index_start;
-  }
-#endif
-  return (GLuint *)0 + v_first + el->index_start;
-}
-
-/* Use when drawing with GPU_batch_draw_advanced */
-static void gpu_batch_bind(GPUBatch *batch)
-{
-  glBindVertexArray(batch->vao_id);
-
-#if GPU_TRACK_INDEX_RANGE
-  /* Can be removed if GL 4.3 is required. */
-  if (!GLEW_ARB_ES3_compatibility && batch->elem != NULL) {
-    GLuint restart_index = (batch->elem->index_type == GPU_INDEX_U16) ? (GLuint)0xFFFF :
-                                                                        (GLuint)0xFFFFFFFF;
-    glPrimitiveRestartIndex(restart_index);
-  }
-#endif
-}
-
 void GPU_batch_draw(GPUBatch *batch)
 {
   GPU_shader_bind(batch->shader);
@@ -630,18 +343,9 @@ void GPU_batch_draw_instanced(GPUBatch *batch, int i_count)
   GPU_shader_unbind();
 }
 
-#if GPU_TRACK_INDEX_RANGE
-#  define BASE_INDEX(el) ((el)->base_index)
-#  define INDEX_TYPE(el) ((el)->gl_index_type)
-#else
-#  define BASE_INDEX(el) 0
-#  define INDEX_TYPE(el) GL_UNSIGNED_INT
-#endif
-
 void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_first, int i_count)
 {
   BLI_assert(GPU_context_active_get()->shader != NULL);
-  /* TODO could assert that VAO is bound. */
 
   if (v_count == 0) {
     v_count = (batch->elem) ? batch->elem->index_len : batch->verts[0]->vertex_len;
@@ -649,8 +353,8 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
   if (i_count == 0) {
     i_count = (batch->inst[0]) ? batch->inst[0]->vertex_len : 1;
     /* Meh. This is to be able to use different numbers of verts in instance vbos. */
-    if (batch->inst[1] && i_count > batch->inst[1]->vertex_len) {
-      i_count = batch->inst[1]->vertex_len;
+    if (batch->inst[1] != NULL) {
+      i_count = min_ii(i_count, batch->inst[1]->vertex_len);
     }
   }
 
@@ -659,78 +363,7 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
     return;
   }
 
-  /* Verify there is enough data do draw. */
-  /* TODO(fclem) Nice to have but this is invalid when using procedural draw-calls.
-   * The right assert would be to check if there is an enabled attribute from each VBO
-   * and check their length. */
-  // BLI_assert(i_first + i_count <= (batch->inst ? batch->inst->vertex_len : INT_MAX));
-  // BLI_assert(v_first + v_count <=
-  //            (batch->elem ? batch->elem->index_len : batch->verts[0]->vertex_len));
-
-#ifdef __APPLE__
-  GLuint vao = 0;
-#endif
-
-  if (!GPU_arb_base_instance_is_supported()) {
-    if (i_first > 0) {
-#ifdef __APPLE__
-      /**
-       * There seems to be a nasty bug when drawing using the same VAO reconfiguring. (see T71147)
-       * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
-       **/
-      glGenVertexArrays(1, &vao);
-      glBindVertexArray(vao);
-#else
-      /* If using offset drawing with instancing, we must
-       * use the default VAO and redo bindings. */
-      glBindVertexArray(GPU_vao_default());
-#endif
-      batch_update_program_bindings(batch, i_first);
-    }
-    else {
-      /* Previous call could have bind the default vao
-       * see above. */
-      glBindVertexArray(batch->vao_id);
-    }
-  }
-
-  GLenum gl_prim_type = convert_prim_type_to_gl(batch->prim_type);
-
-  if (batch->elem) {
-    const GPUIndexBuf *el = batch->elem;
-    GLenum index_type = INDEX_TYPE(el);
-    GLint base_index = BASE_INDEX(el);
-    void *v_first_ofs = elem_offset(el, v_first);
-
-    if (GPU_arb_base_instance_is_supported()) {
-      glDrawElementsInstancedBaseVertexBaseInstance(
-          gl_prim_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
-    }
-    else {
-      glDrawElementsInstancedBaseVertex(
-          gl_prim_type, v_count, index_type, v_first_ofs, i_count, base_index);
-    }
-  }
-  else {
-#ifdef __APPLE__
-    glDisable(GL_PRIMITIVE_RESTART);
-#endif
-    if (GPU_arb_base_instance_is_supported()) {
-      glDrawArraysInstancedBaseInstance(gl_prim_type, v_first, v_count, i_count, i_first);
-    }
-    else {
-      glDrawArraysInstanced(gl_prim_type, v_first, v_count, i_count);
-    }
-#ifdef __APPLE__
-    glEnable(GL_PRIMITIVE_RESTART);
-#endif
-  }
-
-#ifdef __APPLE__
-  if (vao != 0) {
-    glDeleteVertexArrays(1, &vao);
-  }
-#endif
+  static_cast<Batch *>(batch)->draw(v_first, v_count, i_first, i_count);
 }
 
 /* just draw some vertices and let shader place them where we want. */
@@ -782,23 +415,11 @@ void GPU_batch_program_set_imm_shader(GPUBatch *batch)
 
 void gpu_batch_init(void)
 {
-  if (g_default_attr_vbo == 0) {
-    g_default_attr_vbo = GPU_buf_alloc();
-
-    float default_attrib_data[4] = {0.0f, 0.0f, 0.0f, 1.0f};
-    glBindBuffer(GL_ARRAY_BUFFER, g_default_attr_vbo);
-    glBufferData(GL_ARRAY_BUFFER, sizeof(float[4]), default_attrib_data, GL_STATIC_DRAW);
-    glBindBuffer(GL_ARRAY_BUFFER, 0);
-  }
-
   gpu_batch_presets_init();
 }
 
 void gpu_batch_exit(void)
 {
-  GPU_buf_free(g_default_attr_vbo);
-  g_default_attr_vbo = 0;
-
   gpu_batch_presets_exit();
 }
 
diff --git a/source/blender/gpu/intern/gpu_batch_private.hh b/source/blender/gpu/intern/gpu_batch_private.hh
index a9293a5b206..3a8044efc1d 100644
--- a/source/blender/gpu/intern/gpu_batch_private.hh
+++ b/source/blender/gpu/intern/gpu_batch_private.hh
@@ -43,5 +43,3 @@ class Batch : public GPUBatch {
 
 }  // namespace gpu
 }  // namespace blender
-
-void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface);
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index 66514deefbc..e04631910c1 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -188,18 +188,6 @@ void GPU_tex_free(GLuint tex_id)
  * which are not shared across contexts. So we need to keep track of
  * ownership. */
 
-void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch)
-{
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->batch_register(batch);
-}
-
-void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch)
-{
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->batch_unregister(batch);
-}
-
 void gpu_context_add_framebuffer(GPUContext *ctx, GPUFrameBuffer *fb)
 {
 #ifdef DEBUG
diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh
index 9d3b5c3fc85..3f9fca16ff7 100644
--- a/source/blender/gpu/intern/gpu_context_private.hh
+++ b/source/blender/gpu/intern/gpu_context_private.hh
@@ -78,9 +78,6 @@ void GPU_tex_free(GLuint tex_id);
 void GPU_vao_free(GLuint vao_id, GPUContext *ctx);
 void GPU_fbo_free(GLuint fbo_id, GPUContext *ctx);
 
-void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch);
-void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch);
-
 void gpu_context_add_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
 void gpu_context_remove_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
 
diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc
index 508d9112fc0..e4af25500af 100644
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -575,6 +575,10 @@ void GPU_shader_bind(GPUShader *shader)
     GPU_matrix_bind(shader->interface);
     GPU_shader_set_srgb_uniform(shader->interface);
   }
+
+  if (GPU_matrix_dirty_get()) {
+    GPU_matrix_bind(shader->interface);
+  }
 }
 
 void GPU_shader_unbind(void)
diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc
index d825dde7e10..ef90dde1877 100644
--- a/source/blender/gpu/intern/gpu_shader_interface.cc
+++ b/source/blender/gpu/intern/gpu_shader_interface.cc
@@ -35,6 +35,8 @@
 #include "gpu_batch_private.hh"
 #include "gpu_context_private.hh"
 
+#include "gl_batch.hh"
+
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
@@ -45,6 +47,8 @@
 #  include <stdio.h>
 #endif
 
+using namespace blender::gpu;
+
 static const char *BuiltinUniform_name(GPUUniformBuiltin u)
 {
   switch (u) {
@@ -400,8 +404,8 @@ GPUShaderInterface *GPU_shaderinterface_create(int32_t program)
 
   /* Batches ref buffer */
   shaderface->batches_len = GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
-  shaderface->batches = (GPUBatch **)MEM_callocN(shaderface->batches_len * sizeof(GPUBatch *),
-                                                 "GPUShaderInterface batches");
+  shaderface->batches = (void **)MEM_callocN(shaderface->batches_len * sizeof(GPUBatch *),
+                                             "GPUShaderInterface batches");
 
   MEM_freeN(uniforms_from_blocks);
   MEM_freeN(inputs_tmp);
@@ -468,7 +472,8 @@ void GPU_shaderinterface_discard(GPUShaderInterface *shaderface)
   /* Remove this interface from all linked Batches vao cache. */
   for (int i = 0; i < shaderface->batches_len; i++) {
     if (shaderface->batches[i] != NULL) {
-      gpu_batch_remove_interface_ref(shaderface->batches[i], shaderface);
+      /* XXX GL specific. to be removed during refactor. */
+      reinterpret_cast<GLVaoCache *>(shaderface->batches[i])->remove(shaderface);
     }
   }
   MEM_freeN(shaderface->batches);
@@ -511,7 +516,7 @@ int32_t GPU_shaderinterface_block_builtin(const GPUShaderInterface *shaderface,
   return shaderface->builtin_blocks[builtin];
 }
 
-void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
+void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, void *batch)
 {
   int i; /* find first unused slot */
   for (i = 0; i < shaderface->batches_len; i++) {
@@ -523,13 +528,14 @@ void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, GPUBatch
     /* Not enough place, realloc the array. */
     i = shaderface->batches_len;
     shaderface->batches_len += GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
-    shaderface->batches = (GPUBatch **)MEM_recallocN(shaderface->batches,
-                                                     sizeof(GPUBatch *) * shaderface->batches_len);
+    shaderface->batches = (void **)MEM_recallocN(shaderface->batches,
+                                                 sizeof(void *) * shaderface->batches_len);
   }
-  shaderface->batches[i] = batch;
+  /** XXX todo cleanup. */
+  shaderface->batches[i] = reinterpret_cast<void *>(batch);
 }
 
-void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
+void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *shaderface, void *batch)
 {
   for (int i = 0; i < shaderface->batches_len; i++) {
     if (shaderface->batches[i] == batch) {
diff --git a/source/blender/gpu/opengl/gl_batch.cc b/source/blender/gpu/opengl/gl_batch.cc
index 62d81ad9f5a..00e1a61f7cf 100644
--- a/source/blender/gpu/opengl/gl_batch.cc
+++ b/source/blender/gpu/opengl/gl_batch.cc
@@ -29,13 +29,254 @@
 
 #include "glew-mx.h"
 
+#include "GPU_extensions.h"
+
 #include "gpu_batch_private.hh"
 #include "gpu_primitive_private.h"
+#include "gpu_shader_private.h"
 
 #include "gl_batch.hh"
+#include "gl_context.hh"
+#include "gl_vertex_array.hh"
 
 using namespace blender::gpu;
 
+/* -------------------------------------------------------------------- */
+/** \name Vao cache
+ *
+ * Each GLBatch has a small cache of VAO objects that are used to avoid VAO reconfiguration.
+ * TODO(fclem) Could be revisited to avoid so much cross references.
+ * \{ */
+
+GLVaoCache::GLVaoCache(void)
+{
+  init();
+}
+
+GLVaoCache::~GLVaoCache()
+{
+  this->clear();
+}
+
+void GLVaoCache::init(void)
+{
+  context_ = NULL;
+  interface_ = NULL;
+  is_dynamic_vao_count = false;
+  for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+    static_vaos.interfaces[i] = NULL;
+    static_vaos.vao_ids[i] = 0;
+  }
+  vao_base_instance_ = 0;
+  base_instance_ = 0;
+}
+
+/* Create a new VAO object and store it in the cache. */
+void GLVaoCache::insert(const GPUShaderInterface *interface, GLuint vao)
+{
+  /* Now insert the cache. */
+  if (!is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+      if (static_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i < GPU_VAO_STATIC_LEN) {
+      static_vaos.interfaces[i] = interface;
+      static_vaos.vao_ids[i] = vao;
+    }
+    else {
+      /* Erase previous entries, they will be added back if drawn again. */
+      for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+        if (static_vaos.interfaces[i] != NULL) {
+          GPU_shaderinterface_remove_batch_ref(
+              const_cast<GPUShaderInterface *>(static_vaos.interfaces[i]), this);
+          context_->vao_free(static_vaos.vao_ids[i]);
+        }
+      }
+      /* Not enough place switch to dynamic. */
+      is_dynamic_vao_count = true;
+      /* Init dynamic arrays and let the branch below set the values. */
+      dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_callocN(
+          dynamic_vaos.count * sizeof(GPUShaderInterface *), "dyn vaos interfaces");
+      dynamic_vaos.vao_ids = (GLuint *)MEM_callocN(dynamic_vaos.count * sizeof(GLuint),
+                                                   "dyn vaos ids");
+    }
+  }
+
+  if (is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < dynamic_vaos.count; i++) {
+      if (dynamic_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i == dynamic_vaos.count) {
+      /* Not enough place, realloc the array. */
+      i = dynamic_vaos.count;
+      dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_recallocN(
+          (void *)dynamic_vaos.interfaces, sizeof(GPUShaderInterface *) * dynamic_vaos.count);
+      dynamic_vaos.vao_ids = (GLuint *)MEM_recallocN(dynamic_vaos.vao_ids,
+                                                     sizeof(GLuint) * dynamic_vaos.count);
+    }
+    dynamic_vaos.interfaces[i] = interface;
+    dynamic_vaos.vao_ids[i] = vao;
+  }
+
+  GPU_shaderinterface_add_batch_ref(const_cast<GPUShaderInterface *>(interface), this);
+}
+
+void GLVaoCache::remove(const GPUShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      context_->vao_free(vaos[i]);
+      vaos[i] = 0;
+      interfaces[i] = NULL;
+      break; /* cannot have duplicates */
+    }
+  }
+}
+
+void GLVaoCache::clear(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  /* Early out, nothing to free. */
+  if (context_ == NULL) {
+    return;
+  }
+
+  if (context_ == ctx) {
+    glDeleteVertexArrays(count, vaos);
+    glDeleteVertexArrays(1, &vao_base_instance_);
+  }
+  else {
+    /* TODO(fclem) Slow way. Could avoid multiple mutex lock here */
+    for (int i = 0; i < count; i++) {
+      context_->vao_free(vaos[i]);
+    }
+    context_->vao_free(vao_base_instance_);
+  }
+
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == NULL) {
+      continue;
+    }
+    GPU_shaderinterface_remove_batch_ref(const_cast<GPUShaderInterface *>(interfaces[i]), this);
+  }
+
+  if (is_dynamic_vao_count) {
+    MEM_freeN((void *)dynamic_vaos.interfaces);
+    MEM_freeN(dynamic_vaos.vao_ids);
+  }
+
+  if (context_) {
+    context_->vao_cache_unregister(this);
+  }
+  /* Reinit. */
+  this->init();
+}
+
+/* Return 0 on cache miss (invalid VAO) */
+GLuint GLVaoCache::lookup(const GPUShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      return (is_dynamic_vao_count) ? dynamic_vaos.vao_ids[i] : static_vaos.vao_ids[i];
+    }
+  }
+  return 0;
+}
+
+/* The GLVaoCache object is only valid for one GLContext.
+ * Reset the cache if trying to draw in another context; */
+void GLVaoCache::context_check(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+
+  if (context_ != ctx) {
+    if (context_ != NULL) {
+      /* IMPORTANT: Trying to draw a batch in multiple different context will trash the VAO cache.
+       * This has major performance impact and should be avoided in most cases. */
+      context_->vao_cache_unregister(this);
+    }
+    this->clear();
+    context_ = ctx;
+    context_->vao_cache_register(this);
+  }
+}
+
+GLuint GLVaoCache::base_instance_vao_get(GPUBatch *batch, int i_first)
+{
+  this->context_check();
+  /* Make sure the interface is up to date. */
+  if (interface_ != GPU_context_active_get()->shader->interface) {
+    vao_get(batch);
+    /* Trigger update. */
+    base_instance_ = 0;
+  }
+  /**
+   * There seems to be a nasty bug when drawing using the same VAO reconfiguring (T71147).
+   * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
+   **/
+#ifdef __APPLE__
+  glDeleteVertexArrays(1, &vao_base_instance_);
+  vao_base_instance_ = 0;
+#endif
+
+  if (vao_base_instance_ == 0) {
+    glGenVertexArrays(1, &vao_base_instance_);
+  }
+
+  if (base_instance_ != i_first) {
+    base_instance_ = i_first;
+    GLVertArray::update_bindings(vao_base_instance_, batch, interface_, i_first);
+  }
+  return base_instance_;
+}
+
+GLuint GLVaoCache::vao_get(GPUBatch *batch)
+{
+  this->context_check();
+
+  GPUContext *ctx = GPU_context_active_get();
+  if (interface_ != ctx->shader->interface) {
+    interface_ = ctx->shader->interface;
+    vao_id_ = this->lookup(interface_);
+
+    if (vao_id_ == 0) {
+      /* Cache miss, create a new VAO. */
+      glGenVertexArrays(1, &vao_id_);
+      this->insert(interface_, vao_id_);
+      GLVertArray::update_bindings(vao_id_, batch, interface_, 0);
+    }
+  }
+
+  return vao_id_;
+}
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
 GLBatch::GLBatch(void)
 {
 }
@@ -44,7 +285,83 @@ GLBatch::~GLBatch()
 {
 }
 
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Drawing
+ * \{ */
+
+#if GPU_TRACK_INDEX_RANGE
+#  define BASE_INDEX(el) ((el)->base_index)
+#  define INDEX_TYPE(el) ((el)->gl_index_type)
+#else
+#  define BASE_INDEX(el) 0
+#  define INDEX_TYPE(el) GL_UNSIGNED_INT
+#endif
+
+void GLBatch::bind(int i_first)
+{
+  if (flag & GPU_BATCH_DIRTY) {
+    vao_cache_.clear();
+  }
+
+#if GPU_TRACK_INDEX_RANGE
+  /* Can be removed if GL 4.3 is required. */
+  if (!GLEW_ARB_ES3_compatibility && (elem != NULL)) {
+    glPrimitiveRestartIndex((elem->index_type == GPU_INDEX_U16) ? 0xFFFFu : 0xFFFFFFFFu);
+  }
+#endif
+
+  /* Can be removed if GL 4.2 is required. */
+  if (!GPU_arb_base_instance_is_supported() && (i_first > 0)) {
+    glBindVertexArray(vao_cache_.base_instance_vao_get(this, i_first));
+  }
+  else {
+    glBindVertexArray(vao_cache_.vao_get(this));
+  }
+}
+
 void GLBatch::draw(int v_first, int v_count, int i_first, int i_count)
 {
-  UNUSED_VARS(v_first, v_count, i_first, i_count);
-}
-\ No newline at end of file
+  this->bind(i_first);
+
+  GLenum gl_type = convert_prim_type_to_gl(prim_type);
+
+  if (elem) {
+    const GPUIndexBuf *el = elem;
+    GLenum index_type = INDEX_TYPE(el);
+    GLint base_index = BASE_INDEX(el);
+    void *v_first_ofs = (GLuint *)0 + v_first + el->index_start;
+
+#if GPU_TRACK_INDEX_RANGE
+    if (el->index_type == GPU_INDEX_U16) {
+      v_first_ofs = (GLushort *)0 + v_first + el->index_start;
+    }
+#endif
+
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawElementsInstancedBaseVertexBaseInstance(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
+    }
+    else {
+      glDrawElementsInstancedBaseVertex(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index);
+    }
+  }
+  else {
+#ifdef __APPLE__
+    glDisable(GL_PRIMITIVE_RESTART);
+#endif
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawArraysInstancedBaseInstance(gl_type, v_first, v_count, i_count, i_first);
+    }
+    else {
+      glDrawArraysInstanced(gl_type, v_first, v_count, i_count);
+    }
+#ifdef __APPLE__
+    glEnable(GL_PRIMITIVE_RESTART);
+#endif
+  }
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_batch.hh b/source/blender/gpu/opengl/gl_batch.hh
index 290c113205a..490c9180a99 100644
--- a/source/blender/gpu/opengl/gl_batch.hh
+++ b/source/blender/gpu/opengl/gl_batch.hh
@@ -37,18 +37,29 @@
 namespace blender {
 namespace gpu {
 
-#define GPU_BATCH_VAO_STATIC_LEN 3
+#define GPU_VAO_STATIC_LEN 3
 
+/* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
+ * for each shader interface. Start with a static number of vaos and fallback to dynamic count
+ * if necessary. Once a batch goes dynamic it does not go back. */
 class GLVaoCache {
-  /* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
-   * for each shader interface. Start with a static number of vaos and fallback to dynamic count
-   * if necessary. Once a batch goes dynamic it does not go back. */
+ private:
+  /** Context for which the vao_cache_ was generated. */
+  struct GLContext *context_ = NULL;
+  /** Last interface this batch was drawn with. */
+  GPUShaderInterface *interface_ = NULL;
+  /** Cached vao for the last interface. */
+  GLuint vao_id_ = 0;
+  /** Used whend arb_base_instance is not supported. */
+  GLuint vao_base_instance_ = 0;
+  int base_instance_ = 0;
+
   bool is_dynamic_vao_count = false;
   union {
     /** Static handle count */
     struct {
-      const GPUShaderInterface *interfaces[GPU_BATCH_VAO_STATIC_LEN];
-      GLuint vao_ids[GPU_BATCH_VAO_STATIC_LEN];
+      const GPUShaderInterface *interfaces[GPU_VAO_STATIC_LEN];
+      GLuint vao_ids[GPU_VAO_STATIC_LEN];
     } static_vaos;
     /** Dynamic handle count */
     struct {
@@ -58,18 +69,27 @@ class GLVaoCache {
     } dynamic_vaos;
   };
 
-  GLuint search(const GPUShaderInterface *interface);
-  void insert(GLuint vao_id, const GPUShaderInterface *interface);
+ public:
+  GLVaoCache();
+  ~GLVaoCache();
+
+  GLuint vao_get(GPUBatch *batch);
+  GLuint base_instance_vao_get(GPUBatch *batch, int i_first);
+
+  GLuint lookup(const GPUShaderInterface *interface);
+  void insert(const GPUShaderInterface *interface, GLuint vao_id);
+  void remove(const GPUShaderInterface *interface);
   void clear(void);
-  void interface_remove(const GPUShaderInterface *interface);
+
+ private:
+  void init(void);
+  void context_check(void);
 };
 
 class GLBatch : public Batch {
- private:
-  /** Cached values (avoid dereferencing later). */
-  GLuint vao_id;
+ public:
   /** All vaos corresponding to all the GPUShaderInterface this batch was drawn with. */
-  GLVaoCache vaos;
+  GLVaoCache vao_cache_;
 
  public:
   GLBatch();
@@ -77,6 +97,9 @@ class GLBatch : public Batch {
 
   void draw(int v_first, int v_count, int i_first, int i_count) override;
 
+ private:
+  void bind(int i_first);
+
   MEM_CXX_CLASS_ALLOC_FUNCS("GLBatch");
 };
 
diff --git a/source/blender/gpu/opengl/gl_context.cc b/source/blender/gpu/opengl/gl_context.cc
index 00a10924ff6..dd413612879 100644
--- a/source/blender/gpu/opengl/gl_context.cc
+++ b/source/blender/gpu/opengl/gl_context.cc
@@ -63,8 +63,8 @@ GLContext::~GLContext()
   /* For now don't allow GPUFrameBuffers to be reuse in another context. */
   BLI_assert(framebuffers_.is_empty());
   /* Delete vaos so the batch can be reused in another context. */
-  for (GPUBatch *batch : batches_) {
-    GPU_batch_vao_cache_clear(batch);
+  for (GLVaoCache *cache : vao_caches_) {
+    cache->clear();
   }
   glDeleteVertexArrays(1, &default_vao_);
   glDeleteBuffers(1, &default_attr_vbo_);
@@ -197,20 +197,17 @@ void GLBackend::tex_free(GLuint tex_id)
  * is discarded.
  * \{ */
 
-void GLContext::batch_register(struct GPUBatch *batch)
+void GLContext::vao_cache_register(GLVaoCache *cache)
 {
   lists_mutex_.lock();
-  batches_.add(batch);
+  vao_caches_.add(cache);
   lists_mutex_.unlock();
 }
 
-void GLContext::batch_unregister(struct GPUBatch *batch)
+void GLContext::vao_cache_unregister(GLVaoCache *cache)
 {
-  /* vao_cache_clear() can acquire lists_mutex_ so avoid deadlock. */
-  // reinterpret_cast<GLBatch *>(batch)->vao_cache_clear();
-
   lists_mutex_.lock();
-  batches_.remove(batch);
+  vao_caches_.remove(cache);
   lists_mutex_.unlock();
 }
 
diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh
index 3b55965b9d1..f3ff5cb47f4 100644
--- a/source/blender/gpu/opengl/gl_context.hh
+++ b/source/blender/gpu/opengl/gl_context.hh
@@ -25,15 +25,16 @@
 
 #include "gpu_context_private.hh"
 
+#include "GPU_framebuffer.h"
+
 #include "BLI_set.hh"
 #include "BLI_vector.hh"
 
 #include "glew-mx.h"
 
-#include <iostream>
+#include "gl_batch.hh"
+
 #include <mutex>
-#include <unordered_set>
-#include <vector>
 
 namespace blender {
 namespace gpu {
@@ -63,7 +64,7 @@ class GLContext : public GPUContext {
    * GPUBatch & GPUFramebuffer have references to the context they are from, in the case the
    * context is destroyed, we need to remove any reference to it.
    */
-  Set<GPUBatch *> batches_;
+  Set<GLVaoCache *> vao_caches_;
   Set<GPUFrameBuffer *> framebuffers_;
   /** Mutex for the bellow structures. */
   std::mutex lists_mutex_;
@@ -87,8 +88,8 @@ class GLContext : public GPUContext {
 
   void vao_free(GLuint vao_id);
   void fbo_free(GLuint fbo_id);
-  void batch_register(struct GPUBatch *batch);
-  void batch_unregister(struct GPUBatch *batch);
+  void vao_cache_register(GLVaoCache *cache);
+  void vao_cache_unregister(GLVaoCache *cache);
   void framebuffer_register(struct GPUFrameBuffer *fb);
   void framebuffer_unregister(struct GPUFrameBuffer *fb);
 };
diff --git a/source/blender/gpu/opengl/gl_vertex_array.cc b/source/blender/gpu/opengl/gl_vertex_array.cc
new file mode 100644
index 00000000000..907dc37e46f
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_vertex_array.cc
@@ -0,0 +1,158 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "GPU_glew.h"
+
+#include "GPU_shader_interface.h"
+#include "GPU_vertex_buffer.h"
+
+#include "gpu_vertex_format_private.h"
+
+#include "gl_batch.hh"
+#include "gl_context.hh"
+
+#include "gl_vertex_array.hh"
+
+using namespace blender::gpu;
+
+/* -------------------------------------------------------------------- */
+/** \name Vertex Array Bindings
+ * \{ */
+
+/* Returns enabled vertex pointers as a bitflag (one bit per attrib). */
+static uint16_t vbo_bind(const GPUShaderInterface *interface,
+                         const GPUVertFormat *format,
+                         uint v_first,
+                         uint v_len,
+                         const bool use_instancing)
+{
+  uint16_t enabled_attrib = 0;
+  const uint attr_len = format->attr_len;
+  uint stride = format->stride;
+  uint offset = 0;
+  GLuint divisor = (use_instancing) ? 1 : 0;
+
+  for (uint a_idx = 0; a_idx < attr_len; a_idx++) {
+    const GPUVertAttr *a = &format->attrs[a_idx];
+
+    if (format->deinterleaved) {
+      offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].sz) * v_len;
+      stride = a->sz;
+    }
+    else {
+      offset = a->offset;
+    }
+
+    const GLvoid *pointer = (const GLubyte *)0 + offset + v_first * stride;
+    const GLenum type = convert_comp_type_to_gl(static_cast<GPUVertCompType>(a->comp_type));
+
+    for (uint n_idx = 0; n_idx < a->name_len; n_idx++) {
+      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
+      const GPUShaderInput *input = GPU_shaderinterface_attr(interface, name);
+
+      if (input == NULL) {
+        continue;
+      }
+
+      enabled_attrib |= (1 << input->location);
+
+      if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
+        BLI_assert(a->fetch_mode == GPU_FETCH_FLOAT);
+        BLI_assert(a->comp_type == GPU_COMP_F32);
+        for (int i = 0; i < a->comp_len / 4; i++) {
+          glEnableVertexAttribArray(input->location + i);
+          glVertexAttribDivisor(input->location + i, divisor);
+          glVertexAttribPointer(
+              input->location + i, 4, type, GL_FALSE, stride, (const GLubyte *)pointer + i * 16);
+        }
+      }
+      else {
+        glEnableVertexAttribArray(input->location);
+        glVertexAttribDivisor(input->location, divisor);
+
+        switch (a->fetch_mode) {
+          case GPU_FETCH_FLOAT:
+          case GPU_FETCH_INT_TO_FLOAT:
+            glVertexAttribPointer(input->location, a->comp_len, type, GL_FALSE, stride, pointer);
+            break;
+          case GPU_FETCH_INT_TO_FLOAT_UNIT:
+            glVertexAttribPointer(input->location, a->comp_len, type, GL_TRUE, stride, pointer);
+            break;
+          case GPU_FETCH_INT:
+            glVertexAttribIPointer(input->location, a->comp_len, type, stride, pointer);
+            break;
+        }
+      }
+    }
+  }
+  return enabled_attrib;
+}
+
+/* Update the Attrib Binding of the currently bound VAO. */
+void GLVertArray::update_bindings(const GLuint vao,
+                                  const GPUBatch *batch,
+                                  const GPUShaderInterface *interface,
+                                  const int base_instance)
+{
+  uint16_t attr_mask = interface->enabled_attr_mask;
+
+  glBindVertexArray(vao);
+
+  /* Reverse order so first VBO'S have more prevalence (in term of attribute override). */
+  for (int v = GPU_BATCH_VBO_MAX_LEN - 1; v > -1; v--) {
+    GPUVertBuf *vbo = batch->verts[v];
+    if (vbo) {
+      GPU_vertbuf_use(vbo);
+      attr_mask &= ~vbo_bind(interface, &vbo->format, 0, vbo->vertex_len, false);
+    }
+  }
+
+  for (int v = GPU_BATCH_INST_VBO_MAX_LEN - 1; v > -1; v--) {
+    GPUVertBuf *vbo = batch->inst[v];
+    if (vbo) {
+      GPU_vertbuf_use(vbo);
+      attr_mask &= ~vbo_bind(interface, &vbo->format, base_instance, vbo->vertex_len, true);
+    }
+  }
+
+  if (attr_mask != 0 && GLEW_ARB_vertex_attrib_binding) {
+    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
+      if (attr_mask & mask) {
+        GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+        /* This replaces glVertexAttrib4f(a, 0.0f, 0.0f, 0.0f, 1.0f); with a more modern style.
+         * Fix issues for some drivers (see T75069). */
+        glBindVertexBuffer(a, ctx->default_attr_vbo_, (intptr_t)0, (intptr_t)0);
+        glEnableVertexAttribArray(a);
+        glVertexAttribFormat(a, 4, GL_FLOAT, GL_FALSE, 0);
+        glVertexAttribBinding(a, a);
+      }
+    }
+  }
+
+  if (batch->elem) {
+    /* Binds the index buffer. This state is also saved in the VAO. */
+    GPU_indexbuf_use(batch->elem);
+  }
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_vertex_array.hh b/source/blender/gpu/opengl/gl_vertex_array.hh
new file mode 100644
index 00000000000..6da414d7e62
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_vertex_array.hh
@@ -0,0 +1,44 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "glew-mx.h"
+
+#include "GPU_batch.h"
+#include "GPU_shader_interface.h"
+
+namespace blender {
+namespace gpu {
+
+namespace GLVertArray {
+
+void update_bindings(const GLuint vao,
+                     const GPUBatch *batch,
+                     const GPUShaderInterface *interface,
+                     const int base_instance);
+
+}  // namespace GLVertArray
+
+}  // namespace gpu
+}  // namespace blender
author	Clément Foucault <foucault.clem@gmail.com>	2020-08-11 02:31:40 +0300
committer	Clément Foucault <foucault.clem@gmail.com>	2020-08-13 15:20:24 +0300
commit	efc97b3919ea4fd46b9d2e931ca3fea27e7ea31c (patch)
tree	125e4239c90902a967487f44d5aca1696d49be68 /source/blender/gpu
parent	47bfb0f7ad2f70017585fe55a68e49ae09f1150c (diff)