15 files changed, 623 insertions, 493 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index fcbe53e599a..906ae31fbc7 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -92,6 +92,7 @@ set(SRC
   opengl/gl_batch.cc
   opengl/gl_context.cc
   opengl/gl_drawlist.cc
+  opengl/gl_vertex_array.cc
 
   GPU_attr_binding.h
   GPU_batch.h
@@ -143,6 +144,7 @@ set(SRC
   opengl/gl_batch.hh
   opengl/gl_context.hh
   opengl/gl_drawlist.hh
+  opengl/gl_vertex_array.hh
 )
 
 set(LIB
diff --git a/source/blender/gpu/GPU_batch.h b/source/blender/gpu/GPU_batch.h
index d71d4d5435f..33d539e3a9e 100644
--- a/source/blender/gpu/GPU_batch.h
+++ b/source/blender/gpu/GPU_batch.h
@@ -30,7 +30,6 @@
 
 #include "GPU_element.h"
 #include "GPU_shader.h"
-#include "GPU_shader_interface.h"
 #include "GPU_vertex_buffer.h"
 
 #define GPU_BATCH_VBO_MAX_LEN 6
@@ -59,9 +58,7 @@ typedef enum eGPUBatchFlag {
   /** Batch is initialized but it's VBOs are still being populated. (optional) */
   GPU_BATCH_BUILDING = (1 << 16),
   /** Cached data need to be rebuild. (VAO, PSO, ...) */
-  GPU_BATCH_DIRTY_BINDINGS = (1 << 17),
-  GPU_BATCH_DIRTY_INTERFACE = (1 << 18),
-  GPU_BATCH_DIRTY = (GPU_BATCH_DIRTY_BINDINGS | GPU_BATCH_DIRTY_INTERFACE),
+  GPU_BATCH_DIRTY = (1 << 17),
 } eGPUBatchFlag;
 
 #define GPU_BATCH_OWNS_NONE GPU_BATCH_INVALID
@@ -78,6 +75,7 @@ extern "C" {
 /**
  * IMPORTANT: Do not allocate manually as the real struct is bigger (i.e: GLBatch). This is only
  * the common and "public" part of the struct. Use the provided allocator.
+ * TODO(fclem) Make the content of this struct hidden and expose getters/setters.
  **/
 typedef struct GPUBatch {
   /** verts[0] is required, others can be NULL */
@@ -90,32 +88,8 @@ typedef struct GPUBatch {
   eGPUBatchFlag flag;
   /** Type of geometry to draw. */
   GPUPrimType prim_type;
-
-  /** Current assigned shader. */
+  /** Current assigned shader. DEPRECATED. Here only for uniform binding. */
   struct GPUShader *shader;
-  /** Last context used to draw this batch. */
-  struct GPUContext *context;
-
-  struct GPUShaderInterface *interface;
-  GLuint vao_id;
-
-  /* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
-   * for each shader interface. Start with a static number of vaos and fallback to dynamic count
-   * if necessary. Once a batch goes dynamic it does not go back. */
-  bool is_dynamic_vao_count;
-  union {
-    /** Static handle count */
-    struct {
-      const struct GPUShaderInterface *interfaces[GPU_BATCH_VAO_STATIC_LEN];
-      uint32_t vao_ids[GPU_BATCH_VAO_STATIC_LEN];
-    } static_vaos;
-    /** Dynamic handle count */
-    struct {
-      uint count;
-      const struct GPUShaderInterface **interfaces;
-      uint32_t *vao_ids;
-    } dynamic_vaos;
-  };
 } GPUBatch;
 
 GPUBatch *GPU_batch_calloc(void);
diff --git a/source/blender/gpu/GPU_shader_interface.h b/source/blender/gpu/GPU_shader_interface.h
index 8aba1236b65..47e4e432d66 100644
--- a/source/blender/gpu/GPU_shader_interface.h
+++ b/source/blender/gpu/GPU_shader_interface.h
@@ -80,7 +80,7 @@ typedef struct GPUShaderInterface {
   /** Buffer containing all inputs names separated by '\0'. */
   char *name_buffer;
   /** Reference to GPUBatches using this interface */
-  struct GPUBatch **batches;
+  void **batches;
   uint batches_len;
   /** Input counts. */
   uint attribute_len;
@@ -109,8 +109,8 @@ const GPUShaderInput *GPU_shaderinterface_ubo(const GPUShaderInterface *, const
 const GPUShaderInput *GPU_shaderinterface_attr(const GPUShaderInterface *, const char *name);
 
 /* keep track of batches using this interface */
-void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *, struct GPUBatch *);
-void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *, struct GPUBatch *);
+void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *interface, void *cache);
+void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *interface, void *cache);
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/gpu/intern/gpu_batch.cc b/source/blender/gpu/intern/gpu_batch.cc
index 27196413b20..995e1afb236 100644
--- a/source/blender/gpu/intern/gpu_batch.cc
+++ b/source/blender/gpu/intern/gpu_batch.cc
@@ -26,6 +26,8 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "BLI_math_base.h"
+
 #include "GPU_batch.h"
 #include "GPU_batch_presets.h"
 #include "GPU_extensions.h"
@@ -46,49 +48,15 @@
 
 using namespace blender::gpu;
 
-static GLuint g_default_attr_vbo = 0;
-
-static void gpu_batch_bind(GPUBatch *batch);
-static void batch_update_program_bindings(GPUBatch *batch, uint i_first);
-
-void GPU_batch_vao_cache_clear(GPUBatch *batch)
+void GPU_batch_vao_cache_clear(GPUBatch *UNUSED(batch))
 {
-  if (batch->context == NULL) {
-    return;
-  }
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.vao_ids[i]) {
-        GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
-      }
-      if (batch->dynamic_vaos.interfaces[i]) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->dynamic_vaos.interfaces[i], batch);
-      }
-    }
-    MEM_freeN((void *)batch->dynamic_vaos.interfaces);
-    MEM_freeN(batch->dynamic_vaos.vao_ids);
-  }
-  else {
-    for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.vao_ids[i]) {
-        GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
-      }
-      if (batch->static_vaos.interfaces[i]) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->static_vaos.interfaces[i], batch);
-      }
-    }
-  }
-  batch->is_dynamic_vao_count = false;
-  for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-    batch->static_vaos.vao_ids[i] = 0;
-    batch->static_vaos.interfaces[i] = NULL;
-  }
-  gpu_context_remove_batch(batch->context, batch);
-  batch->context = NULL;
+  /* TODO remove */
 }
 
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
 GPUBatch *GPU_batch_calloc(void)
 {
   GPUBatch *batch = GPUBackend::get()->batch_alloc();
@@ -126,7 +94,6 @@ void GPU_batch_init_ex(GPUBatch *batch,
   batch->elem = elem;
   batch->prim_type = prim_type;
   batch->flag = owns_flag | GPU_BATCH_INIT | GPU_BATCH_DIRTY;
-  batch->context = NULL;
   batch->shader = NULL;
 }
 
@@ -144,7 +111,6 @@ void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src)
 
 void GPU_batch_clear(GPUBatch *batch)
 {
-  GPU_batch_vao_cache_clear(batch);
   if (batch->flag & GPU_BATCH_OWNS_INDEX) {
     GPU_indexbuf_discard(batch->elem);
   }
@@ -172,11 +138,17 @@ void GPU_batch_discard(GPUBatch *batch)
   delete static_cast<Batch *>(batch);
 }
 
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Buffers Management
+ * \{ */
+
 /* NOTE: Override ONLY the first instance vbo (and free them if owned). */
 void GPU_batch_instbuf_set(GPUBatch *batch, GPUVertBuf *inst, bool own_vbo)
 {
   BLI_assert(inst);
-  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;
+  batch->flag |= GPU_BATCH_DIRTY;
 
   if (batch->inst[0] && (batch->flag & GPU_BATCH_OWNS_INST_VBO)) {
     GPU_vertbuf_discard(batch->inst[0]);
@@ -190,7 +162,7 @@ void GPU_batch_instbuf_set(GPUBatch *batch, GPUVertBuf *inst, bool own_vbo)
 void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo)
 {
   BLI_assert(elem);
-  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;
+  batch->flag |= GPU_BATCH_DIRTY;
 
   if (batch->elem && (batch->flag & GPU_BATCH_OWNS_INDEX)) {
     GPU_indexbuf_discard(batch->elem);
@@ -203,7 +175,7 @@ void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo)
 int GPU_batch_instbuf_add_ex(GPUBatch *batch, GPUVertBuf *insts, bool own_vbo)
 {
   BLI_assert(insts);
-  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;
+  batch->flag |= GPU_BATCH_DIRTY;
 
   for (uint v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
     if (batch->inst[v] == NULL) {
@@ -228,7 +200,7 @@ int GPU_batch_instbuf_add_ex(GPUBatch *batch, GPUVertBuf *insts, bool own_vbo)
 int GPU_batch_vertbuf_add_ex(GPUBatch *batch, GPUVertBuf *verts, bool own_vbo)
 {
   BLI_assert(verts);
-  batch->flag |= GPU_BATCH_DIRTY_BINDINGS;
+  batch->flag |= GPU_BATCH_DIRTY;
 
   for (uint v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
     if (batch->verts[v] == NULL) {
@@ -246,254 +218,20 @@ int GPU_batch_vertbuf_add_ex(GPUBatch *batch, GPUVertBuf *verts, bool own_vbo)
   return -1;
 }
 
-static GLuint batch_vao_get(GPUBatch *batch)
-{
-  /* Search through cache */
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.interfaces[i] == batch->interface) {
-        return batch->dynamic_vaos.vao_ids[i];
-      }
-    }
-  }
-  else {
-    for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.interfaces[i] == batch->interface) {
-        return batch->static_vaos.vao_ids[i];
-      }
-    }
-  }
-
-  /* Set context of this batch.
-   * It will be bound to it until GPU_batch_vao_cache_clear is called.
-   * Until then it can only be drawn with this context. */
-  if (batch->context == NULL) {
-    batch->context = GPU_context_active_get();
-    gpu_context_add_batch(batch->context, batch);
-  }
-#if TRUST_NO_ONE
-  else {
-    /* Make sure you are not trying to draw this batch in another context. */
-    assert(batch->context == GPU_context_active_get());
-  }
-#endif
-
-  /* Cache miss, time to add a new entry! */
-  GLuint new_vao = 0;
-  if (!batch->is_dynamic_vao_count) {
-    int i; /* find first unused slot */
-    for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.vao_ids[i] == 0) {
-        break;
-      }
-    }
-
-    if (i < GPU_BATCH_VAO_STATIC_LEN) {
-      batch->static_vaos.interfaces[i] = batch->interface;
-      batch->static_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
-    }
-    else {
-      /* Not enough place switch to dynamic. */
-      batch->is_dynamic_vao_count = true;
-      /* Erase previous entries, they will be added back if drawn again. */
-      for (int j = 0; j < GPU_BATCH_VAO_STATIC_LEN; j++) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->static_vaos.interfaces[j], batch);
-        GPU_vao_free(batch->static_vaos.vao_ids[j], batch->context);
-      }
-      /* Init dynamic arrays and let the branch below set the values. */
-      batch->dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
-      batch->dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_callocN(
-          batch->dynamic_vaos.count * sizeof(GPUShaderInterface *), "dyn vaos interfaces");
-      batch->dynamic_vaos.vao_ids = (GLuint *)MEM_callocN(
-          batch->dynamic_vaos.count * sizeof(GLuint), "dyn vaos ids");
-    }
-  }
-
-  if (batch->is_dynamic_vao_count) {
-    int i; /* find first unused slot */
-    for (i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.vao_ids[i] == 0) {
-        break;
-      }
-    }
-
-    if (i == batch->dynamic_vaos.count) {
-      /* Not enough place, realloc the array. */
-      i = batch->dynamic_vaos.count;
-      batch->dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
-      batch->dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_recallocN(
-          (void *)batch->dynamic_vaos.interfaces,
-          sizeof(GPUShaderInterface *) * batch->dynamic_vaos.count);
-      batch->dynamic_vaos.vao_ids = (GLuint *)MEM_recallocN(
-          batch->dynamic_vaos.vao_ids, sizeof(GLuint) * batch->dynamic_vaos.count);
-    }
-    batch->dynamic_vaos.interfaces[i] = batch->interface;
-    batch->dynamic_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
-  }
-
-  GPU_shaderinterface_add_batch_ref((GPUShaderInterface *)batch->interface, batch);
-
-#if TRUST_NO_ONE
-  assert(new_vao != 0);
-#endif
-
-  /* We just got a fresh VAO we need to initialize it. */
-  glBindVertexArray(new_vao);
-  batch_update_program_bindings(batch, 0);
-  glBindVertexArray(0);
+/** \} */
 
-  return new_vao;
-}
+/* -------------------------------------------------------------------- */
+/** \name Uniform setters
+ *
+ * TODO(fclem) port this to GPUShader.
+ * \{ */
 
 void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader)
 {
-  batch->interface = shader->interface;
   batch->shader = shader;
-  if (batch->flag & GPU_BATCH_DIRTY_BINDINGS) {
-    GPU_batch_vao_cache_clear(batch);
-  }
-  batch->vao_id = batch_vao_get(batch);
   GPU_shader_bind(batch->shader);
-  GPU_matrix_bind(batch->shader->interface);
-  GPU_shader_set_srgb_uniform(batch->shader->interface);
-  gpu_batch_bind(batch);
-}
-
-void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface)
-{
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.interfaces[i] == interface) {
-        GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
-        batch->dynamic_vaos.vao_ids[i] = 0;
-        batch->dynamic_vaos.interfaces[i] = NULL;
-        break; /* cannot have duplicates */
-      }
-    }
-  }
-  else {
-    int i;
-    for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.interfaces[i] == interface) {
-        GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
-        batch->static_vaos.vao_ids[i] = 0;
-        batch->static_vaos.interfaces[i] = NULL;
-        break; /* cannot have duplicates */
-      }
-    }
-  }
-}
-
-static void create_bindings(GPUVertBuf *verts,
-                            const GPUShaderInterface *interface,
-                            uint16_t *attr_mask,
-                            uint v_first,
-                            const bool use_instancing)
-{
-  const GPUVertFormat *format = &verts->format;
-
-  const uint attr_len = format->attr_len;
-  uint stride = format->stride;
-  uint offset = 0;
-
-  GPU_vertbuf_use(verts);
-
-  for (uint a_idx = 0; a_idx < attr_len; a_idx++) {
-    const GPUVertAttr *a = &format->attrs[a_idx];
-
-    if (format->deinterleaved) {
-      offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].sz) * verts->vertex_len;
-      stride = a->sz;
-    }
-    else {
-      offset = a->offset;
-    }
-
-    const GLvoid *pointer = (const GLubyte *)0 + offset + v_first * stride;
-    const GLenum type = convert_comp_type_to_gl(static_cast<GPUVertCompType>(a->comp_type));
-
-    for (uint n_idx = 0; n_idx < a->name_len; n_idx++) {
-      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
-      const GPUShaderInput *input = GPU_shaderinterface_attr(interface, name);
-
-      if (input == NULL) {
-        continue;
-      }
-
-      *attr_mask &= ~(1 << input->location);
-
-      if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
-        BLI_assert(a->fetch_mode == GPU_FETCH_FLOAT);
-        BLI_assert(a->comp_type == GPU_COMP_F32);
-        for (int i = 0; i < a->comp_len / 4; i++) {
-          glEnableVertexAttribArray(input->location + i);
-          glVertexAttribDivisor(input->location + i, (use_instancing) ? 1 : 0);
-          glVertexAttribPointer(
-              input->location + i, 4, type, GL_FALSE, stride, (const GLubyte *)pointer + i * 16);
-        }
-      }
-      else {
-        glEnableVertexAttribArray(input->location);
-        glVertexAttribDivisor(input->location, (use_instancing) ? 1 : 0);
-
-        switch (a->fetch_mode) {
-          case GPU_FETCH_FLOAT:
-          case GPU_FETCH_INT_TO_FLOAT:
-            glVertexAttribPointer(input->location, a->comp_len, type, GL_FALSE, stride, pointer);
-            break;
-          case GPU_FETCH_INT_TO_FLOAT_UNIT:
-            glVertexAttribPointer(input->location, a->comp_len, type, GL_TRUE, stride, pointer);
-            break;
-          case GPU_FETCH_INT:
-            glVertexAttribIPointer(input->location, a->comp_len, type, stride, pointer);
-            break;
-        }
-      }
-    }
-  }
-}
-
-static void batch_update_program_bindings(GPUBatch *batch, uint i_first)
-{
-  uint16_t attr_mask = batch->interface->enabled_attr_mask;
-
-  /* Reverse order so first VBO'S have more prevalence (in term of attribute override). */
-  for (int v = GPU_BATCH_VBO_MAX_LEN - 1; v > -1; v--) {
-    if (batch->verts[v] != NULL) {
-      create_bindings(batch->verts[v], batch->interface, &attr_mask, 0, false);
-    }
-  }
-
-  for (int v = GPU_BATCH_INST_VBO_MAX_LEN - 1; v > -1; v--) {
-    if (batch->inst[v]) {
-      create_bindings(batch->inst[v], batch->interface, &attr_mask, i_first, true);
-    }
-  }
-
-  if (attr_mask != 0 && GLEW_ARB_vertex_attrib_binding) {
-    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
-      if (attr_mask & mask) {
-        /* This replaces glVertexAttrib4f(a, 0.0f, 0.0f, 0.0f, 1.0f); with a more modern style.
-         * Fix issues for some drivers (see T75069). */
-        glBindVertexBuffer(a, g_default_attr_vbo, (intptr_t)0, (intptr_t)0);
-
-        glEnableVertexAttribArray(a);
-        glVertexAttribFormat(a, 4, GL_FLOAT, GL_FALSE, 0);
-        glVertexAttribBinding(a, a);
-      }
-    }
-  }
-
-  if (batch->elem) {
-    GPU_indexbuf_use(batch->elem);
-  }
 }
 
-/* -------------------------------------------------------------------- */
-/** \name Uniform setters
- * \{ */
-
 #define GET_UNIFORM \
   const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->shader->interface, name); \
   BLI_assert(uniform);
@@ -581,31 +319,6 @@ void GPU_batch_uniform_mat4(GPUBatch *batch, const char *name, const float data[
 /** \name Drawing / Drawcall functions
  * \{ */
 
-static void *elem_offset(const GPUIndexBuf *el, int v_first)
-{
-#if GPU_TRACK_INDEX_RANGE
-  if (el->index_type == GPU_INDEX_U16) {
-    return (GLushort *)0 + v_first + el->index_start;
-  }
-#endif
-  return (GLuint *)0 + v_first + el->index_start;
-}
-
-/* Use when drawing with GPU_batch_draw_advanced */
-static void gpu_batch_bind(GPUBatch *batch)
-{
-  glBindVertexArray(batch->vao_id);
-
-#if GPU_TRACK_INDEX_RANGE
-  /* Can be removed if GL 4.3 is required. */
-  if (!GLEW_ARB_ES3_compatibility && batch->elem != NULL) {
-    GLuint restart_index = (batch->elem->index_type == GPU_INDEX_U16) ? (GLuint)0xFFFF :
-                                                                        (GLuint)0xFFFFFFFF;
-    glPrimitiveRestartIndex(restart_index);
-  }
-#endif
-}
-
 void GPU_batch_draw(GPUBatch *batch)
 {
   GPU_shader_bind(batch->shader);
@@ -630,18 +343,9 @@ void GPU_batch_draw_instanced(GPUBatch *batch, int i_count)
   GPU_shader_unbind();
 }
 
-#if GPU_TRACK_INDEX_RANGE
-#  define BASE_INDEX(el) ((el)->base_index)
-#  define INDEX_TYPE(el) ((el)->gl_index_type)
-#else
-#  define BASE_INDEX(el) 0
-#  define INDEX_TYPE(el) GL_UNSIGNED_INT
-#endif
-
 void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_first, int i_count)
 {
   BLI_assert(GPU_context_active_get()->shader != NULL);
-  /* TODO could assert that VAO is bound. */
 
   if (v_count == 0) {
     v_count = (batch->elem) ? batch->elem->index_len : batch->verts[0]->vertex_len;
@@ -649,8 +353,8 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
   if (i_count == 0) {
     i_count = (batch->inst[0]) ? batch->inst[0]->vertex_len : 1;
     /* Meh. This is to be able to use different numbers of verts in instance vbos. */
-    if (batch->inst[1] && i_count > batch->inst[1]->vertex_len) {
-      i_count = batch->inst[1]->vertex_len;
+    if (batch->inst[1] != NULL) {
+      i_count = min_ii(i_count, batch->inst[1]->vertex_len);
     }
   }
 
@@ -659,78 +363,7 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
     return;
   }
 
-  /* Verify there is enough data do draw. */
-  /* TODO(fclem) Nice to have but this is invalid when using procedural draw-calls.
-   * The right assert would be to check if there is an enabled attribute from each VBO
-   * and check their length. */
-  // BLI_assert(i_first + i_count <= (batch->inst ? batch->inst->vertex_len : INT_MAX));
-  // BLI_assert(v_first + v_count <=
-  //            (batch->elem ? batch->elem->index_len : batch->verts[0]->vertex_len));
-
-#ifdef __APPLE__
-  GLuint vao = 0;
-#endif
-
-  if (!GPU_arb_base_instance_is_supported()) {
-    if (i_first > 0) {
-#ifdef __APPLE__
-      /**
-       * There seems to be a nasty bug when drawing using the same VAO reconfiguring. (see T71147)
-       * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
-       **/
-      glGenVertexArrays(1, &vao);
-      glBindVertexArray(vao);
-#else
-      /* If using offset drawing with instancing, we must
-       * use the default VAO and redo bindings. */
-      glBindVertexArray(GPU_vao_default());
-#endif
-      batch_update_program_bindings(batch, i_first);
-    }
-    else {
-      /* Previous call could have bind the default vao
-       * see above. */
-      glBindVertexArray(batch->vao_id);
-    }
-  }
-
-  GLenum gl_prim_type = convert_prim_type_to_gl(batch->prim_type);
-
-  if (batch->elem) {
-    const GPUIndexBuf *el = batch->elem;
-    GLenum index_type = INDEX_TYPE(el);
-    GLint base_index = BASE_INDEX(el);
-    void *v_first_ofs = elem_offset(el, v_first);
-
-    if (GPU_arb_base_instance_is_supported()) {
-      glDrawElementsInstancedBaseVertexBaseInstance(
-          gl_prim_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
-    }
-    else {
-      glDrawElementsInstancedBaseVertex(
-          gl_prim_type, v_count, index_type, v_first_ofs, i_count, base_index);
-    }
-  }
-  else {
-#ifdef __APPLE__
-    glDisable(GL_PRIMITIVE_RESTART);
-#endif
-    if (GPU_arb_base_instance_is_supported()) {
-      glDrawArraysInstancedBaseInstance(gl_prim_type, v_first, v_count, i_count, i_first);
-    }
-    else {
-      glDrawArraysInstanced(gl_prim_type, v_first, v_count, i_count);
-    }
-#ifdef __APPLE__
-    glEnable(GL_PRIMITIVE_RESTART);
-#endif
-  }
-
-#ifdef __APPLE__
-  if (vao != 0) {
-    glDeleteVertexArrays(1, &vao);
-  }
-#endif
+  static_cast<Batch *>(batch)->draw(v_first, v_count, i_first, i_count);
 }
 
 /* just draw some vertices and let shader place them where we want. */
@@ -782,23 +415,11 @@ void GPU_batch_program_set_imm_shader(GPUBatch *batch)
 
 void gpu_batch_init(void)
 {
-  if (g_default_attr_vbo == 0) {
-    g_default_attr_vbo = GPU_buf_alloc();
-
-    float default_attrib_data[4] = {0.0f, 0.0f, 0.0f, 1.0f};
-    glBindBuffer(GL_ARRAY_BUFFER, g_default_attr_vbo);
-    glBufferData(GL_ARRAY_BUFFER, sizeof(float[4]), default_attrib_data, GL_STATIC_DRAW);
-    glBindBuffer(GL_ARRAY_BUFFER, 0);
-  }
-
   gpu_batch_presets_init();
 }
 
 void gpu_batch_exit(void)
 {
-  GPU_buf_free(g_default_attr_vbo);
-  g_default_attr_vbo = 0;
-
   gpu_batch_presets_exit();
 }
 
diff --git a/source/blender/gpu/intern/gpu_batch_private.hh b/source/blender/gpu/intern/gpu_batch_private.hh
index a9293a5b206..3a8044efc1d 100644
--- a/source/blender/gpu/intern/gpu_batch_private.hh
+++ b/source/blender/gpu/intern/gpu_batch_private.hh
@@ -43,5 +43,3 @@ class Batch : public GPUBatch {
 
 }  // namespace gpu
 }  // namespace blender
-
-void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface);
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index 66514deefbc..e04631910c1 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -188,18 +188,6 @@ void GPU_tex_free(GLuint tex_id)
  * which are not shared across contexts. So we need to keep track of
  * ownership. */
 
-void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch)
-{
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->batch_register(batch);
-}
-
-void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch)
-{
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->batch_unregister(batch);
-}
-
 void gpu_context_add_framebuffer(GPUContext *ctx, GPUFrameBuffer *fb)
 {
 #ifdef DEBUG
diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh
index 9d3b5c3fc85..3f9fca16ff7 100644
--- a/source/blender/gpu/intern/gpu_context_private.hh
+++ b/source/blender/gpu/intern/gpu_context_private.hh
@@ -78,9 +78,6 @@ void GPU_tex_free(GLuint tex_id);
 void GPU_vao_free(GLuint vao_id, GPUContext *ctx);
 void GPU_fbo_free(GLuint fbo_id, GPUContext *ctx);
 
-void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch);
-void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch);
-
 void gpu_context_add_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
 void gpu_context_remove_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
 
diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc
index 508d9112fc0..e4af25500af 100644
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -575,6 +575,10 @@ void GPU_shader_bind(GPUShader *shader)
     GPU_matrix_bind(shader->interface);
     GPU_shader_set_srgb_uniform(shader->interface);
   }
+
+  if (GPU_matrix_dirty_get()) {
+    GPU_matrix_bind(shader->interface);
+  }
 }
 
 void GPU_shader_unbind(void)
diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc
index d825dde7e10..ef90dde1877 100644
--- a/source/blender/gpu/intern/gpu_shader_interface.cc
+++ b/source/blender/gpu/intern/gpu_shader_interface.cc
@@ -35,6 +35,8 @@
 #include "gpu_batch_private.hh"
 #include "gpu_context_private.hh"
 
+#include "gl_batch.hh"
+
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
@@ -45,6 +47,8 @@
 #  include <stdio.h>
 #endif
 
+using namespace blender::gpu;
+
 static const char *BuiltinUniform_name(GPUUniformBuiltin u)
 {
   switch (u) {
@@ -400,8 +404,8 @@ GPUShaderInterface *GPU_shaderinterface_create(int32_t program)
 
   /* Batches ref buffer */
   shaderface->batches_len = GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
-  shaderface->batches = (GPUBatch **)MEM_callocN(shaderface->batches_len * sizeof(GPUBatch *),
-                                                 "GPUShaderInterface batches");
+  shaderface->batches = (void **)MEM_callocN(shaderface->batches_len * sizeof(GPUBatch *),
+                                             "GPUShaderInterface batches");
 
   MEM_freeN(uniforms_from_blocks);
   MEM_freeN(inputs_tmp);
@@ -468,7 +472,8 @@ void GPU_shaderinterface_discard(GPUShaderInterface *shaderface)
   /* Remove this interface from all linked Batches vao cache. */
   for (int i = 0; i < shaderface->batches_len; i++) {
     if (shaderface->batches[i] != NULL) {
-      gpu_batch_remove_interface_ref(shaderface->batches[i], shaderface);
+      /* XXX GL specific. to be removed during refactor. */
+      reinterpret_cast<GLVaoCache *>(shaderface->batches[i])->remove(shaderface);
     }
   }
   MEM_freeN(shaderface->batches);
@@ -511,7 +516,7 @@ int32_t GPU_shaderinterface_block_builtin(const GPUShaderInterface *shaderface,
   return shaderface->builtin_blocks[builtin];
 }
 
-void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
+void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, void *batch)
 {
   int i; /* find first unused slot */
   for (i = 0; i < shaderface->batches_len; i++) {
@@ -523,13 +528,14 @@ void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, GPUBatch
     /* Not enough place, realloc the array. */
     i = shaderface->batches_len;
     shaderface->batches_len += GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
-    shaderface->batches = (GPUBatch **)MEM_recallocN(shaderface->batches,
-                                                     sizeof(GPUBatch *) * shaderface->batches_len);
+    shaderface->batches = (void **)MEM_recallocN(shaderface->batches,
+                                                 sizeof(void *) * shaderface->batches_len);
   }
-  shaderface->batches[i] = batch;
+  /** XXX todo cleanup. */
+  shaderface->batches[i] = reinterpret_cast<void *>(batch);
 }
 
-void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
+void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *shaderface, void *batch)
 {
   for (int i = 0; i < shaderface->batches_len; i++) {
     if (shaderface->batches[i] == batch) {
diff --git a/source/blender/gpu/opengl/gl_batch.cc b/source/blender/gpu/opengl/gl_batch.cc
index 62d81ad9f5a..00e1a61f7cf 100644
--- a/source/blender/gpu/opengl/gl_batch.cc
+++ b/source/blender/gpu/opengl/gl_batch.cc
@@ -29,13 +29,254 @@
 
 #include "glew-mx.h"
 
+#include "GPU_extensions.h"
+
 #include "gpu_batch_private.hh"
 #include "gpu_primitive_private.h"
+#include "gpu_shader_private.h"
 
 #include "gl_batch.hh"
+#include "gl_context.hh"
+#include "gl_vertex_array.hh"
 
 using namespace blender::gpu;
 
+/* -------------------------------------------------------------------- */
+/** \name Vao cache
+ *
+ * Each GLBatch has a small cache of VAO objects that are used to avoid VAO reconfiguration.
+ * TODO(fclem) Could be revisited to avoid so much cross references.
+ * \{ */
+
+GLVaoCache::GLVaoCache(void)
+{
+  init();
+}
+
+GLVaoCache::~GLVaoCache()
+{
+  this->clear();
+}
+
+void GLVaoCache::init(void)
+{
+  context_ = NULL;
+  interface_ = NULL;
+  is_dynamic_vao_count = false;
+  for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+    static_vaos.interfaces[i] = NULL;
+    static_vaos.vao_ids[i] = 0;
+  }
+  vao_base_instance_ = 0;
+  base_instance_ = 0;
+}
+
+/* Create a new VAO object and store it in the cache. */
+void GLVaoCache::insert(const GPUShaderInterface *interface, GLuint vao)
+{
+  /* Now insert the cache. */
+  if (!is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+      if (static_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i < GPU_VAO_STATIC_LEN) {
+      static_vaos.interfaces[i] = interface;
+      static_vaos.vao_ids[i] = vao;
+    }
+    else {
+      /* Erase previous entries, they will be added back if drawn again. */
+      for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+        if (static_vaos.interfaces[i] != NULL) {
+          GPU_shaderinterface_remove_batch_ref(
+              const_cast<GPUShaderInterface *>(static_vaos.interfaces[i]), this);
+          context_->vao_free(static_vaos.vao_ids[i]);
+        }
+      }
+      /* Not enough place switch to dynamic. */
+      is_dynamic_vao_count = true;
+      /* Init dynamic arrays and let the branch below set the values. */
+      dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_callocN(
+          dynamic_vaos.count * sizeof(GPUShaderInterface *), "dyn vaos interfaces");
+      dynamic_vaos.vao_ids = (GLuint *)MEM_callocN(dynamic_vaos.count * sizeof(GLuint),
+                                                   "dyn vaos ids");
+    }
+  }
+
+  if (is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < dynamic_vaos.count; i++) {
+      if (dynamic_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i == dynamic_vaos.count) {
+      /* Not enough place, realloc the array. */
+      i = dynamic_vaos.count;
+      dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_recallocN(
+          (void *)dynamic_vaos.interfaces, sizeof(GPUShaderInterface *) * dynamic_vaos.count);
+      dynamic_vaos.vao_ids = (GLuint *)MEM_recallocN(dynamic_vaos.vao_ids,
+                                                     sizeof(GLuint) * dynamic_vaos.count);
+    }
+    dynamic_vaos.interfaces[i] = interface;
+    dynamic_vaos.vao_ids[i] = vao;
+  }
+
+  GPU_shaderinterface_add_batch_ref(const_cast<GPUShaderInterface *>(interface), this);
+}
+
+void GLVaoCache::remove(const GPUShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      context_->vao_free(vaos[i]);
+      vaos[i] = 0;
+      interfaces[i] = NULL;
+      break; /* cannot have duplicates */
+    }
+  }
+}
+
+void GLVaoCache::clear(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  /* Early out, nothing to free. */
+  if (context_ == NULL) {
+    return;
+  }
+
+  if (context_ == ctx) {
+    glDeleteVertexArrays(count, vaos);
+    glDeleteVertexArrays(1, &vao_base_instance_);
+  }
+  else {
+    /* TODO(fclem) Slow way. Could avoid multiple mutex lock here */
+    for (int i = 0; i < count; i++) {
+      context_->vao_free(vaos[i]);
+    }
+    context_->vao_free(vao_base_instance_);
+  }
+
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == NULL) {
+      continue;
+    }
+    GPU_shaderinterface_remove_batch_ref(const_cast<GPUShaderInterface *>(interfaces[i]), this);
+  }
+
+  if (is_dynamic_vao_count) {
+    MEM_freeN((void *)dynamic_vaos.interfaces);
+    MEM_freeN(dynamic_vaos.vao_ids);
+  }
+
+  if (context_) {
+    context_->vao_cache_unregister(this);
+  }
+  /* Reinit. */
+  this->init();
+}
+
+/* Return 0 on cache miss (invalid VAO) */
+GLuint GLVaoCache::lookup(const GPUShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      return (is_dynamic_vao_count) ? dynamic_vaos.vao_ids[i] : static_vaos.vao_ids[i];
+    }
+  }
+  return 0;
+}
+
+/* The GLVaoCache object is only valid for one GLContext.
+ * Reset the cache if trying to draw in another context; */
+void GLVaoCache::context_check(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+
+  if (context_ != ctx) {
+    if (context_ != NULL) {
+      /* IMPORTANT: Trying to draw a batch in multiple different context will trash the VAO cache.
+       * This has major performance impact and should be avoided in most cases. */
+      context_->vao_cache_unregister(this);
+    }
+    this->clear();
+    context_ = ctx;
+    context_->vao_cache_register(this);
+  }
+}
+
+GLuint GLVaoCache::base_instance_vao_get(GPUBatch *batch, int i_first)
+{
+  this->context_check();
+  /* Make sure the interface is up to date. */
+  if (interface_ != GPU_context_active_get()->shader->interface) {
+    vao_get(batch);
+    /* Trigger update. */
+    base_instance_ = 0;
+  }
+  /**
+   * There seems to be a nasty bug when drawing using the same VAO reconfiguring (T71147).
+   * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
+   **/
+#ifdef __APPLE__
+  glDeleteVertexArrays(1, &vao_base_instance_);
+  vao_base_instance_ = 0;
+#endif
+
+  if (vao_base_instance_ == 0) {
+    glGenVertexArrays(1, &vao_base_instance_);
+  }
+
+  if (base_instance_ != i_first) {
+    base_instance_ = i_first;
+    GLVertArray::update_bindings(vao_base_instance_, batch, interface_, i_first);
+  }
+  return base_instance_;
+}
+
+GLuint GLVaoCache::vao_get(GPUBatch *batch)
+{
+  this->context_check();
+
+  GPUContext *ctx = GPU_context_active_get();
+  if (interface_ != ctx->shader->interface) {
+    interface_ = ctx->shader->interface;
+    vao_id_ = this->lookup(interface_);
+
+    if (vao_id_ == 0) {
+      /* Cache miss, create a new VAO. */
+      glGenVertexArrays(1, &vao_id_);
+      this->insert(interface_, vao_id_);
+      GLVertArray::update_bindings(vao_id_, batch, interface_, 0);
+    }
+  }
+
+  return vao_id_;
+}
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
 GLBatch::GLBatch(void)
 {
 }
@@ -44,7 +285,83 @@ GLBatch::~GLBatch()
 {
 }
 
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Drawing
+ * \{ */
+
+#if GPU_TRACK_INDEX_RANGE
+#  define BASE_INDEX(el) ((el)->base_index)
+#  define INDEX_TYPE(el) ((el)->gl_index_type)
+#else
+#  define BASE_INDEX(el) 0
+#  define INDEX_TYPE(el) GL_UNSIGNED_INT
+#endif
+
+void GLBatch::bind(int i_first)
+{
+  if (flag & GPU_BATCH_DIRTY) {
+    vao_cache_.clear();
+  }
+
+#if GPU_TRACK_INDEX_RANGE
+  /* Can be removed if GL 4.3 is required. */
+  if (!GLEW_ARB_ES3_compatibility && (elem != NULL)) {
+    glPrimitiveRestartIndex((elem->index_type == GPU_INDEX_U16) ? 0xFFFFu : 0xFFFFFFFFu);
+  }
+#endif
+
+  /* Can be removed if GL 4.2 is required. */
+  if (!GPU_arb_base_instance_is_supported() && (i_first > 0)) {
+    glBindVertexArray(vao_cache_.base_instance_vao_get(this, i_first));
+  }
+  else {
+    glBindVertexArray(vao_cache_.vao_get(this));
+  }
+}
+
 void GLBatch::draw(int v_first, int v_count, int i_first, int i_count)
 {
-  UNUSED_VARS(v_first, v_count, i_first, i_count);
-}
-\ No newline at end of file
+  this->bind(i_first);
+
+  GLenum gl_type = convert_prim_type_to_gl(prim_type);
+
+  if (elem) {
+    const GPUIndexBuf *el = elem;
+    GLenum index_type = INDEX_TYPE(el);
+    GLint base_index = BASE_INDEX(el);
+    void *v_first_ofs = (GLuint *)0 + v_first + el->index_start;
+
+#if GPU_TRACK_INDEX_RANGE
+    if (el->index_type == GPU_INDEX_U16) {
+      v_first_ofs = (GLushort *)0 + v_first + el->index_start;
+    }
+#endif
+
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawElementsInstancedBaseVertexBaseInstance(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
+    }
+    else {
+      glDrawElementsInstancedBaseVertex(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index);
+    }
+  }
+  else {
+#ifdef __APPLE__
+    glDisable(GL_PRIMITIVE_RESTART);
+#endif
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawArraysInstancedBaseInstance(gl_type, v_first, v_count, i_count, i_first);
+    }
+    else {
+      glDrawArraysInstanced(gl_type, v_first, v_count, i_count);
+    }
+#ifdef __APPLE__
+    glEnable(GL_PRIMITIVE_RESTART);
+#endif
+  }
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_batch.hh b/source/blender/gpu/opengl/gl_batch.hh
index 290c113205a..490c9180a99 100644
--- a/source/blender/gpu/opengl/gl_batch.hh
+++ b/source/blender/gpu/opengl/gl_batch.hh
@@ -37,18 +37,29 @@
 namespace blender {
 namespace gpu {
 
-#define GPU_BATCH_VAO_STATIC_LEN 3
+#define GPU_VAO_STATIC_LEN 3
 
+/* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
+ * for each shader interface. Start with a static number of vaos and fallback to dynamic count
+ * if necessary. Once a batch goes dynamic it does not go back. */
 class GLVaoCache {
-  /* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
-   * for each shader interface. Start with a static number of vaos and fallback to dynamic count
-   * if necessary. Once a batch goes dynamic it does not go back. */
+ private:
+  /** Context for which the vao_cache_ was generated. */
+  struct GLContext *context_ = NULL;
+  /** Last interface this batch was drawn with. */
+  GPUShaderInterface *interface_ = NULL;
+  /** Cached vao for the last interface. */
+  GLuint vao_id_ = 0;
+  /** Used whend arb_base_instance is not supported. */
+  GLuint vao_base_instance_ = 0;
+  int base_instance_ = 0;
+
   bool is_dynamic_vao_count = false;
   union {
     /** Static handle count */
     struct {
-      const GPUShaderInterface *interfaces[GPU_BATCH_VAO_STATIC_LEN];
-      GLuint vao_ids[GPU_BATCH_VAO_STATIC_LEN];
+      const GPUShaderInterface *interfaces[GPU_VAO_STATIC_LEN];
+      GLuint vao_ids[GPU_VAO_STATIC_LEN];
     } static_vaos;
     /** Dynamic handle count */
     struct {
@@ -58,18 +69,27 @@ class GLVaoCache {
     } dynamic_vaos;
   };
 
-  GLuint search(const GPUShaderInterface *interface);
-  void insert(GLuint vao_id, const GPUShaderInterface *interface);
+ public:
+  GLVaoCache();
+  ~GLVaoCache();
+
+  GLuint vao_get(GPUBatch *batch);
+  GLuint base_instance_vao_get(GPUBatch *batch, int i_first);
+
+  GLuint lookup(const GPUShaderInterface *interface);
+  void insert(const GPUShaderInterface *interface, GLuint vao_id);
+  void remove(const GPUShaderInterface *interface);
   void clear(void);
-  void interface_remove(const GPUShaderInterface *interface);
+
+ private:
+  void init(void);
+  void context_check(void);
 };
 
 class GLBatch : public Batch {
- private:
-  /** Cached values (avoid dereferencing later). */
-  GLuint vao_id;
+ public:
   /** All vaos corresponding to all the GPUShaderInterface this batch was drawn with. */
-  GLVaoCache vaos;
+  GLVaoCache vao_cache_;
 
  public:
   GLBatch();
@@ -77,6 +97,9 @@ class GLBatch : public Batch {
 
   void draw(int v_first, int v_count, int i_first, int i_count) override;
 
+ private:
+  void bind(int i_first);
+
   MEM_CXX_CLASS_ALLOC_FUNCS("GLBatch");
 };
 
diff --git a/source/blender/gpu/opengl/gl_context.cc b/source/blender/gpu/opengl/gl_context.cc
index 00a10924ff6..dd413612879 100644
--- a/source/blender/gpu/opengl/gl_context.cc
+++ b/source/blender/gpu/opengl/gl_context.cc
@@ -63,8 +63,8 @@ GLContext::~GLContext()
   /* For now don't allow GPUFrameBuffers to be reuse in another context. */
   BLI_assert(framebuffers_.is_empty());
   /* Delete vaos so the batch can be reused in another context. */
-  for (GPUBatch *batch : batches_) {
-    GPU_batch_vao_cache_clear(batch);
+  for (GLVaoCache *cache : vao_caches_) {
+    cache->clear();
   }
   glDeleteVertexArrays(1, &default_vao_);
   glDeleteBuffers(1, &default_attr_vbo_);
@@ -197,20 +197,17 @@ void GLBackend::tex_free(GLuint tex_id)
  * is discarded.
  * \{ */
 
-void GLContext::batch_register(struct GPUBatch *batch)
+void GLContext::vao_cache_register(GLVaoCache *cache)
 {
   lists_mutex_.lock();
-  batches_.add(batch);
+  vao_caches_.add(cache);
   lists_mutex_.unlock();
 }
 
-void GLContext::batch_unregister(struct GPUBatch *batch)
+void GLContext::vao_cache_unregister(GLVaoCache *cache)
 {
-  /* vao_cache_clear() can acquire lists_mutex_ so avoid deadlock. */
-  // reinterpret_cast<GLBatch *>(batch)->vao_cache_clear();
-
   lists_mutex_.lock();
-  batches_.remove(batch);
+  vao_caches_.remove(cache);
   lists_mutex_.unlock();
 }
 
diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh
index 3b55965b9d1..f3ff5cb47f4 100644
--- a/source/blender/gpu/opengl/gl_context.hh
+++ b/source/blender/gpu/opengl/gl_context.hh
@@ -25,15 +25,16 @@
 
 #include "gpu_context_private.hh"
 
+#include "GPU_framebuffer.h"
+
 #include "BLI_set.hh"
 #include "BLI_vector.hh"
 
 #include "glew-mx.h"
 
-#include <iostream>
+#include "gl_batch.hh"
+
 #include <mutex>
-#include <unordered_set>
-#include <vector>
 
 namespace blender {
 namespace gpu {
@@ -63,7 +64,7 @@ class GLContext : public GPUContext {
    * GPUBatch & GPUFramebuffer have references to the context they are from, in the case the
    * context is destroyed, we need to remove any reference to it.
    */
-  Set<GPUBatch *> batches_;
+  Set<GLVaoCache *> vao_caches_;
   Set<GPUFrameBuffer *> framebuffers_;
   /** Mutex for the bellow structures. */
   std::mutex lists_mutex_;
@@ -87,8 +88,8 @@ class GLContext : public GPUContext {
 
   void vao_free(GLuint vao_id);
   void fbo_free(GLuint fbo_id);
-  void batch_register(struct GPUBatch *batch);
-  void batch_unregister(struct GPUBatch *batch);
+  void vao_cache_register(GLVaoCache *cache);
+  void vao_cache_unregister(GLVaoCache *cache);
   void framebuffer_register(struct GPUFrameBuffer *fb);
   void framebuffer_unregister(struct GPUFrameBuffer *fb);
 };
diff --git a/source/blender/gpu/opengl/gl_vertex_array.cc b/source/blender/gpu/opengl/gl_vertex_array.cc
new file mode 100644
index 00000000000..907dc37e46f
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_vertex_array.cc
@@ -0,0 +1,158 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "GPU_glew.h"
+
+#include "GPU_shader_interface.h"
+#include "GPU_vertex_buffer.h"
+
+#include "gpu_vertex_format_private.h"
+
+#include "gl_batch.hh"
+#include "gl_context.hh"
+
+#include "gl_vertex_array.hh"
+
+using namespace blender::gpu;
+
+/* -------------------------------------------------------------------- */
+/** \name Vertex Array Bindings
+ * \{ */
+
+/* Returns enabled vertex pointers as a bitflag (one bit per attrib). */
+static uint16_t vbo_bind(const GPUShaderInterface *interface,
+                         const GPUVertFormat *format,
+                         uint v_first,
+                         uint v_len,
+                         const bool use_instancing)
+{
+  uint16_t enabled_attrib = 0;
+  const uint attr_len = format->attr_len;
+  uint stride = format->stride;
+  uint offset = 0;
+  GLuint divisor = (use_instancing) ? 1 : 0;
+
+  for (uint a_idx = 0; a_idx < attr_len; a_idx++) {
+    const GPUVertAttr *a = &format->attrs[a_idx];
+
+    if (format->deinterleaved) {
+      offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].sz) * v_len;
+      stride = a->sz;
+    }
+    else {
+      offset = a->offset;
+    }
+
+    const GLvoid *pointer = (const GLubyte *)0 + offset + v_first * stride;
+    const GLenum type = convert_comp_type_to_gl(static_cast<GPUVertCompType>(a->comp_type));
+
+    for (uint n_idx = 0; n_idx < a->name_len; n_idx++) {
+      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
+      const GPUShaderInput *input = GPU_shaderinterface_attr(interface, name);
+
+      if (input == NULL) {
+        continue;
+      }
+
+      enabled_attrib |= (1 << input->location);
+
+      if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
+        BLI_assert(a->fetch_mode == GPU_FETCH_FLOAT);
+        BLI_assert(a->comp_type == GPU_COMP_F32);
+        for (int i = 0; i < a->comp_len / 4; i++) {
+          glEnableVertexAttribArray(input->location + i);
+          glVertexAttribDivisor(input->location + i, divisor);
+          glVertexAttribPointer(
+              input->location + i, 4, type, GL_FALSE, stride, (const GLubyte *)pointer + i * 16);
+        }
+      }
+      else {
+        glEnableVertexAttribArray(input->location);
+        glVertexAttribDivisor(input->location, divisor);
+
+        switch (a->fetch_mode) {
+          case GPU_FETCH_FLOAT:
+          case GPU_FETCH_INT_TO_FLOAT:
+            glVertexAttribPointer(input->location, a->comp_len, type, GL_FALSE, stride, pointer);
+            break;
+          case GPU_FETCH_INT_TO_FLOAT_UNIT:
+            glVertexAttribPointer(input->location, a->comp_len, type, GL_TRUE, stride, pointer);
+            break;
+          case GPU_FETCH_INT:
+            glVertexAttribIPointer(input->location, a->comp_len, type, stride, pointer);
+            break;
+        }
+      }
+    }
+  }
+  return enabled_attrib;
+}
+
+/* Update the Attrib Binding of the currently bound VAO. */
+void GLVertArray::update_bindings(const GLuint vao,
+                                  const GPUBatch *batch,
+                                  const GPUShaderInterface *interface,
+                                  const int base_instance)
+{
+  uint16_t attr_mask = interface->enabled_attr_mask;
+
+  glBindVertexArray(vao);
+
+  /* Reverse order so first VBO'S have more prevalence (in term of attribute override). */
+  for (int v = GPU_BATCH_VBO_MAX_LEN - 1; v > -1; v--) {
+    GPUVertBuf *vbo = batch->verts[v];
+    if (vbo) {
+      GPU_vertbuf_use(vbo);
+      attr_mask &= ~vbo_bind(interface, &vbo->format, 0, vbo->vertex_len, false);
+    }
+  }
+
+  for (int v = GPU_BATCH_INST_VBO_MAX_LEN - 1; v > -1; v--) {
+    GPUVertBuf *vbo = batch->inst[v];
+    if (vbo) {
+      GPU_vertbuf_use(vbo);
+      attr_mask &= ~vbo_bind(interface, &vbo->format, base_instance, vbo->vertex_len, true);
+    }
+  }
+
+  if (attr_mask != 0 && GLEW_ARB_vertex_attrib_binding) {
+    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
+      if (attr_mask & mask) {
+        GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+        /* This replaces glVertexAttrib4f(a, 0.0f, 0.0f, 0.0f, 1.0f); with a more modern style.
+         * Fix issues for some drivers (see T75069). */
+        glBindVertexBuffer(a, ctx->default_attr_vbo_, (intptr_t)0, (intptr_t)0);
+        glEnableVertexAttribArray(a);
+        glVertexAttribFormat(a, 4, GL_FLOAT, GL_FALSE, 0);
+        glVertexAttribBinding(a, a);
+      }
+    }
+  }
+
+  if (batch->elem) {
+    /* Binds the index buffer. This state is also saved in the VAO. */
+    GPU_indexbuf_use(batch->elem);
+  }
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_vertex_array.hh b/source/blender/gpu/opengl/gl_vertex_array.hh
new file mode 100644
index 00000000000..6da414d7e62
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_vertex_array.hh
@@ -0,0 +1,44 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "glew-mx.h"
+
+#include "GPU_batch.h"
+#include "GPU_shader_interface.h"
+
+namespace blender {
+namespace gpu {
+
+namespace GLVertArray {
+
+void update_bindings(const GLuint vao,
+                     const GPUBatch *batch,
+                     const GPUShaderInterface *interface,
+                     const int base_instance);
+
+}  // namespace GLVertArray
+
+}  // namespace gpu
+}  // namespace blender