Merge branch 'master' into greasepencil-edit-curve

author: Antonio Vazquez <blendergit@gmail.com> 2020-08-13 17:57:42 +0300
committer: Antonio Vazquez <blendergit@gmail.com> 2020-08-13 17:57:42 +0300
commit: 735c717a63c8870d2ef4a910d82a2648cbaaa5e1 (patch)
tree: 4cff1cfe01053b8cc188cc69e1c2c60946fe37cc /source/blender/gpu
parent: cba7391d4a42a44efeddae3ff717e542a3c73738 (diff)
parent: 53683dec7d9ac9f324ff91904c9f80b8018b9b9c (diff)
30 files changed, 1560 insertions, 996 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 80ea28aca3c..906ae31fbc7 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -63,6 +63,7 @@ set(SRC
   intern/gpu_codegen.c
   intern/gpu_context.cc
   intern/gpu_debug.cc
+  intern/gpu_drawlist.cc
   intern/gpu_element.cc
   intern/gpu_extensions.cc
   intern/gpu_framebuffer.cc
@@ -88,7 +89,10 @@ set(SRC
   intern/gpu_vertex_format.cc
   intern/gpu_viewport.c
 
+  opengl/gl_batch.cc
   opengl/gl_context.cc
+  opengl/gl_drawlist.cc
+  opengl/gl_vertex_array.cc
 
   GPU_attr_binding.h
   GPU_batch.h
@@ -98,6 +102,7 @@ set(SRC
   GPU_common.h
   GPU_context.h
   GPU_debug.h
+  GPU_drawlist.h
   GPU_element.h
   GPU_extensions.h
   GPU_framebuffer.h
@@ -122,9 +127,10 @@ set(SRC
 
   intern/gpu_attr_binding_private.h
   intern/gpu_backend.hh
-  intern/gpu_batch_private.h
+  intern/gpu_batch_private.hh
   intern/gpu_codegen.h
   intern/gpu_context_private.hh
+  intern/gpu_drawlist_private.hh
   intern/gpu_material_library.h
   intern/gpu_matrix_private.h
   intern/gpu_node_graph.h
@@ -135,7 +141,10 @@ set(SRC
   intern/gpu_vertex_format_private.h
 
   opengl/gl_backend.hh
+  opengl/gl_batch.hh
   opengl/gl_context.hh
+  opengl/gl_drawlist.hh
+  opengl/gl_vertex_array.hh
 )
 
 set(LIB
diff --git a/source/blender/gpu/GPU_batch.h b/source/blender/gpu/GPU_batch.h
index 855214c279c..d57739156f8 100644
--- a/source/blender/gpu/GPU_batch.h
+++ b/source/blender/gpu/GPU_batch.h
@@ -26,85 +26,82 @@
 
 #pragma once
 
+#include "BLI_utildefines.h"
+
 #include "GPU_element.h"
 #include "GPU_shader.h"
-#include "GPU_shader_interface.h"
 #include "GPU_vertex_buffer.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
-  GPU_BATCH_UNUSED,
-  GPU_BATCH_READY_TO_FORMAT,
-  GPU_BATCH_READY_TO_BUILD,
-  GPU_BATCH_BUILDING,
-  GPU_BATCH_READY_TO_DRAW,
-} GPUBatchPhase;
-
 #define GPU_BATCH_VBO_MAX_LEN 6
 #define GPU_BATCH_INST_VBO_MAX_LEN 2
 #define GPU_BATCH_VAO_STATIC_LEN 3
 #define GPU_BATCH_VAO_DYN_ALLOC_COUNT 16
 
-typedef struct GPUBatch {
-  /* geometry */
+typedef enum eGPUBatchFlag {
+  /** Invalid default state. */
+  GPU_BATCH_INVALID = 0,
+
+  /** GPUVertBuf ownership. (One bit per vbo) */
+  GPU_BATCH_OWNS_VBO = (1 << 0),
+  GPU_BATCH_OWNS_VBO_MAX = (GPU_BATCH_OWNS_VBO << (GPU_BATCH_VBO_MAX_LEN - 1)),
+  GPU_BATCH_OWNS_VBO_ANY = ((GPU_BATCH_OWNS_VBO << GPU_BATCH_VBO_MAX_LEN) - 1),
+  /** Instance GPUVertBuf ownership. (One bit per vbo) */
+  GPU_BATCH_OWNS_INST_VBO = (GPU_BATCH_OWNS_VBO_MAX << 1),
+  GPU_BATCH_OWNS_INST_VBO_MAX = (GPU_BATCH_OWNS_INST_VBO << (GPU_BATCH_INST_VBO_MAX_LEN - 1)),
+  GPU_BATCH_OWNS_INST_VBO_ANY = ((GPU_BATCH_OWNS_INST_VBO << GPU_BATCH_INST_VBO_MAX_LEN) - 1) &
+                                ~GPU_BATCH_OWNS_VBO_ANY,
+  /** GPUIndexBuf ownership. */
+  GPU_BATCH_OWNS_INDEX = (GPU_BATCH_OWNS_INST_VBO_MAX << 1),
+
+  /** Has been initialized. At least one VBO is set. */
+  GPU_BATCH_INIT = (1 << 16),
+  /** Batch is initialized but it's VBOs are still being populated. (optional) */
+  GPU_BATCH_BUILDING = (1 << 16),
+  /** Cached data need to be rebuild. (VAO, PSO, ...) */
+  GPU_BATCH_DIRTY = (1 << 17),
+} eGPUBatchFlag;
+
+#define GPU_BATCH_OWNS_NONE GPU_BATCH_INVALID
+
+BLI_STATIC_ASSERT(GPU_BATCH_OWNS_INDEX < GPU_BATCH_INIT,
+                  "eGPUBatchFlag: Error: status flags are shadowed by the ownership bits!")
+
+ENUM_OPERATORS(eGPUBatchFlag)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
+/**
+ * IMPORTANT: Do not allocate manually as the real struct is bigger (i.e: GLBatch). This is only
+ * the common and "public" part of the struct. Use the provided allocator.
+ * TODO(fclem) Make the content of this struct hidden and expose getters/setters.
+ **/
+typedef struct GPUBatch {
   /** verts[0] is required, others can be NULL */
   GPUVertBuf *verts[GPU_BATCH_VBO_MAX_LEN];
   /** Instance attributes. */
   GPUVertBuf *inst[GPU_BATCH_INST_VBO_MAX_LEN];
   /** NULL if element list not needed */
   GPUIndexBuf *elem;
-  uint32_t gl_prim_type;
-
-  /* cached values (avoid dereferencing later) */
-  uint32_t vao_id;
-  uint32_t program;
-  const struct GPUShaderInterface *interface;
-
-  /* book-keeping */
-  uint owns_flag;
-  /** used to free all vaos. this implies all vaos were created under the same context. */
-  struct GPUContext *context;
-  GPUBatchPhase phase;
-  bool program_in_use;
-
-  /* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
-   * for each shader interface. Start with a static number of vaos and fallback to dynamic count
-   * if necessary. Once a batch goes dynamic it does not go back. */
-  bool is_dynamic_vao_count;
-  union {
-    /** Static handle count */
-    struct {
-      const struct GPUShaderInterface *interfaces[GPU_BATCH_VAO_STATIC_LEN];
-      uint32_t vao_ids[GPU_BATCH_VAO_STATIC_LEN];
-    } static_vaos;
-    /** Dynamic handle count */
-    struct {
-      uint count;
-      const struct GPUShaderInterface **interfaces;
-      uint32_t *vao_ids;
-    } dynamic_vaos;
-  };
-
-  /* XXX This is the only solution if we want to have some data structure using
-   * batches as key to identify nodes. We must destroy these nodes with this callback. */
-  void (*free_callback)(struct GPUBatch *, void *);
-  void *callback_data;
+  /** Bookeeping. */
+  eGPUBatchFlag flag;
+  /** Type of geometry to draw. */
+  GPUPrimType prim_type;
+  /** Current assigned shader. DEPRECATED. Here only for uniform binding. */
+  struct GPUShader *shader;
 } GPUBatch;
 
-enum {
-  GPU_BATCH_OWNS_VBO = (1 << 0),
-  /* each vbo index gets bit-shifted */
-  GPU_BATCH_OWNS_INSTANCES = (1 << 30),
-  GPU_BATCH_OWNS_INDEX = (1u << 31u),
-};
-
-GPUBatch *GPU_batch_calloc(uint count);
-GPUBatch *GPU_batch_create_ex(GPUPrimType, GPUVertBuf *, GPUIndexBuf *, uint owns_flag);
-void GPU_batch_init_ex(GPUBatch *, GPUPrimType, GPUVertBuf *, GPUIndexBuf *, uint owns_flag);
+GPUBatch *GPU_batch_calloc(void);
+GPUBatch *GPU_batch_create_ex(GPUPrimType prim,
+                              GPUVertBuf *vert,
+                              GPUIndexBuf *elem,
+                              eGPUBatchFlag own_flag);
+void GPU_batch_init_ex(GPUBatch *batch,
+                       GPUPrimType prim,
+                       GPUVertBuf *vert,
+                       GPUIndexBuf *elem,
+                       eGPUBatchFlag own_flag);
 void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src);
 
 #define GPU_batch_create(prim, verts, elem) GPU_batch_create_ex(prim, verts, elem, 0)
@@ -115,10 +112,6 @@ void GPU_batch_clear(GPUBatch *);
 
 void GPU_batch_discard(GPUBatch *); /* verts & elem are not discarded */
 
-void GPU_batch_vao_cache_clear(GPUBatch *);
-
-void GPU_batch_callback_free_set(GPUBatch *, void (*callback)(GPUBatch *, void *), void *);
-
 void GPU_batch_instbuf_set(GPUBatch *, GPUVertBuf *, bool own_vbo); /* Instancing */
 void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo);
 
@@ -128,19 +121,13 @@ int GPU_batch_vertbuf_add_ex(GPUBatch *, GPUVertBuf *, bool own_vbo);
 #define GPU_batch_vertbuf_add(batch, verts) GPU_batch_vertbuf_add_ex(batch, verts, false)
 
 void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader);
-void GPU_batch_set_shader_no_bind(GPUBatch *batch, GPUShader *shader);
 void GPU_batch_program_set_imm_shader(GPUBatch *batch);
 void GPU_batch_program_set_builtin(GPUBatch *batch, eGPUBuiltinShader shader_id);
 void GPU_batch_program_set_builtin_with_config(GPUBatch *batch,
                                                eGPUBuiltinShader shader_id,
                                                eGPUShaderConfig sh_cfg);
-/* Entire batch draws with one shader program, but can be redrawn later with another program. */
-/* Vertex shader's inputs must be compatible with the batch's vertex format. */
-
-void GPU_batch_program_use_begin(GPUBatch *); /* call before Batch_Uniform (temp hack?) */
-void GPU_batch_program_use_end(GPUBatch *);
 
-void GPU_batch_uniform_1ui(GPUBatch *, const char *name, uint value);
+/* Will only work after setting the batch program. */
 void GPU_batch_uniform_1i(GPUBatch *, const char *name, int value);
 void GPU_batch_uniform_1b(GPUBatch *, const char *name, bool value);
 void GPU_batch_uniform_1f(GPUBatch *, const char *name, float value);
@@ -154,10 +141,10 @@ void GPU_batch_uniform_2fv_array(GPUBatch *, const char *name, const int len, co
 void GPU_batch_uniform_4fv_array(GPUBatch *, const char *name, const int len, const float *data);
 void GPU_batch_uniform_mat4(GPUBatch *, const char *name, const float data[4][4]);
 
-void GPU_batch_draw(GPUBatch *);
+void GPU_batch_draw(GPUBatch *batch);
+void GPU_batch_draw_range(GPUBatch *batch, int v_first, int v_count);
+void GPU_batch_draw_instanced(GPUBatch *batch, int i_count);
 
-/* Needs to be called before GPU_batch_draw_advanced. */
-void GPU_batch_bind(GPUBatch *);
 /* This does not bind/unbind shader and does not call GPU_matrix_bind() */
 void GPU_batch_draw_advanced(GPUBatch *, int v_first, int v_count, int i_first, int i_count);
 
@@ -199,19 +186,6 @@ GPUBatch *create_BatchInGeneral(GPUPrimType, VertexBufferStuff, ElementListStuff
 
 #endif /* future plans */
 
-/**
- * #GPUDrawList is an API to do lots of similar draw-calls very fast using multi-draw-indirect.
- * There is a fallback if the feature is not supported.
- */
-typedef struct GPUDrawList GPUDrawList;
-
-GPUDrawList *GPU_draw_list_create(int length);
-void GPU_draw_list_discard(GPUDrawList *list);
-void GPU_draw_list_init(GPUDrawList *list, GPUBatch *batch);
-void GPU_draw_list_command_add(
-    GPUDrawList *list, int v_first, int v_count, int i_first, int i_count);
-void GPU_draw_list_submit(GPUDrawList *list);
-
 void gpu_batch_init(void);
 void gpu_batch_exit(void);
 
diff --git a/source/blender/gpu/GPU_batch_presets.h b/source/blender/gpu/GPU_batch_presets.h
index 1674cf776db..7a235dd0e12 100644
--- a/source/blender/gpu/GPU_batch_presets.h
+++ b/source/blender/gpu/GPU_batch_presets.h
@@ -46,11 +46,8 @@ struct GPUBatch *GPU_batch_preset_panel_drag_widget(const float pixelsize,
 void gpu_batch_presets_init(void);
 void gpu_batch_presets_register(struct GPUBatch *preset_batch);
 bool gpu_batch_presets_unregister(struct GPUBatch *preset_batch);
-void gpu_batch_presets_reset(void);
 void gpu_batch_presets_exit(void);
 
-void GPU_batch_presets_reset(void);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/gpu/GPU_drawlist.h b/source/blender/gpu/GPU_drawlist.h
new file mode 100644
index 00000000000..27f70da8cf8
--- /dev/null
+++ b/source/blender/gpu/GPU_drawlist.h
@@ -0,0 +1,46 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPUDrawList is an API to do lots of similar draw-calls very fast using
+ * multi-draw-indirect. There is a fallback if the feature is not supported.
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GPUBatch;
+
+typedef void *GPUDrawList; /* Opaque pointer. */
+
+/* Create a list with at least length drawcalls. Length can affect performance. */
+GPUDrawList GPU_draw_list_create(int length);
+void GPU_draw_list_discard(GPUDrawList list);
+
+void GPU_draw_list_append(GPUDrawList list, GPUBatch *batch, int i_first, int i_count);
+void GPU_draw_list_submit(GPUDrawList list);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/gpu/GPU_element.h b/source/blender/gpu/GPU_element.h
index 3d5195b12fc..5cf85b4ea0e 100644
--- a/source/blender/gpu/GPU_element.h
+++ b/source/blender/gpu/GPU_element.h
@@ -54,6 +54,8 @@ typedef struct GPUIndexBuf {
   };
 } GPUIndexBuf;
 
+GPUIndexBuf *GPU_indexbuf_calloc(void);
+
 void GPU_indexbuf_use(GPUIndexBuf *);
 uint GPU_indexbuf_size_get(const GPUIndexBuf *);
 
diff --git a/source/blender/gpu/GPU_shader.h b/source/blender/gpu/GPU_shader.h
index f782742ae53..0b9109fbd4b 100644
--- a/source/blender/gpu/GPU_shader.h
+++ b/source/blender/gpu/GPU_shader.h
@@ -104,6 +104,19 @@ void GPU_shader_uniform_vector_int(
 void GPU_shader_uniform_float(GPUShader *shader, int location, float value);
 void GPU_shader_uniform_int(GPUShader *shader, int location, int value);
 
+void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value);
+void GPU_shader_uniform_1b(GPUShader *sh, const char *name, bool value);
+void GPU_shader_uniform_1f(GPUShader *sh, const char *name, float value);
+void GPU_shader_uniform_2f(GPUShader *sh, const char *name, float x, float y);
+void GPU_shader_uniform_3f(GPUShader *sh, const char *name, float x, float y, float z);
+void GPU_shader_uniform_4f(GPUShader *sh, const char *name, float x, float y, float z, float w);
+void GPU_shader_uniform_2fv(GPUShader *sh, const char *name, const float data[2]);
+void GPU_shader_uniform_3fv(GPUShader *sh, const char *name, const float data[3]);
+void GPU_shader_uniform_4fv(GPUShader *sh, const char *name, const float data[4]);
+void GPU_shader_uniform_mat4(GPUShader *sh, const char *name, const float data[4][4]);
+void GPU_shader_uniform_2fv_array(GPUShader *sh, const char *name, int len, const float (*val)[2]);
+void GPU_shader_uniform_4fv_array(GPUShader *sh, const char *name, int len, const float (*val)[4]);
+
 int GPU_shader_get_attribute(GPUShader *shader, const char *name);
 
 char *GPU_shader_get_binary(GPUShader *shader, uint *r_binary_format, int *r_binary_len);
diff --git a/source/blender/gpu/GPU_shader_interface.h b/source/blender/gpu/GPU_shader_interface.h
index 8aba1236b65..47e4e432d66 100644
--- a/source/blender/gpu/GPU_shader_interface.h
+++ b/source/blender/gpu/GPU_shader_interface.h
@@ -80,7 +80,7 @@ typedef struct GPUShaderInterface {
   /** Buffer containing all inputs names separated by '\0'. */
   char *name_buffer;
   /** Reference to GPUBatches using this interface */
-  struct GPUBatch **batches;
+  void **batches;
   uint batches_len;
   /** Input counts. */
   uint attribute_len;
@@ -109,8 +109,8 @@ const GPUShaderInput *GPU_shaderinterface_ubo(const GPUShaderInterface *, const
 const GPUShaderInput *GPU_shaderinterface_attr(const GPUShaderInterface *, const char *name);
 
 /* keep track of batches using this interface */
-void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *, struct GPUBatch *);
-void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *, struct GPUBatch *);
+void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *interface, void *cache);
+void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *interface, void *cache);
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/gpu/GPU_vertex_buffer.h b/source/blender/gpu/GPU_vertex_buffer.h
index 757255496e0..bd1019bb1f5 100644
--- a/source/blender/gpu/GPU_vertex_buffer.h
+++ b/source/blender/gpu/GPU_vertex_buffer.h
@@ -59,6 +59,8 @@ typedef struct GPUVertBuf {
   uint32_t vbo_id;
   /** Usage hint for GL optimisation. */
   GPUUsageType usage;
+  /** This counter will only avoid freeing the GPUVertBuf, not the data. */
+  char handle_refcount;
   /** Data has been touched and need to be reuploaded to GPU. */
   bool dirty;
   uchar *data; /* NULL indicates data in VRAM (unmapped) */
@@ -73,6 +75,10 @@ GPUVertBuf *GPU_vertbuf_create_with_format_ex(const GPUVertFormat *, GPUUsageTyp
 void GPU_vertbuf_clear(GPUVertBuf *verts);
 void GPU_vertbuf_discard(GPUVertBuf *);
 
+/* Avoid GPUVertBuf datablock being free but not its data. */
+void GPU_vertbuf_handle_ref_add(GPUVertBuf *verts);
+void GPU_vertbuf_handle_ref_remove(GPUVertBuf *verts);
+
 void GPU_vertbuf_init(GPUVertBuf *, GPUUsageType);
 void GPU_vertbuf_init_with_format_ex(GPUVertBuf *, const GPUVertFormat *, GPUUsageType);
 
diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh
index 24f592f214f..ba382e3c3fc 100644
--- a/source/blender/gpu/intern/gpu_backend.hh
+++ b/source/blender/gpu/intern/gpu_backend.hh
@@ -25,13 +25,27 @@
 
 #pragma once
 
-struct GPUContext;
+#include "gpu_context_private.hh"
+#include "gpu_drawlist_private.hh"
+#include "gpu_batch_private.hh"
+
+namespace blender {
+namespace gpu {
 
 class GPUBackend {
  public:
   virtual ~GPUBackend(){};
 
+  static GPUBackend *get(void);
+
   virtual GPUContext *context_alloc(void *ghost_window) = 0;
+
+  virtual Batch *batch_alloc(void) = 0;
+  virtual DrawList *drawlist_alloc(int list_length) = 0;
+  // virtual FrameBuffer *framebuffer_alloc(void) = 0;
+  // virtual Shader *shader_alloc(void) = 0;
+  // virtual Texture *texture_alloc(void) = 0;
 };
 
-GPUBackend *gpu_backend_get(void);
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_batch.cc b/source/blender/gpu/intern/gpu_batch.cc
index a6ba4d3d89a..7b006bdc6c2 100644
--- a/source/blender/gpu/intern/gpu_batch.cc
+++ b/source/blender/gpu/intern/gpu_batch.cc
@@ -26,6 +26,8 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "BLI_math_base.h"
+
 #include "GPU_batch.h"
 #include "GPU_batch_presets.h"
 #include "GPU_extensions.h"
@@ -33,7 +35,8 @@
 #include "GPU_platform.h"
 #include "GPU_shader.h"
 
-#include "gpu_batch_private.h"
+#include "gpu_backend.hh"
+#include "gpu_batch_private.hh"
 #include "gpu_context_private.hh"
 #include "gpu_primitive_private.h"
 #include "gpu_shader_private.h"
@@ -43,69 +46,38 @@
 #include <stdlib.h>
 #include <string.h>
 
-static GLuint g_default_attr_vbo = 0;
-
-static void batch_update_program_bindings(GPUBatch *batch, uint i_first);
+using namespace blender::gpu;
 
-void GPU_batch_vao_cache_clear(GPUBatch *batch)
-{
-  if (batch->context == NULL) {
-    return;
-  }
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.vao_ids[i]) {
-        GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
-      }
-      if (batch->dynamic_vaos.interfaces[i]) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->dynamic_vaos.interfaces[i], batch);
-      }
-    }
-    MEM_freeN((void *)batch->dynamic_vaos.interfaces);
-    MEM_freeN(batch->dynamic_vaos.vao_ids);
-  }
-  else {
-    for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.vao_ids[i]) {
-        GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
-      }
-      if (batch->static_vaos.interfaces[i]) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->static_vaos.interfaces[i], batch);
-      }
-    }
-  }
-  batch->is_dynamic_vao_count = false;
-  for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-    batch->static_vaos.vao_ids[i] = 0;
-    batch->static_vaos.interfaces[i] = NULL;
-  }
-  gpu_context_remove_batch(batch->context, batch);
-  batch->context = NULL;
-}
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
 
-GPUBatch *GPU_batch_calloc(uint count)
+GPUBatch *GPU_batch_calloc(void)
 {
-  return (GPUBatch *)MEM_callocN(sizeof(GPUBatch) * count, "GPUBatch");
+  GPUBatch *batch = GPUBackend::get()->batch_alloc();
+  memset(batch, 0, sizeof(*batch));
+  return batch;
 }
 
 GPUBatch *GPU_batch_create_ex(GPUPrimType prim_type,
                               GPUVertBuf *verts,
                               GPUIndexBuf *elem,
-                              uint owns_flag)
+                              eGPUBatchFlag owns_flag)
 {
-  GPUBatch *batch = GPU_batch_calloc(1);
+  GPUBatch *batch = GPU_batch_calloc();
   GPU_batch_init_ex(batch, prim_type, verts, elem, owns_flag);
   return batch;
 }
 
-void GPU_batch_init_ex(
-    GPUBatch *batch, GPUPrimType prim_type, GPUVertBuf *verts, GPUIndexBuf *elem, uint owns_flag)
+void GPU_batch_init_ex(GPUBatch *batch,
+                       GPUPrimType prim_type,
+                       GPUVertBuf *verts,
+                       GPUIndexBuf *elem,
+                       eGPUBatchFlag owns_flag)
 {
-#if TRUST_NO_ONE
-  assert(verts != NULL);
-#endif
+  BLI_assert(verts != NULL);
+  /* Do not pass any other flag */
+  BLI_assert((owns_flag & ~(GPU_BATCH_OWNS_VBO | GPU_BATCH_OWNS_INDEX)) == 0);
 
   batch->verts[0] = verts;
   for (int v = 1; v < GPU_BATCH_VBO_MAX_LEN; v++) {
@@ -115,19 +87,18 @@ void GPU_batch_init_ex(
     batch->inst[v] = NULL;
   }
   batch->elem = elem;
-  batch->gl_prim_type = convert_prim_type_to_gl(prim_type);
-  batch->phase = GPU_BATCH_READY_TO_DRAW;
-  batch->is_dynamic_vao_count = false;
-  batch->owns_flag = owns_flag;
-  batch->free_callback = NULL;
+  batch->prim_type = prim_type;
+  batch->flag = owns_flag | GPU_BATCH_INIT | GPU_BATCH_DIRTY;
+  batch->shader = NULL;
 }
 
 /* This will share the VBOs with the new batch. */
 void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src)
 {
-  GPU_batch_init_ex(batch_dst, GPU_PRIM_POINTS, batch_src->verts[0], batch_src->elem, 0);
+  GPU_batch_init_ex(
+      batch_dst, GPU_PRIM_POINTS, batch_src->verts[0], batch_src->elem, GPU_BATCH_INVALID);
 
-  batch_dst->gl_prim_type = batch_src->gl_prim_type;
+  batch_dst->prim_type = batch_src->prim_type;
   for (int v = 1; v < GPU_BATCH_VBO_MAX_LEN; v++) {
     batch_dst->verts[v] = batch_src->verts[v];
   }
@@ -135,484 +106,182 @@ void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src)
 
 void GPU_batch_clear(GPUBatch *batch)
 {
-  if (batch->owns_flag & GPU_BATCH_OWNS_INDEX) {
+  if (batch->flag & GPU_BATCH_OWNS_INDEX) {
     GPU_indexbuf_discard(batch->elem);
   }
-  if (batch->owns_flag & GPU_BATCH_OWNS_INSTANCES) {
-    GPU_vertbuf_discard(batch->inst[0]);
-    GPU_VERTBUF_DISCARD_SAFE(batch->inst[1]);
-  }
-  if ((batch->owns_flag & ~GPU_BATCH_OWNS_INDEX) != 0) {
-    for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
-      if (batch->verts[v] == NULL) {
-        break;
+  if (batch->flag & GPU_BATCH_OWNS_VBO_ANY) {
+    for (int v = 0; (v < GPU_BATCH_VBO_MAX_LEN) && batch->verts[v]; v++) {
+      if (batch->flag & (GPU_BATCH_OWNS_VBO << v)) {
+        GPU_VERTBUF_DISCARD_SAFE(batch->verts[v]);
       }
-      if (batch->owns_flag & (1 << v)) {
-        GPU_vertbuf_discard(batch->verts[v]);
+    }
+  }
+  if (batch->flag & GPU_BATCH_OWNS_INST_VBO_ANY) {
+    for (int v = 0; (v < GPU_BATCH_INST_VBO_MAX_LEN) && batch->inst[v]; v++) {
+      if (batch->flag & (GPU_BATCH_OWNS_INST_VBO << v)) {
+        GPU_VERTBUF_DISCARD_SAFE(batch->inst[v]);
       }
     }
   }
-  GPU_batch_vao_cache_clear(batch);
-  batch->phase = GPU_BATCH_UNUSED;
+  batch->flag = GPU_BATCH_INVALID;
 }
 
 void GPU_batch_discard(GPUBatch *batch)
 {
-  if (batch->free_callback) {
-    batch->free_callback(batch, batch->callback_data);
-  }
-
   GPU_batch_clear(batch);
-  MEM_freeN(batch);
-}
 
-void GPU_batch_callback_free_set(GPUBatch *batch,
-                                 void (*callback)(GPUBatch *, void *),
-                                 void *user_data)
-{
-  batch->free_callback = callback;
-  batch->callback_data = user_data;
+  delete static_cast<Batch *>(batch);
 }
 
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Buffers Management
+ * \{ */
+
+/* NOTE: Override ONLY the first instance vbo (and free them if owned). */
 void GPU_batch_instbuf_set(GPUBatch *batch, GPUVertBuf *inst, bool own_vbo)
 {
-#if TRUST_NO_ONE
-  assert(inst != NULL);
-#endif
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(inst);
+  batch->flag |= GPU_BATCH_DIRTY;
 
-  if (batch->inst[0] != NULL && (batch->owns_flag & GPU_BATCH_OWNS_INSTANCES)) {
+  if (batch->inst[0] && (batch->flag & GPU_BATCH_OWNS_INST_VBO)) {
     GPU_vertbuf_discard(batch->inst[0]);
-    GPU_VERTBUF_DISCARD_SAFE(batch->inst[1]);
   }
   batch->inst[0] = inst;
 
-  if (own_vbo) {
-    batch->owns_flag |= GPU_BATCH_OWNS_INSTANCES;
-  }
-  else {
-    batch->owns_flag &= ~GPU_BATCH_OWNS_INSTANCES;
-  }
+  SET_FLAG_FROM_TEST(batch->flag, own_vbo, GPU_BATCH_OWNS_INST_VBO);
 }
 
+/* NOTE: Override any previously assigned elem (and free it if owned). */
 void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo)
 {
-  BLI_assert(elem != NULL);
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(elem);
+  batch->flag |= GPU_BATCH_DIRTY;
 
-  if (batch->elem != NULL && (batch->owns_flag & GPU_BATCH_OWNS_INDEX)) {
+  if (batch->elem && (batch->flag & GPU_BATCH_OWNS_INDEX)) {
     GPU_indexbuf_discard(batch->elem);
   }
   batch->elem = elem;
 
-  if (own_ibo) {
-    batch->owns_flag |= GPU_BATCH_OWNS_INDEX;
-  }
-  else {
-    batch->owns_flag &= ~GPU_BATCH_OWNS_INDEX;
-  }
+  SET_FLAG_FROM_TEST(batch->flag, own_ibo, GPU_BATCH_OWNS_INDEX);
 }
 
-/* A bit of a quick hack. Should be streamlined as the vbos handling */
 int GPU_batch_instbuf_add_ex(GPUBatch *batch, GPUVertBuf *insts, bool own_vbo)
 {
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(insts);
+  batch->flag |= GPU_BATCH_DIRTY;
 
   for (uint v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
     if (batch->inst[v] == NULL) {
-#if TRUST_NO_ONE
       /* for now all VertexBuffers must have same vertex_len */
-      if (batch->inst[0] != NULL) {
-        /* Allow for different size of vertex buf (will choose the smallest number of verts). */
-        // assert(insts->vertex_len == batch->inst[0]->vertex_len);
-        assert(own_vbo == ((batch->owns_flag & GPU_BATCH_OWNS_INSTANCES) != 0));
+      if (batch->inst[0]) {
+        /* Allow for different size of vertex buf (will choose the smallest
+         * number of verts). */
+        // BLI_assert(insts->vertex_len == batch->inst[0]->vertex_len);
       }
-#endif
+
       batch->inst[v] = insts;
-      if (own_vbo) {
-        batch->owns_flag |= GPU_BATCH_OWNS_INSTANCES;
-      }
+      SET_FLAG_FROM_TEST(batch->flag, own_vbo, (eGPUBatchFlag)(GPU_BATCH_OWNS_INST_VBO << v));
       return v;
     }
   }
-
   /* we only make it this far if there is no room for another GPUVertBuf */
-#if TRUST_NO_ONE
-  assert(false);
-#endif
+  BLI_assert(0 && "Not enough Instance VBO slot in batch");
   return -1;
 }
 
 /* Returns the index of verts in the batch. */
 int GPU_batch_vertbuf_add_ex(GPUBatch *batch, GPUVertBuf *verts, bool own_vbo)
 {
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(verts);
+  batch->flag |= GPU_BATCH_DIRTY;
 
   for (uint v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
     if (batch->verts[v] == NULL) {
-#if TRUST_NO_ONE
       /* for now all VertexBuffers must have same vertex_len */
       if (batch->verts[0] != NULL) {
-        assert(verts->vertex_len == batch->verts[0]->vertex_len);
+        BLI_assert(verts->vertex_len == batch->verts[0]->vertex_len);
       }
-#endif
       batch->verts[v] = verts;
-      /* TODO: mark dirty so we can keep attribute bindings up-to-date */
-      if (own_vbo) {
-        batch->owns_flag |= (1 << v);
-      }
+      SET_FLAG_FROM_TEST(batch->flag, own_vbo, (eGPUBatchFlag)(GPU_BATCH_OWNS_VBO << v));
       return v;
     }
   }
-
   /* we only make it this far if there is no room for another GPUVertBuf */
-#if TRUST_NO_ONE
-  assert(false);
-#endif
+  BLI_assert(0 && "Not enough VBO slot in batch");
   return -1;
 }
 
-static GLuint batch_vao_get(GPUBatch *batch)
-{
-  /* Search through cache */
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.interfaces[i] == batch->interface) {
-        return batch->dynamic_vaos.vao_ids[i];
-      }
-    }
-  }
-  else {
-    for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.interfaces[i] == batch->interface) {
-        return batch->static_vaos.vao_ids[i];
-      }
-    }
-  }
-
-  /* Set context of this batch.
-   * It will be bound to it until GPU_batch_vao_cache_clear is called.
-   * Until then it can only be drawn with this context. */
-  if (batch->context == NULL) {
-    batch->context = GPU_context_active_get();
-    gpu_context_add_batch(batch->context, batch);
-  }
-#if TRUST_NO_ONE
-  else {
-    /* Make sure you are not trying to draw this batch in another context. */
-    assert(batch->context == GPU_context_active_get());
-  }
-#endif
-
-  /* Cache miss, time to add a new entry! */
-  GLuint new_vao = 0;
-  if (!batch->is_dynamic_vao_count) {
-    int i; /* find first unused slot */
-    for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.vao_ids[i] == 0) {
-        break;
-      }
-    }
-
-    if (i < GPU_BATCH_VAO_STATIC_LEN) {
-      batch->static_vaos.interfaces[i] = batch->interface;
-      batch->static_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
-    }
-    else {
-      /* Not enough place switch to dynamic. */
-      batch->is_dynamic_vao_count = true;
-      /* Erase previous entries, they will be added back if drawn again. */
-      for (int j = 0; j < GPU_BATCH_VAO_STATIC_LEN; j++) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->static_vaos.interfaces[j], batch);
-        GPU_vao_free(batch->static_vaos.vao_ids[j], batch->context);
-      }
-      /* Init dynamic arrays and let the branch below set the values. */
-      batch->dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
-      batch->dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_callocN(
-          batch->dynamic_vaos.count * sizeof(GPUShaderInterface *), "dyn vaos interfaces");
-      batch->dynamic_vaos.vao_ids = (GLuint *)MEM_callocN(
-          batch->dynamic_vaos.count * sizeof(GLuint), "dyn vaos ids");
-    }
-  }
-
-  if (batch->is_dynamic_vao_count) {
-    int i; /* find first unused slot */
-    for (i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.vao_ids[i] == 0) {
-        break;
-      }
-    }
-
-    if (i == batch->dynamic_vaos.count) {
-      /* Not enough place, realloc the array. */
-      i = batch->dynamic_vaos.count;
-      batch->dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
-      batch->dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_recallocN(
-          (void *)batch->dynamic_vaos.interfaces,
-          sizeof(GPUShaderInterface *) * batch->dynamic_vaos.count);
-      batch->dynamic_vaos.vao_ids = (GLuint *)MEM_recallocN(
-          batch->dynamic_vaos.vao_ids, sizeof(GLuint) * batch->dynamic_vaos.count);
-    }
-    batch->dynamic_vaos.interfaces[i] = batch->interface;
-    batch->dynamic_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
-  }
-
-  GPU_shaderinterface_add_batch_ref((GPUShaderInterface *)batch->interface, batch);
-
-#if TRUST_NO_ONE
-  assert(new_vao != 0);
-#endif
-
-  /* We just got a fresh VAO we need to initialize it. */
-  glBindVertexArray(new_vao);
-  batch_update_program_bindings(batch, 0);
-  glBindVertexArray(0);
-
-  return new_vao;
-}
+/** \} */
 
-void GPU_batch_set_shader_no_bind(GPUBatch *batch, GPUShader *shader)
-{
-#if TRUST_NO_ONE
-  assert(glIsProgram(shader->program));
-  assert(batch->program_in_use == 0);
-#endif
-  batch->interface = shader->interface;
-  batch->program = shader->program;
-  batch->vao_id = batch_vao_get(batch);
-}
+/* -------------------------------------------------------------------- */
+/** \name Uniform setters
+ *
+ * TODO(fclem) port this to GPUShader.
+ * \{ */
 
 void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader)
 {
-  GPU_batch_set_shader_no_bind(batch, shader);
-  GPU_batch_program_use_begin(batch); /* hack! to make Batch_Uniform* simpler */
-}
-
-void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface)
-{
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.interfaces[i] == interface) {
-        GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
-        batch->dynamic_vaos.vao_ids[i] = 0;
-        batch->dynamic_vaos.interfaces[i] = NULL;
-        break; /* cannot have duplicates */
-      }
-    }
-  }
-  else {
-    int i;
-    for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.interfaces[i] == interface) {
-        GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
-        batch->static_vaos.vao_ids[i] = 0;
-        batch->static_vaos.interfaces[i] = NULL;
-        break; /* cannot have duplicates */
-      }
-    }
-  }
-}
-
-static void create_bindings(GPUVertBuf *verts,
-                            const GPUShaderInterface *interface,
-                            uint16_t *attr_mask,
-                            uint v_first,
-                            const bool use_instancing)
-{
-  const GPUVertFormat *format = &verts->format;
-
-  const uint attr_len = format->attr_len;
-  uint stride = format->stride;
-  uint offset = 0;
-
-  GPU_vertbuf_use(verts);
-
-  for (uint a_idx = 0; a_idx < attr_len; a_idx++) {
-    const GPUVertAttr *a = &format->attrs[a_idx];
-
-    if (format->deinterleaved) {
-      offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].sz) * verts->vertex_len;
-      stride = a->sz;
-    }
-    else {
-      offset = a->offset;
-    }
-
-    const GLvoid *pointer = (const GLubyte *)0 + offset + v_first * stride;
-    const GLenum type = convert_comp_type_to_gl(static_cast<GPUVertCompType>(a->comp_type));
-
-    for (uint n_idx = 0; n_idx < a->name_len; n_idx++) {
-      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
-      const GPUShaderInput *input = GPU_shaderinterface_attr(interface, name);
-
-      if (input == NULL) {
-        continue;
-      }
-
-      *attr_mask &= ~(1 << input->location);
-
-      if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
-        BLI_assert(a->fetch_mode == GPU_FETCH_FLOAT);
-        BLI_assert(a->comp_type == GPU_COMP_F32);
-        for (int i = 0; i < a->comp_len / 4; i++) {
-          glEnableVertexAttribArray(input->location + i);
-          glVertexAttribDivisor(input->location + i, (use_instancing) ? 1 : 0);
-          glVertexAttribPointer(
-              input->location + i, 4, type, GL_FALSE, stride, (const GLubyte *)pointer + i * 16);
-        }
-      }
-      else {
-        glEnableVertexAttribArray(input->location);
-        glVertexAttribDivisor(input->location, (use_instancing) ? 1 : 0);
-
-        switch (a->fetch_mode) {
-          case GPU_FETCH_FLOAT:
-          case GPU_FETCH_INT_TO_FLOAT:
-            glVertexAttribPointer(input->location, a->comp_len, type, GL_FALSE, stride, pointer);
-            break;
-          case GPU_FETCH_INT_TO_FLOAT_UNIT:
-            glVertexAttribPointer(input->location, a->comp_len, type, GL_TRUE, stride, pointer);
-            break;
-          case GPU_FETCH_INT:
-            glVertexAttribIPointer(input->location, a->comp_len, type, stride, pointer);
-            break;
-        }
-      }
-    }
-  }
-}
-
-static void batch_update_program_bindings(GPUBatch *batch, uint i_first)
-{
-  uint16_t attr_mask = batch->interface->enabled_attr_mask;
-
-  /* Reverse order so first VBO'S have more prevalence (in term of attribute override). */
-  for (int v = GPU_BATCH_VBO_MAX_LEN - 1; v > -1; v--) {
-    if (batch->verts[v] != NULL) {
-      create_bindings(batch->verts[v], batch->interface, &attr_mask, 0, false);
-    }
-  }
-
-  for (int v = GPU_BATCH_INST_VBO_MAX_LEN - 1; v > -1; v--) {
-    if (batch->inst[v]) {
-      create_bindings(batch->inst[v], batch->interface, &attr_mask, i_first, true);
-    }
-  }
-
-  if (attr_mask != 0 && GLEW_ARB_vertex_attrib_binding) {
-    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
-      if (attr_mask & mask) {
-        /* This replaces glVertexAttrib4f(a, 0.0f, 0.0f, 0.0f, 1.0f); with a more modern style.
-         * Fix issues for some drivers (see T75069). */
-        glBindVertexBuffer(a, g_default_attr_vbo, (intptr_t)0, (intptr_t)0);
-
-        glEnableVertexAttribArray(a);
-        glVertexAttribFormat(a, 4, GL_FLOAT, GL_FALSE, 0);
-        glVertexAttribBinding(a, a);
-      }
-    }
-  }
-
-  if (batch->elem) {
-    GPU_indexbuf_use(batch->elem);
-  }
-}
-
-void GPU_batch_program_use_begin(GPUBatch *batch)
-{
-  /* NOTE: use_program & done_using_program are fragile, depend on staying in sync with
-   *       the GL context's active program.
-   *       use_program doesn't mark other programs as "not used". */
-  /* TODO: make not fragile (somehow) */
-
-  if (!batch->program_in_use) {
-    glUseProgram(batch->program);
-    batch->program_in_use = true;
-  }
+  batch->shader = shader;
+  GPU_shader_bind(batch->shader);
 }
 
-void GPU_batch_program_use_end(GPUBatch *batch)
-{
-  if (batch->program_in_use) {
-#if PROGRAM_NO_OPTI
-    glUseProgram(0);
-#endif
-    batch->program_in_use = false;
-  }
-}
-
-#if TRUST_NO_ONE
-#  define GET_UNIFORM \
-    const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->interface, name); \
-    assert(uniform);
-#else
-#  define GET_UNIFORM \
-    const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->interface, name);
-#endif
-
-void GPU_batch_uniform_1ui(GPUBatch *batch, const char *name, uint value)
-{
-  GET_UNIFORM
-  glUniform1ui(uniform->location, value);
-}
+#define GET_UNIFORM \
+  const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->shader->interface, name); \
+  BLI_assert(uniform);
 
 void GPU_batch_uniform_1i(GPUBatch *batch, const char *name, int value)
 {
   GET_UNIFORM
-  glUniform1i(uniform->location, value);
+  GPU_shader_uniform_int(batch->shader, uniform->location, value);
 }
 
 void GPU_batch_uniform_1b(GPUBatch *batch, const char *name, bool value)
 {
-  GET_UNIFORM
-  glUniform1i(uniform->location, value ? GL_TRUE : GL_FALSE);
+  GPU_batch_uniform_1i(batch, name, value ? GL_TRUE : GL_FALSE);
 }
 
 void GPU_batch_uniform_2f(GPUBatch *batch, const char *name, float x, float y)
 {
-  GET_UNIFORM
-  glUniform2f(uniform->location, x, y);
+  const float data[2] = {x, y};
+  GPU_batch_uniform_2fv(batch, name, data);
 }
 
 void GPU_batch_uniform_3f(GPUBatch *batch, const char *name, float x, float y, float z)
 {
-  GET_UNIFORM
-  glUniform3f(uniform->location, x, y, z);
+  const float data[3] = {x, y, z};
+  GPU_batch_uniform_3fv(batch, name, data);
 }
 
 void GPU_batch_uniform_4f(GPUBatch *batch, const char *name, float x, float y, float z, float w)
 {
-  GET_UNIFORM
-  glUniform4f(uniform->location, x, y, z, w);
+  const float data[4] = {x, y, z, w};
+  GPU_batch_uniform_4fv(batch, name, data);
 }
 
 void GPU_batch_uniform_1f(GPUBatch *batch, const char *name, float x)
 {
   GET_UNIFORM
-  glUniform1f(uniform->location, x);
+  GPU_shader_uniform_float(batch->shader, uniform->location, x);
 }
 
 void GPU_batch_uniform_2fv(GPUBatch *batch, const char *name, const float data[2])
 {
   GET_UNIFORM
-  glUniform2fv(uniform->location, 1, data);
+  GPU_shader_uniform_vector(batch->shader, uniform->location, 2, 1, data);
 }
 
 void GPU_batch_uniform_3fv(GPUBatch *batch, const char *name, const float data[3])
 {
   GET_UNIFORM
-  glUniform3fv(uniform->location, 1, data);
+  GPU_shader_uniform_vector(batch->shader, uniform->location, 3, 1, data);
 }
 
 void GPU_batch_uniform_4fv(GPUBatch *batch, const char *name, const float data[4])
 {
   GET_UNIFORM
-  glUniform4fv(uniform->location, 1, data);
+  GPU_shader_uniform_vector(batch->shader, uniform->location, 4, 1, data);
 }
 
 void GPU_batch_uniform_2fv_array(GPUBatch *batch,
@@ -621,7 +290,7 @@ void GPU_batch_uniform_2fv_array(GPUBatch *batch,
                                  const float *data)
 {
   GET_UNIFORM
-  glUniform2fv(uniform->location, len, data);
+  GPU_shader_uniform_vector(batch->shader, uniform->location, 2, len, data);
 }
 
 void GPU_batch_uniform_4fv_array(GPUBatch *batch,
@@ -630,68 +299,48 @@ void GPU_batch_uniform_4fv_array(GPUBatch *batch,
                                  const float *data)
 {
   GET_UNIFORM
-  glUniform4fv(uniform->location, len, data);
+  GPU_shader_uniform_vector(batch->shader, uniform->location, 4, len, data);
 }
 
 void GPU_batch_uniform_mat4(GPUBatch *batch, const char *name, const float data[4][4])
 {
   GET_UNIFORM
-  glUniformMatrix4fv(uniform->location, 1, GL_FALSE, (const float *)data);
+  GPU_shader_uniform_vector(batch->shader, uniform->location, 16, 1, (const float *)data);
 }
 
-static void *elem_offset(const GPUIndexBuf *el, int v_first)
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Drawing / Drawcall functions
+ * \{ */
+
+void GPU_batch_draw(GPUBatch *batch)
 {
-#if GPU_TRACK_INDEX_RANGE
-  if (el->index_type == GPU_INDEX_U16) {
-    return (GLushort *)0 + v_first + el->index_start;
-  }
-#endif
-  return (GLuint *)0 + v_first + el->index_start;
+  GPU_shader_bind(batch->shader);
+  GPU_batch_draw_advanced(batch, 0, 0, 0, 0);
+  GPU_shader_unbind();
 }
 
-/* Use when drawing with GPU_batch_draw_advanced */
-void GPU_batch_bind(GPUBatch *batch)
+void GPU_batch_draw_range(GPUBatch *batch, int v_first, int v_count)
 {
-  glBindVertexArray(batch->vao_id);
-
-#if GPU_TRACK_INDEX_RANGE
-  /* Can be removed if GL 4.3 is required. */
-  if (!GLEW_ARB_ES3_compatibility && batch->elem != NULL) {
-    GLuint restart_index = (batch->elem->index_type == GPU_INDEX_U16) ? (GLuint)0xFFFF :
-                                                                        (GLuint)0xFFFFFFFF;
-    glPrimitiveRestartIndex(restart_index);
-  }
-#endif
+  GPU_shader_bind(batch->shader);
+  GPU_batch_draw_advanced(batch, v_first, v_count, 0, 0);
+  GPU_shader_unbind();
 }
 
-void GPU_batch_draw(GPUBatch *batch)
+/* Draw multiple instance of a batch without having any instance attributes. */
+void GPU_batch_draw_instanced(GPUBatch *batch, int i_count)
 {
-#if TRUST_NO_ONE
-  assert(batch->phase == GPU_BATCH_READY_TO_DRAW);
-  assert(batch->verts[0]->vbo_id != 0);
-#endif
-  GPU_batch_program_use_begin(batch);
-  GPU_matrix_bind(batch->interface);  // external call.
-  GPU_shader_set_srgb_uniform(batch->interface);
-
-  GPU_batch_bind(batch);
-  GPU_batch_draw_advanced(batch, 0, 0, 0, 0);
+  BLI_assert(batch->inst[0] == NULL);
 
-  GPU_batch_program_use_end(batch);
+  GPU_shader_bind(batch->shader);
+  GPU_batch_draw_advanced(batch, 0, 0, 0, i_count);
+  GPU_shader_unbind();
 }
 
-#if GPU_TRACK_INDEX_RANGE
-#  define BASE_INDEX(el) ((el)->base_index)
-#  define INDEX_TYPE(el) ((el)->gl_index_type)
-#else
-#  define BASE_INDEX(el) 0
-#  define INDEX_TYPE(el) GL_UNSIGNED_INT
-#endif
-
 void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_first, int i_count)
 {
-  BLI_assert(batch->program_in_use);
-  /* TODO could assert that VAO is bound. */
+  BLI_assert(GPU_context_active_get()->shader != NULL);
 
   if (v_count == 0) {
     v_count = (batch->elem) ? batch->elem->index_len : batch->verts[0]->vertex_len;
@@ -699,8 +348,8 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
   if (i_count == 0) {
     i_count = (batch->inst[0]) ? batch->inst[0]->vertex_len : 1;
     /* Meh. This is to be able to use different numbers of verts in instance vbos. */
-    if (batch->inst[1] && i_count > batch->inst[1]->vertex_len) {
-      i_count = batch->inst[1]->vertex_len;
+    if (batch->inst[1] != NULL) {
+      i_count = min_ii(i_count, batch->inst[1]->vertex_len);
     }
   }
 
@@ -709,76 +358,7 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
     return;
   }
 
-  /* Verify there is enough data do draw. */
-  /* TODO(fclem) Nice to have but this is invalid when using procedural draw-calls.
-   * The right assert would be to check if there is an enabled attribute from each VBO
-   * and check their length. */
-  // BLI_assert(i_first + i_count <= (batch->inst ? batch->inst->vertex_len : INT_MAX));
-  // BLI_assert(v_first + v_count <=
-  //            (batch->elem ? batch->elem->index_len : batch->verts[0]->vertex_len));
-
-#ifdef __APPLE__
-  GLuint vao = 0;
-#endif
-
-  if (!GPU_arb_base_instance_is_supported()) {
-    if (i_first > 0) {
-#ifdef __APPLE__
-      /**
-       * There seems to be a nasty bug when drawing using the same VAO reconfiguring. (see T71147)
-       * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
-       **/
-      glGenVertexArrays(1, &vao);
-      glBindVertexArray(vao);
-#else
-      /* If using offset drawing with instancing, we must
-       * use the default VAO and redo bindings. */
-      glBindVertexArray(GPU_vao_default());
-#endif
-      batch_update_program_bindings(batch, i_first);
-    }
-    else {
-      /* Previous call could have bind the default vao
-       * see above. */
-      glBindVertexArray(batch->vao_id);
-    }
-  }
-
-  if (batch->elem) {
-    const GPUIndexBuf *el = batch->elem;
-    GLenum index_type = INDEX_TYPE(el);
-    GLint base_index = BASE_INDEX(el);
-    void *v_first_ofs = elem_offset(el, v_first);
-
-    if (GPU_arb_base_instance_is_supported()) {
-      glDrawElementsInstancedBaseVertexBaseInstance(
-          batch->gl_prim_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
-    }
-    else {
-      glDrawElementsInstancedBaseVertex(
-          batch->gl_prim_type, v_count, index_type, v_first_ofs, i_count, base_index);
-    }
-  }
-  else {
-#ifdef __APPLE__
-    glDisable(GL_PRIMITIVE_RESTART);
-#endif
-    if (GPU_arb_base_instance_is_supported()) {
-      glDrawArraysInstancedBaseInstance(batch->gl_prim_type, v_first, v_count, i_count, i_first);
-    }
-    else {
-      glDrawArraysInstanced(batch->gl_prim_type, v_first, v_count, i_count);
-    }
-#ifdef __APPLE__
-    glEnable(GL_PRIMITIVE_RESTART);
-#endif
-  }
-
-#ifdef __APPLE__
-  if (vao != 0) {
-    glDeleteVertexArrays(1, &vao);
-  }
-#endif
+  static_cast<Batch *>(batch)->draw(v_first, v_count, i_first, i_count);
 }
 
 /* just draw some vertices and let shader place them where we want. */
@@ -795,191 +375,6 @@ void GPU_draw_primitive(GPUPrimType prim_type, int v_count)
   // glBindVertexArray(0);
 }
 
-/* -------------------------------------------------------------------- */
-/** \name Indirect Draw Calls
- * \{ */
-
-#if 0
-#  define USE_MULTI_DRAW_INDIRECT 0
-#else
-#  define USE_MULTI_DRAW_INDIRECT \
-    (GL_ARB_multi_draw_indirect && GPU_arb_base_instance_is_supported())
-#endif
-
-typedef struct GPUDrawCommand {
-  uint v_count;
-  uint i_count;
-  uint v_first;
-  uint i_first;
-} GPUDrawCommand;
-
-typedef struct GPUDrawCommandIndexed {
-  uint v_count;
-  uint i_count;
-  uint v_first;
-  uint base_index;
-  uint i_first;
-} GPUDrawCommandIndexed;
-
-struct GPUDrawList {
-  GPUBatch *batch;
-  uint base_index;  /* Avoid dereferencing batch. */
-  uint cmd_offset;  /* in bytes, offset  inside indirect command buffer. */
-  uint cmd_len;     /* Number of used command for the next call. */
-  uint buffer_size; /* in bytes, size of indirect command buffer. */
-  GLuint buffer_id; /* Draw Indirect Buffer id */
-  union {
-    GPUDrawCommand *commands;
-    GPUDrawCommandIndexed *commands_indexed;
-  };
-};
-
-GPUDrawList *GPU_draw_list_create(int length)
-{
-  GPUDrawList *list = (GPUDrawList *)MEM_callocN(sizeof(GPUDrawList), "GPUDrawList");
-  /* Alloc the biggest possible command list which is indexed. */
-  list->buffer_size = sizeof(GPUDrawCommandIndexed) * length;
-  if (USE_MULTI_DRAW_INDIRECT) {
-    list->buffer_id = GPU_buf_alloc();
-    glBindBuffer(GL_DRAW_INDIRECT_BUFFER, list->buffer_id);
-    glBufferData(GL_DRAW_INDIRECT_BUFFER, list->buffer_size, NULL, GL_DYNAMIC_DRAW);
-  }
-  else {
-    list->commands = (GPUDrawCommand *)MEM_mallocN(list->buffer_size, "GPUDrawList data");
-  }
-  return list;
-}
-
-void GPU_draw_list_discard(GPUDrawList *list)
-{
-  if (list->buffer_id) {
-    GPU_buf_free(list->buffer_id);
-  }
-  else {
-    MEM_SAFE_FREE(list->commands);
-  }
-  MEM_freeN(list);
-}
-
-void GPU_draw_list_init(GPUDrawList *list, GPUBatch *batch)
-{
-  BLI_assert(batch->phase == GPU_BATCH_READY_TO_DRAW);
-  list->batch = batch;
-  list->base_index = batch->elem ? BASE_INDEX(batch->elem) : UINT_MAX;
-  list->cmd_len = 0;
-
-  if (USE_MULTI_DRAW_INDIRECT) {
-    if (list->commands == NULL) {
-      glBindBuffer(GL_DRAW_INDIRECT_BUFFER, list->buffer_id);
-      if (list->cmd_offset >= list->buffer_size) {
-        /* Orphan buffer data and start fresh. */
-        glBufferData(GL_DRAW_INDIRECT_BUFFER, list->buffer_size, NULL, GL_DYNAMIC_DRAW);
-        list->cmd_offset = 0;
-      }
-      GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
-      list->commands = (GPUDrawCommand *)glMapBufferRange(
-          GL_DRAW_INDIRECT_BUFFER, list->cmd_offset, list->buffer_size - list->cmd_offset, flags);
-    }
-  }
-  else {
-    list->cmd_offset = 0;
-  }
-}
-
-void GPU_draw_list_command_add(
-    GPUDrawList *list, int v_first, int v_count, int i_first, int i_count)
-{
-  BLI_assert(list->commands);
-
-  if (v_count == 0 || i_count == 0) {
-    return;
-  }
-
-  if (list->base_index != UINT_MAX) {
-    GPUDrawCommandIndexed *cmd = list->commands_indexed + list->cmd_len;
-    cmd->v_first = v_first;
-    cmd->v_count = v_count;
-    cmd->i_count = i_count;
-    cmd->base_index = list->base_index;
-    cmd->i_first = i_first;
-  }
-  else {
-    GPUDrawCommand *cmd = list->commands + list->cmd_len;
-    cmd->v_first = v_first;
-    cmd->v_count = v_count;
-    cmd->i_count = i_count;
-    cmd->i_first = i_first;
-  }
-
-  list->cmd_len++;
-  uint offset = list->cmd_offset + list->cmd_len * sizeof(GPUDrawCommandIndexed);
-
-  if (offset == list->buffer_size) {
-    GPU_draw_list_submit(list);
-    GPU_draw_list_init(list, list->batch);
-  }
-}
-
-void GPU_draw_list_submit(GPUDrawList *list)
-{
-  GPUBatch *batch = list->batch;
-
-  if (list->cmd_len == 0) {
-    return;
-  }
-
-  BLI_assert(list->commands);
-  BLI_assert(batch->program_in_use);
-  /* TODO could assert that VAO is bound. */
-
-  /* TODO We loose a bit of memory here if we only draw arrays. Fix that. */
-  uintptr_t offset = list->cmd_offset;
-  uint cmd_len = list->cmd_len;
-  size_t bytes_used = cmd_len * sizeof(GPUDrawCommandIndexed);
-  list->cmd_len = 0; /* Avoid reuse. */
-
-  /* Only do multi-draw indirect if doing more than 2 drawcall.
-   * This avoids the overhead of buffer mapping if scene is
-   * not very instance friendly.
-   * BUT we also need to take into account the case where only
-   * a few instances are needed to finish filling a call buffer. */
-  const bool do_mdi = (cmd_len > 2) || (list->cmd_offset + bytes_used == list->buffer_size);
-
-  if (USE_MULTI_DRAW_INDIRECT && do_mdi) {
-    GLenum prim = batch->gl_prim_type;
-
-    glBindBuffer(GL_DRAW_INDIRECT_BUFFER, list->buffer_id);
-    glFlushMappedBufferRange(GL_DRAW_INDIRECT_BUFFER, 0, bytes_used);
-    glUnmapBuffer(GL_DRAW_INDIRECT_BUFFER);
-    list->commands = NULL; /* Unmapped */
-    list->cmd_offset += bytes_used;
-
-    if (batch->elem) {
-      glMultiDrawElementsIndirect(prim, INDEX_TYPE(batch->elem), (void *)offset, cmd_len, 0);
-    }
-    else {
-      glMultiDrawArraysIndirect(prim, (void *)offset, cmd_len, 0);
-    }
-  }
-  else {
-    /* Fallback */
-    if (batch->elem) {
-      GPUDrawCommandIndexed *cmd = list->commands_indexed;
-      for (int i = 0; i < cmd_len; i++, cmd++) {
-        /* Index start was added by Draw manager. Avoid counting it twice. */
-        cmd->v_first -= batch->elem->index_start;
-        GPU_batch_draw_advanced(batch, cmd->v_first, cmd->v_count, cmd->i_first, cmd->i_count);
-      }
-    }
-    else {
-      GPUDrawCommand *cmd = list->commands;
-      for (int i = 0; i < cmd_len; i++, cmd++) {
-        GPU_batch_draw_advanced(batch, cmd->v_first, cmd->v_count, cmd->i_first, cmd->i_count);
-      }
-    }
-  }
-}
-
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -1015,23 +410,11 @@ void GPU_batch_program_set_imm_shader(GPUBatch *batch)
 
 void gpu_batch_init(void)
 {
-  if (g_default_attr_vbo == 0) {
-    g_default_attr_vbo = GPU_buf_alloc();
-
-    float default_attrib_data[4] = {0.0f, 0.0f, 0.0f, 1.0f};
-    glBindBuffer(GL_ARRAY_BUFFER, g_default_attr_vbo);
-    glBufferData(GL_ARRAY_BUFFER, sizeof(float[4]), default_attrib_data, GL_STATIC_DRAW);
-    glBindBuffer(GL_ARRAY_BUFFER, 0);
-  }
-
   gpu_batch_presets_init();
 }
 
 void gpu_batch_exit(void)
 {
-  GPU_buf_free(g_default_attr_vbo);
-  g_default_attr_vbo = 0;
-
   gpu_batch_presets_exit();
 }
 
diff --git a/source/blender/gpu/intern/gpu_batch_presets.c b/source/blender/gpu/intern/gpu_batch_presets.c
index 3d9b4326c7e..71c971d8656 100644
--- a/source/blender/gpu/intern/gpu_batch_presets.c
+++ b/source/blender/gpu/intern/gpu_batch_presets.c
@@ -380,18 +380,6 @@ bool gpu_batch_presets_unregister(GPUBatch *preset_batch)
   return false;
 }
 
-void gpu_batch_presets_reset(void)
-{
-  BLI_mutex_lock(&g_presets_3d.mutex);
-  /* Reset vao caches for these every time we switch opengl context.
-   * This way they will draw correctly for each window. */
-  LISTBASE_FOREACH (LinkData *, link, &presets_list) {
-    GPUBatch *preset = link->data;
-    GPU_batch_vao_cache_clear(preset);
-  }
-  BLI_mutex_unlock(&g_presets_3d.mutex);
-}
-
 void gpu_batch_presets_exit(void)
 {
   LinkData *link;
@@ -404,17 +392,4 @@ void gpu_batch_presets_exit(void)
   BLI_mutex_end(&g_presets_3d.mutex);
 }
 
-/**
- * This function only needs to be accessed externally because
- * we are drawing UI batches with the DRW old context.
- *
- * And now we use it for drawing the entire area.
- *
- * XXX (Clément) - to cleanup in the upcoming 2.91 refactor.
- **/
-void GPU_batch_presets_reset()
-{
-  gpu_batch_presets_reset();
-}
-
 /** \} */
diff --git a/source/blender/gpu/intern/gpu_batch_private.h b/source/blender/gpu/intern/gpu_batch_private.hh
index 93745b9ca9b..3a8044efc1d 100644
--- a/source/blender/gpu/intern/gpu_batch_private.h
+++ b/source/blender/gpu/intern/gpu_batch_private.hh
@@ -30,12 +30,16 @@
 #include "GPU_context.h"
 #include "GPU_shader_interface.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+namespace blender {
+namespace gpu {
 
-void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface);
+class Batch : public GPUBatch {
+ public:
+  Batch(){};
+  virtual ~Batch(){};
 
-#ifdef __cplusplus
-}
-#endif
+  virtual void draw(int v_first, int v_count, int i_first, int i_count) = 0;
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index 283784aec20..e04631910c1 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -40,7 +40,7 @@
 #include "GHOST_C-api.h"
 
 #include "gpu_backend.hh"
-#include "gpu_batch_private.h"
+#include "gpu_batch_private.hh"
 #include "gpu_context_private.hh"
 #include "gpu_matrix_private.h"
 
@@ -83,12 +83,12 @@ bool GPUContext::is_active_on_thread(void)
 
 GPUContext *GPU_context_create(void *ghost_window)
 {
-  if (gpu_backend_get() == NULL) {
+  if (GPUBackend::get() == NULL) {
     /* TODO move where it make sense. */
     GPU_backend_init(GPU_BACKEND_OPENGL);
   }
 
-  GPUContext *ctx = gpu_backend_get()->context_alloc(ghost_window);
+  GPUContext *ctx = GPUBackend::get()->context_alloc(ghost_window);
 
   GPU_context_active_set(ctx);
   return ctx;
@@ -173,14 +173,14 @@ void GPU_fbo_free(GLuint fbo_id, GPUContext *ctx)
 void GPU_buf_free(GLuint buf_id)
 {
   /* TODO avoid using backend */
-  GPUBackend *backend = gpu_backend_get();
+  GPUBackend *backend = GPUBackend::get();
   static_cast<GLBackend *>(backend)->buf_free(buf_id);
 }
 
 void GPU_tex_free(GLuint tex_id)
 {
   /* TODO avoid using backend */
-  GPUBackend *backend = gpu_backend_get();
+  GPUBackend *backend = GPUBackend::get();
   static_cast<GLBackend *>(backend)->tex_free(tex_id);
 }
 
@@ -188,18 +188,6 @@ void GPU_tex_free(GLuint tex_id)
  * which are not shared across contexts. So we need to keep track of
  * ownership. */
 
-void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch)
-{
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->batch_register(batch);
-}
-
-void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch)
-{
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->batch_unregister(batch);
-}
-
 void gpu_context_add_framebuffer(GPUContext *ctx, GPUFrameBuffer *fb)
 {
 #ifdef DEBUG
@@ -285,7 +273,7 @@ void GPU_backend_exit(void)
   delete g_backend;
 }
 
-GPUBackend *gpu_backend_get(void)
+GPUBackend *GPUBackend::get(void)
 {
   return g_backend;
 }
diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh
index d369dbe7402..3f9fca16ff7 100644
--- a/source/blender/gpu/intern/gpu_context_private.hh
+++ b/source/blender/gpu/intern/gpu_context_private.hh
@@ -41,6 +41,7 @@ struct GPUMatrixState;
 struct GPUContext {
  public:
   /** State managment */
+  GPUShader *shader = NULL;
   GPUFrameBuffer *current_fbo = NULL;
   GPUMatrixState *matrix_state = NULL;
 
@@ -77,9 +78,6 @@ void GPU_tex_free(GLuint tex_id);
 void GPU_vao_free(GLuint vao_id, GPUContext *ctx);
 void GPU_fbo_free(GLuint fbo_id, GPUContext *ctx);
 
-void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch);
-void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch);
-
 void gpu_context_add_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
 void gpu_context_remove_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
 
diff --git a/source/blender/gpu/intern/gpu_drawlist.cc b/source/blender/gpu/intern/gpu_drawlist.cc
new file mode 100644
index 00000000000..7b807a2fa80
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_drawlist.cc
@@ -0,0 +1,59 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect.
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "GPU_batch.h"
+#include "GPU_drawlist.h"
+
+#include "gpu_backend.hh"
+
+#include "gpu_drawlist_private.hh"
+
+using namespace blender::gpu;
+
+GPUDrawList GPU_draw_list_create(int list_length)
+{
+  DrawList *list_ptr = GPUBackend::get()->drawlist_alloc(list_length);
+  return reinterpret_cast<DrawList *>(list_ptr);
+}
+
+void GPU_draw_list_discard(GPUDrawList list)
+{
+  DrawList *list_ptr = reinterpret_cast<DrawList *>(list);
+  delete list_ptr;
+}
+
+void GPU_draw_list_append(GPUDrawList list, GPUBatch *batch, int i_first, int i_count)
+{
+  DrawList *list_ptr = reinterpret_cast<DrawList *>(list);
+  list_ptr->append(batch, i_first, i_count);
+}
+
+void GPU_draw_list_submit(GPUDrawList list)
+{
+  DrawList *list_ptr = reinterpret_cast<DrawList *>(list);
+  list_ptr->submit();
+}
diff --git a/source/blender/gpu/intern/gpu_drawlist_private.hh b/source/blender/gpu/intern/gpu_drawlist_private.hh
new file mode 100644
index 00000000000..04cc18a5ffd
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_drawlist_private.hh
@@ -0,0 +1,40 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+namespace blender {
+namespace gpu {
+
+class DrawList {
+ public:
+  virtual ~DrawList(){};
+
+  virtual void append(GPUBatch *batch, int i_first, int i_count) = 0;
+  virtual void submit() = 0;
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_element.cc b/source/blender/gpu/intern/gpu_element.cc
index cf7cc1d214c..29c95c725fd 100644
--- a/source/blender/gpu/intern/gpu_element.cc
+++ b/source/blender/gpu/intern/gpu_element.cc
@@ -326,6 +326,11 @@ static void squeeze_indices_short(GPUIndexBufBuilder *builder,
 
 #endif /* GPU_TRACK_INDEX_RANGE */
 
+GPUIndexBuf *GPU_indexbuf_calloc(void)
+{
+  return (GPUIndexBuf *)MEM_callocN(sizeof(GPUIndexBuf), __func__);
+}
+
 GPUIndexBuf *GPU_indexbuf_build(GPUIndexBufBuilder *builder)
 {
   GPUIndexBuf *elem = (GPUIndexBuf *)MEM_callocN(sizeof(GPUIndexBuf), "GPUIndexBuf");
diff --git a/source/blender/gpu/intern/gpu_immediate.cc b/source/blender/gpu/intern/gpu_immediate.cc
index 9cededa54f7..2d137c2f21c 100644
--- a/source/blender/gpu/intern/gpu_immediate.cc
+++ b/source/blender/gpu/intern/gpu_immediate.cc
@@ -171,12 +171,8 @@ void immBindBuiltinProgram(eGPUBuiltinShader shader_id)
 
 void immUnbindProgram(void)
 {
-#if TRUST_NO_ONE
-  assert(imm.bound_program != NULL);
-#endif
-#if PROGRAM_NO_OPTI
-  glUseProgram(0);
-#endif
+  BLI_assert(imm.bound_program != NULL);
+  GPU_shader_unbind();
   imm.bound_program = NULL;
 }
 
@@ -321,7 +317,7 @@ GPUBatch *immBeginBatch(GPUPrimType prim_type, uint vertex_len)
   imm.vertex_data = verts->data;
 
   imm.batch = GPU_batch_create_ex(prim_type, verts, NULL, GPU_BATCH_OWNS_VBO);
-  imm.batch->phase = GPU_BATCH_BUILDING;
+  imm.batch->flag |= GPU_BATCH_BUILDING;
 
   return imm.batch;
 }
@@ -423,7 +419,7 @@ void immEnd(void)
       /* TODO: resize only if vertex count is much smaller */
     }
     GPU_batch_set_shader(imm.batch, imm.bound_program);
-    imm.batch->phase = GPU_BATCH_READY_TO_DRAW;
+    imm.batch->flag &= ~GPU_BATCH_BUILDING;
     imm.batch = NULL; /* don't free, batch belongs to caller */
   }
   else {
diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc
index 03b7d5402f5..7a44efce7fb 100644
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -42,6 +42,7 @@
 #include "GPU_texture.h"
 #include "GPU_uniformbuffer.h"
 
+#include "gpu_context_private.hh"
 #include "gpu_shader_private.h"
 
 extern "C" char datatoc_gpu_shader_colorspace_lib_glsl[];
@@ -258,38 +259,6 @@ GPUShader *GPU_shader_create_from_python(const char *vertexcode,
   return sh;
 }
 
-GPUShader *GPU_shader_load_from_binary(const char *binary,
-                                       const int binary_format,
-                                       const int binary_len,
-                                       const char *shname)
-{
-  BLI_assert(GL_ARB_get_program_binary);
-  int success;
-  int program = glCreateProgram();
-
-  glProgramBinary(program, binary_format, binary, binary_len);
-  glGetProgramiv(program, GL_LINK_STATUS, &success);
-
-  if (success) {
-    glUseProgram(program);
-
-    GPUShader *shader = (GPUShader *)MEM_callocN(sizeof(*shader), __func__);
-    shader->interface = GPU_shaderinterface_create(program);
-    shader->program = program;
-
-#ifndef NDEBUG
-    BLI_snprintf(shader->name, sizeof(shader->name), "%s_%u", shname, g_shaderid++);
-#else
-    UNUSED_VARS(shname);
-#endif
-
-    return shader;
-  }
-
-  glDeleteProgram(program);
-  return NULL;
-}
-
 GPUShader *GPU_shader_create_ex(const char *vertexcode,
                                 const char *fragcode,
                                 const char *geocode,
@@ -598,14 +567,27 @@ void GPU_shader_bind(GPUShader *shader)
 {
   BLI_assert(shader && shader->program);
 
-  glUseProgram(shader->program);
-  GPU_matrix_bind(shader->interface);
-  GPU_shader_set_srgb_uniform(shader->interface);
+  GPUContext *ctx = GPU_context_active_get();
+
+  if (ctx->shader != shader) {
+    ctx->shader = shader;
+    glUseProgram(shader->program);
+    GPU_matrix_bind(shader->interface);
+    GPU_shader_set_srgb_uniform(shader->interface);
+  }
+
+  if (GPU_matrix_dirty_get()) {
+    GPU_matrix_bind(shader->interface);
+  }
 }
 
 void GPU_shader_unbind(void)
 {
+#ifndef NDEBUG
+  GPUContext *ctx = GPU_context_active_get();
+  ctx->shader = NULL;
   glUseProgram(0);
+#endif
 }
 
 /** \} */
@@ -709,38 +691,12 @@ int GPU_shader_get_program(GPUShader *shader)
   return (int)shader->program;
 }
 
-char *GPU_shader_get_binary(GPUShader *shader, uint *r_binary_format, int *r_binary_len)
-{
-  BLI_assert(GLEW_ARB_get_program_binary);
-  char *r_binary;
-  int binary_len = 0;
-
-  glGetProgramiv(shader->program, GL_PROGRAM_BINARY_LENGTH, &binary_len);
-  r_binary = (char *)MEM_mallocN(binary_len, __func__);
-  glGetProgramBinary(shader->program, binary_len, NULL, r_binary_format, r_binary);
-
-  if (r_binary_len) {
-    *r_binary_len = binary_len;
-  }
-
-  return r_binary;
-}
-
 /** \} */
 
 /* -------------------------------------------------------------------- */
 /** \name Uniforms setters
  * \{ */
 
-void GPU_shader_uniform_float(GPUShader *UNUSED(shader), int location, float value)
-{
-  if (location == -1) {
-    return;
-  }
-
-  glUniform1f(location, value);
-}
-
 void GPU_shader_uniform_vector(
     GPUShader *UNUSED(shader), int location, int length, int arraysize, const float *value)
 {
@@ -773,22 +729,9 @@ void GPU_shader_uniform_vector(
   }
 }
 
-void GPU_shader_uniform_int(GPUShader *UNUSED(shader), int location, int value)
-{
-  if (location == -1) {
-    return;
-  }
-
-  glUniform1i(location, value);
-}
-
 void GPU_shader_uniform_vector_int(
     GPUShader *UNUSED(shader), int location, int length, int arraysize, const int *value)
 {
-  if (location == -1) {
-    return;
-  }
-
   switch (length) {
     case 1:
       glUniform1iv(location, arraysize, value);
@@ -808,6 +751,91 @@ void GPU_shader_uniform_vector_int(
   }
 }
 
+void GPU_shader_uniform_int(GPUShader *shader, int location, int value)
+{
+  GPU_shader_uniform_vector_int(shader, location, 1, 1, &value);
+}
+
+void GPU_shader_uniform_float(GPUShader *shader, int location, float value)
+{
+  GPU_shader_uniform_vector(shader, location, 1, 1, &value);
+}
+
+#define GET_UNIFORM \
+  const GPUShaderInput *uniform = GPU_shaderinterface_uniform(sh->interface, name); \
+  BLI_assert(uniform);
+
+void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value)
+{
+  GET_UNIFORM
+  GPU_shader_uniform_int(sh, uniform->location, value);
+}
+
+void GPU_shader_uniform_1b(GPUShader *sh, const char *name, bool value)
+{
+  GPU_shader_uniform_1i(sh, name, value ? 1 : 0);
+}
+
+void GPU_shader_uniform_2f(GPUShader *sh, const char *name, float x, float y)
+{
+  const float data[2] = {x, y};
+  GPU_shader_uniform_2fv(sh, name, data);
+}
+
+void GPU_shader_uniform_3f(GPUShader *sh, const char *name, float x, float y, float z)
+{
+  const float data[3] = {x, y, z};
+  GPU_shader_uniform_3fv(sh, name, data);
+}
+
+void GPU_shader_uniform_4f(GPUShader *sh, const char *name, float x, float y, float z, float w)
+{
+  const float data[4] = {x, y, z, w};
+  GPU_shader_uniform_4fv(sh, name, data);
+}
+
+void GPU_shader_uniform_1f(GPUShader *sh, const char *name, float x)
+{
+  GET_UNIFORM
+  GPU_shader_uniform_float(sh, uniform->location, x);
+}
+
+void GPU_shader_uniform_2fv(GPUShader *sh, const char *name, const float data[2])
+{
+  GET_UNIFORM
+  GPU_shader_uniform_vector(sh, uniform->location, 2, 1, data);
+}
+
+void GPU_shader_uniform_3fv(GPUShader *sh, const char *name, const float data[3])
+{
+  GET_UNIFORM
+  GPU_shader_uniform_vector(sh, uniform->location, 3, 1, data);
+}
+
+void GPU_shader_uniform_4fv(GPUShader *sh, const char *name, const float data[4])
+{
+  GET_UNIFORM
+  GPU_shader_uniform_vector(sh, uniform->location, 4, 1, data);
+}
+
+void GPU_shader_uniform_mat4(GPUShader *sh, const char *name, const float data[4][4])
+{
+  GET_UNIFORM
+  GPU_shader_uniform_vector(sh, uniform->location, 16, 1, (const float *)data);
+}
+
+void GPU_shader_uniform_2fv_array(GPUShader *sh, const char *name, int len, const float (*val)[2])
+{
+  GET_UNIFORM
+  GPU_shader_uniform_vector(sh, uniform->location, 2, len, (const float *)val);
+}
+
+void GPU_shader_uniform_4fv_array(GPUShader *sh, const char *name, int len, const float (*val)[4])
+{
+  GET_UNIFORM
+  GPU_shader_uniform_vector(sh, uniform->location, 4, len, (const float *)val);
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc
index 4511d4a199d..ef90dde1877 100644
--- a/source/blender/gpu/intern/gpu_shader_interface.cc
+++ b/source/blender/gpu/intern/gpu_shader_interface.cc
@@ -32,9 +32,11 @@
 
 #include "GPU_shader_interface.h"
 
-#include "gpu_batch_private.h"
+#include "gpu_batch_private.hh"
 #include "gpu_context_private.hh"
 
+#include "gl_batch.hh"
+
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
@@ -45,6 +47,8 @@
 #  include <stdio.h>
 #endif
 
+using namespace blender::gpu;
+
 static const char *BuiltinUniform_name(GPUUniformBuiltin u)
 {
   switch (u) {
@@ -400,8 +404,8 @@ GPUShaderInterface *GPU_shaderinterface_create(int32_t program)
 
   /* Batches ref buffer */
   shaderface->batches_len = GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
-  shaderface->batches = (GPUBatch **)MEM_callocN(shaderface->batches_len * sizeof(GPUBatch *),
-                                                 "GPUShaderInterface batches");
+  shaderface->batches = (void **)MEM_callocN(shaderface->batches_len * sizeof(GPUBatch *),
+                                             "GPUShaderInterface batches");
 
   MEM_freeN(uniforms_from_blocks);
   MEM_freeN(inputs_tmp);
@@ -468,7 +472,8 @@ void GPU_shaderinterface_discard(GPUShaderInterface *shaderface)
   /* Remove this interface from all linked Batches vao cache. */
   for (int i = 0; i < shaderface->batches_len; i++) {
     if (shaderface->batches[i] != NULL) {
-      gpu_batch_remove_interface_ref(shaderface->batches[i], shaderface);
+      /* XXX GL specific. to be removed during refactor. */
+      reinterpret_cast<GLVaoCache *>(shaderface->batches[i])->remove(shaderface);
     }
   }
   MEM_freeN(shaderface->batches);
@@ -511,7 +516,7 @@ int32_t GPU_shaderinterface_block_builtin(const GPUShaderInterface *shaderface,
   return shaderface->builtin_blocks[builtin];
 }
 
-void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
+void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, void *batch)
 {
   int i; /* find first unused slot */
   for (i = 0; i < shaderface->batches_len; i++) {
@@ -523,13 +528,14 @@ void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, GPUBatch
     /* Not enough place, realloc the array. */
     i = shaderface->batches_len;
     shaderface->batches_len += GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
-    shaderface->batches = (GPUBatch **)MEM_recallocN(shaderface->batches,
-                                                     sizeof(GPUBatch *) * shaderface->batches_len);
+    shaderface->batches = (void **)MEM_recallocN(shaderface->batches,
+                                                 sizeof(void *) * shaderface->batches_len);
   }
-  shaderface->batches[i] = batch;
+  /** XXX todo cleanup. */
+  shaderface->batches[i] = reinterpret_cast<void *>(batch);
 }
 
-void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
+void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *shaderface, void *batch)
 {
   for (int i = 0; i < shaderface->batches_len; i++) {
     if (shaderface->batches[i] == batch) {
diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.cc b/source/blender/gpu/intern/gpu_vertex_buffer.cc
index 67ad8835b6a..debf9835c90 100644
--- a/source/blender/gpu/intern/gpu_vertex_buffer.cc
+++ b/source/blender/gpu/intern/gpu_vertex_buffer.cc
@@ -77,6 +77,7 @@ void GPU_vertbuf_init(GPUVertBuf *verts, GPUUsageType usage)
   memset(verts, 0, sizeof(GPUVertBuf));
   verts->usage = usage;
   verts->dirty = true;
+  verts->handle_refcount = 1;
 }
 
 void GPU_vertbuf_init_with_format_ex(GPUVertBuf *verts,
@@ -137,7 +138,23 @@ void GPU_vertbuf_clear(GPUVertBuf *verts)
 void GPU_vertbuf_discard(GPUVertBuf *verts)
 {
   GPU_vertbuf_clear(verts);
-  MEM_freeN(verts);
+  GPU_vertbuf_handle_ref_remove(verts);
+}
+
+void GPU_vertbuf_handle_ref_add(GPUVertBuf *verts)
+{
+  verts->handle_refcount++;
+}
+
+void GPU_vertbuf_handle_ref_remove(GPUVertBuf *verts)
+{
+  BLI_assert(verts->handle_refcount > 0);
+  verts->handle_refcount--;
+  if (verts->handle_refcount == 0) {
+    /* Should already have been cleared. */
+    BLI_assert(verts->vbo_id == 0 && verts->data == NULL);
+    MEM_freeN(verts);
+  }
 }
 
 uint GPU_vertbuf_size_get(const GPUVertBuf *verts)
diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh
index f7c01b2f184..eba275f0245 100644
--- a/source/blender/gpu/opengl/gl_backend.hh
+++ b/source/blender/gpu/opengl/gl_backend.hh
@@ -27,7 +27,9 @@
 
 #include "BLI_vector.hh"
 
+#include "gl_batch.hh"
 #include "gl_context.hh"
+#include "gl_drawlist.hh"
 
 namespace blender {
 namespace gpu {
@@ -42,6 +44,16 @@ class GLBackend : public GPUBackend {
     return new GLContext(ghost_window, shared_orphan_list_);
   };
 
+  Batch *batch_alloc(void)
+  {
+    return new GLBatch();
+  };
+
+  DrawList *drawlist_alloc(int list_length)
+  {
+    return new GLDrawList(list_length);
+  };
+
   /* TODO remove */
   void buf_free(GLuint buf_id);
   void tex_free(GLuint tex_id);
diff --git a/source/blender/gpu/opengl/gl_batch.cc b/source/blender/gpu/opengl/gl_batch.cc
new file mode 100644
index 00000000000..00e1a61f7cf
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_batch.cc
@@ -0,0 +1,367 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GL implementation of GPUBatch.
+ * The only specificity of GL here is that it caches a list of
+ * Vertex Array Objects based on the bound shader interface.
+ */
+
+#include "BLI_assert.h"
+
+#include "glew-mx.h"
+
+#include "GPU_extensions.h"
+
+#include "gpu_batch_private.hh"
+#include "gpu_primitive_private.h"
+#include "gpu_shader_private.h"
+
+#include "gl_batch.hh"
+#include "gl_context.hh"
+#include "gl_vertex_array.hh"
+
+using namespace blender::gpu;
+
+/* -------------------------------------------------------------------- */
+/** \name Vao cache
+ *
+ * Each GLBatch has a small cache of VAO objects that are used to avoid VAO reconfiguration.
+ * TODO(fclem) Could be revisited to avoid so much cross references.
+ * \{ */
+
+GLVaoCache::GLVaoCache(void)
+{
+  init();
+}
+
+GLVaoCache::~GLVaoCache()
+{
+  this->clear();
+}
+
+void GLVaoCache::init(void)
+{
+  context_ = NULL;
+  interface_ = NULL;
+  is_dynamic_vao_count = false;
+  for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+    static_vaos.interfaces[i] = NULL;
+    static_vaos.vao_ids[i] = 0;
+  }
+  vao_base_instance_ = 0;
+  base_instance_ = 0;
+}
+
+/* Create a new VAO object and store it in the cache. */
+void GLVaoCache::insert(const GPUShaderInterface *interface, GLuint vao)
+{
+  /* Now insert the cache. */
+  if (!is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+      if (static_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i < GPU_VAO_STATIC_LEN) {
+      static_vaos.interfaces[i] = interface;
+      static_vaos.vao_ids[i] = vao;
+    }
+    else {
+      /* Erase previous entries, they will be added back if drawn again. */
+      for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+        if (static_vaos.interfaces[i] != NULL) {
+          GPU_shaderinterface_remove_batch_ref(
+              const_cast<GPUShaderInterface *>(static_vaos.interfaces[i]), this);
+          context_->vao_free(static_vaos.vao_ids[i]);
+        }
+      }
+      /* Not enough place switch to dynamic. */
+      is_dynamic_vao_count = true;
+      /* Init dynamic arrays and let the branch below set the values. */
+      dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_callocN(
+          dynamic_vaos.count * sizeof(GPUShaderInterface *), "dyn vaos interfaces");
+      dynamic_vaos.vao_ids = (GLuint *)MEM_callocN(dynamic_vaos.count * sizeof(GLuint),
+                                                   "dyn vaos ids");
+    }
+  }
+
+  if (is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < dynamic_vaos.count; i++) {
+      if (dynamic_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i == dynamic_vaos.count) {
+      /* Not enough place, realloc the array. */
+      i = dynamic_vaos.count;
+      dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_recallocN(
+          (void *)dynamic_vaos.interfaces, sizeof(GPUShaderInterface *) * dynamic_vaos.count);
+      dynamic_vaos.vao_ids = (GLuint *)MEM_recallocN(dynamic_vaos.vao_ids,
+                                                     sizeof(GLuint) * dynamic_vaos.count);
+    }
+    dynamic_vaos.interfaces[i] = interface;
+    dynamic_vaos.vao_ids[i] = vao;
+  }
+
+  GPU_shaderinterface_add_batch_ref(const_cast<GPUShaderInterface *>(interface), this);
+}
+
+void GLVaoCache::remove(const GPUShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      context_->vao_free(vaos[i]);
+      vaos[i] = 0;
+      interfaces[i] = NULL;
+      break; /* cannot have duplicates */
+    }
+  }
+}
+
+void GLVaoCache::clear(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  /* Early out, nothing to free. */
+  if (context_ == NULL) {
+    return;
+  }
+
+  if (context_ == ctx) {
+    glDeleteVertexArrays(count, vaos);
+    glDeleteVertexArrays(1, &vao_base_instance_);
+  }
+  else {
+    /* TODO(fclem) Slow way. Could avoid multiple mutex lock here */
+    for (int i = 0; i < count; i++) {
+      context_->vao_free(vaos[i]);
+    }
+    context_->vao_free(vao_base_instance_);
+  }
+
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == NULL) {
+      continue;
+    }
+    GPU_shaderinterface_remove_batch_ref(const_cast<GPUShaderInterface *>(interfaces[i]), this);
+  }
+
+  if (is_dynamic_vao_count) {
+    MEM_freeN((void *)dynamic_vaos.interfaces);
+    MEM_freeN(dynamic_vaos.vao_ids);
+  }
+
+  if (context_) {
+    context_->vao_cache_unregister(this);
+  }
+  /* Reinit. */
+  this->init();
+}
+
+/* Return 0 on cache miss (invalid VAO) */
+GLuint GLVaoCache::lookup(const GPUShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  const GPUShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                   static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      return (is_dynamic_vao_count) ? dynamic_vaos.vao_ids[i] : static_vaos.vao_ids[i];
+    }
+  }
+  return 0;
+}
+
+/* The GLVaoCache object is only valid for one GLContext.
+ * Reset the cache if trying to draw in another context; */
+void GLVaoCache::context_check(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+
+  if (context_ != ctx) {
+    if (context_ != NULL) {
+      /* IMPORTANT: Trying to draw a batch in multiple different context will trash the VAO cache.
+       * This has major performance impact and should be avoided in most cases. */
+      context_->vao_cache_unregister(this);
+    }
+    this->clear();
+    context_ = ctx;
+    context_->vao_cache_register(this);
+  }
+}
+
+GLuint GLVaoCache::base_instance_vao_get(GPUBatch *batch, int i_first)
+{
+  this->context_check();
+  /* Make sure the interface is up to date. */
+  if (interface_ != GPU_context_active_get()->shader->interface) {
+    vao_get(batch);
+    /* Trigger update. */
+    base_instance_ = 0;
+  }
+  /**
+   * There seems to be a nasty bug when drawing using the same VAO reconfiguring (T71147).
+   * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
+   **/
+#ifdef __APPLE__
+  glDeleteVertexArrays(1, &vao_base_instance_);
+  vao_base_instance_ = 0;
+#endif
+
+  if (vao_base_instance_ == 0) {
+    glGenVertexArrays(1, &vao_base_instance_);
+  }
+
+  if (base_instance_ != i_first) {
+    base_instance_ = i_first;
+    GLVertArray::update_bindings(vao_base_instance_, batch, interface_, i_first);
+  }
+  return base_instance_;
+}
+
+GLuint GLVaoCache::vao_get(GPUBatch *batch)
+{
+  this->context_check();
+
+  GPUContext *ctx = GPU_context_active_get();
+  if (interface_ != ctx->shader->interface) {
+    interface_ = ctx->shader->interface;
+    vao_id_ = this->lookup(interface_);
+
+    if (vao_id_ == 0) {
+      /* Cache miss, create a new VAO. */
+      glGenVertexArrays(1, &vao_id_);
+      this->insert(interface_, vao_id_);
+      GLVertArray::update_bindings(vao_id_, batch, interface_, 0);
+    }
+  }
+
+  return vao_id_;
+}
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
+GLBatch::GLBatch(void)
+{
+}
+
+GLBatch::~GLBatch()
+{
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Drawing
+ * \{ */
+
+#if GPU_TRACK_INDEX_RANGE
+#  define BASE_INDEX(el) ((el)->base_index)
+#  define INDEX_TYPE(el) ((el)->gl_index_type)
+#else
+#  define BASE_INDEX(el) 0
+#  define INDEX_TYPE(el) GL_UNSIGNED_INT
+#endif
+
+void GLBatch::bind(int i_first)
+{
+  if (flag & GPU_BATCH_DIRTY) {
+    vao_cache_.clear();
+  }
+
+#if GPU_TRACK_INDEX_RANGE
+  /* Can be removed if GL 4.3 is required. */
+  if (!GLEW_ARB_ES3_compatibility && (elem != NULL)) {
+    glPrimitiveRestartIndex((elem->index_type == GPU_INDEX_U16) ? 0xFFFFu : 0xFFFFFFFFu);
+  }
+#endif
+
+  /* Can be removed if GL 4.2 is required. */
+  if (!GPU_arb_base_instance_is_supported() && (i_first > 0)) {
+    glBindVertexArray(vao_cache_.base_instance_vao_get(this, i_first));
+  }
+  else {
+    glBindVertexArray(vao_cache_.vao_get(this));
+  }
+}
+
+void GLBatch::draw(int v_first, int v_count, int i_first, int i_count)
+{
+  this->bind(i_first);
+
+  GLenum gl_type = convert_prim_type_to_gl(prim_type);
+
+  if (elem) {
+    const GPUIndexBuf *el = elem;
+    GLenum index_type = INDEX_TYPE(el);
+    GLint base_index = BASE_INDEX(el);
+    void *v_first_ofs = (GLuint *)0 + v_first + el->index_start;
+
+#if GPU_TRACK_INDEX_RANGE
+    if (el->index_type == GPU_INDEX_U16) {
+      v_first_ofs = (GLushort *)0 + v_first + el->index_start;
+    }
+#endif
+
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawElementsInstancedBaseVertexBaseInstance(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
+    }
+    else {
+      glDrawElementsInstancedBaseVertex(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index);
+    }
+  }
+  else {
+#ifdef __APPLE__
+    glDisable(GL_PRIMITIVE_RESTART);
+#endif
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawArraysInstancedBaseInstance(gl_type, v_first, v_count, i_count, i_first);
+    }
+    else {
+      glDrawArraysInstanced(gl_type, v_first, v_count, i_count);
+    }
+#ifdef __APPLE__
+    glEnable(GL_PRIMITIVE_RESTART);
+#endif
+  }
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_batch.hh b/source/blender/gpu/opengl/gl_batch.hh
new file mode 100644
index 00000000000..d70f43aed2a
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_batch.hh
@@ -0,0 +1,105 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2020, Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "gpu_batch_private.hh"
+
+#include "glew-mx.h"
+
+#include "GPU_shader_interface.h"
+
+namespace blender {
+namespace gpu {
+
+#define GPU_VAO_STATIC_LEN 3
+
+/* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
+ * for each shader interface. Start with a static number of vaos and fallback to dynamic count
+ * if necessary. Once a batch goes dynamic it does not go back. */
+class GLVaoCache {
+ private:
+  /** Context for which the vao_cache_ was generated. */
+  struct GLContext *context_ = NULL;
+  /** Last interface this batch was drawn with. */
+  GPUShaderInterface *interface_ = NULL;
+  /** Cached vao for the last interface. */
+  GLuint vao_id_ = 0;
+  /** Used whend arb_base_instance is not supported. */
+  GLuint vao_base_instance_ = 0;
+  int base_instance_ = 0;
+
+  bool is_dynamic_vao_count = false;
+  union {
+    /** Static handle count */
+    struct {
+      const GPUShaderInterface *interfaces[GPU_VAO_STATIC_LEN];
+      GLuint vao_ids[GPU_VAO_STATIC_LEN];
+    } static_vaos;
+    /** Dynamic handle count */
+    struct {
+      uint count;
+      const GPUShaderInterface **interfaces;
+      GLuint *vao_ids;
+    } dynamic_vaos;
+  };
+
+ public:
+  GLVaoCache();
+  ~GLVaoCache();
+
+  GLuint vao_get(GPUBatch *batch);
+  GLuint base_instance_vao_get(GPUBatch *batch, int i_first);
+
+  GLuint lookup(const GPUShaderInterface *interface);
+  void insert(const GPUShaderInterface *interface, GLuint vao_id);
+  void remove(const GPUShaderInterface *interface);
+  void clear(void);
+
+ private:
+  void init(void);
+  void context_check(void);
+};
+
+class GLBatch : public Batch {
+ public:
+  /** All vaos corresponding to all the GPUShaderInterface this batch was drawn with. */
+  GLVaoCache vao_cache_;
+
+ public:
+  GLBatch();
+  ~GLBatch();
+
+  void draw(int v_first, int v_count, int i_first, int i_count) override;
+  void bind(int i_first);
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("GLBatch");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/opengl/gl_context.cc b/source/blender/gpu/opengl/gl_context.cc
index 00a10924ff6..dd413612879 100644
--- a/source/blender/gpu/opengl/gl_context.cc
+++ b/source/blender/gpu/opengl/gl_context.cc
@@ -63,8 +63,8 @@ GLContext::~GLContext()
   /* For now don't allow GPUFrameBuffers to be reuse in another context. */
   BLI_assert(framebuffers_.is_empty());
   /* Delete vaos so the batch can be reused in another context. */
-  for (GPUBatch *batch : batches_) {
-    GPU_batch_vao_cache_clear(batch);
+  for (GLVaoCache *cache : vao_caches_) {
+    cache->clear();
   }
   glDeleteVertexArrays(1, &default_vao_);
   glDeleteBuffers(1, &default_attr_vbo_);
@@ -197,20 +197,17 @@ void GLBackend::tex_free(GLuint tex_id)
  * is discarded.
  * \{ */
 
-void GLContext::batch_register(struct GPUBatch *batch)
+void GLContext::vao_cache_register(GLVaoCache *cache)
 {
   lists_mutex_.lock();
-  batches_.add(batch);
+  vao_caches_.add(cache);
   lists_mutex_.unlock();
 }
 
-void GLContext::batch_unregister(struct GPUBatch *batch)
+void GLContext::vao_cache_unregister(GLVaoCache *cache)
 {
-  /* vao_cache_clear() can acquire lists_mutex_ so avoid deadlock. */
-  // reinterpret_cast<GLBatch *>(batch)->vao_cache_clear();
-
   lists_mutex_.lock();
-  batches_.remove(batch);
+  vao_caches_.remove(cache);
   lists_mutex_.unlock();
 }
 
diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh
index 3b55965b9d1..0b762c939f1 100644
--- a/source/blender/gpu/opengl/gl_context.hh
+++ b/source/blender/gpu/opengl/gl_context.hh
@@ -25,15 +25,16 @@
 
 #include "gpu_context_private.hh"
 
+#include "GPU_framebuffer.h"
+
 #include "BLI_set.hh"
 #include "BLI_vector.hh"
 
 #include "glew-mx.h"
 
-#include <iostream>
+#include "gl_batch.hh"
+
 #include <mutex>
-#include <unordered_set>
-#include <vector>
 
 namespace blender {
 namespace gpu {
@@ -50,7 +51,7 @@ class GLSharedOrphanLists {
   void orphans_clear(void);
 };
 
-class GLContext : public GPUContext {
+struct GLContext : public GPUContext {
   /* TODO(fclem) these needs to become private. */
  public:
   /** Default VAO for procedural draw calls. */
@@ -63,7 +64,7 @@ class GLContext : public GPUContext {
    * GPUBatch & GPUFramebuffer have references to the context they are from, in the case the
    * context is destroyed, we need to remove any reference to it.
    */
-  Set<GPUBatch *> batches_;
+  Set<GLVaoCache *> vao_caches_;
   Set<GPUFrameBuffer *> framebuffers_;
   /** Mutex for the bellow structures. */
   std::mutex lists_mutex_;
@@ -87,8 +88,8 @@ class GLContext : public GPUContext {
 
   void vao_free(GLuint vao_id);
   void fbo_free(GLuint fbo_id);
-  void batch_register(struct GPUBatch *batch);
-  void batch_unregister(struct GPUBatch *batch);
+  void vao_cache_register(GLVaoCache *cache);
+  void vao_cache_unregister(GLVaoCache *cache);
   void framebuffer_register(struct GPUFrameBuffer *fb);
   void framebuffer_unregister(struct GPUFrameBuffer *fb);
 };
diff --git a/source/blender/gpu/opengl/gl_drawlist.cc b/source/blender/gpu/opengl/gl_drawlist.cc
new file mode 100644
index 00000000000..c121fb9ba2c
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_drawlist.cc
@@ -0,0 +1,240 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#include "BLI_assert.h"
+
+#include "GPU_batch.h"
+#include "GPU_extensions.h"
+
+#include "glew-mx.h"
+
+#include "gpu_context_private.hh"
+#include "gpu_drawlist_private.hh"
+#include "gpu_primitive_private.h"
+
+#include "gl_backend.hh"
+#include "gl_drawlist.hh"
+
+#include <limits.h>
+
+#define USE_MULTI_DRAW_INDIRECT 1
+
+/* TODO remove. */
+#if GPU_TRACK_INDEX_RANGE
+#  define BASE_INDEX(el) ((el)->base_index)
+#  define INDEX_TYPE(el) ((el)->gl_index_type)
+#else
+#  define BASE_INDEX(el) 0
+#  define INDEX_TYPE(el) GL_UNSIGNED_INT
+#endif
+
+using namespace blender::gpu;
+
+typedef struct GLDrawCommand {
+  GLuint v_count;
+  GLuint i_count;
+  GLuint v_first;
+  GLuint i_first;
+} GLDrawCommand;
+
+typedef struct GLDrawCommandIndexed {
+  GLuint v_count;
+  GLuint i_count;
+  GLuint v_first;
+  GLuint base_index;
+  GLuint i_first;
+} GLDrawCommandIndexed;
+
+#define MDI_ENABLED (buffer_size_ != 0)
+#define MDI_DISABLED (buffer_size_ == 0)
+#define MDI_INDEXED (base_index_ != UINT_MAX)
+
+GLDrawList::GLDrawList(int length)
+{
+  BLI_assert(length > 0);
+  batch_ = NULL;
+  buffer_id_ = 0;
+  command_len_ = 0;
+  command_offset_ = 0;
+  data_offset_ = 0;
+  data_size_ = 0;
+  data_ = NULL;
+
+  if (USE_MULTI_DRAW_INDIRECT && GLEW_ARB_multi_draw_indirect &&
+      GPU_arb_base_instance_is_supported()) {
+    /* Alloc the biggest possible command list, which is indexed. */
+    buffer_size_ = sizeof(GLDrawCommandIndexed) * length;
+  }
+  else {
+    /* Indicates MDI is not supported. */
+    buffer_size_ = 0;
+  }
+}
+
+GLDrawList::~GLDrawList()
+{
+  /* TODO This ... */
+  static_cast<GLBackend *>(GPUBackend::get())->buf_free(buffer_id_);
+  /* ... should be this. */
+  // context_->buf_free(buffer_id_)
+}
+
+void GLDrawList::init(void)
+{
+  BLI_assert(GPU_context_active_get());
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_ == NULL);
+  batch_ = NULL;
+  command_len_ = 0;
+
+  if (buffer_id_ == 0) {
+    /* Allocate on first use. */
+    glGenBuffers(1, &buffer_id_);
+    context_ = static_cast<GLContext *>(GPU_context_active_get());
+  }
+
+  glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buffer_id_);
+  /* If buffer is full, orphan buffer data and start fresh. */
+  // if (command_offset_ >= data_size_) {
+  glBufferData(GL_DRAW_INDIRECT_BUFFER, buffer_size_, NULL, GL_DYNAMIC_DRAW);
+  data_offset_ = 0;
+  // }
+  /* Map the remaining range. */
+  GLbitfield flag = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
+  data_size_ = buffer_size_ - data_offset_;
+  data_ = (GLbyte *)glMapBufferRange(GL_DRAW_INDIRECT_BUFFER, data_offset_, data_size_, flag);
+  command_offset_ = 0;
+}
+
+void GLDrawList::append(GPUBatch *batch, int i_first, int i_count)
+{
+  /* Fallback when MultiDrawIndirect is not supported/enabled. */
+  if (MDI_DISABLED) {
+    GPU_batch_draw_advanced(batch, 0, 0, i_first, i_count);
+    return;
+  }
+
+  if (data_ == NULL) {
+    this->init();
+  }
+
+  if (batch != batch_) {
+    // BLI_assert(batch->flag | GPU_BATCH_INIT);
+    this->submit();
+    batch_ = batch;
+    /* Cached for faster access. */
+    base_index_ = batch->elem ? BASE_INDEX(batch->elem) : UINT_MAX;
+    v_first_ = batch->elem ? batch->elem->index_start : 0;
+    v_count_ = batch->elem ? batch->elem->index_len : batch->verts[0]->vertex_len;
+  }
+
+  if (MDI_INDEXED) {
+    GLDrawCommandIndexed *cmd = reinterpret_cast<GLDrawCommandIndexed *>(data_ + command_offset_);
+    cmd->v_first = v_first_;
+    cmd->v_count = v_count_;
+    cmd->i_count = i_count;
+    cmd->base_index = base_index_;
+    cmd->i_first = i_first;
+    command_offset_ += sizeof(GLDrawCommandIndexed);
+  }
+  else {
+    GLDrawCommand *cmd = reinterpret_cast<GLDrawCommand *>(data_ + command_offset_);
+    cmd->v_first = v_first_;
+    cmd->v_count = v_count_;
+    cmd->i_count = i_count;
+    cmd->i_first = i_first;
+    command_offset_ += sizeof(GLDrawCommand);
+  }
+
+  command_len_++;
+
+  if (command_offset_ >= data_size_) {
+    this->submit();
+  }
+}
+
+void GLDrawList::submit(void)
+{
+  if (command_len_ == 0) {
+    return;
+  }
+  /* Something's wrong if we get here without MDI support. */
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_);
+  BLI_assert(GPU_context_active_get()->shader != NULL);
+
+  GLBatch *batch = static_cast<GLBatch *>(batch_);
+
+  /* Only do multi-draw indirect if doing more than 2 drawcall. This avoids the overhead of
+   * buffer mapping if scene is not very instance friendly. BUT we also need to take into
+   * account the
+   * case where only a few instances are needed to finish filling a call buffer. */
+  const bool is_finishing_a_buffer = (command_offset_ >= data_size_);
+  if (command_len_ > 2 || is_finishing_a_buffer) {
+    GLenum prim = convert_prim_type_to_gl(batch_->prim_type);
+    void *offset = (void *)data_offset_;
+
+    glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buffer_id_);
+    glFlushMappedBufferRange(GL_DRAW_INDIRECT_BUFFER, 0, command_offset_);
+    glUnmapBuffer(GL_DRAW_INDIRECT_BUFFER);
+    data_ = NULL; /* Unmapped */
+    data_offset_ += command_offset_;
+
+    batch->bind(0);
+
+    if (MDI_INDEXED) {
+      glMultiDrawElementsIndirect(prim, INDEX_TYPE(batch_->elem), offset, command_len_, 0);
+    }
+    else {
+      glMultiDrawArraysIndirect(prim, offset, command_len_, 0);
+    }
+  }
+  else {
+    /* Fallback do simple drawcalls, and don't unmap the buffer. */
+    if (MDI_INDEXED) {
+      GLDrawCommandIndexed *cmd = (GLDrawCommandIndexed *)data_;
+      for (int i = 0; i < command_len_; i++, cmd++) {
+        /* Index start was already added. Avoid counting it twice. */
+        cmd->v_first -= batch->elem->index_start;
+        batch->draw(cmd->v_first, cmd->v_count, cmd->i_first, cmd->i_count);
+      }
+      /* Reuse the same data. */
+      command_offset_ -= command_len_ * sizeof(GLDrawCommandIndexed);
+    }
+    else {
+      GLDrawCommand *cmd = (GLDrawCommand *)data_;
+      for (int i = 0; i < command_len_; i++, cmd++) {
+        batch->draw(cmd->v_first, cmd->v_count, cmd->i_first, cmd->i_count);
+      }
+      /* Reuse the same data. */
+      command_offset_ -= command_len_ * sizeof(GLDrawCommand);
+    }
+  }
+  /* Do not submit this buffer again. */
+  command_len_ = 0;
+}
+
+/** \} */
+\ No newline at end of file
diff --git a/source/blender/gpu/opengl/gl_drawlist.hh b/source/blender/gpu/opengl/gl_drawlist.hh
new file mode 100644
index 00000000000..4f085149388
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_drawlist.hh
@@ -0,0 +1,80 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_sys_types.h"
+
+#include "GPU_batch.h"
+#include "GPU_glew.h"
+
+#include "gpu_drawlist_private.hh"
+
+#include "gl_context.hh"
+
+namespace blender {
+namespace gpu {
+
+class GLDrawList : public DrawList {
+ public:
+  GLDrawList(int length);
+  ~GLDrawList();
+
+  void append(GPUBatch *batch, int i_first, int i_count) override;
+  void submit(void) override;
+
+ private:
+  void init(void);
+
+  /** Batch for which we are recording commands for. */
+  GPUBatch *batch_;
+  /** Mapped memory bounds. */
+  GLbyte *data_;
+  /** Length of the mapped buffer (in byte). */
+  GLsizeiptr data_size_;
+  /** Current offset inside the mapped buffer (in byte). */
+  GLintptr command_offset_;
+  /** Current number of command recorded inside the mapped buffer. */
+  uint command_len_;
+  /** Is UINT_MAX if not drawing indexed geom. Also Avoid dereferencing batch. */
+  GLuint base_index_;
+  /** Also Avoid dereferencing batch. */
+  GLuint v_first_, v_count_;
+
+  /** GL Indirect Buffer id. 0 means MultiDrawIndirect is not supported/enabled. */
+  GLuint buffer_id_;
+  /** Length of whole the buffer (in byte). */
+  GLsizeiptr buffer_size_;
+  /** Offset of data_ inside the whole buffer (in byte). */
+  GLintptr data_offset_;
+
+  /** To free the buffer_id_. */
+  GLContext *context_;
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("GLDrawList");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/opengl/gl_vertex_array.cc b/source/blender/gpu/opengl/gl_vertex_array.cc
new file mode 100644
index 00000000000..907dc37e46f
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_vertex_array.cc
@@ -0,0 +1,158 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "GPU_glew.h"
+
+#include "GPU_shader_interface.h"
+#include "GPU_vertex_buffer.h"
+
+#include "gpu_vertex_format_private.h"
+
+#include "gl_batch.hh"
+#include "gl_context.hh"
+
+#include "gl_vertex_array.hh"
+
+using namespace blender::gpu;
+
+/* -------------------------------------------------------------------- */
+/** \name Vertex Array Bindings
+ * \{ */
+
+/* Returns enabled vertex pointers as a bitflag (one bit per attrib). */
+static uint16_t vbo_bind(const GPUShaderInterface *interface,
+                         const GPUVertFormat *format,
+                         uint v_first,
+                         uint v_len,
+                         const bool use_instancing)
+{
+  uint16_t enabled_attrib = 0;
+  const uint attr_len = format->attr_len;
+  uint stride = format->stride;
+  uint offset = 0;
+  GLuint divisor = (use_instancing) ? 1 : 0;
+
+  for (uint a_idx = 0; a_idx < attr_len; a_idx++) {
+    const GPUVertAttr *a = &format->attrs[a_idx];
+
+    if (format->deinterleaved) {
+      offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].sz) * v_len;
+      stride = a->sz;
+    }
+    else {
+      offset = a->offset;
+    }
+
+    const GLvoid *pointer = (const GLubyte *)0 + offset + v_first * stride;
+    const GLenum type = convert_comp_type_to_gl(static_cast<GPUVertCompType>(a->comp_type));
+
+    for (uint n_idx = 0; n_idx < a->name_len; n_idx++) {
+      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
+      const GPUShaderInput *input = GPU_shaderinterface_attr(interface, name);
+
+      if (input == NULL) {
+        continue;
+      }
+
+      enabled_attrib |= (1 << input->location);
+
+      if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
+        BLI_assert(a->fetch_mode == GPU_FETCH_FLOAT);
+        BLI_assert(a->comp_type == GPU_COMP_F32);
+        for (int i = 0; i < a->comp_len / 4; i++) {
+          glEnableVertexAttribArray(input->location + i);
+          glVertexAttribDivisor(input->location + i, divisor);
+          glVertexAttribPointer(
+              input->location + i, 4, type, GL_FALSE, stride, (const GLubyte *)pointer + i * 16);
+        }
+      }
+      else {
+        glEnableVertexAttribArray(input->location);
+        glVertexAttribDivisor(input->location, divisor);
+
+        switch (a->fetch_mode) {
+          case GPU_FETCH_FLOAT:
+          case GPU_FETCH_INT_TO_FLOAT:
+            glVertexAttribPointer(input->location, a->comp_len, type, GL_FALSE, stride, pointer);
+            break;
+          case GPU_FETCH_INT_TO_FLOAT_UNIT:
+            glVertexAttribPointer(input->location, a->comp_len, type, GL_TRUE, stride, pointer);
+            break;
+          case GPU_FETCH_INT:
+            glVertexAttribIPointer(input->location, a->comp_len, type, stride, pointer);
+            break;
+        }
+      }
+    }
+  }
+  return enabled_attrib;
+}
+
+/* Update the Attrib Binding of the currently bound VAO. */
+void GLVertArray::update_bindings(const GLuint vao,
+                                  const GPUBatch *batch,
+                                  const GPUShaderInterface *interface,
+                                  const int base_instance)
+{
+  uint16_t attr_mask = interface->enabled_attr_mask;
+
+  glBindVertexArray(vao);
+
+  /* Reverse order so first VBO'S have more prevalence (in term of attribute override). */
+  for (int v = GPU_BATCH_VBO_MAX_LEN - 1; v > -1; v--) {
+    GPUVertBuf *vbo = batch->verts[v];
+    if (vbo) {
+      GPU_vertbuf_use(vbo);
+      attr_mask &= ~vbo_bind(interface, &vbo->format, 0, vbo->vertex_len, false);
+    }
+  }
+
+  for (int v = GPU_BATCH_INST_VBO_MAX_LEN - 1; v > -1; v--) {
+    GPUVertBuf *vbo = batch->inst[v];
+    if (vbo) {
+      GPU_vertbuf_use(vbo);
+      attr_mask &= ~vbo_bind(interface, &vbo->format, base_instance, vbo->vertex_len, true);
+    }
+  }
+
+  if (attr_mask != 0 && GLEW_ARB_vertex_attrib_binding) {
+    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
+      if (attr_mask & mask) {
+        GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+        /* This replaces glVertexAttrib4f(a, 0.0f, 0.0f, 0.0f, 1.0f); with a more modern style.
+         * Fix issues for some drivers (see T75069). */
+        glBindVertexBuffer(a, ctx->default_attr_vbo_, (intptr_t)0, (intptr_t)0);
+        glEnableVertexAttribArray(a);
+        glVertexAttribFormat(a, 4, GL_FLOAT, GL_FALSE, 0);
+        glVertexAttribBinding(a, a);
+      }
+    }
+  }
+
+  if (batch->elem) {
+    /* Binds the index buffer. This state is also saved in the VAO. */
+    GPU_indexbuf_use(batch->elem);
+  }
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_vertex_array.hh b/source/blender/gpu/opengl/gl_vertex_array.hh
new file mode 100644
index 00000000000..6da414d7e62
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_vertex_array.hh
@@ -0,0 +1,44 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "glew-mx.h"
+
+#include "GPU_batch.h"
+#include "GPU_shader_interface.h"
+
+namespace blender {
+namespace gpu {
+
+namespace GLVertArray {
+
+void update_bindings(const GLuint vao,
+                     const GPUBatch *batch,
+                     const GPUShaderInterface *interface,
+                     const int base_instance);
+
+}  // namespace GLVertArray
+
+}  // namespace gpu
+}  // namespace blender
author	Antonio Vazquez <blendergit@gmail.com>	2020-08-13 17:57:42 +0300
committer	Antonio Vazquez <blendergit@gmail.com>	2020-08-13 17:57:42 +0300
commit	735c717a63c8870d2ef4a910d82a2648cbaaa5e1 (patch)
tree	4cff1cfe01053b8cc188cc69e1c2c60946fe37cc /source/blender/gpu
parent	cba7391d4a42a44efeddae3ff717e542a3c73738 (diff)
parent	53683dec7d9ac9f324ff91904c9f80b8018b9b9c (diff)