90 files changed, 6965 insertions, 4669 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 80ea28aca3c..cf0399b776d 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -55,7 +55,6 @@ set(INC_SYS
 )
 
 set(SRC
-  intern/gpu_attr_binding.cc
   intern/gpu_batch.cc
   intern/gpu_batch_presets.c
   intern/gpu_batch_utils.c
@@ -63,6 +62,7 @@ set(SRC
   intern/gpu_codegen.c
   intern/gpu_context.cc
   intern/gpu_debug.cc
+  intern/gpu_drawlist.cc
   intern/gpu_element.cc
   intern/gpu_extensions.cc
   intern/gpu_framebuffer.cc
@@ -74,7 +74,6 @@ set(SRC
   intern/gpu_matrix.cc
   intern/gpu_node_graph.c
   intern/gpu_platform.cc
-  intern/gpu_primitive.c
   intern/gpu_select.c
   intern/gpu_select_pick.c
   intern/gpu_select_sample_query.c
@@ -83,14 +82,23 @@ set(SRC
   intern/gpu_shader_interface.cc
   intern/gpu_state.cc
   intern/gpu_texture.cc
-  intern/gpu_uniformbuffer.cc
+  intern/gpu_uniform_buffer.cc
   intern/gpu_vertex_buffer.cc
   intern/gpu_vertex_format.cc
   intern/gpu_viewport.c
 
+  opengl/gl_batch.cc
   opengl/gl_context.cc
+  opengl/gl_drawlist.cc
+  opengl/gl_debug.cc
+  opengl/gl_framebuffer.cc
+  opengl/gl_immediate.cc
+  opengl/gl_shader.cc
+  opengl/gl_shader_interface.cc
+  opengl/gl_state.cc
+  opengl/gl_uniform_buffer.cc
+  opengl/gl_vertex_array.cc
 
-  GPU_attr_binding.h
   GPU_batch.h
   GPU_batch_presets.h
   GPU_batch_utils.h
@@ -98,6 +106,7 @@ set(SRC
   GPU_common.h
   GPU_context.h
   GPU_debug.h
+  GPU_drawlist.h
   GPU_element.h
   GPU_extensions.h
   GPU_framebuffer.h
@@ -112,30 +121,45 @@ set(SRC
   GPU_primitive.h
   GPU_select.h
   GPU_shader.h
-  GPU_shader_interface.h
   GPU_state.h
   GPU_texture.h
-  GPU_uniformbuffer.h
+  GPU_uniform_buffer.h
   GPU_vertex_buffer.h
   GPU_vertex_format.h
   GPU_viewport.h
 
-  intern/gpu_attr_binding_private.h
   intern/gpu_backend.hh
-  intern/gpu_batch_private.h
+  intern/gpu_batch_private.hh
   intern/gpu_codegen.h
   intern/gpu_context_private.hh
+  intern/gpu_drawlist_private.hh
+  intern/gpu_framebuffer_private.hh
+  intern/gpu_immediate_private.hh
   intern/gpu_material_library.h
   intern/gpu_matrix_private.h
   intern/gpu_node_graph.h
-  intern/gpu_primitive_private.h
   intern/gpu_private.h
   intern/gpu_select_private.h
-  intern/gpu_shader_private.h
+  intern/gpu_shader_private.hh
+  intern/gpu_shader_interface.hh
+  intern/gpu_state_private.hh
+  intern/gpu_uniform_buffer_private.hh
   intern/gpu_vertex_format_private.h
 
   opengl/gl_backend.hh
+  opengl/gl_batch.hh
   opengl/gl_context.hh
+  opengl/gl_debug.hh
+  opengl/gl_drawlist.hh
+  opengl/gl_framebuffer.hh
+  opengl/gl_immediate.hh
+  opengl/gl_primitive.hh
+  opengl/gl_shader.hh
+  opengl/gl_shader_interface.hh
+  opengl/gl_state.hh
+  opengl/gl_texture.hh
+  opengl/gl_uniform_buffer.hh
+  opengl/gl_vertex_array.hh
 )
 
 set(LIB
diff --git a/source/blender/gpu/GPU_batch.h b/source/blender/gpu/GPU_batch.h
index 855214c279c..b45898f9c6a 100644
--- a/source/blender/gpu/GPU_batch.h
+++ b/source/blender/gpu/GPU_batch.h
@@ -26,85 +26,82 @@
 
 #pragma once
 
+#include "BLI_utildefines.h"
+
 #include "GPU_element.h"
 #include "GPU_shader.h"
-#include "GPU_shader_interface.h"
 #include "GPU_vertex_buffer.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
-  GPU_BATCH_UNUSED,
-  GPU_BATCH_READY_TO_FORMAT,
-  GPU_BATCH_READY_TO_BUILD,
-  GPU_BATCH_BUILDING,
-  GPU_BATCH_READY_TO_DRAW,
-} GPUBatchPhase;
-
 #define GPU_BATCH_VBO_MAX_LEN 6
 #define GPU_BATCH_INST_VBO_MAX_LEN 2
 #define GPU_BATCH_VAO_STATIC_LEN 3
 #define GPU_BATCH_VAO_DYN_ALLOC_COUNT 16
 
-typedef struct GPUBatch {
-  /* geometry */
+typedef enum eGPUBatchFlag {
+  /** Invalid default state. */
+  GPU_BATCH_INVALID = 0,
+
+  /** GPUVertBuf ownership. (One bit per vbo) */
+  GPU_BATCH_OWNS_VBO = (1 << 0),
+  GPU_BATCH_OWNS_VBO_MAX = (GPU_BATCH_OWNS_VBO << (GPU_BATCH_VBO_MAX_LEN - 1)),
+  GPU_BATCH_OWNS_VBO_ANY = ((GPU_BATCH_OWNS_VBO << GPU_BATCH_VBO_MAX_LEN) - 1),
+  /** Instance GPUVertBuf ownership. (One bit per vbo) */
+  GPU_BATCH_OWNS_INST_VBO = (GPU_BATCH_OWNS_VBO_MAX << 1),
+  GPU_BATCH_OWNS_INST_VBO_MAX = (GPU_BATCH_OWNS_INST_VBO << (GPU_BATCH_INST_VBO_MAX_LEN - 1)),
+  GPU_BATCH_OWNS_INST_VBO_ANY = ((GPU_BATCH_OWNS_INST_VBO << GPU_BATCH_INST_VBO_MAX_LEN) - 1) &
+                                ~GPU_BATCH_OWNS_VBO_ANY,
+  /** GPUIndexBuf ownership. */
+  GPU_BATCH_OWNS_INDEX = (GPU_BATCH_OWNS_INST_VBO_MAX << 1),
+
+  /** Has been initialized. At least one VBO is set. */
+  GPU_BATCH_INIT = (1 << 16),
+  /** Batch is initialized but it's VBOs are still being populated. (optional) */
+  GPU_BATCH_BUILDING = (1 << 16),
+  /** Cached data need to be rebuild. (VAO, PSO, ...) */
+  GPU_BATCH_DIRTY = (1 << 17),
+} eGPUBatchFlag;
+
+#define GPU_BATCH_OWNS_NONE GPU_BATCH_INVALID
+
+BLI_STATIC_ASSERT(GPU_BATCH_OWNS_INDEX < GPU_BATCH_INIT,
+                  "eGPUBatchFlag: Error: status flags are shadowed by the ownership bits!")
+
+ENUM_OPERATORS(eGPUBatchFlag)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
+/**
+ * IMPORTANT: Do not allocate manually as the real struct is bigger (i.e: GLBatch). This is only
+ * the common and "public" part of the struct. Use the provided allocator.
+ * TODO(fclem) Make the content of this struct hidden and expose getters/setters.
+ **/
+typedef struct GPUBatch {
   /** verts[0] is required, others can be NULL */
   GPUVertBuf *verts[GPU_BATCH_VBO_MAX_LEN];
   /** Instance attributes. */
   GPUVertBuf *inst[GPU_BATCH_INST_VBO_MAX_LEN];
   /** NULL if element list not needed */
   GPUIndexBuf *elem;
-  uint32_t gl_prim_type;
-
-  /* cached values (avoid dereferencing later) */
-  uint32_t vao_id;
-  uint32_t program;
-  const struct GPUShaderInterface *interface;
-
-  /* book-keeping */
-  uint owns_flag;
-  /** used to free all vaos. this implies all vaos were created under the same context. */
-  struct GPUContext *context;
-  GPUBatchPhase phase;
-  bool program_in_use;
-
-  /* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
-   * for each shader interface. Start with a static number of vaos and fallback to dynamic count
-   * if necessary. Once a batch goes dynamic it does not go back. */
-  bool is_dynamic_vao_count;
-  union {
-    /** Static handle count */
-    struct {
-      const struct GPUShaderInterface *interfaces[GPU_BATCH_VAO_STATIC_LEN];
-      uint32_t vao_ids[GPU_BATCH_VAO_STATIC_LEN];
-    } static_vaos;
-    /** Dynamic handle count */
-    struct {
-      uint count;
-      const struct GPUShaderInterface **interfaces;
-      uint32_t *vao_ids;
-    } dynamic_vaos;
-  };
-
-  /* XXX This is the only solution if we want to have some data structure using
-   * batches as key to identify nodes. We must destroy these nodes with this callback. */
-  void (*free_callback)(struct GPUBatch *, void *);
-  void *callback_data;
+  /** Bookeeping. */
+  eGPUBatchFlag flag;
+  /** Type of geometry to draw. */
+  GPUPrimType prim_type;
+  /** Current assigned shader. DEPRECATED. Here only for uniform binding. */
+  struct GPUShader *shader;
 } GPUBatch;
 
-enum {
-  GPU_BATCH_OWNS_VBO = (1 << 0),
-  /* each vbo index gets bit-shifted */
-  GPU_BATCH_OWNS_INSTANCES = (1 << 30),
-  GPU_BATCH_OWNS_INDEX = (1u << 31u),
-};
-
-GPUBatch *GPU_batch_calloc(uint count);
-GPUBatch *GPU_batch_create_ex(GPUPrimType, GPUVertBuf *, GPUIndexBuf *, uint owns_flag);
-void GPU_batch_init_ex(GPUBatch *, GPUPrimType, GPUVertBuf *, GPUIndexBuf *, uint owns_flag);
+GPUBatch *GPU_batch_calloc(void);
+GPUBatch *GPU_batch_create_ex(GPUPrimType prim,
+                              GPUVertBuf *vert,
+                              GPUIndexBuf *elem,
+                              eGPUBatchFlag own_flag);
+void GPU_batch_init_ex(GPUBatch *batch,
+                       GPUPrimType prim,
+                       GPUVertBuf *vert,
+                       GPUIndexBuf *elem,
+                       eGPUBatchFlag own_flag);
 void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src);
 
 #define GPU_batch_create(prim, verts, elem) GPU_batch_create_ex(prim, verts, elem, 0)
@@ -115,10 +112,6 @@ void GPU_batch_clear(GPUBatch *);
 
 void GPU_batch_discard(GPUBatch *); /* verts & elem are not discarded */
 
-void GPU_batch_vao_cache_clear(GPUBatch *);
-
-void GPU_batch_callback_free_set(GPUBatch *, void (*callback)(GPUBatch *, void *), void *);
-
 void GPU_batch_instbuf_set(GPUBatch *, GPUVertBuf *, bool own_vbo); /* Instancing */
 void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo);
 
@@ -128,42 +121,39 @@ int GPU_batch_vertbuf_add_ex(GPUBatch *, GPUVertBuf *, bool own_vbo);
 #define GPU_batch_vertbuf_add(batch, verts) GPU_batch_vertbuf_add_ex(batch, verts, false)
 
 void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader);
-void GPU_batch_set_shader_no_bind(GPUBatch *batch, GPUShader *shader);
 void GPU_batch_program_set_imm_shader(GPUBatch *batch);
 void GPU_batch_program_set_builtin(GPUBatch *batch, eGPUBuiltinShader shader_id);
 void GPU_batch_program_set_builtin_with_config(GPUBatch *batch,
                                                eGPUBuiltinShader shader_id,
                                                eGPUShaderConfig sh_cfg);
-/* Entire batch draws with one shader program, but can be redrawn later with another program. */
-/* Vertex shader's inputs must be compatible with the batch's vertex format. */
-
-void GPU_batch_program_use_begin(GPUBatch *); /* call before Batch_Uniform (temp hack?) */
-void GPU_batch_program_use_end(GPUBatch *);
-
-void GPU_batch_uniform_1ui(GPUBatch *, const char *name, uint value);
-void GPU_batch_uniform_1i(GPUBatch *, const char *name, int value);
-void GPU_batch_uniform_1b(GPUBatch *, const char *name, bool value);
-void GPU_batch_uniform_1f(GPUBatch *, const char *name, float value);
-void GPU_batch_uniform_2f(GPUBatch *, const char *name, float x, float y);
-void GPU_batch_uniform_3f(GPUBatch *, const char *name, float x, float y, float z);
-void GPU_batch_uniform_4f(GPUBatch *, const char *name, float x, float y, float z, float w);
-void GPU_batch_uniform_2fv(GPUBatch *, const char *name, const float data[2]);
-void GPU_batch_uniform_3fv(GPUBatch *, const char *name, const float data[3]);
-void GPU_batch_uniform_4fv(GPUBatch *, const char *name, const float data[4]);
-void GPU_batch_uniform_2fv_array(GPUBatch *, const char *name, const int len, const float *data);
-void GPU_batch_uniform_4fv_array(GPUBatch *, const char *name, const int len, const float *data);
-void GPU_batch_uniform_mat4(GPUBatch *, const char *name, const float data[4][4]);
-
-void GPU_batch_draw(GPUBatch *);
-
-/* Needs to be called before GPU_batch_draw_advanced. */
-void GPU_batch_bind(GPUBatch *);
+
+/* Will only work after setting the batch program. */
+/* TODO(fclem) Theses needs to be replaced by GPU_shader_uniform_* with explicit shader. */
+#define GPU_batch_uniform_1i(batch, name, x) GPU_shader_uniform_1i((batch)->shader, name, x);
+#define GPU_batch_uniform_1b(batch, name, x) GPU_shader_uniform_1b((batch)->shader, name, x);
+#define GPU_batch_uniform_1f(batch, name, x) GPU_shader_uniform_1f((batch)->shader, name, x);
+#define GPU_batch_uniform_2f(batch, name, x, y) GPU_shader_uniform_2f((batch)->shader, name, x, y);
+#define GPU_batch_uniform_3f(batch, name, x, y, z) \
+  GPU_shader_uniform_3f((batch)->shader, name, x, y, z);
+#define GPU_batch_uniform_4f(batch, name, x, y, z, w) \
+  GPU_shader_uniform_4f((batch)->shader, name, x, y, z, w);
+#define GPU_batch_uniform_2fv(batch, name, val) GPU_shader_uniform_2fv((batch)->shader, name, val);
+#define GPU_batch_uniform_3fv(batch, name, val) GPU_shader_uniform_3fv((batch)->shader, name, val);
+#define GPU_batch_uniform_4fv(batch, name, val) GPU_shader_uniform_4fv((batch)->shader, name, val);
+#define GPU_batch_uniform_2fv_array(batch, name, len, val) \
+  GPU_shader_uniform_2fv_array((batch)->shader, name, len, val);
+#define GPU_batch_uniform_4fv_array(batch, name, len, val) \
+  GPU_shader_uniform_4fv_array((batch)->shader, name, len, val);
+#define GPU_batch_uniform_mat4(batch, name, val) \
+  GPU_shader_uniform_mat4((batch)->shader, name, val);
+
+void GPU_batch_draw(GPUBatch *batch);
+void GPU_batch_draw_range(GPUBatch *batch, int v_first, int v_count);
+void GPU_batch_draw_instanced(GPUBatch *batch, int i_count);
+
 /* This does not bind/unbind shader and does not call GPU_matrix_bind() */
 void GPU_batch_draw_advanced(GPUBatch *, int v_first, int v_count, int i_first, int i_count);
 
-/* Does not even need batch */
-void GPU_draw_primitive(GPUPrimType, int v_count);
-
 #if 0 /* future plans */
 
 /* Can multiple batches share a GPUVertBuf? Use ref count? */
@@ -199,19 +189,6 @@ GPUBatch *create_BatchInGeneral(GPUPrimType, VertexBufferStuff, ElementListStuff
 
 #endif /* future plans */
 
-/**
- * #GPUDrawList is an API to do lots of similar draw-calls very fast using multi-draw-indirect.
- * There is a fallback if the feature is not supported.
- */
-typedef struct GPUDrawList GPUDrawList;
-
-GPUDrawList *GPU_draw_list_create(int length);
-void GPU_draw_list_discard(GPUDrawList *list);
-void GPU_draw_list_init(GPUDrawList *list, GPUBatch *batch);
-void GPU_draw_list_command_add(
-    GPUDrawList *list, int v_first, int v_count, int i_first, int i_count);
-void GPU_draw_list_submit(GPUDrawList *list);
-
 void gpu_batch_init(void);
 void gpu_batch_exit(void);
 
diff --git a/source/blender/gpu/GPU_batch_presets.h b/source/blender/gpu/GPU_batch_presets.h
index 1674cf776db..19f200fecbf 100644
--- a/source/blender/gpu/GPU_batch_presets.h
+++ b/source/blender/gpu/GPU_batch_presets.h
@@ -43,14 +43,13 @@ struct GPUBatch *GPU_batch_preset_panel_drag_widget(const float pixelsize,
                                                     const float col_dark[4],
                                                     const float width) ATTR_WARN_UNUSED_RESULT;
 
+struct GPUBatch *GPU_batch_preset_quad(void);
+
 void gpu_batch_presets_init(void);
 void gpu_batch_presets_register(struct GPUBatch *preset_batch);
 bool gpu_batch_presets_unregister(struct GPUBatch *preset_batch);
-void gpu_batch_presets_reset(void);
 void gpu_batch_presets_exit(void);
 
-void GPU_batch_presets_reset(void);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h
index e3d47cfe084..be7e604fb96 100644
--- a/source/blender/gpu/GPU_context.h
+++ b/source/blender/gpu/GPU_context.h
@@ -27,7 +27,6 @@
 
 #include "GPU_batch.h"
 #include "GPU_common.h"
-#include "GPU_shader_interface.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/source/blender/gpu/GPU_debug.h b/source/blender/gpu/GPU_debug.h
index be822056678..09dc02c0fc6 100644
--- a/source/blender/gpu/GPU_debug.h
+++ b/source/blender/gpu/GPU_debug.h
@@ -30,9 +30,6 @@ extern "C" {
 /* prints something if debug mode is active only */
 void GPU_print_error_debug(const char *str);
 
-/* inserts a debug marker message for the debug context messaging system */
-void GPU_string_marker(const char *str);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/gpu/intern/gpu_attr_binding_private.h b/source/blender/gpu/GPU_drawlist.h
index 4d359343c38..27f70da8cf8 100644
--- a/source/blender/gpu/intern/gpu_attr_binding_private.h
+++ b/source/blender/gpu/GPU_drawlist.h
@@ -13,32 +13,33 @@
  * along with this program; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  *
- * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
  * All rights reserved.
  */
 
 /** \file
  * \ingroup gpu
  *
- * GPU vertex attribute binding
+ * GPUDrawList is an API to do lots of similar draw-calls very fast using
+ * multi-draw-indirect. There is a fallback if the feature is not supported.
  */
 
 #pragma once
 
-#include "GPU_shader_interface.h"
-#include "GPU_vertex_format.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-/* TODO(fclem) remove, use shaderface directly. */
-void AttrBinding_clear(GPUAttrBinding *binding);
+struct GPUBatch;
+
+typedef void *GPUDrawList; /* Opaque pointer. */
+
+/* Create a list with at least length drawcalls. Length can affect performance. */
+GPUDrawList GPU_draw_list_create(int length);
+void GPU_draw_list_discard(GPUDrawList list);
 
-void get_attr_locations(const GPUVertFormat *format,
-                        GPUAttrBinding *binding,
-                        const GPUShaderInterface *shaderface);
-uint read_attr_location(const GPUAttrBinding *binding, uint a_idx);
+void GPU_draw_list_append(GPUDrawList list, GPUBatch *batch, int i_first, int i_count);
+void GPU_draw_list_submit(GPUDrawList list);
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/gpu/GPU_element.h b/source/blender/gpu/GPU_element.h
index 3d5195b12fc..5cf85b4ea0e 100644
--- a/source/blender/gpu/GPU_element.h
+++ b/source/blender/gpu/GPU_element.h
@@ -54,6 +54,8 @@ typedef struct GPUIndexBuf {
   };
 } GPUIndexBuf;
 
+GPUIndexBuf *GPU_indexbuf_calloc(void);
+
 void GPU_indexbuf_use(GPUIndexBuf *);
 uint GPU_indexbuf_size_get(const GPUIndexBuf *);
 
diff --git a/source/blender/gpu/GPU_extensions.h b/source/blender/gpu/GPU_extensions.h
index 2ce6e458378..18ac2265cc4 100644
--- a/source/blender/gpu/GPU_extensions.h
+++ b/source/blender/gpu/GPU_extensions.h
@@ -40,7 +40,6 @@ int GPU_max_color_texture_samples(void);
 int GPU_max_cube_map_size(void);
 int GPU_max_ubo_binds(void);
 int GPU_max_ubo_size(void);
-float GPU_max_line_width(void);
 void GPU_get_dfdy_factors(float fac[2]);
 bool GPU_arb_base_instance_is_supported(void);
 bool GPU_arb_texture_cube_map_array_is_supported(void);
diff --git a/source/blender/gpu/GPU_framebuffer.h b/source/blender/gpu/GPU_framebuffer.h
index 9dc07fefd4e..e6648c69de7 100644
--- a/source/blender/gpu/GPU_framebuffer.h
+++ b/source/blender/gpu/GPU_framebuffer.h
@@ -19,6 +19,13 @@
 
 /** \file
  * \ingroup gpu
+ *
+ * GPU Framebuffer
+ * - this is a wrapper for an OpenGL framebuffer object (FBO). in practice
+ *   multiple FBO's may be created.
+ * - actual FBO creation & config is deferred until GPU_framebuffer_bind or
+ *   GPU_framebuffer_check_valid to allow creation & config while another
+ *   opengl context is bound (since FBOs are not shared between ogl contexts).
  */
 
 #pragma once
@@ -41,32 +48,28 @@ typedef enum eGPUFrameBufferBits {
 } eGPUFrameBufferBits;
 
 typedef enum eGPUBackBuffer {
-  GPU_BACKBUFFER = 0,
+  GPU_BACKBUFFER_LEFT = 0,
   GPU_BACKBUFFER_RIGHT,
-  GPU_BACKBUFFER_LEFT,
 } eGPUBackBuffer;
 
-typedef struct GPUFrameBuffer GPUFrameBuffer;
-typedef struct GPUOffScreen GPUOffScreen;
+/** Opaque pointer hiding blender::gpu::FrameBuffer. */
+typedef struct GPUFrameBuffer {
+  void *dummy;
+} GPUFrameBuffer;
 
-/* GPU Framebuffer
- * - this is a wrapper for an OpenGL framebuffer object (FBO). in practice
- *   multiple FBO's may be created, to get around limitations on the number
- *   of attached textures and the dimension requirements.
- * - actual FBO creation & config is deferred until GPU_framebuffer_bind or
- *   GPU_framebuffer_check_valid to allow creation & config while another
- *   opengl context is bound (since FBOs are not shared between ogl contexts).
- */
+typedef struct GPUOffScreen GPUOffScreen;
 
-GPUFrameBuffer *GPU_framebuffer_create(void);
+GPUFrameBuffer *GPU_framebuffer_create(const char *name);
 void GPU_framebuffer_free(GPUFrameBuffer *fb);
 void GPU_framebuffer_bind(GPUFrameBuffer *fb);
+void GPU_framebuffer_bind_no_srgb(GPUFrameBuffer *fb);
 void GPU_framebuffer_restore(void);
 
 bool GPU_framebuffer_bound(GPUFrameBuffer *fb);
 bool GPU_framebuffer_check_valid(GPUFrameBuffer *fb, char err_out[256]);
 
 GPUFrameBuffer *GPU_framebuffer_active_get(void);
+GPUFrameBuffer *GPU_framebuffer_back_get(void);
 
 #define GPU_FRAMEBUFFER_FREE_SAFE(fb) \
   do { \
@@ -79,13 +82,10 @@ GPUFrameBuffer *GPU_framebuffer_active_get(void);
 /* Framebuffer setup : You need to call GPU_framebuffer_bind for these
  * to be effective. */
 
-void GPU_framebuffer_texture_attach(GPUFrameBuffer *fb, struct GPUTexture *tex, int slot, int mip);
-void GPU_framebuffer_texture_layer_attach(
-    GPUFrameBuffer *fb, struct GPUTexture *tex, int slot, int layer, int mip);
-void GPU_framebuffer_texture_cubeface_attach(
-    GPUFrameBuffer *fb, struct GPUTexture *tex, int slot, int face, int mip);
+void GPU_framebuffer_texture_attach_ex(GPUFrameBuffer *gpu_fb,
+                                       GPUAttachment attachement,
+                                       int slot);
 void GPU_framebuffer_texture_detach(GPUFrameBuffer *fb, struct GPUTexture *tex);
-void GPU_framebuffer_texture_detach_slot(GPUFrameBuffer *fb, struct GPUTexture *tex, int type);
 
 /**
  * How to use #GPU_framebuffer_ensure_config().
@@ -109,7 +109,7 @@ void GPU_framebuffer_texture_detach_slot(GPUFrameBuffer *fb, struct GPUTexture *
 #define GPU_framebuffer_ensure_config(_fb, ...) \
   do { \
     if (*(_fb) == NULL) { \
-      *(_fb) = GPU_framebuffer_create(); \
+      *(_fb) = GPU_framebuffer_create(#_fb); \
     } \
     GPUAttachment config[] = __VA_ARGS__; \
     GPU_framebuffer_config_array(*(_fb), config, (sizeof(config) / sizeof(GPUAttachment))); \
@@ -150,9 +150,17 @@ void GPU_framebuffer_config_array(GPUFrameBuffer *fb, const GPUAttachment *confi
     _tex, _face, _mip, \
   }
 
+void GPU_framebuffer_texture_attach(GPUFrameBuffer *fb, GPUTexture *tex, int slot, int mip);
+void GPU_framebuffer_texture_layer_attach(
+    GPUFrameBuffer *fb, GPUTexture *tex, int slot, int layer, int mip);
+void GPU_framebuffer_texture_cubeface_attach(
+    GPUFrameBuffer *fb, GPUTexture *tex, int slot, int face, int mip);
+
 /* Framebuffer operations */
 
 void GPU_framebuffer_viewport_set(GPUFrameBuffer *fb, int x, int y, int w, int h);
+void GPU_framebuffer_viewport_get(GPUFrameBuffer *fb, int r_viewport[4]);
+void GPU_framebuffer_viewport_reset(GPUFrameBuffer *fb);
 
 void GPU_framebuffer_clear(GPUFrameBuffer *fb,
                            eGPUFrameBufferBits buffers,
@@ -224,7 +232,6 @@ void GPU_offscreen_viewport_data_get(GPUOffScreen *ofs,
 
 void GPU_clear_color(float red, float green, float blue, float alpha);
 void GPU_clear_depth(float depth);
-void GPU_clear(eGPUFrameBufferBits flags);
 
 void GPU_frontbuffer_read_pixels(
     int x, int y, int w, int h, int channels, eGPUDataFormat format, void *data);
diff --git a/source/blender/gpu/GPU_immediate.h b/source/blender/gpu/GPU_immediate.h
index 41d4f5d28d3..6057770d2d9 100644
--- a/source/blender/gpu/GPU_immediate.h
+++ b/source/blender/gpu/GPU_immediate.h
@@ -29,7 +29,6 @@
 #include "GPU_immediate_util.h"
 #include "GPU_primitive.h"
 #include "GPU_shader.h"
-#include "GPU_shader_interface.h"
 #include "GPU_texture.h"
 #include "GPU_vertex_format.h"
 
@@ -103,13 +102,11 @@ void immVertex2iv(uint attr_id, const int data[2]);
 
 /* Provide uniform values that don't change for the entire draw call. */
 void immUniform1i(const char *name, int x);
-void immUniform4iv(const char *name, const int data[4]);
 void immUniform1f(const char *name, float x);
 void immUniform2f(const char *name, float x, float y);
 void immUniform2fv(const char *name, const float data[2]);
 void immUniform3f(const char *name, float x, float y, float z);
 void immUniform3fv(const char *name, const float data[3]);
-void immUniformArray3fv(const char *name, const float *data, int count);
 void immUniform4f(const char *name, float x, float y, float z, float w);
 void immUniform4fv(const char *name, const float data[4]);
 void immUniformArray4fv(const char *bare_name, const float *data, int count);
diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h
index b8957ff1819..680e717e615 100644
--- a/source/blender/gpu/GPU_material.h
+++ b/source/blender/gpu/GPU_material.h
@@ -39,7 +39,7 @@ struct GPUNode;
 struct GPUNodeLink;
 struct GPUNodeStack;
 struct GPUTexture;
-struct GPUUniformBuffer;
+struct GPUUniformBuf;
 struct Image;
 struct ImageUser;
 struct ListBase;
@@ -112,15 +112,6 @@ typedef enum eGPUMatFlag {
   GPU_MATFLAG_BARYCENTRIC = (1 << 4),
 } eGPUMatFlag;
 
-typedef enum eGPUBlendMode {
-  GPU_BLEND_SOLID = 0,
-  GPU_BLEND_ADD = 1,
-  GPU_BLEND_ALPHA = 2,
-  GPU_BLEND_CLIP = 4,
-  GPU_BLEND_ALPHA_SORT = 8,
-  GPU_BLEND_ALPHA_TO_COVERAGE = 16,
-} eGPUBlendMode;
-
 typedef struct GPUNodeStack {
   eGPUType type;
   float vec[4];
@@ -167,10 +158,10 @@ bool GPU_stack_link(GPUMaterial *mat,
                     GPUNodeStack *in,
                     GPUNodeStack *out,
                     ...);
-GPUNodeLink *GPU_uniformbuffer_link_out(struct GPUMaterial *mat,
-                                        struct bNode *node,
-                                        struct GPUNodeStack *stack,
-                                        const int index);
+GPUNodeLink *GPU_uniformbuf_link_out(struct GPUMaterial *mat,
+                                     struct bNode *node,
+                                     struct GPUNodeStack *stack,
+                                     const int index);
 
 void GPU_material_output_link(GPUMaterial *material, GPUNodeLink *link);
 
@@ -178,9 +169,9 @@ void GPU_material_sss_profile_create(GPUMaterial *material,
                                      float radii[3],
                                      const short *falloff_type,
                                      const float *sharpness);
-struct GPUUniformBuffer *GPU_material_sss_profile_get(GPUMaterial *material,
-                                                      int sample_len,
-                                                      struct GPUTexture **tex_profile);
+struct GPUUniformBuf *GPU_material_sss_profile_get(GPUMaterial *material,
+                                                   int sample_len,
+                                                   struct GPUTexture **tex_profile);
 
 /* High level functions to create and use GPU materials */
 GPUMaterial *GPU_material_from_nodetree_find(struct ListBase *gpumaterials,
@@ -210,9 +201,9 @@ struct GPUShader *GPU_material_get_shader(GPUMaterial *material);
 struct Material *GPU_material_get_material(GPUMaterial *material);
 eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
 
-struct GPUUniformBuffer *GPU_material_uniform_buffer_get(GPUMaterial *material);
+struct GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
 void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs);
-struct GPUUniformBuffer *GPU_material_create_sss_profile_ubo(void);
+struct GPUUniformBuf *GPU_material_create_sss_profile_ubo(void);
 
 bool GPU_material_has_surface_output(GPUMaterial *mat);
 bool GPU_material_has_volume_output(GPUMaterial *mat);
diff --git a/source/blender/gpu/GPU_matrix.h b/source/blender/gpu/GPU_matrix.h
index 7b94a535a30..aad6ae9e2ba 100644
--- a/source/blender/gpu/GPU_matrix.h
+++ b/source/blender/gpu/GPU_matrix.h
@@ -29,7 +29,7 @@
 extern "C" {
 #endif
 
-struct GPUShaderInterface;
+struct GPUShader;
 
 void GPU_matrix_reset(void); /* to Identity transform & empty stack */
 
@@ -147,7 +147,7 @@ const float (*GPU_matrix_normal_get(float m[3][3]))[3];
 const float (*GPU_matrix_normal_inverse_get(float m[3][3]))[3];
 
 /* set uniform values for currently bound shader */
-void GPU_matrix_bind(const struct GPUShaderInterface *);
+void GPU_matrix_bind(struct GPUShader *shader);
 bool GPU_matrix_dirty_get(void); /* since last bind */
 
 /* own working polygon offset */
diff --git a/source/blender/gpu/GPU_primitive.h b/source/blender/gpu/GPU_primitive.h
index e910e81fac1..781a10f3636 100644
--- a/source/blender/gpu/GPU_primitive.h
+++ b/source/blender/gpu/GPU_primitive.h
@@ -56,8 +56,11 @@ typedef enum {
   GPU_PRIM_CLASS_ANY = GPU_PRIM_CLASS_POINT | GPU_PRIM_CLASS_LINE | GPU_PRIM_CLASS_SURFACE,
 } GPUPrimClass;
 
-GPUPrimClass GPU_primtype_class(GPUPrimType);
-bool GPU_primtype_belongs_to_class(GPUPrimType, GPUPrimClass);
+/**
+ * TODO Improve error checking by validating that the shader is suited for this primitive type.
+ * GPUPrimClass GPU_primtype_class(GPUPrimType);
+ * bool GPU_primtype_belongs_to_class(GPUPrimType, GPUPrimClass);
+ **/
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/gpu/GPU_shader.h b/source/blender/gpu/GPU_shader.h
index f782742ae53..33fef266c42 100644
--- a/source/blender/gpu/GPU_shader.h
+++ b/source/blender/gpu/GPU_shader.h
@@ -27,14 +27,12 @@
 extern "C" {
 #endif
 
-typedef struct GPUShader GPUShader;
-struct GPUShaderInterface;
 struct GPUTexture;
-struct GPUUniformBuffer;
+struct GPUUniformBuf;
+struct GPUVertBuf;
 
-/* GPU Shader
- * - only for fragment shaders now
- * - must call texture bind before setting a texture as uniform! */
+/** Opaque type hidding blender::gpu::Shader */
+typedef struct GPUShader GPUShader;
 
 typedef enum eGPUShaderTFBType {
   GPU_SHADER_TFB_NONE = 0, /* Transform feedback unsupported. */
@@ -63,17 +61,16 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
                                 const char **tf_names,
                                 const int tf_count,
                                 const char *shader_name);
-GPUShader *GPU_shader_load_from_binary(const char *binary,
-                                       const int binary_format,
-                                       const int binary_len,
-                                       const char *shname);
+
 struct GPU_ShaderCreateFromArray_Params {
   const char **vert, **geom, **frag, **defs;
 };
 struct GPUShader *GPU_shader_create_from_arrays_impl(
-    const struct GPU_ShaderCreateFromArray_Params *params);
+    const struct GPU_ShaderCreateFromArray_Params *params, const char *func, int line);
+
 #define GPU_shader_create_from_arrays(...) \
-  GPU_shader_create_from_arrays_impl(&(const struct GPU_ShaderCreateFromArray_Params)__VA_ARGS__)
+  GPU_shader_create_from_arrays_impl( \
+      &(const struct GPU_ShaderCreateFromArray_Params)__VA_ARGS__, __func__, __LINE__)
 
 void GPU_shader_free(GPUShader *shader);
 
@@ -81,12 +78,47 @@ void GPU_shader_bind(GPUShader *shader);
 void GPU_shader_unbind(void);
 
 /* Returns true if transform feedback was successfully enabled. */
-bool GPU_shader_transform_feedback_enable(GPUShader *shader, unsigned int vbo_id);
+bool GPU_shader_transform_feedback_enable(GPUShader *shader, struct GPUVertBuf *vertbuf);
 void GPU_shader_transform_feedback_disable(GPUShader *shader);
 
 int GPU_shader_get_program(GPUShader *shader);
 
-void GPU_shader_set_srgb_uniform(const struct GPUShaderInterface *interface);
+typedef enum {
+  GPU_UNIFORM_MODEL = 0,      /* mat4 ModelMatrix */
+  GPU_UNIFORM_VIEW,           /* mat4 ViewMatrix */
+  GPU_UNIFORM_MODELVIEW,      /* mat4 ModelViewMatrix */
+  GPU_UNIFORM_PROJECTION,     /* mat4 ProjectionMatrix */
+  GPU_UNIFORM_VIEWPROJECTION, /* mat4 ViewProjectionMatrix */
+  GPU_UNIFORM_MVP,            /* mat4 ModelViewProjectionMatrix */
+
+  GPU_UNIFORM_MODEL_INV,          /* mat4 ModelMatrixInverse */
+  GPU_UNIFORM_VIEW_INV,           /* mat4 ViewMatrixInverse */
+  GPU_UNIFORM_MODELVIEW_INV,      /* mat4 ModelViewMatrixInverse */
+  GPU_UNIFORM_PROJECTION_INV,     /* mat4 ProjectionMatrixInverse */
+  GPU_UNIFORM_VIEWPROJECTION_INV, /* mat4 ViewProjectionMatrixInverse */
+
+  GPU_UNIFORM_NORMAL,     /* mat3 NormalMatrix */
+  GPU_UNIFORM_ORCO,       /* vec4 OrcoTexCoFactors[] */
+  GPU_UNIFORM_CLIPPLANES, /* vec4 WorldClipPlanes[] */
+
+  GPU_UNIFORM_COLOR,          /* vec4 color */
+  GPU_UNIFORM_BASE_INSTANCE,  /* int baseInstance */
+  GPU_UNIFORM_RESOURCE_CHUNK, /* int resourceChunk */
+  GPU_UNIFORM_RESOURCE_ID,    /* int resourceId */
+  GPU_UNIFORM_SRGB_TRANSFORM, /* bool srgbTarget */
+
+  GPU_NUM_UNIFORMS, /* Special value, denotes number of builtin uniforms. */
+} GPUUniformBuiltin;
+
+typedef enum {
+  GPU_UNIFORM_BLOCK_VIEW = 0, /* viewBlock */
+  GPU_UNIFORM_BLOCK_MODEL,    /* modelBlock */
+  GPU_UNIFORM_BLOCK_INFO,     /* infoBlock */
+
+  GPU_NUM_UNIFORM_BLOCKS, /* Special value, denotes number of builtin uniforms block. */
+} GPUUniformBlockBuiltin;
+
+void GPU_shader_set_srgb_uniform(GPUShader *shader);
 
 int GPU_shader_get_uniform(GPUShader *shader, const char *name);
 int GPU_shader_get_builtin_uniform(GPUShader *shader, int builtin);
@@ -104,9 +136,20 @@ void GPU_shader_uniform_vector_int(
 void GPU_shader_uniform_float(GPUShader *shader, int location, float value);
 void GPU_shader_uniform_int(GPUShader *shader, int location, int value);
 
-int GPU_shader_get_attribute(GPUShader *shader, const char *name);
+void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value);
+void GPU_shader_uniform_1b(GPUShader *sh, const char *name, bool value);
+void GPU_shader_uniform_1f(GPUShader *sh, const char *name, float value);
+void GPU_shader_uniform_2f(GPUShader *sh, const char *name, float x, float y);
+void GPU_shader_uniform_3f(GPUShader *sh, const char *name, float x, float y, float z);
+void GPU_shader_uniform_4f(GPUShader *sh, const char *name, float x, float y, float z, float w);
+void GPU_shader_uniform_2fv(GPUShader *sh, const char *name, const float data[2]);
+void GPU_shader_uniform_3fv(GPUShader *sh, const char *name, const float data[3]);
+void GPU_shader_uniform_4fv(GPUShader *sh, const char *name, const float data[4]);
+void GPU_shader_uniform_mat4(GPUShader *sh, const char *name, const float data[4][4]);
+void GPU_shader_uniform_2fv_array(GPUShader *sh, const char *name, int len, const float (*val)[2]);
+void GPU_shader_uniform_4fv_array(GPUShader *sh, const char *name, int len, const float (*val)[4]);
 
-char *GPU_shader_get_binary(GPUShader *shader, uint *r_binary_format, int *r_binary_len);
+int GPU_shader_get_attribute(GPUShader *shader, const char *name);
 
 void GPU_shader_set_framebuffer_srgb_target(int use_srgb_to_linear);
 
diff --git a/source/blender/gpu/GPU_shader_interface.h b/source/blender/gpu/GPU_shader_interface.h
deleted file mode 100644
index 8aba1236b65..00000000000
--- a/source/blender/gpu/GPU_shader_interface.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * The Original Code is Copyright (C) 2016 by Mike Erwin.
- * All rights reserved.
- */
-
-/** \file
- * \ingroup gpu
- *
- * GPU shader interface (C --> GLSL)
- */
-
-#pragma once
-
-#include "GPU_common.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum {
-  GPU_UNIFORM_MODEL = 0,      /* mat4 ModelMatrix */
-  GPU_UNIFORM_VIEW,           /* mat4 ViewMatrix */
-  GPU_UNIFORM_MODELVIEW,      /* mat4 ModelViewMatrix */
-  GPU_UNIFORM_PROJECTION,     /* mat4 ProjectionMatrix */
-  GPU_UNIFORM_VIEWPROJECTION, /* mat4 ViewProjectionMatrix */
-  GPU_UNIFORM_MVP,            /* mat4 ModelViewProjectionMatrix */
-
-  GPU_UNIFORM_MODEL_INV,          /* mat4 ModelMatrixInverse */
-  GPU_UNIFORM_VIEW_INV,           /* mat4 ViewMatrixInverse */
-  GPU_UNIFORM_MODELVIEW_INV,      /* mat4 ModelViewMatrixInverse */
-  GPU_UNIFORM_PROJECTION_INV,     /* mat4 ProjectionMatrixInverse */
-  GPU_UNIFORM_VIEWPROJECTION_INV, /* mat4 ViewProjectionMatrixInverse */
-
-  GPU_UNIFORM_NORMAL,     /* mat3 NormalMatrix */
-  GPU_UNIFORM_ORCO,       /* vec4 OrcoTexCoFactors[] */
-  GPU_UNIFORM_CLIPPLANES, /* vec4 WorldClipPlanes[] */
-
-  GPU_UNIFORM_COLOR,          /* vec4 color */
-  GPU_UNIFORM_BASE_INSTANCE,  /* int baseInstance */
-  GPU_UNIFORM_RESOURCE_CHUNK, /* int resourceChunk */
-  GPU_UNIFORM_RESOURCE_ID,    /* int resourceId */
-  GPU_UNIFORM_SRGB_TRANSFORM, /* bool srgbTarget */
-
-  GPU_NUM_UNIFORMS, /* Special value, denotes number of builtin uniforms. */
-} GPUUniformBuiltin;
-
-typedef enum {
-  GPU_UNIFORM_BLOCK_VIEW = 0, /* viewBlock */
-  GPU_UNIFORM_BLOCK_MODEL,    /* modelBlock */
-  GPU_UNIFORM_BLOCK_INFO,     /* infoBlock */
-
-  GPU_NUM_UNIFORM_BLOCKS, /* Special value, denotes number of builtin uniforms block. */
-} GPUUniformBlockBuiltin;
-
-typedef struct GPUShaderInput {
-  uint32_t name_offset;
-  uint32_t name_hash;
-  int32_t location;
-  /** Defined at interface creation or in shader. Only for Samplers, UBOs and Vertex Attribs. */
-  int32_t binding;
-} GPUShaderInput;
-
-#define GPU_SHADERINTERFACE_REF_ALLOC_COUNT 16
-
-typedef struct GPUShaderInterface {
-  /** Buffer containing all inputs names separated by '\0'. */
-  char *name_buffer;
-  /** Reference to GPUBatches using this interface */
-  struct GPUBatch **batches;
-  uint batches_len;
-  /** Input counts. */
-  uint attribute_len;
-  uint ubo_len;
-  uint uniform_len;
-  /** Enabled bindpoints that needs to be fed with data. */
-  uint16_t enabled_attr_mask;
-  uint16_t enabled_ubo_mask;
-  uint64_t enabled_tex_mask;
-  /** Opengl Location of builtin uniforms. Fast access, no lookup needed. */
-  int32_t builtins[GPU_NUM_UNIFORMS];
-  int32_t builtin_blocks[GPU_NUM_UNIFORM_BLOCKS];
-  /** Flat array. In this order: Attributes, Ubos, Uniforms. */
-  GPUShaderInput inputs[0];
-} GPUShaderInterface;
-
-GPUShaderInterface *GPU_shaderinterface_create(int32_t program_id);
-void GPU_shaderinterface_discard(GPUShaderInterface *);
-
-const GPUShaderInput *GPU_shaderinterface_uniform(const GPUShaderInterface *, const char *name);
-int32_t GPU_shaderinterface_uniform_builtin(const GPUShaderInterface *shaderface,
-                                            GPUUniformBuiltin builtin);
-int32_t GPU_shaderinterface_block_builtin(const GPUShaderInterface *shaderface,
-                                          GPUUniformBlockBuiltin builtin);
-const GPUShaderInput *GPU_shaderinterface_ubo(const GPUShaderInterface *, const char *name);
-const GPUShaderInput *GPU_shaderinterface_attr(const GPUShaderInterface *, const char *name);
-
-/* keep track of batches using this interface */
-void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *, struct GPUBatch *);
-void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *, struct GPUBatch *);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/source/blender/gpu/GPU_state.h b/source/blender/gpu/GPU_state.h
index 4a2c90e241b..253877bcca0 100644
--- a/source/blender/gpu/GPU_state.h
+++ b/source/blender/gpu/GPU_state.h
@@ -20,90 +20,138 @@
 
 #pragma once
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+#include "BLI_utildefines.h"
 
-/* These map directly to the GL_ blend functions, to minimize API add as needed*/
-typedef enum eGPUBlendFunction {
-  GPU_ONE,
-  GPU_SRC_ALPHA,
-  GPU_ONE_MINUS_SRC_ALPHA,
-  GPU_DST_COLOR,
-  GPU_ZERO,
-} eGPUBlendFunction;
-
-/* These map directly to the GL_ filter functions, to minimize API add as needed*/
-typedef enum eGPUFilterFunction {
-  GPU_NEAREST,
-  GPU_LINEAR,
-} eGPUFilterFunction;
-
-typedef enum eGPUFaceCull {
+typedef enum eGPUWriteMask {
+  GPU_WRITE_NONE = 0,
+  GPU_WRITE_RED = (1 << 0),
+  GPU_WRITE_GREEN = (1 << 1),
+  GPU_WRITE_BLUE = (1 << 2),
+  GPU_WRITE_ALPHA = (1 << 3),
+  GPU_WRITE_DEPTH = (1 << 4),
+  GPU_WRITE_STENCIL = (1 << 5),
+  GPU_WRITE_COLOR = (GPU_WRITE_RED | GPU_WRITE_GREEN | GPU_WRITE_BLUE | GPU_WRITE_ALPHA),
+} eGPUWriteMask;
+
+ENUM_OPERATORS(eGPUWriteMask)
+
+/**
+ * Defines the fixed pipeline blending equation.
+ * SRC is the output color from the shader.
+ * DST is the color from the framebuffer.
+ * The blending equation is :
+ *  (SRC * A) + (DST * B).
+ * The blend mode will modify the A and B parameters.
+ */
+typedef enum eGPUBlend {
+  GPU_BLEND_NONE = 0,
+  /** Premult variants will _NOT_ multiply rgb output by alpha. */
+  GPU_BLEND_ALPHA,
+  GPU_BLEND_ALPHA_PREMULT,
+  GPU_BLEND_ADDITIVE,
+  GPU_BLEND_ADDITIVE_PREMULT,
+  GPU_BLEND_MULTIPLY,
+  GPU_BLEND_SUBTRACT,
+  /** Replace logic op: SRC * (1 - DST)
+   * NOTE: Does not modify alpha. */
+  GPU_BLEND_INVERT,
+  /** Order independent transparency.
+   * NOTE: Cannot be used as is. Needs special setup (framebuffer, shader ...). */
+  GPU_BLEND_OIT,
+  /** Special blend to add color under and multiply dst color by src alpha. */
+  GPU_BLEND_BACKGROUND,
+  /** Custom blend parameters using dual source blending : SRC0 + SRC1 * DST
+   * NOTE: Can only be used with _ONE_ Draw Buffer and shader needs to be specialized. */
+  GPU_BLEND_CUSTOM,
+} eGPUBlend;
+
+typedef enum eGPUDepthTest {
+  GPU_DEPTH_NONE = 0,
+  GPU_DEPTH_ALWAYS, /* Used to draw to the depth buffer without really testing. */
+  GPU_DEPTH_LESS,
+  GPU_DEPTH_LESS_EQUAL, /* Default. */
+  GPU_DEPTH_EQUAL,
+  GPU_DEPTH_GREATER,
+  GPU_DEPTH_GREATER_EQUAL,
+} eGPUDepthTest;
+
+typedef enum eGPUStencilTest {
+  GPU_STENCIL_NONE = 0,
+  GPU_STENCIL_ALWAYS,
+  GPU_STENCIL_EQUAL,
+  GPU_STENCIL_NEQUAL,
+} eGPUStencilTest;
+
+typedef enum eGPUStencilOp {
+  GPU_STENCIL_OP_NONE = 0,
+  GPU_STENCIL_OP_REPLACE,
+  /** Special values for stencil shadows. */
+  GPU_STENCIL_OP_COUNT_DEPTH_PASS,
+  GPU_STENCIL_OP_COUNT_DEPTH_FAIL,
+} eGPUStencilOp;
+
+typedef enum eGPUFaceCullTest {
   GPU_CULL_NONE = 0, /* Culling disabled. */
   GPU_CULL_FRONT,
   GPU_CULL_BACK,
-} eGPUFaceCull;
+} eGPUFaceCullTest;
 
 typedef enum eGPUProvokingVertex {
-  GPU_VERTEX_FIRST = 0,
-  GPU_VERTEX_LAST, /* Default */
+  GPU_VERTEX_LAST = 0,  /* Default. */
+  GPU_VERTEX_FIRST = 1, /* Follow Blender loop order. */
 } eGPUProvokingVertex;
 
-/* Initialize
- * - sets the default Blender opengl state, if in doubt, check
- *   the contents of this function
- * - this is called when starting Blender, for opengl rendering. */
-void GPU_state_init(void);
-
-void GPU_blend(bool enable);
-void GPU_blend_set_func(eGPUBlendFunction sfactor, eGPUBlendFunction dfactor);
-void GPU_blend_set_func_separate(eGPUBlendFunction src_rgb,
-                                 eGPUBlendFunction dst_rgb,
-                                 eGPUBlendFunction src_alpha,
-                                 eGPUBlendFunction dst_alpha);
-void GPU_face_culling(eGPUFaceCull culling);
-void GPU_front_facing(bool invert);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void GPU_blend(eGPUBlend blend);
+void GPU_face_culling(eGPUFaceCullTest culling);
+void GPU_depth_test(eGPUDepthTest test);
+void GPU_stencil_test(eGPUStencilTest test);
 void GPU_provoking_vertex(eGPUProvokingVertex vert);
+void GPU_front_facing(bool invert);
 void GPU_depth_range(float near, float far);
-void GPU_depth_test(bool enable);
-bool GPU_depth_test_enabled(void);
 void GPU_scissor_test(bool enable);
 void GPU_line_smooth(bool enable);
 void GPU_line_width(float width);
+void GPU_logic_op_xor_set(bool enable);
 void GPU_point_size(float size);
 void GPU_polygon_smooth(bool enable);
 void GPU_program_point_size(bool enable);
 void GPU_scissor(int x, int y, int width, int height);
-void GPU_scissor_get_f(float coords[4]);
-void GPU_scissor_get_i(int coords[4]);
+void GPU_scissor_get(int coords[4]);
 void GPU_viewport(int x, int y, int width, int height);
 void GPU_viewport_size_get_f(float coords[4]);
 void GPU_viewport_size_get_i(int coords[4]);
+void GPU_write_mask(eGPUWriteMask mask);
 void GPU_color_mask(bool r, bool g, bool b, bool a);
 void GPU_depth_mask(bool depth);
 bool GPU_depth_mask_get(void);
-void GPU_stencil_mask(uint stencil);
 void GPU_unpack_row_length_set(uint len);
+void GPU_shadow_offset(bool enable);
 void GPU_clip_distances(int enabled_len);
 bool GPU_mipmap_enabled(void);
+void GPU_state_set(eGPUWriteMask write_mask,
+                   eGPUBlend blend,
+                   eGPUFaceCullTest culling_test,
+                   eGPUDepthTest depth_test,
+                   eGPUStencilTest stencil_test,
+                   eGPUStencilOp stencil_op,
+                   eGPUProvokingVertex provoking_vert);
 
-void GPU_flush(void);
-void GPU_finish(void);
+void GPU_stencil_reference_set(uint reference);
+void GPU_stencil_write_mask_set(uint write_mask);
+void GPU_stencil_compare_mask_set(uint compare_mask);
 
-void GPU_logic_op_xor_set(bool enable);
+eGPUBlend GPU_blend_get(void);
+eGPUDepthTest GPU_depth_test_get(void);
+eGPUWriteMask GPU_write_mask_get(void);
+uint GPU_stencil_mask_get(void);
+eGPUStencilTest GPU_stencil_test_get(void);
 
-/* Attribute push & pop. */
-typedef enum eGPUAttrMask {
-  GPU_DEPTH_BUFFER_BIT = (1 << 0),
-  GPU_ENABLE_BIT = (1 << 1),
-  GPU_SCISSOR_BIT = (1 << 2),
-  GPU_VIEWPORT_BIT = (1 << 3),
-  GPU_BLEND_BIT = (1 << 4),
-} eGPUAttrMask;
-
-void gpuPushAttr(eGPUAttrMask mask);
-void gpuPopAttr(void);
+void GPU_flush(void);
+void GPU_finish(void);
 
 #ifdef __cplusplus
 }
diff --git a/source/blender/gpu/GPU_texture.h b/source/blender/gpu/GPU_texture.h
index 7ee7f8fcdec..93865c098b8 100644
--- a/source/blender/gpu/GPU_texture.h
+++ b/source/blender/gpu/GPU_texture.h
@@ -275,8 +275,10 @@ void GPU_texture_mipmap_mode(GPUTexture *tex, bool use_mipmap, bool use_filter);
 void GPU_texture_wrap_mode(GPUTexture *tex, bool use_repeat, bool use_clamp);
 void GPU_texture_swizzle_set(GPUTexture *tex, const char swizzle[4]);
 
+/* TODO should be private internal functions. */
 void GPU_texture_attach_framebuffer(GPUTexture *tex, struct GPUFrameBuffer *fb, int attachment);
-int GPU_texture_detach_framebuffer(GPUTexture *tex, struct GPUFrameBuffer *fb);
+void GPU_texture_detach_framebuffer(GPUTexture *tex, struct GPUFrameBuffer *fb);
+int GPU_texture_framebuffer_attachement_get(GPUTexture *tex, struct GPUFrameBuffer *fb);
 
 int GPU_texture_target(const GPUTexture *tex);
 int GPU_texture_width(const GPUTexture *tex);
diff --git a/source/blender/gpu/GPU_uniform_buffer.h b/source/blender/gpu/GPU_uniform_buffer.h
new file mode 100644
index 00000000000..4a00dda634d
--- /dev/null
+++ b/source/blender/gpu/GPU_uniform_buffer.h
@@ -0,0 +1,61 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2005 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Uniform buffers API. Used to handle many uniforms update at once.
+ * Make sure that the data structure is compatible with what the implementation expect.
+ * (see "7.6.2.2 Standard Uniform Block Layout" from the OpenGL spec for more info about std140
+ * layout)
+ * Rule of thumb: Padding to 16bytes, don't use vec3, don't use arrays of anything that is not vec4
+ * aligned .
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ListBase;
+
+/** Opaque pointer hiding blender::gpu::UniformBuf. */
+typedef struct GPUUniformBuf {
+  void *dummy;
+} GPUUniformBuf;
+
+GPUUniformBuf *GPU_uniformbuf_create_ex(size_t size, const void *data, const char *name);
+GPUUniformBuf *GPU_uniformbuf_create_from_list(struct ListBase *inputs, const char *name);
+
+#define GPU_uniformbuf_create(size) GPU_uniformbuf_create_ex(size, NULL, __func__);
+
+void GPU_uniformbuf_free(GPUUniformBuf *ubo);
+
+void GPU_uniformbuf_update(GPUUniformBuf *ubo, const void *data);
+
+void GPU_uniformbuf_bind(GPUUniformBuf *ubo, int number);
+void GPU_uniformbuf_unbind(GPUUniformBuf *ubo);
+void GPU_uniformbuf_unbind_all(void);
+
+#define GPU_UBO_BLOCK_NAME "nodeTree"
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/gpu/GPU_uniformbuffer.h b/source/blender/gpu/GPU_uniformbuffer.h
deleted file mode 100644
index e2b2a757fb9..00000000000
--- a/source/blender/gpu/GPU_uniformbuffer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * The Original Code is Copyright (C) 2005 Blender Foundation.
- * All rights reserved.
- */
-
-/** \file
- * \ingroup gpu
- */
-
-#pragma once
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct ListBase;
-
-typedef struct GPUUniformBuffer GPUUniformBuffer;
-
-GPUUniformBuffer *GPU_uniformbuffer_create(int size, const void *data, char err_out[256]);
-GPUUniformBuffer *GPU_uniformbuffer_dynamic_create(struct ListBase *inputs, char err_out[256]);
-
-void GPU_uniformbuffer_free(GPUUniformBuffer *ubo);
-
-void GPU_uniformbuffer_update(GPUUniformBuffer *ubo, const void *data);
-void GPU_uniformbuffer_dynamic_update(GPUUniformBuffer *ubo_);
-
-void GPU_uniformbuffer_bind(GPUUniformBuffer *ubo, int number);
-void GPU_uniformbuffer_unbind(GPUUniformBuffer *ubo);
-void GPU_uniformbuffer_unbind_all(void);
-
-bool GPU_uniformbuffer_is_empty(GPUUniformBuffer *ubo);
-bool GPU_uniformbuffer_is_dirty(GPUUniformBuffer *ubo);
-
-#define GPU_UBO_BLOCK_NAME "nodeTree"
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/source/blender/gpu/GPU_vertex_buffer.h b/source/blender/gpu/GPU_vertex_buffer.h
index 757255496e0..bd1019bb1f5 100644
--- a/source/blender/gpu/GPU_vertex_buffer.h
+++ b/source/blender/gpu/GPU_vertex_buffer.h
@@ -59,6 +59,8 @@ typedef struct GPUVertBuf {
   uint32_t vbo_id;
   /** Usage hint for GL optimisation. */
   GPUUsageType usage;
+  /** This counter will only avoid freeing the GPUVertBuf, not the data. */
+  char handle_refcount;
   /** Data has been touched and need to be reuploaded to GPU. */
   bool dirty;
   uchar *data; /* NULL indicates data in VRAM (unmapped) */
@@ -73,6 +75,10 @@ GPUVertBuf *GPU_vertbuf_create_with_format_ex(const GPUVertFormat *, GPUUsageTyp
 void GPU_vertbuf_clear(GPUVertBuf *verts);
 void GPU_vertbuf_discard(GPUVertBuf *);
 
+/* Avoid GPUVertBuf datablock being free but not its data. */
+void GPU_vertbuf_handle_ref_add(GPUVertBuf *verts);
+void GPU_vertbuf_handle_ref_remove(GPUVertBuf *verts);
+
 void GPU_vertbuf_init(GPUVertBuf *, GPUUsageType);
 void GPU_vertbuf_init_with_format_ex(GPUVertBuf *, const GPUVertFormat *, GPUUsageType);
 
diff --git a/source/blender/gpu/GPU_viewport.h b/source/blender/gpu/GPU_viewport.h
index 60b78ecd59b..c3e2f1788b4 100644
--- a/source/blender/gpu/GPU_viewport.h
+++ b/source/blender/gpu/GPU_viewport.h
@@ -55,8 +55,8 @@ typedef struct ViewportMemoryPool {
   struct BLI_memblock *views;
   struct BLI_memblock *passes;
   struct BLI_memblock *images;
-  struct GPUUniformBuffer **matrices_ubo;
-  struct GPUUniformBuffer **obinfos_ubo;
+  struct GPUUniformBuf **matrices_ubo;
+  struct GPUUniformBuf **obinfos_ubo;
   uint ubo_len;
 } ViewportMemoryPool;
 
diff --git a/source/blender/gpu/intern/gpu_attr_binding.cc b/source/blender/gpu/intern/gpu_attr_binding.cc
deleted file mode 100644
index 6cb60884620..00000000000
--- a/source/blender/gpu/intern/gpu_attr_binding.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * The Original Code is Copyright (C) 2016 by Mike Erwin.
- * All rights reserved.
- */
-
-/** \file
- * \ingroup gpu
- *
- * GPU vertex attribute binding
- */
-
-#include "GPU_attr_binding.h"
-#include "gpu_attr_binding_private.h"
-#include <stddef.h>
-#include <stdlib.h>
-
-#if GPU_VERT_ATTR_MAX_LEN != 16
-#  error "attribute binding code assumes GPU_VERT_ATTR_MAX_LEN = 16"
-#endif
-
-void AttrBinding_clear(GPUAttrBinding *binding)
-{
-  binding->loc_bits = 0;
-  binding->enabled_bits = 0;
-}
-
-uint read_attr_location(const GPUAttrBinding *binding, uint a_idx)
-{
-#if TRUST_NO_ONE
-  assert(a_idx < GPU_VERT_ATTR_MAX_LEN);
-  assert(binding->enabled_bits & (1 << a_idx));
-#endif
-  return (binding->loc_bits >> (4 * a_idx)) & 0xF;
-}
-
-static void write_attr_location(GPUAttrBinding *binding, uint a_idx, uint location)
-{
-#if TRUST_NO_ONE
-  assert(a_idx < GPU_VERT_ATTR_MAX_LEN);
-  assert(location < GPU_VERT_ATTR_MAX_LEN);
-#endif
-  const uint shift = 4 * a_idx;
-  const uint64_t mask = ((uint64_t)0xF) << shift;
-  /* overwrite this attr's previous location */
-  binding->loc_bits = (binding->loc_bits & ~mask) | (location << shift);
-  /* mark this attr as enabled */
-  binding->enabled_bits |= 1 << a_idx;
-}
-
-void get_attr_locations(const GPUVertFormat *format,
-                        GPUAttrBinding *binding,
-                        const GPUShaderInterface *shaderface)
-{
-  AttrBinding_clear(binding);
-
-  for (uint a_idx = 0; a_idx < format->attr_len; a_idx++) {
-    const GPUVertAttr *a = &format->attrs[a_idx];
-    for (uint n_idx = 0; n_idx < a->name_len; n_idx++) {
-      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
-      const GPUShaderInput *input = GPU_shaderinterface_attr(shaderface, name);
-#if TRUST_NO_ONE
-      assert(input != NULL);
-      /* TODO: make this a recoverable runtime error?
-       * indicates mismatch between vertex format and program. */
-#endif
-      write_attr_location(binding, a_idx, input->location);
-    }
-  }
-}
diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh
index 24f592f214f..ec60e6b5704 100644
--- a/source/blender/gpu/intern/gpu_backend.hh
+++ b/source/blender/gpu/intern/gpu_backend.hh
@@ -27,11 +27,30 @@
 
 struct GPUContext;
 
+namespace blender {
+namespace gpu {
+
+class Batch;
+class DrawList;
+class FrameBuffer;
+class Shader;
+class UniformBuf;
+
 class GPUBackend {
  public:
   virtual ~GPUBackend(){};
 
+  static GPUBackend *get(void);
+
   virtual GPUContext *context_alloc(void *ghost_window) = 0;
+
+  virtual Batch *batch_alloc(void) = 0;
+  virtual DrawList *drawlist_alloc(int list_length) = 0;
+  virtual FrameBuffer *framebuffer_alloc(const char *name) = 0;
+  virtual Shader *shader_alloc(const char *name) = 0;
+  // virtual Texture *texture_alloc(void) = 0;
+  virtual UniformBuf *uniformbuf_alloc(int size, const char *name) = 0;
 };
 
-GPUBackend *gpu_backend_get(void);
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_batch.cc b/source/blender/gpu/intern/gpu_batch.cc
index a6ba4d3d89a..0b0c88a42e2 100644
--- a/source/blender/gpu/intern/gpu_batch.cc
+++ b/source/blender/gpu/intern/gpu_batch.cc
@@ -26,6 +26,8 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "BLI_math_base.h"
+
 #include "GPU_batch.h"
 #include "GPU_batch_presets.h"
 #include "GPU_extensions.h"
@@ -33,79 +35,50 @@
 #include "GPU_platform.h"
 #include "GPU_shader.h"
 
-#include "gpu_batch_private.h"
+#include "gpu_backend.hh"
+#include "gpu_batch_private.hh"
 #include "gpu_context_private.hh"
-#include "gpu_primitive_private.h"
-#include "gpu_shader_private.h"
+#include "gpu_shader_private.hh"
 #include "gpu_vertex_format_private.h"
 
+#include "gl_primitive.hh" /* TODO remove */
+
 #include <limits.h>
 #include <stdlib.h>
 #include <string.h>
 
-static GLuint g_default_attr_vbo = 0;
-
-static void batch_update_program_bindings(GPUBatch *batch, uint i_first);
+using namespace blender::gpu;
 
-void GPU_batch_vao_cache_clear(GPUBatch *batch)
-{
-  if (batch->context == NULL) {
-    return;
-  }
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.vao_ids[i]) {
-        GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
-      }
-      if (batch->dynamic_vaos.interfaces[i]) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->dynamic_vaos.interfaces[i], batch);
-      }
-    }
-    MEM_freeN((void *)batch->dynamic_vaos.interfaces);
-    MEM_freeN(batch->dynamic_vaos.vao_ids);
-  }
-  else {
-    for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.vao_ids[i]) {
-        GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
-      }
-      if (batch->static_vaos.interfaces[i]) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->static_vaos.interfaces[i], batch);
-      }
-    }
-  }
-  batch->is_dynamic_vao_count = false;
-  for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-    batch->static_vaos.vao_ids[i] = 0;
-    batch->static_vaos.interfaces[i] = NULL;
-  }
-  gpu_context_remove_batch(batch->context, batch);
-  batch->context = NULL;
-}
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
 
-GPUBatch *GPU_batch_calloc(uint count)
+GPUBatch *GPU_batch_calloc(void)
 {
-  return (GPUBatch *)MEM_callocN(sizeof(GPUBatch) * count, "GPUBatch");
+  GPUBatch *batch = GPUBackend::get()->batch_alloc();
+  memset(batch, 0, sizeof(*batch));
+  return batch;
 }
 
 GPUBatch *GPU_batch_create_ex(GPUPrimType prim_type,
                               GPUVertBuf *verts,
                               GPUIndexBuf *elem,
-                              uint owns_flag)
+                              eGPUBatchFlag owns_flag)
 {
-  GPUBatch *batch = GPU_batch_calloc(1);
+  GPUBatch *batch = GPU_batch_calloc();
   GPU_batch_init_ex(batch, prim_type, verts, elem, owns_flag);
   return batch;
 }
 
-void GPU_batch_init_ex(
-    GPUBatch *batch, GPUPrimType prim_type, GPUVertBuf *verts, GPUIndexBuf *elem, uint owns_flag)
+void GPU_batch_init_ex(GPUBatch *batch,
+                       GPUPrimType prim_type,
+                       GPUVertBuf *verts,
+                       GPUIndexBuf *elem,
+                       eGPUBatchFlag owns_flag)
 {
-#if TRUST_NO_ONE
-  assert(verts != NULL);
-#endif
+  BLI_assert(verts != NULL);
+  /* Do not pass any other flag */
+  BLI_assert((owns_flag & ~(GPU_BATCH_OWNS_VBO | GPU_BATCH_OWNS_INDEX)) == 0);
 
   batch->verts[0] = verts;
   for (int v = 1; v < GPU_BATCH_VBO_MAX_LEN; v++) {
@@ -115,19 +88,18 @@ void GPU_batch_init_ex(
     batch->inst[v] = NULL;
   }
   batch->elem = elem;
-  batch->gl_prim_type = convert_prim_type_to_gl(prim_type);
-  batch->phase = GPU_BATCH_READY_TO_DRAW;
-  batch->is_dynamic_vao_count = false;
-  batch->owns_flag = owns_flag;
-  batch->free_callback = NULL;
+  batch->prim_type = prim_type;
+  batch->flag = owns_flag | GPU_BATCH_INIT | GPU_BATCH_DIRTY;
+  batch->shader = NULL;
 }
 
 /* This will share the VBOs with the new batch. */
 void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src)
 {
-  GPU_batch_init_ex(batch_dst, GPU_PRIM_POINTS, batch_src->verts[0], batch_src->elem, 0);
+  GPU_batch_init_ex(
+      batch_dst, GPU_PRIM_POINTS, batch_src->verts[0], batch_src->elem, GPU_BATCH_INVALID);
 
-  batch_dst->gl_prim_type = batch_src->gl_prim_type;
+  batch_dst->prim_type = batch_src->prim_type;
   for (int v = 1; v < GPU_BATCH_VBO_MAX_LEN; v++) {
     batch_dst->verts[v] = batch_src->verts[v];
   }
@@ -135,563 +107,159 @@ void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src)
 
 void GPU_batch_clear(GPUBatch *batch)
 {
-  if (batch->owns_flag & GPU_BATCH_OWNS_INDEX) {
+  if (batch->flag & GPU_BATCH_OWNS_INDEX) {
     GPU_indexbuf_discard(batch->elem);
   }
-  if (batch->owns_flag & GPU_BATCH_OWNS_INSTANCES) {
-    GPU_vertbuf_discard(batch->inst[0]);
-    GPU_VERTBUF_DISCARD_SAFE(batch->inst[1]);
-  }
-  if ((batch->owns_flag & ~GPU_BATCH_OWNS_INDEX) != 0) {
-    for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
-      if (batch->verts[v] == NULL) {
-        break;
+  if (batch->flag & GPU_BATCH_OWNS_VBO_ANY) {
+    for (int v = 0; (v < GPU_BATCH_VBO_MAX_LEN) && batch->verts[v]; v++) {
+      if (batch->flag & (GPU_BATCH_OWNS_VBO << v)) {
+        GPU_VERTBUF_DISCARD_SAFE(batch->verts[v]);
       }
-      if (batch->owns_flag & (1 << v)) {
-        GPU_vertbuf_discard(batch->verts[v]);
+    }
+  }
+  if (batch->flag & GPU_BATCH_OWNS_INST_VBO_ANY) {
+    for (int v = 0; (v < GPU_BATCH_INST_VBO_MAX_LEN) && batch->inst[v]; v++) {
+      if (batch->flag & (GPU_BATCH_OWNS_INST_VBO << v)) {
+        GPU_VERTBUF_DISCARD_SAFE(batch->inst[v]);
       }
     }
   }
-  GPU_batch_vao_cache_clear(batch);
-  batch->phase = GPU_BATCH_UNUSED;
+  batch->flag = GPU_BATCH_INVALID;
 }
 
 void GPU_batch_discard(GPUBatch *batch)
 {
-  if (batch->free_callback) {
-    batch->free_callback(batch, batch->callback_data);
-  }
-
   GPU_batch_clear(batch);
-  MEM_freeN(batch);
-}
 
-void GPU_batch_callback_free_set(GPUBatch *batch,
-                                 void (*callback)(GPUBatch *, void *),
-                                 void *user_data)
-{
-  batch->free_callback = callback;
-  batch->callback_data = user_data;
+  delete static_cast<Batch *>(batch);
 }
 
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Buffers Management
+ * \{ */
+
+/* NOTE: Override ONLY the first instance vbo (and free them if owned). */
 void GPU_batch_instbuf_set(GPUBatch *batch, GPUVertBuf *inst, bool own_vbo)
 {
-#if TRUST_NO_ONE
-  assert(inst != NULL);
-#endif
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(inst);
+  batch->flag |= GPU_BATCH_DIRTY;
 
-  if (batch->inst[0] != NULL && (batch->owns_flag & GPU_BATCH_OWNS_INSTANCES)) {
+  if (batch->inst[0] && (batch->flag & GPU_BATCH_OWNS_INST_VBO)) {
     GPU_vertbuf_discard(batch->inst[0]);
-    GPU_VERTBUF_DISCARD_SAFE(batch->inst[1]);
   }
   batch->inst[0] = inst;
 
-  if (own_vbo) {
-    batch->owns_flag |= GPU_BATCH_OWNS_INSTANCES;
-  }
-  else {
-    batch->owns_flag &= ~GPU_BATCH_OWNS_INSTANCES;
-  }
+  SET_FLAG_FROM_TEST(batch->flag, own_vbo, GPU_BATCH_OWNS_INST_VBO);
 }
 
+/* NOTE: Override any previously assigned elem (and free it if owned). */
 void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo)
 {
-  BLI_assert(elem != NULL);
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(elem);
+  batch->flag |= GPU_BATCH_DIRTY;
 
-  if (batch->elem != NULL && (batch->owns_flag & GPU_BATCH_OWNS_INDEX)) {
+  if (batch->elem && (batch->flag & GPU_BATCH_OWNS_INDEX)) {
     GPU_indexbuf_discard(batch->elem);
   }
   batch->elem = elem;
 
-  if (own_ibo) {
-    batch->owns_flag |= GPU_BATCH_OWNS_INDEX;
-  }
-  else {
-    batch->owns_flag &= ~GPU_BATCH_OWNS_INDEX;
-  }
+  SET_FLAG_FROM_TEST(batch->flag, own_ibo, GPU_BATCH_OWNS_INDEX);
 }
 
-/* A bit of a quick hack. Should be streamlined as the vbos handling */
 int GPU_batch_instbuf_add_ex(GPUBatch *batch, GPUVertBuf *insts, bool own_vbo)
 {
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(insts);
+  batch->flag |= GPU_BATCH_DIRTY;
 
   for (uint v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
     if (batch->inst[v] == NULL) {
-#if TRUST_NO_ONE
       /* for now all VertexBuffers must have same vertex_len */
-      if (batch->inst[0] != NULL) {
-        /* Allow for different size of vertex buf (will choose the smallest number of verts). */
-        // assert(insts->vertex_len == batch->inst[0]->vertex_len);
-        assert(own_vbo == ((batch->owns_flag & GPU_BATCH_OWNS_INSTANCES) != 0));
+      if (batch->inst[0]) {
+        /* Allow for different size of vertex buffer (will choose the smallest number of verts). */
+        // BLI_assert(insts->vertex_len == batch->inst[0]->vertex_len);
       }
-#endif
+
       batch->inst[v] = insts;
-      if (own_vbo) {
-        batch->owns_flag |= GPU_BATCH_OWNS_INSTANCES;
-      }
+      SET_FLAG_FROM_TEST(batch->flag, own_vbo, (eGPUBatchFlag)(GPU_BATCH_OWNS_INST_VBO << v));
       return v;
     }
   }
-
   /* we only make it this far if there is no room for another GPUVertBuf */
-#if TRUST_NO_ONE
-  assert(false);
-#endif
+  BLI_assert(0 && "Not enough Instance VBO slot in batch");
   return -1;
 }
 
 /* Returns the index of verts in the batch. */
 int GPU_batch_vertbuf_add_ex(GPUBatch *batch, GPUVertBuf *verts, bool own_vbo)
 {
-  /* redo the bindings */
-  GPU_batch_vao_cache_clear(batch);
+  BLI_assert(verts);
+  batch->flag |= GPU_BATCH_DIRTY;
 
   for (uint v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
     if (batch->verts[v] == NULL) {
-#if TRUST_NO_ONE
       /* for now all VertexBuffers must have same vertex_len */
       if (batch->verts[0] != NULL) {
-        assert(verts->vertex_len == batch->verts[0]->vertex_len);
+        BLI_assert(verts->vertex_len == batch->verts[0]->vertex_len);
       }
-#endif
       batch->verts[v] = verts;
-      /* TODO: mark dirty so we can keep attribute bindings up-to-date */
-      if (own_vbo) {
-        batch->owns_flag |= (1 << v);
-      }
+      SET_FLAG_FROM_TEST(batch->flag, own_vbo, (eGPUBatchFlag)(GPU_BATCH_OWNS_VBO << v));
       return v;
     }
   }
-
   /* we only make it this far if there is no room for another GPUVertBuf */
-#if TRUST_NO_ONE
-  assert(false);
-#endif
+  BLI_assert(0 && "Not enough VBO slot in batch");
   return -1;
 }
 
-static GLuint batch_vao_get(GPUBatch *batch)
-{
-  /* Search through cache */
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.interfaces[i] == batch->interface) {
-        return batch->dynamic_vaos.vao_ids[i];
-      }
-    }
-  }
-  else {
-    for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.interfaces[i] == batch->interface) {
-        return batch->static_vaos.vao_ids[i];
-      }
-    }
-  }
-
-  /* Set context of this batch.
-   * It will be bound to it until GPU_batch_vao_cache_clear is called.
-   * Until then it can only be drawn with this context. */
-  if (batch->context == NULL) {
-    batch->context = GPU_context_active_get();
-    gpu_context_add_batch(batch->context, batch);
-  }
-#if TRUST_NO_ONE
-  else {
-    /* Make sure you are not trying to draw this batch in another context. */
-    assert(batch->context == GPU_context_active_get());
-  }
-#endif
-
-  /* Cache miss, time to add a new entry! */
-  GLuint new_vao = 0;
-  if (!batch->is_dynamic_vao_count) {
-    int i; /* find first unused slot */
-    for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.vao_ids[i] == 0) {
-        break;
-      }
-    }
-
-    if (i < GPU_BATCH_VAO_STATIC_LEN) {
-      batch->static_vaos.interfaces[i] = batch->interface;
-      batch->static_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
-    }
-    else {
-      /* Not enough place switch to dynamic. */
-      batch->is_dynamic_vao_count = true;
-      /* Erase previous entries, they will be added back if drawn again. */
-      for (int j = 0; j < GPU_BATCH_VAO_STATIC_LEN; j++) {
-        GPU_shaderinterface_remove_batch_ref(
-            (GPUShaderInterface *)batch->static_vaos.interfaces[j], batch);
-        GPU_vao_free(batch->static_vaos.vao_ids[j], batch->context);
-      }
-      /* Init dynamic arrays and let the branch below set the values. */
-      batch->dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
-      batch->dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_callocN(
-          batch->dynamic_vaos.count * sizeof(GPUShaderInterface *), "dyn vaos interfaces");
-      batch->dynamic_vaos.vao_ids = (GLuint *)MEM_callocN(
-          batch->dynamic_vaos.count * sizeof(GLuint), "dyn vaos ids");
-    }
-  }
-
-  if (batch->is_dynamic_vao_count) {
-    int i; /* find first unused slot */
-    for (i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.vao_ids[i] == 0) {
-        break;
-      }
-    }
-
-    if (i == batch->dynamic_vaos.count) {
-      /* Not enough place, realloc the array. */
-      i = batch->dynamic_vaos.count;
-      batch->dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
-      batch->dynamic_vaos.interfaces = (const GPUShaderInterface **)MEM_recallocN(
-          (void *)batch->dynamic_vaos.interfaces,
-          sizeof(GPUShaderInterface *) * batch->dynamic_vaos.count);
-      batch->dynamic_vaos.vao_ids = (GLuint *)MEM_recallocN(
-          batch->dynamic_vaos.vao_ids, sizeof(GLuint) * batch->dynamic_vaos.count);
-    }
-    batch->dynamic_vaos.interfaces[i] = batch->interface;
-    batch->dynamic_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
-  }
-
-  GPU_shaderinterface_add_batch_ref((GPUShaderInterface *)batch->interface, batch);
-
-#if TRUST_NO_ONE
-  assert(new_vao != 0);
-#endif
-
-  /* We just got a fresh VAO we need to initialize it. */
-  glBindVertexArray(new_vao);
-  batch_update_program_bindings(batch, 0);
-  glBindVertexArray(0);
-
-  return new_vao;
-}
+/** \} */
 
-void GPU_batch_set_shader_no_bind(GPUBatch *batch, GPUShader *shader)
-{
-#if TRUST_NO_ONE
-  assert(glIsProgram(shader->program));
-  assert(batch->program_in_use == 0);
-#endif
-  batch->interface = shader->interface;
-  batch->program = shader->program;
-  batch->vao_id = batch_vao_get(batch);
-}
+/* -------------------------------------------------------------------- */
+/** \name Uniform setters
+ *
+ * TODO(fclem) port this to GPUShader.
+ * \{ */
 
 void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader)
 {
-  GPU_batch_set_shader_no_bind(batch, shader);
-  GPU_batch_program_use_begin(batch); /* hack! to make Batch_Uniform* simpler */
-}
-
-void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface)
-{
-  if (batch->is_dynamic_vao_count) {
-    for (int i = 0; i < batch->dynamic_vaos.count; i++) {
-      if (batch->dynamic_vaos.interfaces[i] == interface) {
-        GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
-        batch->dynamic_vaos.vao_ids[i] = 0;
-        batch->dynamic_vaos.interfaces[i] = NULL;
-        break; /* cannot have duplicates */
-      }
-    }
-  }
-  else {
-    int i;
-    for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; i++) {
-      if (batch->static_vaos.interfaces[i] == interface) {
-        GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
-        batch->static_vaos.vao_ids[i] = 0;
-        batch->static_vaos.interfaces[i] = NULL;
-        break; /* cannot have duplicates */
-      }
-    }
-  }
-}
-
-static void create_bindings(GPUVertBuf *verts,
-                            const GPUShaderInterface *interface,
-                            uint16_t *attr_mask,
-                            uint v_first,
-                            const bool use_instancing)
-{
-  const GPUVertFormat *format = &verts->format;
-
-  const uint attr_len = format->attr_len;
-  uint stride = format->stride;
-  uint offset = 0;
-
-  GPU_vertbuf_use(verts);
-
-  for (uint a_idx = 0; a_idx < attr_len; a_idx++) {
-    const GPUVertAttr *a = &format->attrs[a_idx];
-
-    if (format->deinterleaved) {
-      offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].sz) * verts->vertex_len;
-      stride = a->sz;
-    }
-    else {
-      offset = a->offset;
-    }
-
-    const GLvoid *pointer = (const GLubyte *)0 + offset + v_first * stride;
-    const GLenum type = convert_comp_type_to_gl(static_cast<GPUVertCompType>(a->comp_type));
-
-    for (uint n_idx = 0; n_idx < a->name_len; n_idx++) {
-      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
-      const GPUShaderInput *input = GPU_shaderinterface_attr(interface, name);
-
-      if (input == NULL) {
-        continue;
-      }
-
-      *attr_mask &= ~(1 << input->location);
-
-      if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
-        BLI_assert(a->fetch_mode == GPU_FETCH_FLOAT);
-        BLI_assert(a->comp_type == GPU_COMP_F32);
-        for (int i = 0; i < a->comp_len / 4; i++) {
-          glEnableVertexAttribArray(input->location + i);
-          glVertexAttribDivisor(input->location + i, (use_instancing) ? 1 : 0);
-          glVertexAttribPointer(
-              input->location + i, 4, type, GL_FALSE, stride, (const GLubyte *)pointer + i * 16);
-        }
-      }
-      else {
-        glEnableVertexAttribArray(input->location);
-        glVertexAttribDivisor(input->location, (use_instancing) ? 1 : 0);
-
-        switch (a->fetch_mode) {
-          case GPU_FETCH_FLOAT:
-          case GPU_FETCH_INT_TO_FLOAT:
-            glVertexAttribPointer(input->location, a->comp_len, type, GL_FALSE, stride, pointer);
-            break;
-          case GPU_FETCH_INT_TO_FLOAT_UNIT:
-            glVertexAttribPointer(input->location, a->comp_len, type, GL_TRUE, stride, pointer);
-            break;
-          case GPU_FETCH_INT:
-            glVertexAttribIPointer(input->location, a->comp_len, type, stride, pointer);
-            break;
-        }
-      }
-    }
-  }
-}
-
-static void batch_update_program_bindings(GPUBatch *batch, uint i_first)
-{
-  uint16_t attr_mask = batch->interface->enabled_attr_mask;
-
-  /* Reverse order so first VBO'S have more prevalence (in term of attribute override). */
-  for (int v = GPU_BATCH_VBO_MAX_LEN - 1; v > -1; v--) {
-    if (batch->verts[v] != NULL) {
-      create_bindings(batch->verts[v], batch->interface, &attr_mask, 0, false);
-    }
-  }
-
-  for (int v = GPU_BATCH_INST_VBO_MAX_LEN - 1; v > -1; v--) {
-    if (batch->inst[v]) {
-      create_bindings(batch->inst[v], batch->interface, &attr_mask, i_first, true);
-    }
-  }
-
-  if (attr_mask != 0 && GLEW_ARB_vertex_attrib_binding) {
-    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
-      if (attr_mask & mask) {
-        /* This replaces glVertexAttrib4f(a, 0.0f, 0.0f, 0.0f, 1.0f); with a more modern style.
-         * Fix issues for some drivers (see T75069). */
-        glBindVertexBuffer(a, g_default_attr_vbo, (intptr_t)0, (intptr_t)0);
-
-        glEnableVertexAttribArray(a);
-        glVertexAttribFormat(a, 4, GL_FLOAT, GL_FALSE, 0);
-        glVertexAttribBinding(a, a);
-      }
-    }
-  }
-
-  if (batch->elem) {
-    GPU_indexbuf_use(batch->elem);
-  }
-}
-
-void GPU_batch_program_use_begin(GPUBatch *batch)
-{
-  /* NOTE: use_program & done_using_program are fragile, depend on staying in sync with
-   *       the GL context's active program.
-   *       use_program doesn't mark other programs as "not used". */
-  /* TODO: make not fragile (somehow) */
-
-  if (!batch->program_in_use) {
-    glUseProgram(batch->program);
-    batch->program_in_use = true;
-  }
-}
-
-void GPU_batch_program_use_end(GPUBatch *batch)
-{
-  if (batch->program_in_use) {
-#if PROGRAM_NO_OPTI
-    glUseProgram(0);
-#endif
-    batch->program_in_use = false;
-  }
-}
-
-#if TRUST_NO_ONE
-#  define GET_UNIFORM \
-    const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->interface, name); \
-    assert(uniform);
-#else
-#  define GET_UNIFORM \
-    const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->interface, name);
-#endif
-
-void GPU_batch_uniform_1ui(GPUBatch *batch, const char *name, uint value)
-{
-  GET_UNIFORM
-  glUniform1ui(uniform->location, value);
-}
-
-void GPU_batch_uniform_1i(GPUBatch *batch, const char *name, int value)
-{
-  GET_UNIFORM
-  glUniform1i(uniform->location, value);
-}
-
-void GPU_batch_uniform_1b(GPUBatch *batch, const char *name, bool value)
-{
-  GET_UNIFORM
-  glUniform1i(uniform->location, value ? GL_TRUE : GL_FALSE);
-}
-
-void GPU_batch_uniform_2f(GPUBatch *batch, const char *name, float x, float y)
-{
-  GET_UNIFORM
-  glUniform2f(uniform->location, x, y);
-}
-
-void GPU_batch_uniform_3f(GPUBatch *batch, const char *name, float x, float y, float z)
-{
-  GET_UNIFORM
-  glUniform3f(uniform->location, x, y, z);
-}
-
-void GPU_batch_uniform_4f(GPUBatch *batch, const char *name, float x, float y, float z, float w)
-{
-  GET_UNIFORM
-  glUniform4f(uniform->location, x, y, z, w);
-}
-
-void GPU_batch_uniform_1f(GPUBatch *batch, const char *name, float x)
-{
-  GET_UNIFORM
-  glUniform1f(uniform->location, x);
-}
-
-void GPU_batch_uniform_2fv(GPUBatch *batch, const char *name, const float data[2])
-{
-  GET_UNIFORM
-  glUniform2fv(uniform->location, 1, data);
-}
-
-void GPU_batch_uniform_3fv(GPUBatch *batch, const char *name, const float data[3])
-{
-  GET_UNIFORM
-  glUniform3fv(uniform->location, 1, data);
-}
-
-void GPU_batch_uniform_4fv(GPUBatch *batch, const char *name, const float data[4])
-{
-  GET_UNIFORM
-  glUniform4fv(uniform->location, 1, data);
-}
-
-void GPU_batch_uniform_2fv_array(GPUBatch *batch,
-                                 const char *name,
-                                 const int len,
-                                 const float *data)
-{
-  GET_UNIFORM
-  glUniform2fv(uniform->location, len, data);
+  batch->shader = shader;
+  GPU_shader_bind(batch->shader);
 }
 
-void GPU_batch_uniform_4fv_array(GPUBatch *batch,
-                                 const char *name,
-                                 const int len,
-                                 const float *data)
-{
-  GET_UNIFORM
-  glUniform4fv(uniform->location, len, data);
-}
+/** \} */
 
-void GPU_batch_uniform_mat4(GPUBatch *batch, const char *name, const float data[4][4])
-{
-  GET_UNIFORM
-  glUniformMatrix4fv(uniform->location, 1, GL_FALSE, (const float *)data);
-}
+/* -------------------------------------------------------------------- */
+/** \name Drawing / Drawcall functions
+ * \{ */
 
-static void *elem_offset(const GPUIndexBuf *el, int v_first)
+void GPU_batch_draw(GPUBatch *batch)
 {
-#if GPU_TRACK_INDEX_RANGE
-  if (el->index_type == GPU_INDEX_U16) {
-    return (GLushort *)0 + v_first + el->index_start;
-  }
-#endif
-  return (GLuint *)0 + v_first + el->index_start;
+  GPU_shader_bind(batch->shader);
+  GPU_batch_draw_advanced(batch, 0, 0, 0, 0);
+  GPU_shader_unbind();
 }
 
-/* Use when drawing with GPU_batch_draw_advanced */
-void GPU_batch_bind(GPUBatch *batch)
+void GPU_batch_draw_range(GPUBatch *batch, int v_first, int v_count)
 {
-  glBindVertexArray(batch->vao_id);
-
-#if GPU_TRACK_INDEX_RANGE
-  /* Can be removed if GL 4.3 is required. */
-  if (!GLEW_ARB_ES3_compatibility && batch->elem != NULL) {
-    GLuint restart_index = (batch->elem->index_type == GPU_INDEX_U16) ? (GLuint)0xFFFF :
-                                                                        (GLuint)0xFFFFFFFF;
-    glPrimitiveRestartIndex(restart_index);
-  }
-#endif
+  GPU_shader_bind(batch->shader);
+  GPU_batch_draw_advanced(batch, v_first, v_count, 0, 0);
+  GPU_shader_unbind();
 }
 
-void GPU_batch_draw(GPUBatch *batch)
+/* Draw multiple instance of a batch without having any instance attributes. */
+void GPU_batch_draw_instanced(GPUBatch *batch, int i_count)
 {
-#if TRUST_NO_ONE
-  assert(batch->phase == GPU_BATCH_READY_TO_DRAW);
-  assert(batch->verts[0]->vbo_id != 0);
-#endif
-  GPU_batch_program_use_begin(batch);
-  GPU_matrix_bind(batch->interface);  // external call.
-  GPU_shader_set_srgb_uniform(batch->interface);
+  BLI_assert(batch->inst[0] == NULL);
 
-  GPU_batch_bind(batch);
-  GPU_batch_draw_advanced(batch, 0, 0, 0, 0);
-
-  GPU_batch_program_use_end(batch);
+  GPU_shader_bind(batch->shader);
+  GPU_batch_draw_advanced(batch, 0, 0, 0, i_count);
+  GPU_shader_unbind();
 }
 
-#if GPU_TRACK_INDEX_RANGE
-#  define BASE_INDEX(el) ((el)->base_index)
-#  define INDEX_TYPE(el) ((el)->gl_index_type)
-#else
-#  define BASE_INDEX(el) 0
-#  define INDEX_TYPE(el) GL_UNSIGNED_INT
-#endif
-
 void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_first, int i_count)
 {
-  BLI_assert(batch->program_in_use);
-  /* TODO could assert that VAO is bound. */
+  BLI_assert(GPU_context_active_get()->shader != NULL);
 
   if (v_count == 0) {
     v_count = (batch->elem) ? batch->elem->index_len : batch->verts[0]->vertex_len;
@@ -699,8 +267,8 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
   if (i_count == 0) {
     i_count = (batch->inst[0]) ? batch->inst[0]->vertex_len : 1;
     /* Meh. This is to be able to use different numbers of verts in instance vbos. */
-    if (batch->inst[1] && i_count > batch->inst[1]->vertex_len) {
-      i_count = batch->inst[1]->vertex_len;
+    if (batch->inst[1] != NULL) {
+      i_count = min_ii(i_count, batch->inst[1]->vertex_len);
     }
   }
 
@@ -709,275 +277,7 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
     return;
   }
 
-  /* Verify there is enough data do draw. */
-  /* TODO(fclem) Nice to have but this is invalid when using procedural draw-calls.
-   * The right assert would be to check if there is an enabled attribute from each VBO
-   * and check their length. */
-  // BLI_assert(i_first + i_count <= (batch->inst ? batch->inst->vertex_len : INT_MAX));
-  // BLI_assert(v_first + v_count <=
-  //            (batch->elem ? batch->elem->index_len : batch->verts[0]->vertex_len));
-
-#ifdef __APPLE__
-  GLuint vao = 0;
-#endif
-
-  if (!GPU_arb_base_instance_is_supported()) {
-    if (i_first > 0) {
-#ifdef __APPLE__
-      /**
-       * There seems to be a nasty bug when drawing using the same VAO reconfiguring. (see T71147)
-       * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
-       **/
-      glGenVertexArrays(1, &vao);
-      glBindVertexArray(vao);
-#else
-      /* If using offset drawing with instancing, we must
-       * use the default VAO and redo bindings. */
-      glBindVertexArray(GPU_vao_default());
-#endif
-      batch_update_program_bindings(batch, i_first);
-    }
-    else {
-      /* Previous call could have bind the default vao
-       * see above. */
-      glBindVertexArray(batch->vao_id);
-    }
-  }
-
-  if (batch->elem) {
-    const GPUIndexBuf *el = batch->elem;
-    GLenum index_type = INDEX_TYPE(el);
-    GLint base_index = BASE_INDEX(el);
-    void *v_first_ofs = elem_offset(el, v_first);
-
-    if (GPU_arb_base_instance_is_supported()) {
-      glDrawElementsInstancedBaseVertexBaseInstance(
-          batch->gl_prim_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
-    }
-    else {
-      glDrawElementsInstancedBaseVertex(
-          batch->gl_prim_type, v_count, index_type, v_first_ofs, i_count, base_index);
-    }
-  }
-  else {
-#ifdef __APPLE__
-    glDisable(GL_PRIMITIVE_RESTART);
-#endif
-    if (GPU_arb_base_instance_is_supported()) {
-      glDrawArraysInstancedBaseInstance(batch->gl_prim_type, v_first, v_count, i_count, i_first);
-    }
-    else {
-      glDrawArraysInstanced(batch->gl_prim_type, v_first, v_count, i_count);
-    }
-#ifdef __APPLE__
-    glEnable(GL_PRIMITIVE_RESTART);
-#endif
-  }
-
-#ifdef __APPLE__
-  if (vao != 0) {
-    glDeleteVertexArrays(1, &vao);
-  }
-#endif
-}
-
-/* just draw some vertices and let shader place them where we want. */
-void GPU_draw_primitive(GPUPrimType prim_type, int v_count)
-{
-  /* we cannot draw without vao ... annoying ... */
-  glBindVertexArray(GPU_vao_default());
-
-  GLenum type = convert_prim_type_to_gl(prim_type);
-  glDrawArrays(type, 0, v_count);
-
-  /* Performance hog if you are drawing with the same vao multiple time.
-   * Only activate for debugging.*/
-  // glBindVertexArray(0);
-}
-
-/* -------------------------------------------------------------------- */
-/** \name Indirect Draw Calls
- * \{ */
-
-#if 0
-#  define USE_MULTI_DRAW_INDIRECT 0
-#else
-#  define USE_MULTI_DRAW_INDIRECT \
-    (GL_ARB_multi_draw_indirect && GPU_arb_base_instance_is_supported())
-#endif
-
-typedef struct GPUDrawCommand {
-  uint v_count;
-  uint i_count;
-  uint v_first;
-  uint i_first;
-} GPUDrawCommand;
-
-typedef struct GPUDrawCommandIndexed {
-  uint v_count;
-  uint i_count;
-  uint v_first;
-  uint base_index;
-  uint i_first;
-} GPUDrawCommandIndexed;
-
-struct GPUDrawList {
-  GPUBatch *batch;
-  uint base_index;  /* Avoid dereferencing batch. */
-  uint cmd_offset;  /* in bytes, offset  inside indirect command buffer. */
-  uint cmd_len;     /* Number of used command for the next call. */
-  uint buffer_size; /* in bytes, size of indirect command buffer. */
-  GLuint buffer_id; /* Draw Indirect Buffer id */
-  union {
-    GPUDrawCommand *commands;
-    GPUDrawCommandIndexed *commands_indexed;
-  };
-};
-
-GPUDrawList *GPU_draw_list_create(int length)
-{
-  GPUDrawList *list = (GPUDrawList *)MEM_callocN(sizeof(GPUDrawList), "GPUDrawList");
-  /* Alloc the biggest possible command list which is indexed. */
-  list->buffer_size = sizeof(GPUDrawCommandIndexed) * length;
-  if (USE_MULTI_DRAW_INDIRECT) {
-    list->buffer_id = GPU_buf_alloc();
-    glBindBuffer(GL_DRAW_INDIRECT_BUFFER, list->buffer_id);
-    glBufferData(GL_DRAW_INDIRECT_BUFFER, list->buffer_size, NULL, GL_DYNAMIC_DRAW);
-  }
-  else {
-    list->commands = (GPUDrawCommand *)MEM_mallocN(list->buffer_size, "GPUDrawList data");
-  }
-  return list;
-}
-
-void GPU_draw_list_discard(GPUDrawList *list)
-{
-  if (list->buffer_id) {
-    GPU_buf_free(list->buffer_id);
-  }
-  else {
-    MEM_SAFE_FREE(list->commands);
-  }
-  MEM_freeN(list);
-}
-
-void GPU_draw_list_init(GPUDrawList *list, GPUBatch *batch)
-{
-  BLI_assert(batch->phase == GPU_BATCH_READY_TO_DRAW);
-  list->batch = batch;
-  list->base_index = batch->elem ? BASE_INDEX(batch->elem) : UINT_MAX;
-  list->cmd_len = 0;
-
-  if (USE_MULTI_DRAW_INDIRECT) {
-    if (list->commands == NULL) {
-      glBindBuffer(GL_DRAW_INDIRECT_BUFFER, list->buffer_id);
-      if (list->cmd_offset >= list->buffer_size) {
-        /* Orphan buffer data and start fresh. */
-        glBufferData(GL_DRAW_INDIRECT_BUFFER, list->buffer_size, NULL, GL_DYNAMIC_DRAW);
-        list->cmd_offset = 0;
-      }
-      GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
-      list->commands = (GPUDrawCommand *)glMapBufferRange(
-          GL_DRAW_INDIRECT_BUFFER, list->cmd_offset, list->buffer_size - list->cmd_offset, flags);
-    }
-  }
-  else {
-    list->cmd_offset = 0;
-  }
-}
-
-void GPU_draw_list_command_add(
-    GPUDrawList *list, int v_first, int v_count, int i_first, int i_count)
-{
-  BLI_assert(list->commands);
-
-  if (v_count == 0 || i_count == 0) {
-    return;
-  }
-
-  if (list->base_index != UINT_MAX) {
-    GPUDrawCommandIndexed *cmd = list->commands_indexed + list->cmd_len;
-    cmd->v_first = v_first;
-    cmd->v_count = v_count;
-    cmd->i_count = i_count;
-    cmd->base_index = list->base_index;
-    cmd->i_first = i_first;
-  }
-  else {
-    GPUDrawCommand *cmd = list->commands + list->cmd_len;
-    cmd->v_first = v_first;
-    cmd->v_count = v_count;
-    cmd->i_count = i_count;
-    cmd->i_first = i_first;
-  }
-
-  list->cmd_len++;
-  uint offset = list->cmd_offset + list->cmd_len * sizeof(GPUDrawCommandIndexed);
-
-  if (offset == list->buffer_size) {
-    GPU_draw_list_submit(list);
-    GPU_draw_list_init(list, list->batch);
-  }
-}
-
-void GPU_draw_list_submit(GPUDrawList *list)
-{
-  GPUBatch *batch = list->batch;
-
-  if (list->cmd_len == 0) {
-    return;
-  }
-
-  BLI_assert(list->commands);
-  BLI_assert(batch->program_in_use);
-  /* TODO could assert that VAO is bound. */
-
-  /* TODO We loose a bit of memory here if we only draw arrays. Fix that. */
-  uintptr_t offset = list->cmd_offset;
-  uint cmd_len = list->cmd_len;
-  size_t bytes_used = cmd_len * sizeof(GPUDrawCommandIndexed);
-  list->cmd_len = 0; /* Avoid reuse. */
-
-  /* Only do multi-draw indirect if doing more than 2 drawcall.
-   * This avoids the overhead of buffer mapping if scene is
-   * not very instance friendly.
-   * BUT we also need to take into account the case where only
-   * a few instances are needed to finish filling a call buffer. */
-  const bool do_mdi = (cmd_len > 2) || (list->cmd_offset + bytes_used == list->buffer_size);
-
-  if (USE_MULTI_DRAW_INDIRECT && do_mdi) {
-    GLenum prim = batch->gl_prim_type;
-
-    glBindBuffer(GL_DRAW_INDIRECT_BUFFER, list->buffer_id);
-    glFlushMappedBufferRange(GL_DRAW_INDIRECT_BUFFER, 0, bytes_used);
-    glUnmapBuffer(GL_DRAW_INDIRECT_BUFFER);
-    list->commands = NULL; /* Unmapped */
-    list->cmd_offset += bytes_used;
-
-    if (batch->elem) {
-      glMultiDrawElementsIndirect(prim, INDEX_TYPE(batch->elem), (void *)offset, cmd_len, 0);
-    }
-    else {
-      glMultiDrawArraysIndirect(prim, (void *)offset, cmd_len, 0);
-    }
-  }
-  else {
-    /* Fallback */
-    if (batch->elem) {
-      GPUDrawCommandIndexed *cmd = list->commands_indexed;
-      for (int i = 0; i < cmd_len; i++, cmd++) {
-        /* Index start was added by Draw manager. Avoid counting it twice. */
-        cmd->v_first -= batch->elem->index_start;
-        GPU_batch_draw_advanced(batch, cmd->v_first, cmd->v_count, cmd->i_first, cmd->i_count);
-      }
-    }
-    else {
-      GPUDrawCommand *cmd = list->commands;
-      for (int i = 0; i < cmd_len; i++, cmd++) {
-        GPU_batch_draw_advanced(batch, cmd->v_first, cmd->v_count, cmd->i_first, cmd->i_count);
-      }
-    }
-  }
+  static_cast<Batch *>(batch)->draw(v_first, v_count, i_first, i_count);
 }
 
 /** \} */
@@ -1015,23 +315,11 @@ void GPU_batch_program_set_imm_shader(GPUBatch *batch)
 
 void gpu_batch_init(void)
 {
-  if (g_default_attr_vbo == 0) {
-    g_default_attr_vbo = GPU_buf_alloc();
-
-    float default_attrib_data[4] = {0.0f, 0.0f, 0.0f, 1.0f};
-    glBindBuffer(GL_ARRAY_BUFFER, g_default_attr_vbo);
-    glBufferData(GL_ARRAY_BUFFER, sizeof(float[4]), default_attrib_data, GL_STATIC_DRAW);
-    glBindBuffer(GL_ARRAY_BUFFER, 0);
-  }
-
   gpu_batch_presets_init();
 }
 
 void gpu_batch_exit(void)
 {
-  GPU_buf_free(g_default_attr_vbo);
-  g_default_attr_vbo = 0;
-
   gpu_batch_presets_exit();
 }
 
diff --git a/source/blender/gpu/intern/gpu_batch_presets.c b/source/blender/gpu/intern/gpu_batch_presets.c
index 3d9b4326c7e..6a1645a71d8 100644
--- a/source/blender/gpu/intern/gpu_batch_presets.c
+++ b/source/blender/gpu/intern/gpu_batch_presets.c
@@ -35,7 +35,6 @@
 #include "GPU_batch.h"
 #include "GPU_batch_presets.h" /* own include */
 #include "GPU_batch_utils.h"
-#include "gpu_shader_private.h"
 
 /* -------------------------------------------------------------------- */
 /** \name Local Structures
@@ -63,6 +62,7 @@ static struct {
 static struct {
   struct {
     GPUBatch *panel_drag_widget;
+    GPUBatch *quad;
   } batch;
 
   float panel_drag_widget_pixelsize;
@@ -331,6 +331,24 @@ GPUBatch *GPU_batch_preset_panel_drag_widget(const float pixelsize,
   return g_presets_2d.batch.panel_drag_widget;
 }
 
+/* To be used with procedural placement inside shader. */
+GPUBatch *GPU_batch_preset_quad(void)
+{
+  if (!g_presets_2d.batch.quad) {
+    GPUVertBuf *vbo = GPU_vertbuf_create_with_format(preset_2d_format());
+    GPU_vertbuf_data_alloc(vbo, 4);
+
+    float pos_data[4][2] = {{0.0f, 0.0f}, {0.0f, 1.0f}, {1.0f, 1.0f}, {1.0f, 0.0f}};
+    GPU_vertbuf_attr_fill(vbo, g_presets_2d.attr_id.pos, pos_data);
+    /* Don't fill the color. */
+
+    g_presets_2d.batch.quad = GPU_batch_create_ex(GPU_PRIM_TRI_FAN, vbo, NULL, GPU_BATCH_OWNS_VBO);
+
+    gpu_batch_presets_register(g_presets_2d.batch.quad);
+  }
+  return g_presets_2d.batch.quad;
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -380,18 +398,6 @@ bool gpu_batch_presets_unregister(GPUBatch *preset_batch)
   return false;
 }
 
-void gpu_batch_presets_reset(void)
-{
-  BLI_mutex_lock(&g_presets_3d.mutex);
-  /* Reset vao caches for these every time we switch opengl context.
-   * This way they will draw correctly for each window. */
-  LISTBASE_FOREACH (LinkData *, link, &presets_list) {
-    GPUBatch *preset = link->data;
-    GPU_batch_vao_cache_clear(preset);
-  }
-  BLI_mutex_unlock(&g_presets_3d.mutex);
-}
-
 void gpu_batch_presets_exit(void)
 {
   LinkData *link;
@@ -404,17 +410,4 @@ void gpu_batch_presets_exit(void)
   BLI_mutex_end(&g_presets_3d.mutex);
 }
 
-/**
- * This function only needs to be accessed externally because
- * we are drawing UI batches with the DRW old context.
- *
- * And now we use it for drawing the entire area.
- *
- * XXX (Clément) - to cleanup in the upcoming 2.91 refactor.
- **/
-void GPU_batch_presets_reset()
-{
-  gpu_batch_presets_reset();
-}
-
 /** \} */
diff --git a/source/blender/gpu/intern/gpu_batch_private.h b/source/blender/gpu/intern/gpu_batch_private.hh
index 93745b9ca9b..c0444647fe1 100644
--- a/source/blender/gpu/intern/gpu_batch_private.h
+++ b/source/blender/gpu/intern/gpu_batch_private.hh
@@ -28,14 +28,21 @@
 
 #include "GPU_batch.h"
 #include "GPU_context.h"
-#include "GPU_shader_interface.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+namespace blender {
+namespace gpu {
 
-void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface);
+/**
+ * Base class which is then specialized for each implementation (GL, VK, ...).
+ * NOTE: Extends GPUBatch as we still needs to expose some of the internals to the outside C code.
+ **/
+class Batch : public GPUBatch {
+ public:
+  Batch(){};
+  virtual ~Batch(){};
 
-#ifdef __cplusplus
-}
-#endif
+  virtual void draw(int v_first, int v_count, int i_first, int i_count) = 0;
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_batch_utils.c b/source/blender/gpu/intern/gpu_batch_utils.c
index 0660d4a1724..e2d03d27035 100644
--- a/source/blender/gpu/intern/gpu_batch_utils.c
+++ b/source/blender/gpu/intern/gpu_batch_utils.c
@@ -28,7 +28,6 @@
 
 #include "GPU_batch.h"
 #include "GPU_batch_utils.h" /* own include */
-#include "gpu_shader_private.h"
 
 /* -------------------------------------------------------------------- */
 /** \name Polygon Creation (2D)
diff --git a/source/blender/gpu/intern/gpu_codegen.c b/source/blender/gpu/intern/gpu_codegen.c
index b051d4fe59a..d67ce0be310 100644
--- a/source/blender/gpu/intern/gpu_codegen.c
+++ b/source/blender/gpu/intern/gpu_codegen.c
@@ -43,7 +43,7 @@
 #include "GPU_extensions.h"
 #include "GPU_material.h"
 #include "GPU_shader.h"
-#include "GPU_uniformbuffer.h"
+#include "GPU_uniform_buffer.h"
 #include "GPU_vertex_format.h"
 
 #include "BLI_sys_types.h" /* for intptr_t support */
@@ -686,7 +686,7 @@ static char *code_generate_vertex(GPUNodeGraph *graph,
   BLI_dynstr_append(ds, "#define USE_ATTR\n\n");
 
   BLI_dynstr_append(ds, vert_code);
-  BLI_dynstr_append(ds, "\n");
+  BLI_dynstr_append(ds, "\n\n");
 
   BLI_dynstr_append(ds, "void pass_attr(vec3 position, mat3 normalmat, mat4 modelmatinv) {\n");
 
@@ -755,15 +755,16 @@ static char *code_generate_geometry(GPUNodeGraph *graph,
 
   /* Generate varying assignments. */
   BLI_dynstr_append(ds, "#define USE_ATTR\n");
-  BLI_dynstr_append(ds, "void pass_attr(const int vert) {\n");
+  /* This needs to be a define. Some drivers don't like variable vert index inside dataAttrIn. */
+  BLI_dynstr_append(ds, "#define pass_attr(vert) {\\\n");
 
   if (builtins & GPU_BARYCENTRIC_TEXCO) {
-    BLI_dynstr_append(ds, "  dataAttrOut.barycentricTexCo = calc_barycentric_co(vert);\n");
+    BLI_dynstr_append(ds, "dataAttrOut.barycentricTexCo = calc_barycentric_co(vert);\\\n");
   }
 
   LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph->attributes) {
     /* TODO let shader choose what to do depending on what the attribute is. */
-    BLI_dynstr_appendf(ds, "  dataAttrOut.var%d = dataAttrIn[vert].var%d;\n", attr->id, attr->id);
+    BLI_dynstr_appendf(ds, "dataAttrOut.var%d = dataAttrIn[vert].var%d;\\\n", attr->id, attr->id);
   }
   BLI_dynstr_append(ds, "}\n\n");
 
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index 283784aec20..85e7dffe3e7 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -40,7 +40,7 @@
 #include "GHOST_C-api.h"
 
 #include "gpu_backend.hh"
-#include "gpu_batch_private.h"
+#include "gpu_batch_private.hh"
 #include "gpu_context_private.hh"
 #include "gpu_matrix_private.h"
 
@@ -70,6 +70,12 @@ GPUContext::GPUContext()
 GPUContext::~GPUContext()
 {
   GPU_matrix_state_discard(matrix_state);
+  delete state_manager;
+  delete front_left;
+  delete back_left;
+  delete front_right;
+  delete back_right;
+  delete imm;
 }
 
 bool GPUContext::is_active_on_thread(void)
@@ -83,12 +89,12 @@ bool GPUContext::is_active_on_thread(void)
 
 GPUContext *GPU_context_create(void *ghost_window)
 {
-  if (gpu_backend_get() == NULL) {
+  if (GPUBackend::get() == NULL) {
     /* TODO move where it make sense. */
     GPU_backend_init(GPU_BACKEND_OPENGL);
   }
 
-  GPUContext *ctx = gpu_backend_get()->context_alloc(ghost_window);
+  GPUContext *ctx = GPUBackend::get()->context_alloc(ghost_window);
 
   GPU_context_active_set(ctx);
   return ctx;
@@ -120,18 +126,6 @@ GPUContext *GPU_context_active_get(void)
   return active_ctx;
 }
 
-GLuint GPU_vao_default(void)
-{
-  BLI_assert(active_ctx); /* need at least an active context */
-  return static_cast<GLContext *>(active_ctx)->default_vao_;
-}
-
-GLuint GPU_framebuffer_default(void)
-{
-  BLI_assert(active_ctx); /* need at least an active context */
-  return static_cast<GLContext *>(active_ctx)->default_framebuffer_;
-}
-
 GLuint GPU_vao_alloc(void)
 {
   GLuint new_vao_id = 0;
@@ -173,63 +167,17 @@ void GPU_fbo_free(GLuint fbo_id, GPUContext *ctx)
 void GPU_buf_free(GLuint buf_id)
 {
   /* TODO avoid using backend */
-  GPUBackend *backend = gpu_backend_get();
+  GPUBackend *backend = GPUBackend::get();
   static_cast<GLBackend *>(backend)->buf_free(buf_id);
 }
 
 void GPU_tex_free(GLuint tex_id)
 {
   /* TODO avoid using backend */
-  GPUBackend *backend = gpu_backend_get();
+  GPUBackend *backend = GPUBackend::get();
   static_cast<GLBackend *>(backend)->tex_free(tex_id);
 }
 
-/* GPUBatch & GPUFrameBuffer contains respectively VAO & FBO indices
- * which are not shared across contexts. So we need to keep track of
- * ownership. */
-
-void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch)
-{
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->batch_register(batch);
-}
-
-void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch)
-{
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->batch_unregister(batch);
-}
-
-void gpu_context_add_framebuffer(GPUContext *ctx, GPUFrameBuffer *fb)
-{
-#ifdef DEBUG
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->framebuffer_register(fb);
-#else
-  UNUSED_VARS(ctx, fb);
-#endif
-}
-
-void gpu_context_remove_framebuffer(GPUContext *ctx, GPUFrameBuffer *fb)
-{
-#ifdef DEBUG
-  BLI_assert(ctx);
-  static_cast<GLContext *>(ctx)->framebuffer_unregister(fb);
-#else
-  UNUSED_VARS(ctx, fb);
-#endif
-}
-
-void gpu_context_active_framebuffer_set(GPUContext *ctx, GPUFrameBuffer *fb)
-{
-  ctx->current_fbo = fb;
-}
-
-GPUFrameBuffer *gpu_context_active_framebuffer_get(GPUContext *ctx)
-{
-  return ctx->current_fbo;
-}
-
 struct GPUMatrixState *gpu_context_active_matrix_state_get()
 {
   BLI_assert(active_ctx);
@@ -285,7 +233,7 @@ void GPU_backend_exit(void)
   delete g_backend;
 }
 
-GPUBackend *gpu_backend_get(void)
+GPUBackend *GPUBackend::get(void)
 {
   return g_backend;
 }
diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh
index d369dbe7402..b72eee13105 100644
--- a/source/blender/gpu/intern/gpu_context_private.hh
+++ b/source/blender/gpu/intern/gpu_context_private.hh
@@ -29,25 +29,46 @@
 
 #include "GPU_context.h"
 
+#include "gpu_framebuffer_private.hh"
+#include "gpu_immediate_private.hh"
+#include "gpu_shader_private.hh"
+#include "gpu_state_private.hh"
+
 #include <mutex>
 #include <pthread.h>
 #include <string.h>
 #include <unordered_set>
 #include <vector>
 
-struct GPUFrameBuffer;
 struct GPUMatrixState;
 
 struct GPUContext {
  public:
   /** State managment */
-  GPUFrameBuffer *current_fbo = NULL;
+  blender::gpu::Shader *shader = NULL;
+  blender::gpu::FrameBuffer *active_fb = NULL;
   GPUMatrixState *matrix_state = NULL;
+  blender::gpu::GPUStateManager *state_manager = NULL;
+  blender::gpu::Immediate *imm = NULL;
+
+  /**
+   * All 4 window framebuffers.
+   * None of them are valid in an offscreen context.
+   * Right framebuffers are only available if using stereo rendering.
+   * Front framebuffers contains (in principle, but not always) the last frame color.
+   * Default framebuffer is back_left.
+   */
+  blender::gpu::FrameBuffer *back_left = NULL;
+  blender::gpu::FrameBuffer *front_left = NULL;
+  blender::gpu::FrameBuffer *back_right = NULL;
+  blender::gpu::FrameBuffer *front_right = NULL;
 
  protected:
   /** Thread on which this context is active. */
   pthread_t thread_;
   bool is_active_;
+  /** Avoid including GHOST headers. Can be NULL for offscreen contexts. */
+  void *ghost_window_;
 
  public:
   GPUContext();
@@ -61,9 +82,6 @@ struct GPUContext {
   MEM_CXX_CLASS_ALLOC_FUNCS("GPUContext")
 };
 
-GLuint GPU_vao_default(void);
-GLuint GPU_framebuffer_default(void);
-
 /* These require a gl ctx bound. */
 GLuint GPU_buf_alloc(void);
 GLuint GPU_tex_alloc(void);
@@ -77,12 +95,6 @@ void GPU_tex_free(GLuint tex_id);
 void GPU_vao_free(GLuint vao_id, GPUContext *ctx);
 void GPU_fbo_free(GLuint fbo_id, GPUContext *ctx);
 
-void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch);
-void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch);
-
-void gpu_context_add_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
-void gpu_context_remove_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
-
 void gpu_context_active_framebuffer_set(GPUContext *ctx, struct GPUFrameBuffer *fb);
 struct GPUFrameBuffer *gpu_context_active_framebuffer_get(GPUContext *ctx);
 
diff --git a/source/blender/gpu/intern/gpu_debug.cc b/source/blender/gpu/intern/gpu_debug.cc
index f7d6236071d..f179a241926 100644
--- a/source/blender/gpu/intern/gpu_debug.cc
+++ b/source/blender/gpu/intern/gpu_debug.cc
@@ -36,226 +36,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-#ifndef __APPLE__ /* only non-Apple systems implement OpenGL debug callbacks */
-
-/* control whether we use older AMD_debug_output extension
- * some supported GPU + OS combos do not have the newer extensions */
-#  define LEGACY_DEBUG 1
-
-/* Debug callbacks need the same calling convention as OpenGL functions. */
-#  if defined(_WIN32)
-#    define APIENTRY __stdcall
-#  else
-#    define APIENTRY
-#  endif
-
-static const char *source_name(GLenum source)
-{
-  switch (source) {
-    case GL_DEBUG_SOURCE_API:
-      return "API";
-    case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
-      return "window system";
-    case GL_DEBUG_SOURCE_SHADER_COMPILER:
-      return "shader compiler";
-    case GL_DEBUG_SOURCE_THIRD_PARTY:
-      return "3rd party";
-    case GL_DEBUG_SOURCE_APPLICATION:
-      return "application";
-    case GL_DEBUG_SOURCE_OTHER:
-      return "other";
-    default:
-      return "???";
-  }
-}
-
-static const char *message_type_name(GLenum message)
-{
-  switch (message) {
-    case GL_DEBUG_TYPE_ERROR:
-      return "error";
-    case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
-      return "deprecated behavior";
-    case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
-      return "undefined behavior";
-    case GL_DEBUG_TYPE_PORTABILITY:
-      return "portability";
-    case GL_DEBUG_TYPE_PERFORMANCE:
-      return "performance";
-    case GL_DEBUG_TYPE_OTHER:
-      return "other";
-    case GL_DEBUG_TYPE_MARKER:
-      return "marker"; /* KHR has this, ARB does not */
-    default:
-      return "???";
-  }
-}
-
-static void APIENTRY gpu_debug_proc(GLenum source,
-                                    GLenum type,
-                                    GLuint UNUSED(id),
-                                    GLenum severity,
-                                    GLsizei UNUSED(length),
-                                    const GLchar *message,
-                                    const GLvoid *UNUSED(userParm))
-{
-  bool backtrace = false;
-
-  switch (severity) {
-    case GL_DEBUG_SEVERITY_HIGH:
-      backtrace = true;
-      ATTR_FALLTHROUGH;
-    case GL_DEBUG_SEVERITY_MEDIUM:
-    case GL_DEBUG_SEVERITY_LOW:
-    case GL_DEBUG_SEVERITY_NOTIFICATION: /* KHR has this, ARB does not */
-      fprintf(stderr, "GL %s %s: %s\n", source_name(source), message_type_name(type), message);
-  }
-
-  if (backtrace) {
-    BLI_system_backtrace(stderr);
-    fflush(stderr);
-  }
-}
-
-#  if LEGACY_DEBUG
-
-static const char *category_name_amd(GLenum category)
-{
-  switch (category) {
-    case GL_DEBUG_CATEGORY_API_ERROR_AMD:
-      return "API error";
-    case GL_DEBUG_CATEGORY_WINDOW_SYSTEM_AMD:
-      return "window system";
-    case GL_DEBUG_CATEGORY_DEPRECATION_AMD:
-      return "deprecated behavior";
-    case GL_DEBUG_CATEGORY_UNDEFINED_BEHAVIOR_AMD:
-      return "undefined behavior";
-    case GL_DEBUG_CATEGORY_PERFORMANCE_AMD:
-      return "performance";
-    case GL_DEBUG_CATEGORY_SHADER_COMPILER_AMD:
-      return "shader compiler";
-    case GL_DEBUG_CATEGORY_APPLICATION_AMD:
-      return "application";
-    case GL_DEBUG_CATEGORY_OTHER_AMD:
-      return "other";
-    default:
-      return "???";
-  }
-}
-
-static void APIENTRY gpu_debug_proc_amd(GLuint UNUSED(id),
-                                        GLenum category,
-                                        GLenum severity,
-                                        GLsizei UNUSED(length),
-                                        const GLchar *message,
-                                        GLvoid *UNUSED(userParm))
-{
-  bool backtrace = false;
-
-  switch (severity) {
-    case GL_DEBUG_SEVERITY_HIGH:
-      backtrace = true;
-      ATTR_FALLTHROUGH;
-    case GL_DEBUG_SEVERITY_MEDIUM:
-    case GL_DEBUG_SEVERITY_LOW:
-      fprintf(stderr, "GL %s: %s\n", category_name_amd(category), message);
-  }
-
-  if (backtrace) {
-    BLI_system_backtrace(stderr);
-    fflush(stderr);
-  }
-}
-#  endif /* LEGACY_DEBUG */
-
-#  undef APIENTRY
-#endif /* not Apple */
-
-void gpu_debug_init(void)
-{
-#ifdef __APPLE__
-  fprintf(stderr, "OpenGL debug callback is not available on Apple.\n");
-#else /* not Apple */
-  const char success[] = "Successfully hooked OpenGL debug callback.";
-
-  if (GLEW_VERSION_4_3 || GLEW_KHR_debug) {
-    fprintf(stderr,
-            "Using %s\n",
-            GLEW_VERSION_4_3 ? "OpenGL 4.3 debug facilities" : "KHR_debug extension");
-    glEnable(GL_DEBUG_OUTPUT);
-    glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
-    glDebugMessageCallback((GLDEBUGPROC)gpu_debug_proc, NULL);
-    glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE);
-    GPU_string_marker(success);
-  }
-  else if (GLEW_ARB_debug_output) {
-    fprintf(stderr, "Using ARB_debug_output extension\n");
-    glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
-    glDebugMessageCallbackARB((GLDEBUGPROCARB)gpu_debug_proc, NULL);
-    glDebugMessageControlARB(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE);
-    GPU_string_marker(success);
-  }
-#  if LEGACY_DEBUG
-  else if (GLEW_AMD_debug_output) {
-    fprintf(stderr, "Using AMD_debug_output extension\n");
-    glDebugMessageCallbackAMD(gpu_debug_proc_amd, NULL);
-    glDebugMessageEnableAMD(GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE);
-    GPU_string_marker(success);
-  }
-#  endif
-  else {
-    fprintf(stderr, "Failed to hook OpenGL debug callback.\n");
-  }
-#endif /* not Apple */
-}
-
-void gpu_debug_exit(void)
-{
-#ifndef __APPLE__
-  if (GLEW_VERSION_4_3 || GLEW_KHR_debug) {
-    glDebugMessageCallback(NULL, NULL);
-  }
-  else if (GLEW_ARB_debug_output) {
-    glDebugMessageCallbackARB(NULL, NULL);
-  }
-#  if LEGACY_DEBUG
-  else if (GLEW_AMD_debug_output) {
-    glDebugMessageCallbackAMD(NULL, NULL);
-  }
-#  endif
-#endif
-}
-
-void GPU_string_marker(const char *buf)
-{
-#ifdef __APPLE__
-  UNUSED_VARS(buf);
-#else /* not Apple */
-  if (GLEW_VERSION_4_3 || GLEW_KHR_debug) {
-    glDebugMessageInsert(GL_DEBUG_SOURCE_APPLICATION,
-                         GL_DEBUG_TYPE_MARKER,
-                         0,
-                         GL_DEBUG_SEVERITY_NOTIFICATION,
-                         -1,
-                         buf);
-  }
-  else if (GLEW_ARB_debug_output) {
-    glDebugMessageInsertARB(GL_DEBUG_SOURCE_APPLICATION_ARB,
-                            GL_DEBUG_TYPE_OTHER_ARB,
-                            0,
-                            GL_DEBUG_SEVERITY_LOW_ARB,
-                            -1,
-                            buf);
-  }
-#  if LEGACY_DEBUG
-  else if (GLEW_AMD_debug_output) {
-    glDebugMessageInsertAMD(
-        GL_DEBUG_CATEGORY_APPLICATION_AMD, GL_DEBUG_SEVERITY_LOW_AMD, 0, 0, buf);
-  }
-#  endif
-#endif /* not Apple */
-}
-
 void GPU_print_error_debug(const char *str)
 {
   if (G.debug & G_DEBUG) {
diff --git a/source/blender/gpu/intern/gpu_primitive.c b/source/blender/gpu/intern/gpu_drawlist.cc
index 3b11b38db87..7b807a2fa80 100644
--- a/source/blender/gpu/intern/gpu_primitive.c
+++ b/source/blender/gpu/intern/gpu_drawlist.cc
@@ -20,30 +20,40 @@
 /** \file
  * \ingroup gpu
  *
- * GPU geometric primitives
+ * Implementation of Multi Draw Indirect.
  */
 
-#include "GPU_primitive.h"
-#include "gpu_primitive_private.h"
+#include "MEM_guardedalloc.h"
 
-GLenum convert_prim_type_to_gl(GPUPrimType prim_type)
+#include "GPU_batch.h"
+#include "GPU_drawlist.h"
+
+#include "gpu_backend.hh"
+
+#include "gpu_drawlist_private.hh"
+
+using namespace blender::gpu;
+
+GPUDrawList GPU_draw_list_create(int list_length)
+{
+  DrawList *list_ptr = GPUBackend::get()->drawlist_alloc(list_length);
+  return reinterpret_cast<DrawList *>(list_ptr);
+}
+
+void GPU_draw_list_discard(GPUDrawList list)
+{
+  DrawList *list_ptr = reinterpret_cast<DrawList *>(list);
+  delete list_ptr;
+}
+
+void GPU_draw_list_append(GPUDrawList list, GPUBatch *batch, int i_first, int i_count)
+{
+  DrawList *list_ptr = reinterpret_cast<DrawList *>(list);
+  list_ptr->append(batch, i_first, i_count);
+}
+
+void GPU_draw_list_submit(GPUDrawList list)
 {
-#if TRUST_NO_ONE
-  assert(prim_type != GPU_PRIM_NONE);
-#endif
-  static const GLenum table[] = {
-      [GPU_PRIM_POINTS] = GL_POINTS,
-      [GPU_PRIM_LINES] = GL_LINES,
-      [GPU_PRIM_LINE_STRIP] = GL_LINE_STRIP,
-      [GPU_PRIM_LINE_LOOP] = GL_LINE_LOOP,
-      [GPU_PRIM_TRIS] = GL_TRIANGLES,
-      [GPU_PRIM_TRI_STRIP] = GL_TRIANGLE_STRIP,
-      [GPU_PRIM_TRI_FAN] = GL_TRIANGLE_FAN,
-
-      [GPU_PRIM_LINES_ADJ] = GL_LINES_ADJACENCY,
-      [GPU_PRIM_LINE_STRIP_ADJ] = GL_LINE_STRIP_ADJACENCY,
-      [GPU_PRIM_TRIS_ADJ] = GL_TRIANGLES_ADJACENCY,
-  };
-
-  return table[prim_type];
+  DrawList *list_ptr = reinterpret_cast<DrawList *>(list);
+  list_ptr->submit();
 }
diff --git a/source/blender/gpu/GPU_attr_binding.h b/source/blender/gpu/intern/gpu_drawlist_private.hh
index e7c3dcbce05..ddb09fb0c89 100644
--- a/source/blender/gpu/GPU_attr_binding.h
+++ b/source/blender/gpu/intern/gpu_drawlist_private.hh
@@ -13,31 +13,32 @@
  * along with this program; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  *
- * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
  * All rights reserved.
  */
 
 /** \file
  * \ingroup gpu
- *
- * GPU vertex attribute binding
  */
 
 #pragma once
 
-#include "GPU_common.h"
+#include "MEM_guardedalloc.h"
+
+namespace blender {
+namespace gpu {
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+/**
+ * Implementation of Multi Draw Indirect.
+ * Base class which is then specialized for each implementation (GL, VK, ...).
+ **/
+class DrawList {
+ public:
+  virtual ~DrawList(){};
 
-typedef struct GPUAttrBinding {
-  /** Store 4 bits for each of the 16 attributes. */
-  uint64_t loc_bits;
-  /** 1 bit for each attribute. */
-  uint16_t enabled_bits;
-} GPUAttrBinding;
+  virtual void append(GPUBatch *batch, int i_first, int i_count) = 0;
+  virtual void submit() = 0;
+};
 
-#ifdef __cplusplus
-}
-#endif
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_element.cc b/source/blender/gpu/intern/gpu_element.cc
index cf7cc1d214c..29c95c725fd 100644
--- a/source/blender/gpu/intern/gpu_element.cc
+++ b/source/blender/gpu/intern/gpu_element.cc
@@ -326,6 +326,11 @@ static void squeeze_indices_short(GPUIndexBufBuilder *builder,
 
 #endif /* GPU_TRACK_INDEX_RANGE */
 
+GPUIndexBuf *GPU_indexbuf_calloc(void)
+{
+  return (GPUIndexBuf *)MEM_callocN(sizeof(GPUIndexBuf), __func__);
+}
+
 GPUIndexBuf *GPU_indexbuf_build(GPUIndexBufBuilder *builder)
 {
   GPUIndexBuf *elem = (GPUIndexBuf *)MEM_callocN(sizeof(GPUIndexBuf), "GPUIndexBuf");
diff --git a/source/blender/gpu/intern/gpu_extensions.cc b/source/blender/gpu/intern/gpu_extensions.cc
index 8074e4b64f0..6fe08d81cda 100644
--- a/source/blender/gpu/intern/gpu_extensions.cc
+++ b/source/blender/gpu/intern/gpu_extensions.cc
@@ -71,12 +71,10 @@ static struct GPUGlobal {
   GLint maxubosize;
   GLint maxubobinds;
   int samples_color_texture_max;
-  float line_width_range[2];
   /* workaround for different calculation of dfdy factors on GPUs. Some GPUs/drivers
    * calculate dfdy in shader differently when drawing to an off-screen buffer. First
    * number is factor on screen and second is off-screen */
   float dfdyfactors[2];
-  float max_anisotropy;
   /* Some Intel drivers have limited support for `GLEW_ARB_base_instance` so in
    * these cases it is best to indicate that it is not supported. See T67951 */
   bool glew_arb_base_instance_is_supported;
@@ -118,7 +116,7 @@ static void gpu_detect_mip_render_workaround(void)
   glTexParameteri(GPU_texture_target(tex), GL_TEXTURE_MAX_LEVEL, 0);
   GPU_texture_unbind(tex);
 
-  GPUFrameBuffer *fb = GPU_framebuffer_create();
+  GPUFrameBuffer *fb = GPU_framebuffer_create(__func__);
   GPU_framebuffer_texture_attach(fb, tex, 0, 1);
   GPU_framebuffer_bind(fb);
   GPU_framebuffer_clear_color(fb, clear_color);
@@ -164,11 +162,6 @@ int GPU_max_textures_vert(void)
   return GG.maxtexturesvert;
 }
 
-float GPU_max_texture_anisotropy(void)
-{
-  return GG.max_anisotropy;
-}
-
 int GPU_max_color_texture_samples(void)
 {
   return GG.samples_color_texture_max;
@@ -189,11 +182,6 @@ int GPU_max_ubo_size(void)
   return GG.maxubosize;
 }
 
-float GPU_max_line_width(void)
-{
-  return GG.line_width_range[1];
-}
-
 void GPU_get_dfdy_factors(float fac[2])
 {
   copy_v2_v2(fac, GG.dfdyfactors);
@@ -264,18 +252,9 @@ void gpu_extensions_init(void)
   glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &GG.maxtexlayers);
   glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &GG.maxcubemapsize);
 
-  if (GLEW_EXT_texture_filter_anisotropic) {
-    glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &GG.max_anisotropy);
-  }
-  else {
-    GG.max_anisotropy = 1.0f;
-  }
-
   glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_BLOCKS, &GG.maxubobinds);
   glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &GG.maxubosize);
 
-  glGetFloatv(GL_ALIASED_LINE_WIDTH_RANGE, GG.line_width_range);
-
   glGetIntegerv(GL_MAX_COLOR_TEXTURE_SAMPLES, &GG.samples_color_texture_max);
 
   const char *vendor = (const char *)glGetString(GL_VENDOR);
diff --git a/source/blender/gpu/intern/gpu_framebuffer.cc b/source/blender/gpu/intern/gpu_framebuffer.cc
index 5f3089b2ffb..1b6fea56028 100644
--- a/source/blender/gpu/intern/gpu_framebuffer.cc
+++ b/source/blender/gpu/intern/gpu_framebuffer.cc
@@ -29,681 +29,396 @@
 
 #include "GPU_batch.h"
 #include "GPU_extensions.h"
-#include "GPU_framebuffer.h"
 #include "GPU_shader.h"
 #include "GPU_texture.h"
 
+#include "gpu_backend.hh"
 #include "gpu_context_private.hh"
 #include "gpu_private.h"
+#include "gpu_texture_private.hh"
 
-typedef enum {
-  GPU_FB_DEPTH_ATTACHMENT = 0,
-  GPU_FB_DEPTH_STENCIL_ATTACHMENT,
-  GPU_FB_COLOR_ATTACHMENT0,
-  GPU_FB_COLOR_ATTACHMENT1,
-  GPU_FB_COLOR_ATTACHMENT2,
-  GPU_FB_COLOR_ATTACHMENT3,
-  GPU_FB_COLOR_ATTACHMENT4,
-  GPU_FB_COLOR_ATTACHMENT5,
-  /* Number of maximum output slots.
-   * We support 6 outputs for now (usually we wouldn't need more to preserve fill rate). */
-  /* Keep in mind that GL max is GL_MAX_DRAW_BUFFERS and is at least 8, corresponding to
-   * the maximum number of COLOR attachments specified by glDrawBuffers. */
-  GPU_FB_MAX_ATTACHEMENT,
-} GPUAttachmentType;
-
-#define FOREACH_ATTACHMENT_RANGE(att, _start, _end) \
-  for (GPUAttachmentType att = static_cast<GPUAttachmentType>(_start); att < _end; \
-       att = static_cast<GPUAttachmentType>(att + 1))
-
-#define GPU_FB_MAX_COLOR_ATTACHMENT (GPU_FB_MAX_ATTACHEMENT - GPU_FB_COLOR_ATTACHMENT0)
-
-#define GPU_FB_DIRTY_DRAWBUFFER (1 << 15)
-
-#define GPU_FB_ATTACHEMENT_IS_DIRTY(flag, type) ((flag & (1 << type)) != 0)
-#define GPU_FB_ATTACHEMENT_SET_DIRTY(flag, type) (flag |= (1 << type))
-
-struct GPUFrameBuffer {
-  GPUContext *ctx;
-  GLuint object;
-  GPUAttachment attachments[GPU_FB_MAX_ATTACHEMENT];
-  uint16_t dirty_flag;
-  int width, height;
-  bool multisample;
-  /* TODO Check that we always use the right context when binding
-   * (FBOs are not shared across ogl contexts). */
-  // void *ctx;
-};
+#include "gpu_framebuffer_private.hh"
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Constructor / Destructor
+ * \{ */
 
-static GLenum convert_attachment_type_to_gl(GPUAttachmentType type)
+FrameBuffer::FrameBuffer(const char *name)
 {
-#define ATTACHMENT(type) \
-  case GPU_FB_##type: { \
-    return GL_##type; \
-  } \
-    ((void)0)
-
-  switch (type) {
-    ATTACHMENT(DEPTH_ATTACHMENT);
-    ATTACHMENT(DEPTH_STENCIL_ATTACHMENT);
-    ATTACHMENT(COLOR_ATTACHMENT0);
-    ATTACHMENT(COLOR_ATTACHMENT1);
-    ATTACHMENT(COLOR_ATTACHMENT2);
-    ATTACHMENT(COLOR_ATTACHMENT3);
-    ATTACHMENT(COLOR_ATTACHMENT4);
-    ATTACHMENT(COLOR_ATTACHMENT5);
-    default:
-      BLI_assert(0);
-      return GL_COLOR_ATTACHMENT0;
+  if (name) {
+    BLI_strncpy(name_, name, sizeof(name_));
   }
-}
+  else {
+    name_[0] = '\0';
+  }
+  /* Force config on first use. */
+  dirty_attachments_ = true;
+  dirty_state_ = true;
 
-static GPUAttachmentType attachment_type_from_tex(GPUTexture *tex, int slot)
-{
-  switch (GPU_texture_format(tex)) {
-    case GPU_DEPTH_COMPONENT32F:
-    case GPU_DEPTH_COMPONENT24:
-    case GPU_DEPTH_COMPONENT16:
-      return GPU_FB_DEPTH_ATTACHMENT;
-    case GPU_DEPTH24_STENCIL8:
-    case GPU_DEPTH32F_STENCIL8:
-      return GPU_FB_DEPTH_STENCIL_ATTACHMENT;
-    default:
-      return static_cast<GPUAttachmentType>(GPU_FB_COLOR_ATTACHMENT0 + slot);
+  for (int i = 0; i < ARRAY_SIZE(attachments_); i++) {
+    attachments_[i].tex = NULL;
+    attachments_[i].mip = -1;
+    attachments_[i].layer = -1;
   }
 }
 
-static GLenum convert_buffer_bits_to_gl(eGPUFrameBufferBits bits)
+FrameBuffer::~FrameBuffer()
 {
-  GLbitfield mask = 0;
-  mask |= (bits & GPU_DEPTH_BIT) ? GL_DEPTH_BUFFER_BIT : 0;
-  mask |= (bits & GPU_STENCIL_BIT) ? GL_STENCIL_BUFFER_BIT : 0;
-  mask |= (bits & GPU_COLOR_BIT) ? GL_COLOR_BUFFER_BIT : 0;
-  return mask;
+  GPUFrameBuffer *gpu_fb = reinterpret_cast<GPUFrameBuffer *>(this);
+  for (int i = 0; i < ARRAY_SIZE(attachments_); i++) {
+    if (attachments_[i].tex != NULL) {
+      GPU_texture_detach_framebuffer(attachments_[i].tex, gpu_fb);
+    }
+  }
 }
 
-static GPUTexture *framebuffer_get_depth_tex(GPUFrameBuffer *fb)
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Attachments managment
+ * \{ */
+
+void FrameBuffer::attachment_set(GPUAttachmentType type, const GPUAttachment &new_attachment)
 {
-  if (fb->attachments[GPU_FB_DEPTH_ATTACHMENT].tex) {
-    return fb->attachments[GPU_FB_DEPTH_ATTACHMENT].tex;
+  if (new_attachment.mip == -1) {
+    return; /* GPU_ATTACHMENT_LEAVE */
   }
 
-  return fb->attachments[GPU_FB_DEPTH_STENCIL_ATTACHMENT].tex;
-}
+  if (type >= GPU_FB_MAX_ATTACHEMENT) {
+    fprintf(stderr,
+            "GPUFramebuffer: Error: Trying to attach texture to type %d but maximum slot is %d.\n",
+            type - GPU_FB_COLOR_ATTACHMENT0,
+            GPU_FB_MAX_COLOR_ATTACHMENT);
+    return;
+  }
 
-static GPUTexture *framebuffer_get_color_tex(GPUFrameBuffer *fb, int slot)
-{
-  return fb->attachments[GPU_FB_COLOR_ATTACHMENT0 + slot].tex;
-}
+  if (new_attachment.tex) {
+    if (new_attachment.layer > 0) {
+      BLI_assert(ELEM(GPU_texture_target(new_attachment.tex),
+                      GL_TEXTURE_2D_ARRAY,
+                      GL_TEXTURE_CUBE_MAP,
+                      GL_TEXTURE_CUBE_MAP_ARRAY_ARB));
+    }
+    if (GPU_texture_stencil(new_attachment.tex)) {
+      BLI_assert(ELEM(type, GPU_FB_DEPTH_STENCIL_ATTACHMENT));
+    }
+    else if (GPU_texture_depth(new_attachment.tex)) {
+      BLI_assert(ELEM(type, GPU_FB_DEPTH_ATTACHMENT));
+    }
+  }
 
-static void gpu_print_framebuffer_error(GLenum status, char err_out[256])
-{
-  const char *format = "GPUFrameBuffer: framebuffer status %s\n";
-  const char *err = "unknown";
-
-#define FORMAT_STATUS(X) \
-  case GL_FRAMEBUFFER_##X: { \
-    err = "GL_FRAMEBUFFER_" #X; \
-    break; \
-  } \
-    ((void)0)
-
-  switch (status) {
-    /* success */
-    FORMAT_STATUS(COMPLETE);
-    /* errors shared by OpenGL desktop & ES */
-    FORMAT_STATUS(INCOMPLETE_ATTACHMENT);
-    FORMAT_STATUS(INCOMPLETE_MISSING_ATTACHMENT);
-    FORMAT_STATUS(UNSUPPORTED);
-#if 0 /* for OpenGL ES only */
-    FORMAT_STATUS(INCOMPLETE_DIMENSIONS);
-#else /* for desktop GL only */
-    FORMAT_STATUS(INCOMPLETE_DRAW_BUFFER);
-    FORMAT_STATUS(INCOMPLETE_READ_BUFFER);
-    FORMAT_STATUS(INCOMPLETE_MULTISAMPLE);
-    FORMAT_STATUS(UNDEFINED);
-#endif
+  GPUAttachment &attachment = attachments_[type];
+
+  if (attachment.tex == new_attachment.tex && attachment.layer == new_attachment.layer &&
+      attachment.mip == new_attachment.mip) {
+    return; /* Exact same texture already bound here. */
+  }
+  /* Unbind previous and bind new. */
+  /* TODO(fclem) cleanup the casts. */
+  if (attachment.tex) {
+    GPU_texture_detach_framebuffer(attachment.tex, reinterpret_cast<GPUFrameBuffer *>(this));
   }
 
-#undef FORMAT_STATUS
+  attachment = new_attachment;
 
-  if (err_out) {
-    BLI_snprintf(err_out, 256, format, err);
+  /* Might be null if this is for unbinding. */
+  if (attachment.tex) {
+    GPU_texture_attach_framebuffer(attachment.tex, reinterpret_cast<GPUFrameBuffer *>(this), type);
   }
   else {
-    fprintf(stderr, format, err);
+    /* GPU_ATTACHMENT_NONE */
   }
-}
-
-void gpu_framebuffer_module_init(void)
-{
-}
 
-void gpu_framebuffer_module_exit(void)
-{
+  dirty_attachments_ = true;
 }
 
-GPUFrameBuffer *GPU_framebuffer_active_get(void)
+void FrameBuffer::recursive_downsample(int max_lvl,
+                                       void (*callback)(void *userData, int level),
+                                       void *userData)
 {
   GPUContext *ctx = GPU_context_active_get();
-  if (ctx) {
-    return gpu_context_active_framebuffer_get(ctx);
+  /* Bind to make sure the framebuffer is up to date. */
+  this->bind(true);
+
+  if (width_ == 1 && height_ == 1) {
+    return;
   }
+  /* HACK: Make the framebuffer appear not bound to avoid assert in GPU_texture_bind. */
+  ctx->active_fb = NULL;
 
-  return 0;
-}
+  int levels = floor(log2(max_ii(width_, height_)));
+  max_lvl = min_ii(max_lvl, levels);
 
-static void gpu_framebuffer_current_set(GPUFrameBuffer *fb)
-{
-  GPUContext *ctx = GPU_context_active_get();
-  if (ctx) {
-    gpu_context_active_framebuffer_set(ctx, fb);
+  int current_dim[2] = {width_, height_};
+  int mip_lvl;
+  for (mip_lvl = 1; mip_lvl < max_lvl + 1; mip_lvl++) {
+    /* calculate next viewport size */
+    current_dim[0] = max_ii(current_dim[0] / 2, 1);
+    current_dim[1] = max_ii(current_dim[1] / 2, 1);
+    /* Replace attaached miplevel for each attachement. */
+    for (int att = 0; att < ARRAY_SIZE(attachments_); att++) {
+      GPUTexture *tex = attachments_[att].tex;
+      if (tex != NULL) {
+        /* Some Intel HDXXX have issue with rendering to a mipmap that is below
+         * the texture GL_TEXTURE_MAX_LEVEL. So even if it not correct, in this case
+         * we allow GL_TEXTURE_MAX_LEVEL to be one level lower. In practice it does work! */
+        int map_lvl = (GPU_mip_render_workaround()) ? mip_lvl : (mip_lvl - 1);
+        /* Restrict fetches only to previous level. */
+        GPU_texture_bind(tex, 0);
+        glTexParameteri(GPU_texture_target(tex), GL_TEXTURE_BASE_LEVEL, mip_lvl - 1);
+        glTexParameteri(GPU_texture_target(tex), GL_TEXTURE_MAX_LEVEL, map_lvl);
+        GPU_texture_unbind(tex);
+        /* Bind next level. */
+        attachments_[att].mip = mip_lvl;
+      }
+    }
+    /* Update the internal attachments and viewport size. */
+    dirty_attachments_ = true;
+    this->bind(true);
+    /* HACK: Make the framebuffer appear not bound to avoid assert in GPU_texture_bind. */
+    ctx->active_fb = NULL;
+
+    callback(userData, mip_lvl);
+
+    /* This is the last mipmap level. Exit loop without incrementing mip_lvl. */
+    if (current_dim[0] == 1 && current_dim[1] == 1) {
+      break;
+    }
+  }
+
+  for (int att = 0; att < ARRAY_SIZE(attachments_); att++) {
+    if (attachments_[att].tex != NULL) {
+      /* Reset mipmap level range. */
+      GPUTexture *tex = attachments_[att].tex;
+      GPU_texture_bind(tex, 0);
+      glTexParameteri(GPU_texture_target(tex), GL_TEXTURE_BASE_LEVEL, 0);
+      glTexParameteri(GPU_texture_target(tex), GL_TEXTURE_MAX_LEVEL, mip_lvl);
+      GPU_texture_unbind(tex);
+      /* Reset base level. NOTE: might not be the one bound at the start of this function. */
+      attachments_[att].mip = 0;
+    }
   }
+  dirty_attachments_ = true;
 }
 
-/* GPUFrameBuffer */
+/** \} */
+
+}  // namespace blender::gpu
 
-GPUFrameBuffer *GPU_framebuffer_create(void)
+/* -------------------------------------------------------------------- */
+/** \name C-API
+ * \{ */
+
+using namespace blender;
+using namespace blender::gpu;
+
+GPUFrameBuffer *GPU_framebuffer_create(const char *name)
 {
   /* We generate the FB object later at first use in order to
    * create the framebuffer in the right opengl context. */
-  return (GPUFrameBuffer *)MEM_callocN(sizeof(GPUFrameBuffer), "GPUFrameBuffer");
+  return (GPUFrameBuffer *)GPUBackend::get()->framebuffer_alloc(name);
 }
 
-static void gpu_framebuffer_init(GPUFrameBuffer *fb)
+void GPU_framebuffer_free(GPUFrameBuffer *gpu_fb)
 {
-  fb->object = GPU_fbo_alloc();
-  fb->ctx = GPU_context_active_get();
-  gpu_context_add_framebuffer(fb->ctx, fb);
+  delete reinterpret_cast<FrameBuffer *>(gpu_fb);
 }
 
-void GPU_framebuffer_free(GPUFrameBuffer *fb)
-{
-  for (int i_type = 0; i_type < GPU_FB_MAX_ATTACHEMENT; i_type++) {
-    GPUAttachmentType type = static_cast<GPUAttachmentType>(i_type);
-    if (fb->attachments[type].tex != NULL) {
-      GPU_framebuffer_texture_detach(fb, fb->attachments[type].tex);
-    }
-  }
-
-  if (fb->object != 0) {
-    /* This restores the framebuffer if it was bound */
-    GPU_fbo_free(fb->object, fb->ctx);
-    gpu_context_remove_framebuffer(fb->ctx, fb);
-  }
+/* ---------- Binding ----------- */
 
-  if (GPU_framebuffer_active_get() == fb) {
-    gpu_framebuffer_current_set(NULL);
-  }
-
-  MEM_freeN(fb);
+void GPU_framebuffer_bind(GPUFrameBuffer *gpu_fb)
+{
+  FrameBuffer *fb = reinterpret_cast<FrameBuffer *>(gpu_fb);
+  const bool enable_srgb = true;
+  fb->bind(enable_srgb);
 }
 
-/* ---------- Attach ----------- */
-
-static void gpu_framebuffer_texture_attach_ex(
-    GPUFrameBuffer *fb, GPUTexture *tex, int slot, int layer, int mip)
+/* Workaround for binding a srgb framebuffer without doing the srgb transform. */
+void GPU_framebuffer_bind_no_srgb(GPUFrameBuffer *gpu_fb)
 {
-  if (slot >= GPU_FB_MAX_COLOR_ATTACHMENT) {
-    fprintf(stderr,
-            "Attaching to index %d framebuffer slot unsupported. "
-            "Use at most %d\n",
-            slot,
-            GPU_FB_MAX_COLOR_ATTACHMENT);
-    return;
-  }
+  FrameBuffer *fb = reinterpret_cast<FrameBuffer *>(gpu_fb);
+  const bool enable_srgb = false;
+  fb->bind(enable_srgb);
+}
 
-  GPUAttachmentType type = attachment_type_from_tex(tex, slot);
-  GPUAttachment *attachment = &fb->attachments[type];
+/* For stereo rendering. */
+void GPU_backbuffer_bind(eGPUBackBuffer buffer)
+{
+  GPUContext *ctx = GPU_context_active_get();
 
-  if ((attachment->tex == tex) && (attachment->mip == mip) && (attachment->layer == layer)) {
-    return; /* Exact same texture already bound here. */
-  }
-  if (attachment->tex != NULL) {
-    GPU_framebuffer_texture_detach(fb, attachment->tex);
+  if (buffer == GPU_BACKBUFFER_LEFT) {
+    ctx->back_left->bind(false);
   }
-
-  if (attachment->tex == NULL) {
-    GPU_texture_attach_framebuffer(tex, fb, type);
+  else {
+    ctx->back_right->bind(false);
   }
-
-  attachment->tex = tex;
-  attachment->mip = mip;
-  attachment->layer = layer;
-  GPU_FB_ATTACHEMENT_SET_DIRTY(fb->dirty_flag, type);
 }
 
-void GPU_framebuffer_texture_attach(GPUFrameBuffer *fb, GPUTexture *tex, int slot, int mip)
+void GPU_framebuffer_restore(void)
 {
-  gpu_framebuffer_texture_attach_ex(fb, tex, slot, -1, mip);
+  GPU_context_active_get()->back_left->bind(false);
 }
 
-void GPU_framebuffer_texture_layer_attach(
-    GPUFrameBuffer *fb, GPUTexture *tex, int slot, int layer, int mip)
+GPUFrameBuffer *GPU_framebuffer_active_get(void)
 {
-  /* NOTE: We could support 1D ARRAY texture. */
-  BLI_assert(GPU_texture_target(tex) == GL_TEXTURE_2D_ARRAY);
-  gpu_framebuffer_texture_attach_ex(fb, tex, slot, layer, mip);
+  GPUContext *ctx = GPU_context_active_get();
+  return reinterpret_cast<GPUFrameBuffer *>(ctx ? ctx->active_fb : NULL);
 }
 
-void GPU_framebuffer_texture_cubeface_attach(
-    GPUFrameBuffer *fb, GPUTexture *tex, int slot, int face, int mip)
+/* Returns the default framebuffer. Will always exists even if it's just a dummy. */
+GPUFrameBuffer *GPU_framebuffer_back_get(void)
 {
-  BLI_assert(GPU_texture_cube(tex));
-  gpu_framebuffer_texture_attach_ex(fb, tex, slot, face, mip);
+  GPUContext *ctx = GPU_context_active_get();
+  return reinterpret_cast<GPUFrameBuffer *>(ctx ? ctx->back_left : NULL);
 }
 
-/* ---------- Detach ----------- */
-
-void GPU_framebuffer_texture_detach_slot(GPUFrameBuffer *fb, GPUTexture *tex, int type)
+bool GPU_framebuffer_bound(GPUFrameBuffer *gpu_fb)
 {
-  GPUAttachment *attachment = &fb->attachments[type];
+  return (gpu_fb == GPU_framebuffer_active_get());
+}
 
-  if (attachment->tex != tex) {
-    fprintf(stderr,
-            "Warning, attempting to detach Texture %p from framebuffer %p "
-            "but texture is not attached.\n",
-            tex,
-            fb);
-    return;
-  }
+/* ---------- Attachment Management ----------- */
 
-  attachment->tex = NULL;
-  GPU_FB_ATTACHEMENT_SET_DIRTY(fb->dirty_flag, type);
+bool GPU_framebuffer_check_valid(GPUFrameBuffer *gpu_fb, char err_out[256])
+{
+  return reinterpret_cast<FrameBuffer *>(gpu_fb)->check(err_out);
 }
 
-void GPU_framebuffer_texture_detach(GPUFrameBuffer *fb, GPUTexture *tex)
+void GPU_framebuffer_texture_attach_ex(GPUFrameBuffer *gpu_fb, GPUAttachment attachement, int slot)
 {
-  GPUAttachmentType type = (GPUAttachmentType)GPU_texture_detach_framebuffer(tex, fb);
-  GPU_framebuffer_texture_detach_slot(fb, tex, type);
+  GPUAttachmentType type = blender::gpu::Texture::attachment_type(attachement.tex, slot);
+  reinterpret_cast<FrameBuffer *>(gpu_fb)->attachment_set(type, attachement);
 }
 
-/* ---------- Config (Attach & Detach) ----------- */
-
-/**
- * First GPUAttachment in *config is always the depth/depth_stencil buffer.
- * Following GPUAttachments are color buffers.
- * Setting GPUAttachment.mip to -1 will leave the texture in this slot.
- * Setting GPUAttachment.tex to NULL will detach the texture in this slot.
- */
-void GPU_framebuffer_config_array(GPUFrameBuffer *fb, const GPUAttachment *config, int config_len)
+void GPU_framebuffer_texture_attach(GPUFrameBuffer *fb, GPUTexture *tex, int slot, int mip)
 {
-  if (config[0].tex) {
-    BLI_assert(GPU_texture_depth(config[0].tex));
-    gpu_framebuffer_texture_attach_ex(fb, config[0].tex, 0, config[0].layer, config[0].mip);
-  }
-  else if (config[0].mip == -1) {
-    /* Leave texture attached */
-  }
-  else if (fb->attachments[GPU_FB_DEPTH_ATTACHMENT].tex != NULL) {
-    GPU_framebuffer_texture_detach(fb, fb->attachments[GPU_FB_DEPTH_ATTACHMENT].tex);
-  }
-  else if (fb->attachments[GPU_FB_DEPTH_STENCIL_ATTACHMENT].tex != NULL) {
-    GPU_framebuffer_texture_detach(fb, fb->attachments[GPU_FB_DEPTH_STENCIL_ATTACHMENT].tex);
-  }
-
-  int slot = 0;
-  for (int i = 1; i < config_len; i++, slot++) {
-    if (config[i].tex != NULL) {
-      BLI_assert(GPU_texture_depth(config[i].tex) == false);
-      gpu_framebuffer_texture_attach_ex(fb, config[i].tex, slot, config[i].layer, config[i].mip);
-    }
-    else if (config[i].mip != -1) {
-      GPUTexture *tex = framebuffer_get_color_tex(fb, slot);
-      if (tex != NULL) {
-        GPU_framebuffer_texture_detach(fb, tex);
-      }
-    }
-  }
+  GPUAttachment attachement = GPU_ATTACHMENT_TEXTURE_MIP(tex, mip);
+  GPU_framebuffer_texture_attach_ex(fb, attachement, slot);
 }
 
-/* ---------- Bind / Restore ----------- */
-
-static void gpu_framebuffer_attachment_attach(GPUAttachment *attach, GPUAttachmentType attach_type)
+void GPU_framebuffer_texture_layer_attach(
+    GPUFrameBuffer *fb, GPUTexture *tex, int slot, int layer, int mip)
 {
-  int tex_bind = GPU_texture_opengl_bindcode(attach->tex);
-  GLenum gl_attachment = convert_attachment_type_to_gl(attach_type);
-
-  if (attach->layer > -1) {
-    if (GPU_texture_cube(attach->tex)) {
-      glFramebufferTexture2D(GL_FRAMEBUFFER,
-                             gl_attachment,
-                             GL_TEXTURE_CUBE_MAP_POSITIVE_X + attach->layer,
-                             tex_bind,
-                             attach->mip);
-    }
-    else {
-      glFramebufferTextureLayer(
-          GL_FRAMEBUFFER, gl_attachment, tex_bind, attach->mip, attach->layer);
-    }
-  }
-  else {
-    glFramebufferTexture(GL_FRAMEBUFFER, gl_attachment, tex_bind, attach->mip);
-  }
+  GPUAttachment attachement = GPU_ATTACHMENT_TEXTURE_LAYER_MIP(tex, layer, mip);
+  GPU_framebuffer_texture_attach_ex(fb, attachement, slot);
 }
 
-static void gpu_framebuffer_attachment_detach(GPUAttachment *UNUSED(attachment),
-                                              GPUAttachmentType attach_type)
+void GPU_framebuffer_texture_cubeface_attach(
+    GPUFrameBuffer *fb, GPUTexture *tex, int slot, int face, int mip)
 {
-  GLenum gl_attachment = convert_attachment_type_to_gl(attach_type);
-  glFramebufferTexture(GL_FRAMEBUFFER, gl_attachment, 0, 0);
+  GPUAttachment attachement = GPU_ATTACHMENT_TEXTURE_CUBEFACE_MIP(tex, face, mip);
+  GPU_framebuffer_texture_attach_ex(fb, attachement, slot);
 }
 
-static void gpu_framebuffer_update_attachments(GPUFrameBuffer *fb)
+void GPU_framebuffer_texture_detach(GPUFrameBuffer *gpu_fb, GPUTexture *tex)
 {
-  GLenum gl_attachments[GPU_FB_MAX_COLOR_ATTACHMENT];
-  int numslots = 0;
-
-  BLI_assert(GPU_framebuffer_active_get() == fb);
-
-  /* Update attachments */
-  FOREACH_ATTACHMENT_RANGE(type, 0, GPU_FB_MAX_ATTACHEMENT)
-  {
-    if (type >= GPU_FB_COLOR_ATTACHMENT0) {
-      if (fb->attachments[type].tex) {
-        gl_attachments[numslots] = convert_attachment_type_to_gl(type);
-      }
-      else {
-        gl_attachments[numslots] = GL_NONE;
-      }
-      numslots++;
-    }
-
-    if (GPU_FB_ATTACHEMENT_IS_DIRTY(fb->dirty_flag, type) == false) {
-      continue;
-    }
-    if (fb->attachments[type].tex != NULL) {
-      gpu_framebuffer_attachment_attach(&fb->attachments[type], type);
-
-      fb->multisample = (GPU_texture_samples(fb->attachments[type].tex) > 0);
-      fb->width = GPU_texture_width(fb->attachments[type].tex);
-      fb->height = GPU_texture_height(fb->attachments[type].tex);
-    }
-    else {
-      gpu_framebuffer_attachment_detach(&fb->attachments[type], type);
-    }
-  }
-  fb->dirty_flag = 0;
-
-  /* Update draw buffers (color targets)
-   * This state is saved in the FBO */
-  if (numslots) {
-    glDrawBuffers(numslots, gl_attachments);
+  GPUAttachment attachement = GPU_ATTACHMENT_NONE;
+  int type = GPU_texture_framebuffer_attachement_get(tex, gpu_fb);
+  if (type != -1) {
+    reinterpret_cast<FrameBuffer *>(gpu_fb)->attachment_set((GPUAttachmentType)type, attachement);
   }
   else {
-    glDrawBuffer(GL_NONE);
+    BLI_assert(!"Error: Texture: Framebuffer is not attached");
   }
 }
 
 /**
- * Hack to solve the problem of some bugged AMD GPUs (see `GPU_unused_fb_slot_workaround`).
- * If there is an empty color slot between the color slots,
- * all textures after this slot are apparently skipped/discarded.
+ * First GPUAttachment in *config is always the depth/depth_stencil buffer.
+ * Following GPUAttachments are color buffers.
+ * Setting GPUAttachment.mip to -1 will leave the texture in this slot.
+ * Setting GPUAttachment.tex to NULL will detach the texture in this slot.
  */
-static void gpu_framebuffer_update_attachments_and_fill_empty_slots(GPUFrameBuffer *fb)
-{
-  GLenum gl_attachments[GPU_FB_MAX_COLOR_ATTACHMENT];
-  int dummy_tex = 0;
-
-  BLI_assert(GPU_framebuffer_active_get() == fb);
-
-  /* Update attachments */
-  for (int i_type = GPU_FB_MAX_ATTACHEMENT - 1; i_type >= 0; --i_type) {
-    GPUAttachmentType type = static_cast<GPUAttachmentType>(i_type);
-    GPUTexture *tex = fb->attachments[type].tex;
-
-    if (type >= GPU_FB_COLOR_ATTACHMENT0) {
-      int slot = type - GPU_FB_COLOR_ATTACHMENT0;
-      if (tex != NULL || (dummy_tex != 0)) {
-        gl_attachments[slot] = convert_attachment_type_to_gl(type);
-
-        if (dummy_tex == 0) {
-          dummy_tex = GPU_texture_opengl_bindcode(tex);
-        }
-      }
-      else {
-        gl_attachments[slot] = GL_NONE;
-      }
-    }
-    else {
-      dummy_tex = 0;
-    }
-
-    if ((dummy_tex != 0) && tex == NULL) {
-      /* Fill empty slot */
-      glFramebufferTexture(GL_FRAMEBUFFER, convert_attachment_type_to_gl(type), dummy_tex, 0);
-    }
-    else if (GPU_FB_ATTACHEMENT_IS_DIRTY(fb->dirty_flag, type)) {
-      if (tex != NULL) {
-        gpu_framebuffer_attachment_attach(&fb->attachments[type], type);
-
-        fb->multisample = (GPU_texture_samples(tex) > 0);
-        fb->width = GPU_texture_width(tex);
-        fb->height = GPU_texture_height(tex);
-      }
-      else {
-        gpu_framebuffer_attachment_detach(&fb->attachments[type], type);
-      }
-    }
-  }
-  fb->dirty_flag = 0;
-
-  /* Update draw buffers (color targets)
-   * This state is saved in the FBO */
-  glDrawBuffers(GPU_FB_MAX_COLOR_ATTACHMENT, gl_attachments);
-}
-
-#define FRAMEBUFFER_STACK_DEPTH 16
-
-static struct {
-  GPUFrameBuffer *framebuffers[FRAMEBUFFER_STACK_DEPTH];
-  uint top;
-} FrameBufferStack = {{0}};
-
-static void gpuPushFrameBuffer(GPUFrameBuffer *fbo)
+void GPU_framebuffer_config_array(GPUFrameBuffer *gpu_fb,
+                                  const GPUAttachment *config,
+                                  int config_len)
 {
-  BLI_assert(FrameBufferStack.top < FRAMEBUFFER_STACK_DEPTH);
-  FrameBufferStack.framebuffers[FrameBufferStack.top] = fbo;
-  FrameBufferStack.top++;
-}
-
-static GPUFrameBuffer *gpuPopFrameBuffer(void)
-{
-  BLI_assert(FrameBufferStack.top > 0);
-  FrameBufferStack.top--;
-  return FrameBufferStack.framebuffers[FrameBufferStack.top];
-}
+  FrameBuffer *fb = reinterpret_cast<FrameBuffer *>(gpu_fb);
 
-#undef FRAMEBUFFER_STACK_DEPTH
+  const GPUAttachment &depth_attachement = config[0];
+  Span<GPUAttachment> color_attachments(config + 1, config_len - 1);
 
-void GPU_framebuffer_bind(GPUFrameBuffer *fb)
-{
-  if (fb->object == 0) {
-    gpu_framebuffer_init(fb);
+  if (depth_attachement.mip == -1) {
+    /* GPU_ATTACHMENT_LEAVE */
   }
-
-  if (GPU_framebuffer_active_get() != fb) {
-    glBindFramebuffer(GL_FRAMEBUFFER, fb->object);
-    glEnable(GL_FRAMEBUFFER_SRGB);
-
-    GPUTexture *first_target = fb->attachments[GPU_FB_COLOR_ATTACHMENT0].tex;
-    const bool is_srgb_target = (first_target &&
-                                 (GPU_texture_format(first_target) == GPU_SRGB8_A8));
-    GPU_shader_set_framebuffer_srgb_target(is_srgb_target);
+  else if (depth_attachement.tex == NULL) {
+    /* GPU_ATTACHMENT_NONE: Need to clear both targets. */
+    fb->attachment_set(GPU_FB_DEPTH_STENCIL_ATTACHMENT, depth_attachement);
+    fb->attachment_set(GPU_FB_DEPTH_ATTACHMENT, depth_attachement);
   }
-
-  gpu_framebuffer_current_set(fb);
-
-  if (fb->dirty_flag != 0) {
-    if (GPU_unused_fb_slot_workaround()) {
-      /* XXX: Please AMD, fix this. */
-      gpu_framebuffer_update_attachments_and_fill_empty_slots(fb);
-    }
-    else {
-      gpu_framebuffer_update_attachments(fb);
-    }
+  else {
+    GPUAttachmentType type = GPU_texture_stencil(depth_attachement.tex) ?
+                                 GPU_FB_DEPTH_STENCIL_ATTACHMENT :
+                                 GPU_FB_DEPTH_ATTACHMENT;
+    fb->attachment_set(type, depth_attachement);
   }
 
-  /* TODO manually check for errors? */
-#if 0
-  char err_out[256];
-  if (!GPU_framebuffer_check_valid(fb, err_out)) {
-    printf("Invalid %s\n", err_out);
+  GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0;
+  for (const GPUAttachment &attachement : color_attachments) {
+    fb->attachment_set(type, attachement);
+    ++type;
   }
-#endif
-
-  glViewport(0, 0, fb->width, fb->height);
 }
 
-void GPU_framebuffer_restore(void)
+/* ---------- Viewport & Scissor Region ----------- */
+
+/* Viewport and scissor size is stored per framebuffer.
+ * It is only reset to its original dimensions explicitely OR when binding the framebuffer after
+ * modifiying its attachments. */
+void GPU_framebuffer_viewport_set(GPUFrameBuffer *gpu_fb, int x, int y, int width, int height)
 {
-  if (GPU_framebuffer_active_get() != NULL) {
-    glBindFramebuffer(GL_FRAMEBUFFER, GPU_framebuffer_default());
-    gpu_framebuffer_current_set(NULL);
-    glDisable(GL_FRAMEBUFFER_SRGB);
-    GPU_shader_set_framebuffer_srgb_target(false);
-  }
+  int viewport_rect[4] = {x, y, width, height};
+  reinterpret_cast<FrameBuffer *>(gpu_fb)->viewport_set(viewport_rect);
 }
 
-bool GPU_framebuffer_bound(GPUFrameBuffer *fb)
+void GPU_framebuffer_viewport_get(GPUFrameBuffer *gpu_fb, int r_viewport[4])
 {
-  return (fb == GPU_framebuffer_active_get()) && (fb->object != 0);
+  reinterpret_cast<FrameBuffer *>(gpu_fb)->viewport_get(r_viewport);
 }
 
-bool GPU_framebuffer_check_valid(GPUFrameBuffer *fb, char err_out[256])
+/* Reset to its attachement(s) size. */
+void GPU_framebuffer_viewport_reset(GPUFrameBuffer *gpu_fb)
 {
-  if (!GPU_framebuffer_bound(fb)) {
-    GPU_framebuffer_bind(fb);
-  }
-
-  GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
-
-  if (status != GL_FRAMEBUFFER_COMPLETE) {
-    GPU_framebuffer_restore();
-    gpu_print_framebuffer_error(status, err_out);
-    return false;
-  }
-
-  return true;
+  reinterpret_cast<FrameBuffer *>(gpu_fb)->viewport_reset();
 }
 
 /* ---------- Framebuffer Operations ----------- */
 
-#define CHECK_FRAMEBUFFER_IS_BOUND(_fb) \
-  BLI_assert(GPU_framebuffer_bound(_fb)); \
-  UNUSED_VARS_NDEBUG(_fb); \
-  ((void)0)
-
-/* Needs to be done after binding. */
-void GPU_framebuffer_viewport_set(GPUFrameBuffer *fb, int x, int y, int w, int h)
-{
-  CHECK_FRAMEBUFFER_IS_BOUND(fb);
-
-  glViewport(x, y, w, h);
-}
-
-void GPU_framebuffer_clear(GPUFrameBuffer *fb,
+void GPU_framebuffer_clear(GPUFrameBuffer *gpu_fb,
                            eGPUFrameBufferBits buffers,
                            const float clear_col[4],
                            float clear_depth,
                            uint clear_stencil)
 {
-  CHECK_FRAMEBUFFER_IS_BOUND(fb);
-
-  if (buffers & GPU_COLOR_BIT) {
-    glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
-    glClearColor(clear_col[0], clear_col[1], clear_col[2], clear_col[3]);
-  }
-  if (buffers & GPU_DEPTH_BIT) {
-    glDepthMask(GL_TRUE);
-    glClearDepth(clear_depth);
-  }
-  if (buffers & GPU_STENCIL_BIT) {
-    glStencilMask(0xFF);
-    glClearStencil(clear_stencil);
-  }
-
-  GLbitfield mask = convert_buffer_bits_to_gl(buffers);
-  glClear(mask);
+  reinterpret_cast<FrameBuffer *>(gpu_fb)->clear(buffers, clear_col, clear_depth, clear_stencil);
 }
 
-/* Clear all textures bound to this framebuffer with a different color. */
-void GPU_framebuffer_multi_clear(GPUFrameBuffer *fb, const float (*clear_cols)[4])
+/* Clear all textures attached to this framebuffer with a different color. */
+void GPU_framebuffer_multi_clear(GPUFrameBuffer *gpu_fb, const float (*clear_cols)[4])
 {
-  CHECK_FRAMEBUFFER_IS_BOUND(fb);
-
-  glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
-
-  int i_type = GPU_FB_COLOR_ATTACHMENT0;
-  for (int i = 0; i_type < GPU_FB_MAX_ATTACHEMENT; i++, i_type++) {
-    GPUAttachmentType type = static_cast<GPUAttachmentType>(i_type);
-    if (fb->attachments[type].tex != NULL) {
-      glClearBufferfv(GL_COLOR, i, clear_cols[i]);
-    }
-  }
-}
-
-void GPU_framebuffer_read_depth(GPUFrameBuffer *fb, int x, int y, int w, int h, float *data)
-{
-  CHECK_FRAMEBUFFER_IS_BOUND(fb);
-
-  GLenum type = GL_DEPTH_COMPONENT;
-  glReadBuffer(GL_COLOR_ATTACHMENT0); /* This is OK! */
-  glReadPixels(x, y, w, h, type, GL_FLOAT, data);
+  reinterpret_cast<FrameBuffer *>(gpu_fb)->clear_multi(clear_cols);
 }
 
-static GLenum gpu_get_gl_datatype(eGPUDataFormat format)
+void GPU_clear_color(float red, float green, float blue, float alpha)
 {
-  switch (format) {
-    case GPU_DATA_FLOAT:
-      return GL_FLOAT;
-    case GPU_DATA_INT:
-      return GL_INT;
-    case GPU_DATA_UNSIGNED_INT:
-      return GL_UNSIGNED_INT;
-    case GPU_DATA_UNSIGNED_BYTE:
-      return GL_UNSIGNED_BYTE;
-    case GPU_DATA_UNSIGNED_INT_24_8:
-      return GL_UNSIGNED_INT_24_8;
-    case GPU_DATA_10_11_11_REV:
-      return GL_UNSIGNED_INT_10F_11F_11F_REV;
-    default:
-      BLI_assert(!"Unhandled data format");
-      return GL_FLOAT;
-  }
+  float clear_col[4] = {red, green, blue, alpha};
+  GPU_context_active_get()->active_fb->clear(GPU_COLOR_BIT, clear_col, 0.0f, 0x0);
 }
 
-static GLenum gpu_get_gl_channel_type(int channels)
+void GPU_clear_depth(float depth)
 {
-  switch (channels) {
-    case 1:
-      return GL_RED;
-    case 2:
-      return GL_RG;
-    case 3:
-      return GL_RGB;
-    case 4:
-      return GL_RGBA;
-    default:
-      BLI_assert(!"Wrong number of read channels");
-      return GL_RED;
-  }
+  float clear_col[4] = {0};
+  GPU_context_active_get()->active_fb->clear(GPU_DEPTH_BIT, clear_col, depth, 0x0);
 }
 
-static void gpu_framebuffer_read_color_ex(
-    int x, int y, int w, int h, int channels, GLenum readfb, eGPUDataFormat format, float *data)
+void GPU_framebuffer_read_depth(GPUFrameBuffer *gpu_fb, int x, int y, int w, int h, float *data)
 {
-  GLenum type = gpu_get_gl_channel_type(channels);
-  GLenum gl_format = gpu_get_gl_datatype(format);
-  /* TODO: needed for selection buffers to work properly, this should be handled better. */
-  if (type == GL_RED && gl_format == GL_UNSIGNED_INT) {
-    type = GL_RED_INTEGER;
-  }
-  glReadBuffer(readfb);
-  glReadPixels(x, y, w, h, type, gl_format, data);
+  int rect[4] = {x, y, w, h};
+  reinterpret_cast<FrameBuffer *>(gpu_fb)->read(GPU_DEPTH_BIT, GPU_DATA_FLOAT, rect, 1, 1, data);
 }
 
-void GPU_framebuffer_read_color(GPUFrameBuffer *fb,
+void GPU_framebuffer_read_color(GPUFrameBuffer *gpu_fb,
                                 int x,
                                 int y,
                                 int w,
@@ -713,90 +428,62 @@ void GPU_framebuffer_read_color(GPUFrameBuffer *fb,
                                 eGPUDataFormat format,
                                 void *data)
 {
-  CHECK_FRAMEBUFFER_IS_BOUND(fb);
-  gpu_framebuffer_read_color_ex(
-      x, y, w, h, channels, GL_COLOR_ATTACHMENT0 + slot, format, (float *)data);
+  int rect[4] = {x, y, w, h};
+  reinterpret_cast<FrameBuffer *>(gpu_fb)->read(GPU_COLOR_BIT, format, rect, channels, slot, data);
+}
+
+/* TODO(fclem) rename to read_color. */
+void GPU_frontbuffer_read_pixels(
+    int x, int y, int w, int h, int channels, eGPUDataFormat format, void *data)
+{
+  int rect[4] = {x, y, w, h};
+  GPU_context_active_get()->front_left->read(GPU_COLOR_BIT, format, rect, channels, 0, data);
 }
 
 /* read_slot and write_slot are only used for color buffers. */
-void GPU_framebuffer_blit(GPUFrameBuffer *fb_read,
+/* TODO(fclem) port as texture operation. */
+void GPU_framebuffer_blit(GPUFrameBuffer *gpufb_read,
                           int read_slot,
-                          GPUFrameBuffer *fb_write,
+                          GPUFrameBuffer *gpufb_write,
                           int write_slot,
                           eGPUFrameBufferBits blit_buffers)
 {
+  FrameBuffer *fb_read = reinterpret_cast<FrameBuffer *>(gpufb_read);
+  FrameBuffer *fb_write = reinterpret_cast<FrameBuffer *>(gpufb_write);
   BLI_assert(blit_buffers != 0);
 
-  GPUFrameBuffer *prev_fb = GPU_framebuffer_active_get();
+  FrameBuffer *prev_fb = GPU_context_active_get()->active_fb;
 
-  /* Framebuffers must be up to date. This simplify this function. */
-  if (fb_read->dirty_flag != 0 || fb_read->object == 0) {
-    GPU_framebuffer_bind(fb_read);
+#ifndef NDEBUG
+  GPUTexture *read_tex, *write_tex;
+  if (blit_buffers & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) {
+    read_tex = fb_read->depth_tex();
+    write_tex = fb_write->depth_tex();
   }
-  if (fb_write->dirty_flag != 0 || fb_write->object == 0) {
-    GPU_framebuffer_bind(fb_write);
+  else {
+    read_tex = fb_read->color_tex(read_slot);
+    write_tex = fb_write->color_tex(write_slot);
   }
 
-  const bool do_color = (blit_buffers & GPU_COLOR_BIT);
-  const bool do_depth = (blit_buffers & GPU_DEPTH_BIT);
-  const bool do_stencil = (blit_buffers & GPU_STENCIL_BIT);
-
-  GPUTexture *read_tex = ((do_depth || do_stencil) ?
-                              framebuffer_get_depth_tex(fb_read) :
-                              framebuffer_get_color_tex(fb_read, read_slot));
-  GPUTexture *write_tex = ((do_depth || do_stencil) ?
-                               framebuffer_get_depth_tex(fb_write) :
-                               framebuffer_get_color_tex(fb_write, read_slot));
-
-  if (do_depth) {
+  if (blit_buffers & GPU_DEPTH_BIT) {
     BLI_assert(GPU_texture_depth(read_tex) && GPU_texture_depth(write_tex));
     BLI_assert(GPU_texture_format(read_tex) == GPU_texture_format(write_tex));
   }
-  if (do_stencil) {
+  if (blit_buffers & GPU_STENCIL_BIT) {
     BLI_assert(GPU_texture_stencil(read_tex) && GPU_texture_stencil(write_tex));
     BLI_assert(GPU_texture_format(read_tex) == GPU_texture_format(write_tex));
   }
   if (GPU_texture_samples(write_tex) != 0 || GPU_texture_samples(read_tex) != 0) {
     /* Can only blit multisample textures to another texture of the same size. */
-    BLI_assert((fb_read->width == fb_write->width) && (fb_read->height == fb_write->height));
+    BLI_assert((GPU_texture_width(write_tex) == GPU_texture_width(read_tex)) &&
+               (GPU_texture_height(write_tex) == GPU_texture_height(read_tex)));
   }
+#endif
 
-  glBindFramebuffer(GL_READ_FRAMEBUFFER, fb_read->object);
-  glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fb_write->object);
+  fb_read->blit_to(blit_buffers, read_slot, fb_write, write_slot, 0, 0);
 
-  if (do_color) {
-    glReadBuffer(GL_COLOR_ATTACHMENT0 + read_slot);
-    glDrawBuffer(GL_COLOR_ATTACHMENT0 + write_slot);
-    /* XXX we messed with the glDrawBuffer, this will reset the
-     * glDrawBuffers the next time we bind fb_write. */
-    fb_write->dirty_flag = GPU_FB_DIRTY_DRAWBUFFER;
-  }
-
-  GLbitfield mask = convert_buffer_bits_to_gl(blit_buffers);
-
-  glBlitFramebuffer(0,
-                    0,
-                    fb_read->width,
-                    fb_read->height,
-                    0,
-                    0,
-                    fb_write->width,
-                    fb_write->height,
-                    mask,
-                    GL_NEAREST);
-
-  /* Restore previous framebuffer */
-  if (fb_write == prev_fb) {
-    GPU_framebuffer_bind(fb_write); /* To update drawbuffers */
-  }
-  else if (prev_fb) {
-    glBindFramebuffer(GL_FRAMEBUFFER, prev_fb->object);
-    gpu_framebuffer_current_set(prev_fb);
-  }
-  else {
-    glBindFramebuffer(GL_FRAMEBUFFER, GPU_framebuffer_default());
-    gpu_framebuffer_current_set(NULL);
-  }
+  /* FIXME(fclem) sRGB is not saved. */
+  prev_fb->bind(true);
 }
 
 /**
@@ -804,80 +491,45 @@ void GPU_framebuffer_blit(GPUFrameBuffer *fb_read,
  * input. This function only takes care of the correct texture handling. It execute the callback
  * for each texture level.
  */
-void GPU_framebuffer_recursive_downsample(GPUFrameBuffer *fb,
+void GPU_framebuffer_recursive_downsample(GPUFrameBuffer *gpu_fb,
                                           int max_lvl,
                                           void (*callback)(void *userData, int level),
                                           void *userData)
 {
-  /* Framebuffer must be up to date and bound. This simplify this function. */
-  if (GPU_framebuffer_active_get() != fb || fb->dirty_flag != 0 || fb->object == 0) {
-    GPU_framebuffer_bind(fb);
-  }
-  /* HACK: We make the framebuffer appear not bound in order to
-   * not trigger any error in GPU_texture_bind().  */
-  GPUFrameBuffer *prev_fb = GPU_framebuffer_active_get();
-  gpu_framebuffer_current_set(NULL);
-
-  int levels = floor(log2(max_ii(fb->width, fb->height)));
-  max_lvl = min_ii(max_lvl, levels);
+  reinterpret_cast<FrameBuffer *>(gpu_fb)->recursive_downsample(max_lvl, callback, userData);
+}
 
-  int i;
-  int current_dim[2] = {fb->width, fb->height};
-  for (i = 1; i < max_lvl + 1; i++) {
-    /* calculate next viewport size */
-    current_dim[0] = max_ii(current_dim[0] / 2, 1);
-    current_dim[1] = max_ii(current_dim[1] / 2, 1);
+/** \} */
 
-    for (int i_type = 0; i_type < GPU_FB_MAX_ATTACHEMENT; i_type++) {
-      GPUAttachmentType type = static_cast<GPUAttachmentType>(i_type);
-      if (fb->attachments[type].tex != NULL) {
-        /* Some Intel HDXXX have issue with rendering to a mipmap that is below
-         * the texture GL_TEXTURE_MAX_LEVEL. So even if it not correct, in this case
-         * we allow GL_TEXTURE_MAX_LEVEL to be one level lower. In practice it does work! */
-        int next_lvl = (GPU_mip_render_workaround()) ? i : i - 1;
-        /* bind next level for rendering but first restrict fetches only to previous level */
-        GPUTexture *tex = fb->attachments[type].tex;
-        GPU_texture_bind(tex, 0);
-        glTexParameteri(GPU_texture_target(tex), GL_TEXTURE_BASE_LEVEL, i - 1);
-        glTexParameteri(GPU_texture_target(tex), GL_TEXTURE_MAX_LEVEL, next_lvl);
-        GPU_texture_unbind(tex);
-        /* copy attachment and replace miplevel. */
-        GPUAttachment attachment = fb->attachments[type];
-        attachment.mip = i;
-        gpu_framebuffer_attachment_attach(&attachment, type);
-      }
-    }
-
-    BLI_assert(GL_FRAMEBUFFER_COMPLETE == glCheckFramebufferStatus(GL_FRAMEBUFFER));
+/* -------------------------------------------------------------------- */
+/** \name GPUOffScreen
+ *
+ * Container that holds a framebuffer and its textures.
+ * Might be bound to multiple contexts.
+ * \{ */
 
-    glViewport(0, 0, current_dim[0], current_dim[1]);
-    callback(userData, i);
+#define FRAMEBUFFER_STACK_DEPTH 16
 
-    if (current_dim[0] == 1 && current_dim[1] == 1) {
-      break;
-    }
-  }
+static struct {
+  GPUFrameBuffer *framebuffers[FRAMEBUFFER_STACK_DEPTH];
+  uint top;
+} FrameBufferStack = {{0}};
 
-  for (int i_type = 0; i_type < GPU_FB_MAX_ATTACHEMENT; i_type++) {
-    GPUAttachmentType type = static_cast<GPUAttachmentType>(i_type);
-    if (fb->attachments[type].tex != NULL) {
-      /* reset mipmap level range */
-      GPUTexture *tex = fb->attachments[type].tex;
-      GPU_texture_bind(tex, 0);
-      glTexParameteri(GPU_texture_target(tex), GL_TEXTURE_BASE_LEVEL, 0);
-      glTexParameteri(GPU_texture_target(tex), GL_TEXTURE_MAX_LEVEL, i - 1);
-      GPU_texture_unbind(tex);
-      /* Reattach original level */
-      /* NOTE: This is not necessary but this makes the FBO config
-       *       remain in sync with the GPUFrameBuffer config. */
-      gpu_framebuffer_attachment_attach(&fb->attachments[type], type);
-    }
-  }
+static void gpuPushFrameBuffer(GPUFrameBuffer *fb)
+{
+  BLI_assert(FrameBufferStack.top < FRAMEBUFFER_STACK_DEPTH);
+  FrameBufferStack.framebuffers[FrameBufferStack.top] = fb;
+  FrameBufferStack.top++;
+}
 
-  gpu_framebuffer_current_set(prev_fb);
+static GPUFrameBuffer *gpuPopFrameBuffer(void)
+{
+  BLI_assert(FrameBufferStack.top > 0);
+  FrameBufferStack.top--;
+  return FrameBufferStack.framebuffers[FrameBufferStack.top];
 }
 
-/* GPUOffScreen */
+#undef FRAMEBUFFER_STACK_DEPTH
 
 #define MAX_CTX_FB_LEN 3
 
@@ -952,21 +604,14 @@ GPUOffScreen *GPU_offscreen_create(
     return NULL;
   }
 
-  gpuPushAttr(GPU_VIEWPORT_BIT);
-
   GPUFrameBuffer *fb = gpu_offscreen_fb_get(ofs);
 
   /* check validity at the very end! */
   if (!GPU_framebuffer_check_valid(fb, err_out)) {
     GPU_offscreen_free(ofs);
-    gpuPopAttr();
     return NULL;
   }
-
   GPU_framebuffer_restore();
-
-  gpuPopAttr();
-
   return ofs;
 }
 
@@ -990,23 +635,16 @@ void GPU_offscreen_free(GPUOffScreen *ofs)
 void GPU_offscreen_bind(GPUOffScreen *ofs, bool save)
 {
   if (save) {
-    gpuPushAttr((eGPUAttrMask)(GPU_SCISSOR_BIT | GPU_VIEWPORT_BIT));
     GPUFrameBuffer *fb = GPU_framebuffer_active_get();
-    gpuPushFrameBuffer(fb);
+    gpuPushFrameBuffer(reinterpret_cast<GPUFrameBuffer *>(fb));
   }
-  GPUFrameBuffer *ofs_fb = gpu_offscreen_fb_get(ofs);
-  GPU_framebuffer_bind(ofs_fb);
-  glDisable(GL_FRAMEBUFFER_SRGB);
-  GPU_scissor_test(false);
-  GPU_shader_set_framebuffer_srgb_target(false);
+  reinterpret_cast<FrameBuffer *>(gpu_offscreen_fb_get(ofs))->bind(false);
 }
 
 void GPU_offscreen_unbind(GPUOffScreen *UNUSED(ofs), bool restore)
 {
   GPUFrameBuffer *fb = NULL;
-
   if (restore) {
-    gpuPopAttr();
     fb = gpuPopFrameBuffer();
   }
 
@@ -1020,33 +658,20 @@ void GPU_offscreen_unbind(GPUOffScreen *UNUSED(ofs), bool restore)
 
 void GPU_offscreen_draw_to_screen(GPUOffScreen *ofs, int x, int y)
 {
-  const int w = GPU_texture_width(ofs->color);
-  const int h = GPU_texture_height(ofs->color);
-
-  GPUFrameBuffer *ofs_fb = gpu_offscreen_fb_get(ofs);
-
-  glBindFramebuffer(GL_READ_FRAMEBUFFER, ofs_fb->object);
-  GLenum status = glCheckFramebufferStatus(GL_READ_FRAMEBUFFER);
-
-  if (status == GL_FRAMEBUFFER_COMPLETE) {
-    glBlitFramebuffer(0, 0, w, h, x, y, x + w, y + h, GL_COLOR_BUFFER_BIT, GL_NEAREST);
-  }
-  else {
-    gpu_print_framebuffer_error(status, NULL);
-  }
-
-  glBindFramebuffer(GL_READ_FRAMEBUFFER, GPU_framebuffer_default());
+  GPUContext *ctx = GPU_context_active_get();
+  FrameBuffer *ofs_fb = reinterpret_cast<FrameBuffer *>(gpu_offscreen_fb_get(ofs));
+  ofs_fb->blit_to(GPU_COLOR_BIT, 0, ctx->active_fb, 0, x, y);
 }
 
-void GPU_offscreen_read_pixels(GPUOffScreen *ofs, eGPUDataFormat type, void *pixels)
+void GPU_offscreen_read_pixels(GPUOffScreen *ofs, eGPUDataFormat format, void *pixels)
 {
+  BLI_assert(ELEM(format, GPU_DATA_UNSIGNED_BYTE, GPU_DATA_FLOAT));
+
   const int w = GPU_texture_width(ofs->color);
   const int h = GPU_texture_height(ofs->color);
 
-  BLI_assert(ELEM(type, GPU_DATA_UNSIGNED_BYTE, GPU_DATA_FLOAT));
-  GLenum gl_type = (type == GPU_DATA_FLOAT) ? GL_FLOAT : GL_UNSIGNED_BYTE;
-
-  glReadPixels(0, 0, w, h, GL_RGBA, gl_type, pixels);
+  GPUFrameBuffer *ofs_fb = gpu_offscreen_fb_get(ofs);
+  GPU_framebuffer_read_color(ofs_fb, 0, 0, w, h, 4, 0, format, pixels);
 }
 
 int GPU_offscreen_width(const GPUOffScreen *ofs)
@@ -1075,37 +700,4 @@ void GPU_offscreen_viewport_data_get(GPUOffScreen *ofs,
   *r_depth = ofs->depth;
 }
 
-void GPU_clear_color(float red, float green, float blue, float alpha)
-{
-  glClearColor(red, green, blue, alpha);
-}
-
-void GPU_clear_depth(float depth)
-{
-  glClearDepth(depth);
-}
-
-void GPU_clear(eGPUFrameBufferBits flags)
-{
-  glClear(convert_buffer_bits_to_gl(flags));
-}
-
-void GPU_frontbuffer_read_pixels(
-    int x, int y, int w, int h, int channels, eGPUDataFormat format, void *data)
-{
-  gpu_framebuffer_read_color_ex(x, y, w, h, channels, GL_FRONT, format, (float *)data);
-}
-
-/* For stereo rendering. */
-void GPU_backbuffer_bind(eGPUBackBuffer buffer)
-{
-  if (buffer == GPU_BACKBUFFER) {
-    glDrawBuffer(GL_BACK);
-  }
-  else if (buffer == GPU_BACKBUFFER_LEFT) {
-    glDrawBuffer(GL_BACK_LEFT);
-  }
-  else if (buffer == GPU_BACKBUFFER_RIGHT) {
-    glDrawBuffer(GL_BACK_RIGHT);
-  }
-}
+/** \} */
+\ No newline at end of file
diff --git a/source/blender/gpu/intern/gpu_framebuffer_private.hh b/source/blender/gpu/intern/gpu_framebuffer_private.hh
new file mode 100644
index 00000000000..3fba0c8de92
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_framebuffer_private.hh
@@ -0,0 +1,211 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU Framebuffer
+ * - this is a wrapper for an OpenGL framebuffer object (FBO). in practice
+ *   multiple FBO's may be created.
+ * - actual FBO creation & config is deferred until GPU_framebuffer_bind or
+ *   GPU_framebuffer_check_valid to allow creation & config while another
+ *   opengl context is bound (since FBOs are not shared between ogl contexts).
+ */
+
+#pragma once
+
+#include "BLI_math_vector.h"
+#include "BLI_span.hh"
+
+#include "MEM_guardedalloc.h"
+
+#include "GPU_framebuffer.h"
+
+struct GPUTexture;
+
+typedef enum GPUAttachmentType : int {
+  GPU_FB_DEPTH_ATTACHMENT = 0,
+  GPU_FB_DEPTH_STENCIL_ATTACHMENT,
+  GPU_FB_COLOR_ATTACHMENT0,
+  GPU_FB_COLOR_ATTACHMENT1,
+  GPU_FB_COLOR_ATTACHMENT2,
+  GPU_FB_COLOR_ATTACHMENT3,
+  GPU_FB_COLOR_ATTACHMENT4,
+  GPU_FB_COLOR_ATTACHMENT5,
+  /* Number of maximum output slots.
+   * We support 6 outputs for now (usually we wouldn't need more to preserve fill rate). */
+  /* Keep in mind that GL max is GL_MAX_DRAW_BUFFERS and is at least 8, corresponding to
+   * the maximum number of COLOR attachments specified by glDrawBuffers. */
+  GPU_FB_MAX_ATTACHEMENT,
+
+  GPU_FB_MAX_COLOR_ATTACHMENT = (GPU_FB_MAX_ATTACHEMENT - GPU_FB_COLOR_ATTACHMENT0),
+} GPUAttachmentType;
+
+inline constexpr GPUAttachmentType operator-(GPUAttachmentType a, int b)
+{
+  return static_cast<GPUAttachmentType>(static_cast<int>(a) - b);
+}
+
+inline constexpr GPUAttachmentType operator+(GPUAttachmentType a, int b)
+{
+  return static_cast<GPUAttachmentType>(static_cast<int>(a) + b);
+}
+
+inline GPUAttachmentType &operator++(GPUAttachmentType &a)
+{
+  a = a + 1;
+  return a;
+}
+
+inline GPUAttachmentType &operator--(GPUAttachmentType &a)
+{
+  a = a - 1;
+  return a;
+}
+
+namespace blender {
+namespace gpu {
+
+#ifdef DEBUG
+#  define DEBUG_NAME_LEN 64
+#else
+#  define DEBUG_NAME_LEN 16
+#endif
+
+class FrameBuffer {
+ protected:
+  /** Set of texture attachements to render to. DEPTH and DEPTH_STENCIL are mutualy exclusive. */
+  GPUAttachment attachments_[GPU_FB_MAX_ATTACHEMENT];
+  /** Is true if internal representation need to be updated. */
+  bool dirty_attachments_;
+  /** Size of attachement textures. */
+  int width_, height_;
+  /** Debug name. */
+  char name_[DEBUG_NAME_LEN];
+  /** Framebuffer state. */
+  int viewport_[4];
+  int scissor_[4];
+  bool scissor_test_ = false;
+  bool dirty_state_;
+
+ public:
+  FrameBuffer(const char *name);
+  virtual ~FrameBuffer();
+
+  virtual void bind(bool enabled_srgb) = 0;
+  virtual bool check(char err_out[256]) = 0;
+  virtual void clear(eGPUFrameBufferBits buffers,
+                     const float clear_col[4],
+                     float clear_depth,
+                     uint clear_stencil) = 0;
+  virtual void clear_multi(const float (*clear_col)[4]) = 0;
+
+  virtual void read(eGPUFrameBufferBits planes,
+                    eGPUDataFormat format,
+                    const int area[4],
+                    int channel_len,
+                    int slot,
+                    void *r_data) = 0;
+
+  virtual void blit_to(eGPUFrameBufferBits planes,
+                       int src_slot,
+                       FrameBuffer *dst,
+                       int dst_slot,
+                       int dst_offset_x,
+                       int dst_offset_y) = 0;
+
+  void attachment_set(GPUAttachmentType type, const GPUAttachment &new_attachment);
+
+  void recursive_downsample(int max_lvl,
+                            void (*callback)(void *userData, int level),
+                            void *userData);
+
+  inline void size_set(int width, int height)
+  {
+    width_ = width;
+    height_ = height;
+    dirty_state_ = true;
+  }
+
+  inline void viewport_set(const int viewport[4])
+  {
+    if (!equals_v4v4_int(viewport_, viewport)) {
+      copy_v4_v4_int(viewport_, viewport);
+      dirty_state_ = true;
+    }
+  }
+
+  inline void scissor_set(const int scissor[4])
+  {
+    if (!equals_v4v4_int(scissor_, scissor)) {
+      copy_v4_v4_int(scissor_, scissor);
+      dirty_state_ = true;
+    }
+  }
+
+  inline void scissor_test_set(bool test)
+  {
+    scissor_test_ = test;
+  }
+
+  inline void viewport_get(int r_viewport[4]) const
+  {
+    copy_v4_v4_int(r_viewport, viewport_);
+  }
+
+  inline void scissor_get(int r_scissor[4]) const
+  {
+    copy_v4_v4_int(r_scissor, scissor_);
+  }
+
+  inline bool scissor_test_get(void) const
+  {
+    return scissor_test_;
+  }
+
+  inline void viewport_reset(void)
+  {
+    int viewport_rect[4] = {0, 0, width_, height_};
+    viewport_set(viewport_rect);
+  }
+
+  inline void scissor_reset(void)
+  {
+    int scissor_rect[4] = {0, 0, width_, height_};
+    scissor_set(scissor_rect);
+  }
+
+  inline GPUTexture *depth_tex(void) const
+  {
+    if (attachments_[GPU_FB_DEPTH_ATTACHMENT].tex) {
+      return attachments_[GPU_FB_DEPTH_ATTACHMENT].tex;
+    }
+    return attachments_[GPU_FB_DEPTH_STENCIL_ATTACHMENT].tex;
+  };
+
+  inline GPUTexture *color_tex(int slot) const
+  {
+    return attachments_[GPU_FB_COLOR_ATTACHMENT0 + slot].tex;
+  };
+};
+
+#undef DEBUG_NAME_LEN
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_immediate.cc b/source/blender/gpu/intern/gpu_immediate.cc
index 9cededa54f7..c5dd84ddbd0 100644
--- a/source/blender/gpu/intern/gpu_immediate.cc
+++ b/source/blender/gpu/intern/gpu_immediate.cc
@@ -20,147 +20,66 @@
 /** \file
  * \ingroup gpu
  *
- * GPU immediate mode work-alike
+ * Mimics old style opengl immediate mode drawing.
  */
 
 #ifndef GPU_STANDALONE
 #  include "UI_resources.h"
 #endif
 
-#include "GPU_attr_binding.h"
 #include "GPU_immediate.h"
 #include "GPU_matrix.h"
 #include "GPU_texture.h"
 
-#include "gpu_attr_binding_private.h"
 #include "gpu_context_private.hh"
-#include "gpu_primitive_private.h"
-#include "gpu_shader_private.h"
+#include "gpu_immediate_private.hh"
+#include "gpu_shader_private.hh"
 #include "gpu_vertex_format_private.h"
 
-#include <stdlib.h>
-#include <string.h>
+using namespace blender::gpu;
 
-typedef struct ImmediateDrawBuffer {
-  GLuint vbo_id;
-  GLubyte *buffer_data;
-  uint buffer_offset;
-  uint buffer_size;
-} ImmediateDrawBuffer;
-
-typedef struct {
-  /* TODO: organize this struct by frequency of change (run-time) */
-
-  GPUBatch *batch;
-  GPUContext *context;
-
-  /* current draw call */
-  bool strict_vertex_len;
-  uint vertex_len;
-  uint buffer_bytes_mapped;
-  ImmediateDrawBuffer *active_buffer;
-  GPUPrimType prim_type;
-  GPUVertFormat vertex_format;
-  ImmediateDrawBuffer draw_buffer;
-  ImmediateDrawBuffer draw_buffer_strict;
-
-  /* current vertex */
-  uint vertex_idx;
-  GLubyte *vertex_data;
-  uint16_t
-      unassigned_attr_bits; /* which attributes of current vertex have not been given values? */
-
-  GLuint vao_id;
-
-  GPUShader *bound_program;
-  const GPUShaderInterface *shader_interface;
-  GPUAttrBinding attr_binding;
-  uint16_t prev_enabled_attr_bits; /* <-- only affects this VAO, so we're ok */
-} Immediate;
-
-/* size of internal buffer */
-#define DEFAULT_INTERNAL_BUFFER_SIZE (4 * 1024 * 1024)
-
-static bool initialized = false;
-static Immediate imm;
+static Immediate *imm = NULL;
 
 void immInit(void)
 {
-#if TRUST_NO_ONE
-  assert(!initialized);
-#endif
-  memset(&imm, 0, sizeof(Immediate));
-
-  imm.draw_buffer.vbo_id = GPU_buf_alloc();
-  imm.draw_buffer.buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE;
-  glBindBuffer(GL_ARRAY_BUFFER, imm.draw_buffer.vbo_id);
-  glBufferData(GL_ARRAY_BUFFER, imm.draw_buffer.buffer_size, NULL, GL_DYNAMIC_DRAW);
-  imm.draw_buffer_strict.vbo_id = GPU_buf_alloc();
-  imm.draw_buffer_strict.buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE;
-  glBindBuffer(GL_ARRAY_BUFFER, imm.draw_buffer_strict.vbo_id);
-  glBufferData(GL_ARRAY_BUFFER, imm.draw_buffer_strict.buffer_size, NULL, GL_DYNAMIC_DRAW);
-
-  imm.prim_type = GPU_PRIM_NONE;
-  imm.strict_vertex_len = true;
-
-  glBindBuffer(GL_ARRAY_BUFFER, 0);
-  initialized = true;
+  /* TODO Remove */
 }
 
 void immActivate(void)
 {
-#if TRUST_NO_ONE
-  assert(initialized);
-  assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we're not between a Begin/End pair */
-  assert(imm.vao_id == 0);
-#endif
-  imm.vao_id = GPU_vao_alloc();
-  imm.context = GPU_context_active_get();
+  imm = GPU_context_active_get()->imm;
 }
 
 void immDeactivate(void)
 {
-#if TRUST_NO_ONE
-  assert(initialized);
-  assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we're not between a Begin/End pair */
-  assert(imm.vao_id != 0);
-#endif
-  GPU_vao_free(imm.vao_id, imm.context);
-  imm.vao_id = 0;
-  imm.prev_enabled_attr_bits = 0;
+  imm = NULL;
 }
 
 void immDestroy(void)
 {
-  GPU_buf_free(imm.draw_buffer.vbo_id);
-  GPU_buf_free(imm.draw_buffer_strict.vbo_id);
-  initialized = false;
+  /* TODO Remove */
 }
 
 GPUVertFormat *immVertexFormat(void)
 {
-  GPU_vertformat_clear(&imm.vertex_format);
-  return &imm.vertex_format;
+  GPU_vertformat_clear(&imm->vertex_format);
+  return &imm->vertex_format;
 }
 
 void immBindShader(GPUShader *shader)
 {
-#if TRUST_NO_ONE
-  assert(imm.bound_program == NULL);
-  assert(glIsProgram(shader->program));
-#endif
+  BLI_assert(imm->shader == NULL);
 
-  imm.bound_program = shader;
-  imm.shader_interface = shader->interface;
+  imm->shader = shader;
 
-  if (!imm.vertex_format.packed) {
-    VertexFormat_pack(&imm.vertex_format);
+  if (!imm->vertex_format.packed) {
+    VertexFormat_pack(&imm->vertex_format);
+    imm->enabled_attr_bits = 0xFFFFu & ~(0xFFFFu << imm->vertex_format.attr_len);
   }
 
   GPU_shader_bind(shader);
-  get_attr_locations(&imm.vertex_format, &imm.attr_binding, imm.shader_interface);
-  GPU_matrix_bind(imm.shader_interface);
-  GPU_shader_set_srgb_uniform(imm.shader_interface);
+  GPU_matrix_bind(shader);
+  GPU_shader_set_srgb_uniform(shader);
 }
 
 void immBindBuiltinProgram(eGPUBuiltinShader shader_id)
@@ -171,22 +90,19 @@ void immBindBuiltinProgram(eGPUBuiltinShader shader_id)
 
 void immUnbindProgram(void)
 {
-#if TRUST_NO_ONE
-  assert(imm.bound_program != NULL);
-#endif
-#if PROGRAM_NO_OPTI
-  glUseProgram(0);
-#endif
-  imm.bound_program = NULL;
+  BLI_assert(imm->shader != NULL);
+
+  GPU_shader_unbind();
+  imm->shader = NULL;
 }
 
 /* XXX do not use it. Special hack to use OCIO with batch API. */
 GPUShader *immGetShader(void)
 {
-  return imm.bound_program;
+  return imm->shader;
 }
 
-#if TRUST_NO_ONE
+#ifndef NDEBUG
 static bool vertex_count_makes_sense_for_primitive(uint vertex_len, GPUPrimType prim_type)
 {
   /* does vertex_len make sense for this primitive type? */
@@ -217,285 +133,122 @@ static bool vertex_count_makes_sense_for_primitive(uint vertex_len, GPUPrimType
 
 void immBegin(GPUPrimType prim_type, uint vertex_len)
 {
-#if TRUST_NO_ONE
-  assert(initialized);
-  assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we haven't already begun */
-  assert(vertex_count_makes_sense_for_primitive(vertex_len, prim_type));
-  assert(imm.active_buffer == NULL);
-#endif
-  imm.prim_type = prim_type;
-  imm.vertex_len = vertex_len;
-  imm.vertex_idx = 0;
-  imm.unassigned_attr_bits = imm.attr_binding.enabled_bits;
-
-  /* how many bytes do we need for this draw call? */
-  const uint bytes_needed = vertex_buffer_size(&imm.vertex_format, vertex_len);
-  ImmediateDrawBuffer *active_buffer = imm.strict_vertex_len ? &imm.draw_buffer_strict :
-                                                               &imm.draw_buffer;
-  imm.active_buffer = active_buffer;
-
-  glBindBuffer(GL_ARRAY_BUFFER, active_buffer->vbo_id);
-
-  /* does the current buffer have enough room? */
-  const uint available_bytes = active_buffer->buffer_size - active_buffer->buffer_offset;
-
-  bool recreate_buffer = false;
-  if (bytes_needed > active_buffer->buffer_size) {
-    /* expand the internal buffer */
-    active_buffer->buffer_size = bytes_needed;
-    recreate_buffer = true;
-  }
-  else if (bytes_needed < DEFAULT_INTERNAL_BUFFER_SIZE &&
-           active_buffer->buffer_size > DEFAULT_INTERNAL_BUFFER_SIZE) {
-    /* shrink the internal buffer */
-    active_buffer->buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE;
-    recreate_buffer = true;
-  }
-
-  /* ensure vertex data is aligned */
-  /* Might waste a little space, but it's safe. */
-  const uint pre_padding = padding(active_buffer->buffer_offset, imm.vertex_format.stride);
-
-  if (!recreate_buffer && ((bytes_needed + pre_padding) <= available_bytes)) {
-    active_buffer->buffer_offset += pre_padding;
-  }
-  else {
-    /* orphan this buffer & start with a fresh one */
-    /* this method works on all platforms, old & new */
-    glBufferData(GL_ARRAY_BUFFER, active_buffer->buffer_size, NULL, GL_DYNAMIC_DRAW);
-
-    active_buffer->buffer_offset = 0;
-  }
-
-  /*  printf("mapping %u to %u\n", imm.buffer_offset, imm.buffer_offset + bytes_needed - 1); */
-
-#if TRUST_NO_ONE
-  {
-    GLint bufsize;
-    glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &bufsize);
-    assert(active_buffer->buffer_offset + bytes_needed <= bufsize);
-  }
-#endif
-
-  active_buffer->buffer_data = (GLubyte *)glMapBufferRange(
-      GL_ARRAY_BUFFER,
-      active_buffer->buffer_offset,
-      bytes_needed,
-      GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT |
-          (imm.strict_vertex_len ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT));
+  BLI_assert(imm->prim_type == GPU_PRIM_NONE); /* Make sure we haven't already begun. */
+  BLI_assert(vertex_count_makes_sense_for_primitive(vertex_len, prim_type));
 
-#if TRUST_NO_ONE
-  assert(active_buffer->buffer_data != NULL);
-#endif
+  imm->prim_type = prim_type;
+  imm->vertex_len = vertex_len;
+  imm->vertex_idx = 0;
+  imm->unassigned_attr_bits = imm->enabled_attr_bits;
 
-  imm.buffer_bytes_mapped = bytes_needed;
-  imm.vertex_data = active_buffer->buffer_data;
+  imm->vertex_data = imm->begin();
 }
 
 void immBeginAtMost(GPUPrimType prim_type, uint vertex_len)
 {
-#if TRUST_NO_ONE
-  assert(vertex_len > 0);
-#endif
-
-  imm.strict_vertex_len = false;
+  BLI_assert(vertex_len > 0);
+  imm->strict_vertex_len = false;
   immBegin(prim_type, vertex_len);
 }
 
 GPUBatch *immBeginBatch(GPUPrimType prim_type, uint vertex_len)
 {
-#if TRUST_NO_ONE
-  assert(initialized);
-  assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we haven't already begun */
-  assert(vertex_count_makes_sense_for_primitive(vertex_len, prim_type));
-#endif
-  imm.prim_type = prim_type;
-  imm.vertex_len = vertex_len;
-  imm.vertex_idx = 0;
-  imm.unassigned_attr_bits = imm.attr_binding.enabled_bits;
+  BLI_assert(imm->prim_type == GPU_PRIM_NONE); /* Make sure we haven't already begun. */
+  BLI_assert(vertex_count_makes_sense_for_primitive(vertex_len, prim_type));
+
+  imm->prim_type = prim_type;
+  imm->vertex_len = vertex_len;
+  imm->vertex_idx = 0;
+  imm->unassigned_attr_bits = imm->enabled_attr_bits;
 
-  GPUVertBuf *verts = GPU_vertbuf_create_with_format(&imm.vertex_format);
+  GPUVertBuf *verts = GPU_vertbuf_create_with_format(&imm->vertex_format);
   GPU_vertbuf_data_alloc(verts, vertex_len);
 
-  imm.buffer_bytes_mapped = GPU_vertbuf_size_get(verts);
-  imm.vertex_data = verts->data;
+  imm->vertex_data = verts->data;
 
-  imm.batch = GPU_batch_create_ex(prim_type, verts, NULL, GPU_BATCH_OWNS_VBO);
-  imm.batch->phase = GPU_BATCH_BUILDING;
+  imm->batch = GPU_batch_create_ex(prim_type, verts, NULL, GPU_BATCH_OWNS_VBO);
+  imm->batch->flag |= GPU_BATCH_BUILDING;
 
-  return imm.batch;
+  return imm->batch;
 }
 
 GPUBatch *immBeginBatchAtMost(GPUPrimType prim_type, uint vertex_len)
 {
-  imm.strict_vertex_len = false;
+  BLI_assert(vertex_len > 0);
+  imm->strict_vertex_len = false;
   return immBeginBatch(prim_type, vertex_len);
 }
 
-static void immDrawSetup(void)
-{
-  /* set up VAO -- can be done during Begin or End really */
-  glBindVertexArray(imm.vao_id);
-
-  /* Enable/Disable vertex attributes as needed. */
-  if (imm.attr_binding.enabled_bits != imm.prev_enabled_attr_bits) {
-    for (uint loc = 0; loc < GPU_VERT_ATTR_MAX_LEN; loc++) {
-      bool is_enabled = imm.attr_binding.enabled_bits & (1 << loc);
-      bool was_enabled = imm.prev_enabled_attr_bits & (1 << loc);
-
-      if (is_enabled && !was_enabled) {
-        glEnableVertexAttribArray(loc);
-      }
-      else if (was_enabled && !is_enabled) {
-        glDisableVertexAttribArray(loc);
-      }
-    }
-
-    imm.prev_enabled_attr_bits = imm.attr_binding.enabled_bits;
-  }
-
-  const uint stride = imm.vertex_format.stride;
-
-  for (uint a_idx = 0; a_idx < imm.vertex_format.attr_len; a_idx++) {
-    const GPUVertAttr *a = &imm.vertex_format.attrs[a_idx];
-
-    const uint offset = imm.active_buffer->buffer_offset + a->offset;
-    const GLvoid *pointer = (const GLubyte *)0 + offset;
-
-    const uint loc = read_attr_location(&imm.attr_binding, a_idx);
-    const GLenum type = convert_comp_type_to_gl(static_cast<GPUVertCompType>(a->comp_type));
-
-    switch (a->fetch_mode) {
-      case GPU_FETCH_FLOAT:
-      case GPU_FETCH_INT_TO_FLOAT:
-        glVertexAttribPointer(loc, a->comp_len, type, GL_FALSE, stride, pointer);
-        break;
-      case GPU_FETCH_INT_TO_FLOAT_UNIT:
-        glVertexAttribPointer(loc, a->comp_len, type, GL_TRUE, stride, pointer);
-        break;
-      case GPU_FETCH_INT:
-        glVertexAttribIPointer(loc, a->comp_len, type, stride, pointer);
-    }
-  }
-
-  if (GPU_matrix_dirty_get()) {
-    GPU_matrix_bind(imm.shader_interface);
-  }
-}
-
 void immEnd(void)
 {
-#if TRUST_NO_ONE
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-  assert(imm.active_buffer || imm.batch);
-#endif
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* Make sure we're between a Begin/End pair. */
+  BLI_assert(imm->vertex_data || imm->batch);
 
-  uint buffer_bytes_used;
-  if (imm.strict_vertex_len) {
-#if TRUST_NO_ONE
-    assert(imm.vertex_idx == imm.vertex_len); /* with all vertices defined */
-#endif
-    buffer_bytes_used = imm.buffer_bytes_mapped;
+  if (imm->strict_vertex_len) {
+    BLI_assert(imm->vertex_idx == imm->vertex_len); /* With all vertices defined. */
   }
   else {
-#if TRUST_NO_ONE
-    assert(imm.vertex_idx <= imm.vertex_len);
-#endif
-    if (imm.vertex_idx == imm.vertex_len) {
-      buffer_bytes_used = imm.buffer_bytes_mapped;
-    }
-    else {
-#if TRUST_NO_ONE
-      assert(imm.vertex_idx == 0 ||
-             vertex_count_makes_sense_for_primitive(imm.vertex_idx, imm.prim_type));
-#endif
-      imm.vertex_len = imm.vertex_idx;
-      buffer_bytes_used = vertex_buffer_size(&imm.vertex_format, imm.vertex_len);
-      /* unused buffer bytes are available to the next immBegin */
-    }
-    /* tell OpenGL what range was modified so it doesn't copy the whole mapped range */
-    glFlushMappedBufferRange(GL_ARRAY_BUFFER, 0, buffer_bytes_used);
+    BLI_assert(imm->vertex_idx <= imm->vertex_len);
+    BLI_assert(imm->vertex_idx == 0 ||
+               vertex_count_makes_sense_for_primitive(imm->vertex_idx, imm->prim_type));
   }
 
-  if (imm.batch) {
-    if (buffer_bytes_used != imm.buffer_bytes_mapped) {
-      GPU_vertbuf_data_resize(imm.batch->verts[0], imm.vertex_len);
+  if (imm->batch) {
+    if (imm->vertex_idx < imm->vertex_len) {
+      GPU_vertbuf_data_resize(imm->batch->verts[0], imm->vertex_len);
       /* TODO: resize only if vertex count is much smaller */
     }
-    GPU_batch_set_shader(imm.batch, imm.bound_program);
-    imm.batch->phase = GPU_BATCH_READY_TO_DRAW;
-    imm.batch = NULL; /* don't free, batch belongs to caller */
+    GPU_batch_set_shader(imm->batch, imm->shader);
+    imm->batch->flag &= ~GPU_BATCH_BUILDING;
+    imm->batch = NULL; /* don't free, batch belongs to caller */
   }
   else {
-    glUnmapBuffer(GL_ARRAY_BUFFER);
-
-    if (imm.vertex_len > 0) {
-      immDrawSetup();
-#ifdef __APPLE__
-      glDisable(GL_PRIMITIVE_RESTART);
-#endif
-      glDrawArrays(convert_prim_type_to_gl(imm.prim_type), 0, imm.vertex_len);
-#ifdef __APPLE__
-      glEnable(GL_PRIMITIVE_RESTART);
-#endif
-    }
-    /* These lines are causing crash on startup on some old GPU + drivers.
-     * They are not required so just comment them. (T55722) */
-    // glBindBuffer(GL_ARRAY_BUFFER, 0);
-    // glBindVertexArray(0);
-    /* prep for next immBegin */
-    imm.active_buffer->buffer_offset += buffer_bytes_used;
+    imm->end();
   }
 
-  /* prep for next immBegin */
-  imm.prim_type = GPU_PRIM_NONE;
-  imm.strict_vertex_len = true;
-  imm.active_buffer = NULL;
+  /* Prepare for next immBegin. */
+  imm->prim_type = GPU_PRIM_NONE;
+  imm->strict_vertex_len = true;
+  imm->vertex_data = NULL;
 }
 
 static void setAttrValueBit(uint attr_id)
 {
   uint16_t mask = 1 << attr_id;
-#if TRUST_NO_ONE
-  assert(imm.unassigned_attr_bits & mask); /* not already set */
-#endif
-  imm.unassigned_attr_bits &= ~mask;
+  BLI_assert(imm->unassigned_attr_bits & mask); /* not already set */
+  imm->unassigned_attr_bits &= ~mask;
 }
 
 /* --- generic attribute functions --- */
 
 void immAttr1f(uint attr_id, float x)
 {
-  GPUVertAttr *attr = &imm.vertex_format.attrs[attr_id];
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(attr->comp_type == GPU_COMP_F32);
-  assert(attr->comp_len == 1);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  GPUVertAttr *attr = &imm->vertex_format.attrs[attr_id];
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(attr->comp_type == GPU_COMP_F32);
+  BLI_assert(attr->comp_len == 1);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 
-  float *data = (float *)(imm.vertex_data + attr->offset);
-  /*  printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data); */
+  float *data = (float *)(imm->vertex_data + attr->offset);
+  /*  printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm->buffer_data, data); */
 
   data[0] = x;
 }
 
 void immAttr2f(uint attr_id, float x, float y)
 {
-  GPUVertAttr *attr = &imm.vertex_format.attrs[attr_id];
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(attr->comp_type == GPU_COMP_F32);
-  assert(attr->comp_len == 2);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  GPUVertAttr *attr = &imm->vertex_format.attrs[attr_id];
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(attr->comp_type == GPU_COMP_F32);
+  BLI_assert(attr->comp_len == 2);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 
-  float *data = (float *)(imm.vertex_data + attr->offset);
-  /*  printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data); */
+  float *data = (float *)(imm->vertex_data + attr->offset);
+  /*  printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm->buffer_data, data); */
 
   data[0] = x;
   data[1] = y;
@@ -503,18 +256,16 @@ void immAttr2f(uint attr_id, float x, float y)
 
 void immAttr3f(uint attr_id, float x, float y, float z)
 {
-  GPUVertAttr *attr = &imm.vertex_format.attrs[attr_id];
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(attr->comp_type == GPU_COMP_F32);
-  assert(attr->comp_len == 3);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  GPUVertAttr *attr = &imm->vertex_format.attrs[attr_id];
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(attr->comp_type == GPU_COMP_F32);
+  BLI_assert(attr->comp_len == 3);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 
-  float *data = (float *)(imm.vertex_data + attr->offset);
-  /*  printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data); */
+  float *data = (float *)(imm->vertex_data + attr->offset);
+  /*  printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm->buffer_data, data); */
 
   data[0] = x;
   data[1] = y;
@@ -523,18 +274,16 @@ void immAttr3f(uint attr_id, float x, float y, float z)
 
 void immAttr4f(uint attr_id, float x, float y, float z, float w)
 {
-  GPUVertAttr *attr = &imm.vertex_format.attrs[attr_id];
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(attr->comp_type == GPU_COMP_F32);
-  assert(attr->comp_len == 4);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  GPUVertAttr *attr = &imm->vertex_format.attrs[attr_id];
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(attr->comp_type == GPU_COMP_F32);
+  BLI_assert(attr->comp_len == 4);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 
-  float *data = (float *)(imm.vertex_data + attr->offset);
-  /*  printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data); */
+  float *data = (float *)(imm->vertex_data + attr->offset);
+  /*  printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm->buffer_data, data); */
 
   data[0] = x;
   data[1] = y;
@@ -544,34 +293,30 @@ void immAttr4f(uint attr_id, float x, float y, float z, float w)
 
 void immAttr1u(uint attr_id, uint x)
 {
-  GPUVertAttr *attr = &imm.vertex_format.attrs[attr_id];
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(attr->comp_type == GPU_COMP_U32);
-  assert(attr->comp_len == 1);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  GPUVertAttr *attr = &imm->vertex_format.attrs[attr_id];
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(attr->comp_type == GPU_COMP_U32);
+  BLI_assert(attr->comp_len == 1);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 
-  uint *data = (uint *)(imm.vertex_data + attr->offset);
+  uint *data = (uint *)(imm->vertex_data + attr->offset);
 
   data[0] = x;
 }
 
 void immAttr2i(uint attr_id, int x, int y)
 {
-  GPUVertAttr *attr = &imm.vertex_format.attrs[attr_id];
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(attr->comp_type == GPU_COMP_I32);
-  assert(attr->comp_len == 2);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  GPUVertAttr *attr = &imm->vertex_format.attrs[attr_id];
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(attr->comp_type == GPU_COMP_I32);
+  BLI_assert(attr->comp_len == 2);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 
-  int *data = (int *)(imm.vertex_data + attr->offset);
+  int *data = (int *)(imm->vertex_data + attr->offset);
 
   data[0] = x;
   data[1] = y;
@@ -579,17 +324,15 @@ void immAttr2i(uint attr_id, int x, int y)
 
 void immAttr2s(uint attr_id, short x, short y)
 {
-  GPUVertAttr *attr = &imm.vertex_format.attrs[attr_id];
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(attr->comp_type == GPU_COMP_I16);
-  assert(attr->comp_len == 2);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  GPUVertAttr *attr = &imm->vertex_format.attrs[attr_id];
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(attr->comp_type == GPU_COMP_I16);
+  BLI_assert(attr->comp_len == 2);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 
-  short *data = (short *)(imm.vertex_data + attr->offset);
+  short *data = (short *)(imm->vertex_data + attr->offset);
 
   data[0] = x;
   data[1] = y;
@@ -612,18 +355,16 @@ void immAttr4fv(uint attr_id, const float data[4])
 
 void immAttr3ub(uint attr_id, uchar r, uchar g, uchar b)
 {
-  GPUVertAttr *attr = &imm.vertex_format.attrs[attr_id];
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(attr->comp_type == GPU_COMP_U8);
-  assert(attr->comp_len == 3);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  GPUVertAttr *attr = &imm->vertex_format.attrs[attr_id];
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(attr->comp_type == GPU_COMP_U8);
+  BLI_assert(attr->comp_len == 3);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 
-  GLubyte *data = imm.vertex_data + attr->offset;
-  /*  printf("%s %td %p\n", __FUNCTION__, data - imm.buffer_data, data); */
+  uchar *data = imm->vertex_data + attr->offset;
+  /*  printf("%s %td %p\n", __FUNCTION__, data - imm->buffer_data, data); */
 
   data[0] = r;
   data[1] = g;
@@ -632,18 +373,16 @@ void immAttr3ub(uint attr_id, uchar r, uchar g, uchar b)
 
 void immAttr4ub(uint attr_id, uchar r, uchar g, uchar b, uchar a)
 {
-  GPUVertAttr *attr = &imm.vertex_format.attrs[attr_id];
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(attr->comp_type == GPU_COMP_U8);
-  assert(attr->comp_len == 4);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  GPUVertAttr *attr = &imm->vertex_format.attrs[attr_id];
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(attr->comp_type == GPU_COMP_U8);
+  BLI_assert(attr->comp_len == 4);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 
-  GLubyte *data = imm.vertex_data + attr->offset;
-  /*  printf("%s %td %p\n", __FUNCTION__, data - imm.buffer_data, data); */
+  uchar *data = imm->vertex_data + attr->offset;
+  /*  printf("%s %td %p\n", __FUNCTION__, data - imm->buffer_data, data); */
 
   data[0] = r;
   data[1] = g;
@@ -663,45 +402,39 @@ void immAttr4ubv(uint attr_id, const uchar data[4])
 
 void immAttrSkip(uint attr_id)
 {
-#if TRUST_NO_ONE
-  assert(attr_id < imm.vertex_format.attr_len);
-  assert(imm.vertex_idx < imm.vertex_len);
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-#endif
+  BLI_assert(attr_id < imm->vertex_format.attr_len);
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
   setAttrValueBit(attr_id);
 }
 
 static void immEndVertex(void) /* and move on to the next vertex */
 {
-#if TRUST_NO_ONE
-  assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
-  assert(imm.vertex_idx < imm.vertex_len);
-#endif
+  BLI_assert(imm->prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+  BLI_assert(imm->vertex_idx < imm->vertex_len);
 
   /* Have all attributes been assigned values?
    * If not, copy value from previous vertex. */
-  if (imm.unassigned_attr_bits) {
-#if TRUST_NO_ONE
-    assert(imm.vertex_idx > 0); /* first vertex must have all attributes specified */
-#endif
-    for (uint a_idx = 0; a_idx < imm.vertex_format.attr_len; a_idx++) {
-      if ((imm.unassigned_attr_bits >> a_idx) & 1) {
-        const GPUVertAttr *a = &imm.vertex_format.attrs[a_idx];
+  if (imm->unassigned_attr_bits) {
+    BLI_assert(imm->vertex_idx > 0); /* first vertex must have all attributes specified */
+    for (uint a_idx = 0; a_idx < imm->vertex_format.attr_len; a_idx++) {
+      if ((imm->unassigned_attr_bits >> a_idx) & 1) {
+        const GPUVertAttr *a = &imm->vertex_format.attrs[a_idx];
 
 #if 0
-        printf("copying %s from vertex %u to %u\n", a->name, imm.vertex_idx - 1, imm.vertex_idx);
+        printf("copying %s from vertex %u to %u\n", a->name, imm->vertex_idx - 1, imm->vertex_idx);
 #endif
 
-        GLubyte *data = imm.vertex_data + a->offset;
-        memcpy(data, data - imm.vertex_format.stride, a->sz);
+        GLubyte *data = imm->vertex_data + a->offset;
+        memcpy(data, data - imm->vertex_format.stride, a->sz);
         /* TODO: consolidate copy of adjacent attributes */
       }
     }
   }
 
-  imm.vertex_idx++;
-  imm.vertex_data += imm.vertex_format.stride;
-  imm.unassigned_attr_bits = imm.attr_binding.enabled_bits;
+  imm->vertex_idx++;
+  imm->vertex_data += imm->vertex_format.stride;
+  imm->unassigned_attr_bits = imm->enabled_attr_bits;
 }
 
 void immVertex2f(uint attr_id, float x, float y)
@@ -754,123 +487,77 @@ void immVertex2iv(uint attr_id, const int data[2])
 
 /* --- generic uniform functions --- */
 
-#if 0
-#  if TRUST_NO_ONE
-#    define GET_UNIFORM \
-      const GPUShaderInput *uniform = GPU_shaderinterface_uniform(imm.shader_interface, name); \
-      assert(uniform);
-#  else
-#    define GET_UNIFORM \
-      const GPUShaderInput *uniform = GPU_shaderinterface_uniform(imm.shader_interface, name);
-#  endif
-#else
-/* NOTE: It is possible to have uniform fully optimized out from the shader.
- *       In this case we can't assert failure or allow NULL-pointer dereference.
- * TODO(sergey): How can we detect existing-but-optimized-out uniform but still
- *               catch typos in uniform names passed to immUniform*() functions? */
-#  define GET_UNIFORM \
-    const GPUShaderInput *uniform = GPU_shaderinterface_uniform(imm.shader_interface, name); \
-    if (uniform == NULL) \
-      return;
-#endif
-
 void immUniform1f(const char *name, float x)
 {
-  GET_UNIFORM
-  glUniform1f(uniform->location, x);
+  GPU_shader_uniform_1f(imm->shader, name, x);
 }
 
 void immUniform2f(const char *name, float x, float y)
 {
-  GET_UNIFORM
-  glUniform2f(uniform->location, x, y);
+  GPU_shader_uniform_2f(imm->shader, name, x, y);
 }
 
 void immUniform2fv(const char *name, const float data[2])
 {
-  GET_UNIFORM
-  glUniform2fv(uniform->location, 1, data);
+  GPU_shader_uniform_2fv(imm->shader, name, data);
 }
 
 void immUniform3f(const char *name, float x, float y, float z)
 {
-  GET_UNIFORM
-  glUniform3f(uniform->location, x, y, z);
+  GPU_shader_uniform_3f(imm->shader, name, x, y, z);
 }
 
 void immUniform3fv(const char *name, const float data[3])
 {
-  GET_UNIFORM
-  glUniform3fv(uniform->location, 1, data);
-}
-
-/* can increase this limit or move to another file */
-#define MAX_UNIFORM_NAME_LEN 60
-
-/* Note array index is not supported for name (i.e: "array[0]"). */
-void immUniformArray3fv(const char *name, const float *data, int count)
-{
-  GET_UNIFORM
-  glUniform3fv(uniform->location, count, data);
+  GPU_shader_uniform_3fv(imm->shader, name, data);
 }
 
 void immUniform4f(const char *name, float x, float y, float z, float w)
 {
-  GET_UNIFORM
-  glUniform4f(uniform->location, x, y, z, w);
+  GPU_shader_uniform_4f(imm->shader, name, x, y, z, w);
 }
 
 void immUniform4fv(const char *name, const float data[4])
 {
-  GET_UNIFORM
-  glUniform4fv(uniform->location, 1, data);
+  GPU_shader_uniform_4fv(imm->shader, name, data);
 }
 
 /* Note array index is not supported for name (i.e: "array[0]"). */
 void immUniformArray4fv(const char *name, const float *data, int count)
 {
-  GET_UNIFORM
-  glUniform4fv(uniform->location, count, data);
+  GPU_shader_uniform_4fv_array(imm->shader, name, count, (float(*)[4])data);
 }
 
 void immUniformMatrix4fv(const char *name, const float data[4][4])
 {
-  GET_UNIFORM
-  glUniformMatrix4fv(uniform->location, 1, GL_FALSE, (float *)data);
+  GPU_shader_uniform_mat4(imm->shader, name, data);
 }
 
 void immUniform1i(const char *name, int x)
 {
-  GET_UNIFORM
-  glUniform1i(uniform->location, x);
-}
-
-void immUniform4iv(const char *name, const int data[4])
-{
-  GET_UNIFORM
-  glUniform4iv(uniform->location, 1, data);
+  GPU_shader_uniform_1i(imm->shader, name, x);
 }
 
 void immBindTexture(const char *name, GPUTexture *tex)
 {
-  GET_UNIFORM
-  GPU_texture_bind(tex, uniform->binding);
+  int binding = GPU_shader_get_texture_binding(imm->shader, name);
+  GPU_texture_bind(tex, binding);
 }
 
 void immBindTextureSampler(const char *name, GPUTexture *tex, eGPUSamplerState state)
 {
-  GET_UNIFORM
-  GPU_texture_bind_ex(tex, state, uniform->binding, true);
+  int binding = GPU_shader_get_texture_binding(imm->shader, name);
+  GPU_texture_bind_ex(tex, state, binding, true);
 }
 
 /* --- convenience functions for setting "uniform vec4 color" --- */
 
 void immUniformColor4f(float r, float g, float b, float a)
 {
-  int32_t uniform_loc = GPU_shaderinterface_uniform_builtin(imm.shader_interface,
-                                                            GPU_UNIFORM_COLOR);
+  int32_t uniform_loc = GPU_shader_get_builtin_uniform(imm->shader, GPU_UNIFORM_COLOR);
   BLI_assert(uniform_loc != -1);
-  glUniform4f(uniform_loc, r, g, b, a);
+  float data[4] = {r, g, b, a};
+  GPU_shader_uniform_vector(imm->shader, uniform_loc, 4, 1, data);
 }
 
 void immUniformColor4fv(const float rgba[4])
@@ -893,8 +580,6 @@ void immUniformColor3fvAlpha(const float rgb[3], float a)
   immUniformColor4f(rgb[0], rgb[1], rgb[2], a);
 }
 
-/* TODO: v-- treat as sRGB? --v */
-
 void immUniformColor3ub(uchar r, uchar g, uchar b)
 {
   const float scale = 1.0f / 255.0f;
diff --git a/source/blender/gpu/intern/gpu_immediate_private.hh b/source/blender/gpu/intern/gpu_immediate_private.hh
new file mode 100644
index 00000000000..aa99fb9a438
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_immediate_private.hh
@@ -0,0 +1,66 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#pragma once
+
+#include "GPU_batch.h"
+#include "GPU_primitive.h"
+#include "GPU_shader.h"
+#include "GPU_vertex_format.h"
+
+namespace blender::gpu {
+
+class Immediate {
+ public:
+  /** Pointer to the mapped buffer data for the currect vertex. */
+  uchar *vertex_data = NULL;
+  /** Current vertex index. */
+  uint vertex_idx = 0;
+  /** Length of the buffer in vertices. */
+  uint vertex_len = 0;
+  /** Which attributes of current vertex have not been given values? */
+  uint16_t unassigned_attr_bits = 0;
+  /** Attributes that needs to be set. One bit per attribute. */
+  uint16_t enabled_attr_bits = 0;
+
+  /** Current draw call specification. */
+  GPUPrimType prim_type = GPU_PRIM_NONE;
+  GPUVertFormat vertex_format;
+  GPUShader *shader = NULL;
+  /** Enforce strict vertex count (disabled when using immBeginAtMost). */
+  bool strict_vertex_len = true;
+
+  /** Batch in construction when using immBeginBatch. */
+  GPUBatch *batch = NULL;
+
+ public:
+  Immediate(){};
+  virtual ~Immediate(){};
+
+  virtual uchar *begin(void) = 0;
+  virtual void end(void) = 0;
+};
+
+}  // namespace blender::gpu
+\ No newline at end of file
diff --git a/source/blender/gpu/intern/gpu_init_exit.c b/source/blender/gpu/intern/gpu_init_exit.c
index ba0da95eb9d..4cb43db9bce 100644
--- a/source/blender/gpu/intern/gpu_init_exit.c
+++ b/source/blender/gpu/intern/gpu_init_exit.c
@@ -53,11 +53,6 @@ void GPU_init(void)
 
   gpu_codegen_init();
   gpu_material_library_init();
-  gpu_framebuffer_module_init();
-
-  if (G.debug & G_DEBUG_GPU) {
-    gpu_debug_init();
-  }
 
   gpu_batch_init();
 
@@ -82,11 +77,6 @@ void GPU_exit(void)
 
   gpu_batch_exit();
 
-  if (G.debug & G_DEBUG_GPU) {
-    gpu_debug_exit();
-  }
-
-  gpu_framebuffer_module_exit();
   gpu_material_library_exit();
   gpu_codegen_exit();
 
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index 8df1f94238a..1016e766140 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -47,7 +47,7 @@
 #include "GPU_material.h"
 #include "GPU_shader.h"
 #include "GPU_texture.h"
-#include "GPU_uniformbuffer.h"
+#include "GPU_uniform_buffer.h"
 
 #include "DRW_engine.h"
 
@@ -88,11 +88,11 @@ struct GPUMaterial {
   eGPUMatFlag flag;
 
   /* Used by 2.8 pipeline */
-  GPUUniformBuffer *ubo; /* UBOs for shader uniforms. */
+  GPUUniformBuf *ubo; /* UBOs for shader uniforms. */
 
   /* Eevee SSS */
-  GPUUniformBuffer *sss_profile; /* UBO containing SSS profile. */
-  GPUTexture *sss_tex_profile;   /* Texture containing SSS profile. */
+  GPUUniformBuf *sss_profile;  /* UBO containing SSS profile. */
+  GPUTexture *sss_tex_profile; /* Texture containing SSS profile. */
   float sss_enabled;
   float sss_radii[3];
   int sss_samples;
@@ -174,13 +174,13 @@ static void gpu_material_free_single(GPUMaterial *material)
     GPU_pass_release(material->pass);
   }
   if (material->ubo != NULL) {
-    GPU_uniformbuffer_free(material->ubo);
+    GPU_uniformbuf_free(material->ubo);
   }
   if (material->sss_tex_profile != NULL) {
     GPU_texture_free(material->sss_tex_profile);
   }
   if (material->sss_profile != NULL) {
-    GPU_uniformbuffer_free(material->sss_profile);
+    GPU_uniformbuf_free(material->sss_profile);
   }
   if (material->coba_tex != NULL) {
     GPU_texture_free(material->coba_tex);
@@ -220,7 +220,7 @@ Material *GPU_material_get_material(GPUMaterial *material)
   return material->ma;
 }
 
-GPUUniformBuffer *GPU_material_uniform_buffer_get(GPUMaterial *material)
+GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material)
 {
   return material->ubo;
 }
@@ -232,7 +232,12 @@ GPUUniformBuffer *GPU_material_uniform_buffer_get(GPUMaterial *material)
  */
 void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
 {
-  material->ubo = GPU_uniformbuffer_dynamic_create(inputs, NULL);
+#ifndef NDEBUG
+  const char *name = material->name;
+#else
+  const char *name = "Material";
+#endif
+  material->ubo = GPU_uniformbuf_create_from_list(inputs, name);
 }
 
 /* Eevee Subsurface scattering. */
@@ -507,13 +512,13 @@ void GPU_material_sss_profile_create(GPUMaterial *material,
 
   /* Update / Create UBO */
   if (material->sss_profile == NULL) {
-    material->sss_profile = GPU_uniformbuffer_create(sizeof(GPUSssKernelData), NULL, NULL);
+    material->sss_profile = GPU_uniformbuf_create(sizeof(GPUSssKernelData));
   }
 }
 
-struct GPUUniformBuffer *GPU_material_sss_profile_get(GPUMaterial *material,
-                                                      int sample_len,
-                                                      GPUTexture **tex_profile)
+struct GPUUniformBuf *GPU_material_sss_profile_get(GPUMaterial *material,
+                                                   int sample_len,
+                                                   GPUTexture **tex_profile)
 {
   if (!material->sss_enabled) {
     return NULL;
@@ -530,7 +535,7 @@ struct GPUUniformBuffer *GPU_material_sss_profile_get(GPUMaterial *material,
     compute_sss_kernel(&kd, material->sss_radii, sample_len, material->sss_falloff, sharpness);
 
     /* Update / Create UBO */
-    GPU_uniformbuffer_update(material->sss_profile, &kd);
+    GPU_uniformbuf_update(material->sss_profile, &kd);
 
     /* Update / Create Tex */
     float *translucence_profile;
@@ -555,9 +560,9 @@ struct GPUUniformBuffer *GPU_material_sss_profile_get(GPUMaterial *material,
   return material->sss_profile;
 }
 
-struct GPUUniformBuffer *GPU_material_create_sss_profile_ubo(void)
+struct GPUUniformBuf *GPU_material_create_sss_profile_ubo(void)
 {
-  return GPU_uniformbuffer_create(sizeof(GPUSssKernelData), NULL, NULL);
+  return GPU_uniformbuf_create(sizeof(GPUSssKernelData));
 }
 
 #undef SSS_EXPONENT
@@ -735,7 +740,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
     gpu_node_graph_free(&mat->graph);
   }
 
-  /* Only free after GPU_pass_shader_get where GPUUniformBuffer
+  /* Only free after GPU_pass_shader_get where GPUUniformBuf
    * read data from the local tree. */
   ntreeFreeLocalTree(localtree);
   MEM_freeN(localtree);
diff --git a/source/blender/gpu/intern/gpu_matrix.cc b/source/blender/gpu/intern/gpu_matrix.cc
index 5d8d77bbf1c..cdb6d303588 100644
--- a/source/blender/gpu/intern/gpu_matrix.cc
+++ b/source/blender/gpu/intern/gpu_matrix.cc
@@ -21,8 +21,6 @@
  * \ingroup gpu
  */
 
-#include "GPU_shader_interface.h"
-
 #include "gpu_context_private.hh"
 #include "gpu_matrix_private.h"
 
@@ -643,47 +641,44 @@ const float (*GPU_matrix_normal_inverse_get(float m[3][3]))[3]
   return m;
 }
 
-void GPU_matrix_bind(const GPUShaderInterface *shaderface)
+void GPU_matrix_bind(GPUShader *shader)
 {
   /* set uniform values to matrix stack values
    * call this before a draw call if desired matrices are dirty
    * call glUseProgram before this, as glUniform expects program to be bound
    */
+  int32_t MV = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW);
+  int32_t P = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_PROJECTION);
+  int32_t MVP = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MVP);
 
-  int32_t MV = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_MODELVIEW);
-  int32_t P = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_PROJECTION);
-  int32_t MVP = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_MVP);
-
-  int32_t N = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_NORMAL);
-  int32_t MV_inv = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_MODELVIEW_INV);
-  int32_t P_inv = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_PROJECTION_INV);
+  int32_t N = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_NORMAL);
+  int32_t MV_inv = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW_INV);
+  int32_t P_inv = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_PROJECTION_INV);
 
-  /* XXX(fclem) this works but this assumes shader is unused inside GPU_shader_uniform_vector. */
-  GPUShader *sh = NULL;
   if (MV != -1) {
-    GPU_shader_uniform_vector(sh, MV, 16, 1, (const float *)GPU_matrix_model_view_get(NULL));
+    GPU_shader_uniform_vector(shader, MV, 16, 1, (const float *)GPU_matrix_model_view_get(NULL));
   }
   if (P != -1) {
-    GPU_shader_uniform_vector(sh, P, 16, 1, (const float *)GPU_matrix_projection_get(NULL));
+    GPU_shader_uniform_vector(shader, P, 16, 1, (const float *)GPU_matrix_projection_get(NULL));
   }
   if (MVP != -1) {
     GPU_shader_uniform_vector(
-        sh, MVP, 16, 1, (const float *)GPU_matrix_model_view_projection_get(NULL));
+        shader, MVP, 16, 1, (const float *)GPU_matrix_model_view_projection_get(NULL));
   }
   if (N != -1) {
-    GPU_shader_uniform_vector(sh, N, 9, 1, (const float *)GPU_matrix_normal_get(NULL));
+    GPU_shader_uniform_vector(shader, N, 9, 1, (const float *)GPU_matrix_normal_get(NULL));
   }
   if (MV_inv != -1) {
     Mat4 m;
     GPU_matrix_model_view_get(m);
     invert_m4(m);
-    GPU_shader_uniform_vector(sh, MV_inv, 16, 1, (const float *)m);
+    GPU_shader_uniform_vector(shader, MV_inv, 16, 1, (const float *)m);
   }
   if (P_inv != -1) {
     Mat4 m;
     GPU_matrix_projection_get(m);
     invert_m4(m);
-    GPU_shader_uniform_vector(sh, P_inv, 16, 1, (const float *)m);
+    GPU_shader_uniform_vector(shader, P_inv, 16, 1, (const float *)m);
   }
 
   gpu_matrix_state_active_set_dirty(false);
@@ -734,8 +729,8 @@ float GPU_polygon_offset_calc(const float (*winmat)[4], float viewdist, float di
 #else
     static float depth_fac = 0.0f;
     if (depth_fac == 0.0f) {
-      int depthbits;
-      glGetIntegerv(GL_DEPTH_BITS, &depthbits);
+      /* Hardcode for 24 bit precision. */
+      int depthbits = 24;
       depth_fac = 1.0f / (float)((1 << depthbits) - 1);
     }
     offs = (-1.0 / winmat[2][2]) * dist * depth_fac;
diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c
index 81cf2d69f4d..1b8a5e20240 100644
--- a/source/blender/gpu/intern/gpu_node_graph.c
+++ b/source/blender/gpu/intern/gpu_node_graph.c
@@ -592,10 +592,10 @@ bool GPU_stack_link(GPUMaterial *material,
   return true;
 }
 
-GPUNodeLink *GPU_uniformbuffer_link_out(GPUMaterial *mat,
-                                        bNode *node,
-                                        GPUNodeStack *stack,
-                                        const int index)
+GPUNodeLink *GPU_uniformbuf_link_out(GPUMaterial *mat,
+                                     bNode *node,
+                                     GPUNodeStack *stack,
+                                     const int index)
 {
   return gpu_uniformbuffer_link(mat, node, stack, index, SOCK_OUT);
 }
diff --git a/source/blender/gpu/intern/gpu_private.h b/source/blender/gpu/intern/gpu_private.h
index ef96bedae4a..505ac3b0278 100644
--- a/source/blender/gpu/intern/gpu_private.h
+++ b/source/blender/gpu/intern/gpu_private.h
@@ -32,14 +32,6 @@ void gpu_platform_exit(void);
 void gpu_extensions_init(void);
 void gpu_extensions_exit(void);
 
-/* gpu_debug.c */
-void gpu_debug_init(void);
-void gpu_debug_exit(void);
-
-/* gpu_framebuffer.c */
-void gpu_framebuffer_module_init(void);
-void gpu_framebuffer_module_exit(void);
-
 /* gpu_pbvh.c */
 void gpu_pbvh_init(void);
 void gpu_pbvh_exit(void);
diff --git a/source/blender/gpu/intern/gpu_select_pick.c b/source/blender/gpu/intern/gpu_select_pick.c
index 0f6f29fab40..c3ccb68a998 100644
--- a/source/blender/gpu/intern/gpu_select_pick.c
+++ b/source/blender/gpu/intern/gpu_select_pick.c
@@ -27,6 +27,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "GPU_framebuffer.h"
 #include "GPU_glew.h"
 #include "GPU_immediate.h"
 #include "GPU_select.h"
@@ -282,6 +283,12 @@ typedef struct GPUPickState {
       uint *rect_id;
     } nearest;
   };
+
+  /* Previous state to restore after drawing. */
+  int viewport[4];
+  int scissor[4];
+  eGPUWriteMask write_mask;
+  eGPUDepthTest depth_test;
 } GPUPickState;
 
 static GPUPickState g_pick_state = {0};
@@ -304,17 +311,18 @@ void gpu_select_pick_begin(uint (*buffer)[4], uint bufsize, const rcti *input, c
 
   /* Restrict OpenGL operations for when we don't have cache */
   if (ps->is_cached == false) {
-    gpuPushAttr(GPU_DEPTH_BUFFER_BIT | GPU_VIEWPORT_BIT);
+    ps->write_mask = GPU_write_mask_get();
+    ps->depth_test = GPU_depth_test_get();
+    GPU_scissor_get(ps->scissor);
 
     /* disable writing to the framebuffer */
     GPU_color_mask(false, false, false, false);
 
-    glEnable(GL_DEPTH_TEST);
-    glDepthMask(GL_TRUE);
+    GPU_depth_mask(true);
     /* Always use #GL_LEQUAL even though GPU_SELECT_PICK_ALL always clears the buffer. This is
      * because individual objects themselves might have sections that overlap and we need these
      * to have the correct distance information. */
-    glDepthFunc(GL_LEQUAL);
+    GPU_depth_test(GPU_DEPTH_LESS_EQUAL);
 
     float viewport[4];
     GPU_viewport_size_get_f(viewport);
@@ -331,7 +339,7 @@ void gpu_select_pick_begin(uint (*buffer)[4], uint bufsize, const rcti *input, c
 
     /* It's possible we don't want to clear depth buffer,
      * so existing elements are masked by current z-buffer. */
-    glClear(GL_DEPTH_BUFFER_BIT);
+    GPU_clear_depth(1.0f);
 
     /* scratch buffer (read new values here) */
     ps->gl.rect_depth_test = depth_buf_malloc(rect_len);
@@ -510,8 +518,13 @@ bool gpu_select_pick_load_id(uint id, bool end)
       SWAP(DepthBufCache *, ps->gl.rect_depth, ps->gl.rect_depth_test);
 
       if (g_pick_state.mode == GPU_SELECT_PICK_ALL) {
+        /* (fclem) This is to be on the safe side. I don't know if this is required. */
+        bool prev_depth_mask = GPU_depth_mask_get();
         /* we want new depths every time */
-        glClear(GL_DEPTH_BUFFER_BIT);
+        GPU_depth_mask(true);
+        GPU_clear_depth(1.0f);
+
+        GPU_depth_mask(prev_depth_mask);
       }
     }
   }
@@ -535,8 +548,9 @@ uint gpu_select_pick_end(void)
       /* force finishing last pass */
       gpu_select_pick_load_id(ps->gl.prev_id, true);
     }
-    gpuPopAttr();
-    GPU_color_mask(true, true, true, true);
+    GPU_write_mask(ps->write_mask);
+    GPU_depth_test(ps->depth_test);
+    GPU_viewport(UNPACK4(ps->viewport));
   }
 
   /* assign but never free directly since it may be in cache */
diff --git a/source/blender/gpu/intern/gpu_select_sample_query.c b/source/blender/gpu/intern/gpu_select_sample_query.c
index f67c9c36a6b..45d52b22664 100644
--- a/source/blender/gpu/intern/gpu_select_sample_query.c
+++ b/source/blender/gpu/intern/gpu_select_sample_query.c
@@ -26,6 +26,7 @@
 
 #include <stdlib.h>
 
+#include "GPU_framebuffer.h"
 #include "GPU_glew.h"
 #include "GPU_select.h"
 #include "GPU_state.h"
@@ -60,6 +61,12 @@ typedef struct GPUQueryState {
   char mode;
   uint index;
   int oldhits;
+
+  /* Previous state to restore after drawing. */
+  int viewport[4];
+  int scissor[4];
+  eGPUWriteMask write_mask;
+  eGPUDepthTest depth_test;
 } GPUQueryState;
 
 static GPUQueryState g_query_state = {0};
@@ -67,8 +74,6 @@ static GPUQueryState g_query_state = {0};
 void gpu_select_query_begin(
     uint (*buffer)[4], uint bufsize, const rcti *input, char mode, int oldhits)
 {
-  float viewport[4];
-
   g_query_state.query_issued = false;
   g_query_state.active_query = 0;
   g_query_state.num_of_queries = 0;
@@ -86,36 +91,42 @@ void gpu_select_query_begin(
                                  "gpu selection ids");
   glGenQueries(g_query_state.num_of_queries, g_query_state.queries);
 
-  gpuPushAttr(GPU_DEPTH_BUFFER_BIT | GPU_VIEWPORT_BIT | GPU_SCISSOR_BIT);
-  /* disable writing to the framebuffer */
-  GPU_color_mask(false, false, false, false);
+  g_query_state.write_mask = GPU_write_mask_get();
+  g_query_state.depth_test = GPU_depth_test_get();
+  GPU_scissor_get(g_query_state.scissor);
+  GPU_viewport_size_get_i(g_query_state.viewport);
+
+  /* Write to color buffer. Seems to fix issues with selecting alpha blended geom (see T7997). */
+  GPU_color_mask(true, true, true, true);
 
   /* In order to save some fill rate we minimize the viewport using rect.
    * We need to get the region of the viewport so that our geometry doesn't
    * get rejected before the depth test. Should probably cull rect against
    * the viewport but this is a rare case I think */
-  GPU_viewport_size_get_f(viewport);
-  GPU_viewport(viewport[0], viewport[1], BLI_rcti_size_x(input), BLI_rcti_size_y(input));
+
+  int viewport[4] = {
+      UNPACK2(g_query_state.viewport), BLI_rcti_size_x(input), BLI_rcti_size_y(input)};
+
+  GPU_viewport(UNPACK4(viewport));
+  GPU_scissor(UNPACK4(viewport));
+  GPU_scissor_test(false);
 
   /* occlusion queries operates on fragments that pass tests and since we are interested on all
    * objects in the view frustum independently of their order, we need to disable the depth test */
   if (mode == GPU_SELECT_ALL) {
     /* glQueries on Windows+Intel drivers only works with depth testing turned on.
      * See T62947 for details */
-    glEnable(GL_DEPTH_TEST);
-    glDepthFunc(GL_ALWAYS);
-    glDepthMask(GL_TRUE);
+    GPU_depth_test(GPU_DEPTH_ALWAYS);
+    GPU_depth_mask(true);
   }
   else if (mode == GPU_SELECT_NEAREST_FIRST_PASS) {
-    glClear(GL_DEPTH_BUFFER_BIT);
-    glEnable(GL_DEPTH_TEST);
-    glDepthMask(GL_TRUE);
-    glDepthFunc(GL_LEQUAL);
+    GPU_depth_test(GPU_DEPTH_LESS_EQUAL);
+    GPU_depth_mask(true);
+    GPU_clear_depth(1.0f);
   }
   else if (mode == GPU_SELECT_NEAREST_SECOND_PASS) {
-    glEnable(GL_DEPTH_TEST);
-    glDepthMask(GL_FALSE);
-    glDepthFunc(GL_EQUAL);
+    GPU_depth_test(GPU_DEPTH_EQUAL);
+    GPU_depth_mask(false);
   }
 }
 
@@ -204,8 +215,10 @@ uint gpu_select_query_end(void)
   glDeleteQueries(g_query_state.num_of_queries, g_query_state.queries);
   MEM_freeN(g_query_state.queries);
   MEM_freeN(g_query_state.id);
-  gpuPopAttr();
-  GPU_color_mask(true, true, true, true);
+
+  GPU_write_mask(g_query_state.write_mask);
+  GPU_depth_test(g_query_state.depth_test);
+  GPU_viewport(UNPACK4(g_query_state.viewport));
 
   return hits;
 }
diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc
index 03b7d5402f5..360feb9a8c8 100644
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -29,6 +29,7 @@
 #include "BLI_string.h"
 #include "BLI_string_utils.h"
 #include "BLI_utildefines.h"
+#include "BLI_vector.hh"
 
 #include "BKE_appdir.h"
 #include "BKE_global.h"
@@ -40,259 +41,229 @@
 #include "GPU_platform.h"
 #include "GPU_shader.h"
 #include "GPU_texture.h"
-#include "GPU_uniformbuffer.h"
+#include "GPU_uniform_buffer.h"
 
-#include "gpu_shader_private.h"
+#include "gpu_backend.hh"
+#include "gpu_context_private.hh"
+#include "gpu_shader_private.hh"
 
 extern "C" char datatoc_gpu_shader_colorspace_lib_glsl[];
 
-/* Adjust these constants as needed. */
-#define MAX_DEFINE_LENGTH 256
-#define MAX_EXT_DEFINE_LENGTH 512
+using namespace blender;
+using namespace blender::gpu;
 
-#ifndef NDEBUG
-static uint g_shaderid = 0;
-#endif
+/** Opaque type hidding blender::gpu::Shader */
+struct GPUShader {
+  char _pad[1];
+};
 
 /* -------------------------------------------------------------------- */
-/** \name Convenience functions
+/** \name Debug functions
  * \{ */
 
-static void shader_print_errors(const char *task, const char *log, const char **code, int totcode)
+void Shader::print_errors(Span<const char *> sources, char *log, const char *stage)
 {
-  int line = 1;
-
-  fprintf(stderr, "GPUShader: %s error:\n", task);
-
-  for (int i = 0; i < totcode; i++) {
-    const char *c, *pos, *end = code[i] + strlen(code[i]);
-
-    if (G.debug & G_DEBUG) {
-      fprintf(stderr, "===== shader string %d ====\n", i + 1);
-
-      c = code[i];
-      while ((c < end) && (pos = strchr(c, '\n'))) {
-        fprintf(stderr, "%2d  ", line);
-        fwrite(c, (pos + 1) - c, 1, stderr);
-        c = pos + 1;
-        line++;
+  const char line_prefix[] = "      | ";
+  char *sources_combined = BLI_string_join_arrayN((const char **)sources.data(), sources.size());
+
+  fprintf(stderr, "GPUShader: Compilation Log : %s : %s\n", this->name, stage);
+
+  char *log_line = log, *line_end;
+  char *error_line_number_end;
+  int error_line, error_char, last_error_line = -2, last_error_char = -1;
+  bool found_line_id = false;
+  while ((line_end = strchr(log_line, '\n'))) {
+    /* Skip empty lines. */
+    if (line_end == log_line) {
+      log_line++;
+      continue;
+    }
+    /* 0 = error, 1 = warning. */
+    int type = -1;
+    /* Skip ERROR: or WARNING:. */
+    const char *prefix[] = {"ERROR", "WARNING"};
+    for (int i = 0; i < ARRAY_SIZE(prefix); i++) {
+      if (STREQLEN(log_line, prefix[i], strlen(prefix[i]))) {
+        log_line += strlen(prefix[i]);
+        type = i;
+        break;
       }
-
-      fprintf(stderr, "%s", c);
     }
-  }
-
-  fprintf(stderr, "%s\n", log);
+    /* Skip whitespaces and separators. */
+    while (ELEM(log_line[0], ':', '(', ' ')) {
+      log_line++;
+    }
+    /* Parse error line & char numbers. */
+    error_line = error_char = -1;
+    if (log_line[0] >= '0' && log_line[0] <= '9') {
+      error_line = (int)strtol(log_line, &error_line_number_end, 10);
+      /* Try to fetch the error caracter (not always available). */
+      if (ELEM(error_line_number_end[0], '(', ':') && error_line_number_end[1] != ' ') {
+        error_char = (int)strtol(error_line_number_end + 1, &log_line, 10);
+      }
+      else {
+        log_line = error_line_number_end;
+      }
+      /* There can be a 3rd number (case of mesa driver). */
+      if (ELEM(log_line[0], '(', ':') && log_line[1] >= '0' && log_line[1] <= '9') {
+        error_line = error_char;
+        error_char = (int)strtol(log_line + 1, &error_line_number_end, 10);
+        log_line = error_line_number_end;
+      }
+    }
+    /* Skip whitespaces and separators. */
+    while (ELEM(log_line[0], ':', ')', ' ')) {
+      log_line++;
+    }
+    if (error_line == -1) {
+      found_line_id = false;
+    }
+    const char *src_line = sources_combined;
+    if ((error_line != -1) && (error_char != -1)) {
+      if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OFFICIAL)) {
+        /* source:line */
+        int error_source = error_line;
+        if (error_source < sources.size()) {
+          src_line = sources[error_source];
+          error_line = error_char;
+          error_char = -1;
+        }
+      }
+      else if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_OFFICIAL) ||
+               GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_OFFICIAL)) {
+        /* 0:line */
+        error_line = error_char;
+        error_char = -1;
+      }
+      else {
+        /* line:char */
+      }
+    }
+    /* Separate from previous block. */
+    if (last_error_line != error_line) {
+      fprintf(stderr, "\033[90m%s\033[39m\n", line_prefix);
+    }
+    else if (error_char != last_error_char) {
+      fprintf(stderr, "%s\n", line_prefix);
+    }
+    /* Print line from the source file that is producing the error. */
+    if ((error_line != -1) && (error_line != last_error_line || error_char != last_error_char)) {
+      const char *src_line_end = src_line;
+      found_line_id = false;
+      /* error_line is 1 based in this case. */
+      int src_line_index = 1;
+      while ((src_line_end = strchr(src_line, '\n'))) {
+        if (src_line_index == error_line) {
+          found_line_id = true;
+          break;
+        }
+        /* Continue to next line. */
+        src_line = src_line_end + 1;
+        src_line_index++;
+      }
+      /* Print error source. */
+      if (found_line_id) {
+        if (error_line != last_error_line) {
+          fprintf(stderr, "%5d | ", src_line_index);
+        }
+        else {
+          fprintf(stderr, line_prefix);
+        }
+        fwrite(src_line, (src_line_end + 1) - src_line, 1, stderr);
+        /* Print char offset. */
+        fprintf(stderr, line_prefix);
+        if (error_char != -1) {
+          for (int i = 0; i < error_char; i++) {
+            fprintf(stderr, " ");
+          }
+          fprintf(stderr, "^");
+        }
+        fprintf(stderr, "\n");
+      }
+    }
+    fprintf(stderr, line_prefix);
+    /* Skip to message. Avoid redundant info. */
+    const char *keywords[] = {"error", "warning"};
+    for (int i = 0; i < ARRAY_SIZE(prefix); i++) {
+      if (STREQLEN(log_line, keywords[i], strlen(keywords[i]))) {
+        log_line += strlen(keywords[i]);
+        type = i;
+        break;
+      }
+    }
+    /* Skip and separators. */
+    while (ELEM(log_line[0], ':', ')')) {
+      log_line++;
+    }
+    if (type == 0) {
+      fprintf(stderr, "\033[31;1mError\033[0;2m: ");
+    }
+    else if (type == 1) {
+      fprintf(stderr, "\033[33;1mWarning\033[0;2m: ");
+    }
+    /* Print the error itself. */
+    fprintf(stderr, "\033[2m");
+    fwrite(log_line, (line_end + 1) - log_line, 1, stderr);
+    fprintf(stderr, "\033[0m");
+    /* Continue to next line. */
+    log_line = line_end + 1;
+    last_error_line = error_line;
+    last_error_char = error_char;
+  }
+  fprintf(stderr, "\n");
+  MEM_freeN(sources_combined);
 }
 
-static const char *gpu_shader_version(void)
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Creation / Destruction
+ * \{ */
+
+Shader::Shader(const char *sh_name)
 {
-  return "#version 330\n";
+  BLI_strncpy(this->name, sh_name, sizeof(this->name));
 }
 
-static void gpu_shader_standard_extensions(char defines[MAX_EXT_DEFINE_LENGTH])
+Shader::~Shader()
 {
-  /* enable extensions for features that are not part of our base GLSL version
-   * don't use an extension for something already available!
-   */
-
-  if (GLEW_ARB_texture_gather) {
-    /* There is a bug on older Nvidia GPU where GL_ARB_texture_gather
-     * is reported to be supported but yield a compile error (see T55802). */
-    if (!GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_ANY) || GLEW_VERSION_4_0) {
-      strcat(defines, "#extension GL_ARB_texture_gather: enable\n");
-
-      /* Some drivers don't agree on GLEW_ARB_texture_gather and the actual support in the
-       * shader so double check the preprocessor define (see T56544). */
-      if (!GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_ANY) && !GLEW_VERSION_4_0) {
-        strcat(defines, "#ifdef GL_ARB_texture_gather\n");
-        strcat(defines, "#  define GPU_ARB_texture_gather\n");
-        strcat(defines, "#endif\n");
-      }
-      else {
-        strcat(defines, "#define GPU_ARB_texture_gather\n");
-      }
-    }
-  }
-  if (GLEW_ARB_texture_query_lod) {
-    /* a #version 400 feature, but we use #version 330 maximum so use extension */
-    strcat(defines, "#extension GL_ARB_texture_query_lod: enable\n");
-  }
-  if (GLEW_ARB_shader_draw_parameters) {
-    strcat(defines, "#extension GL_ARB_shader_draw_parameters : enable\n");
-    strcat(defines, "#define GPU_ARB_shader_draw_parameters\n");
-  }
-  if (GPU_arb_texture_cube_map_array_is_supported()) {
-    strcat(defines, "#extension GL_ARB_texture_cube_map_array : enable\n");
-    strcat(defines, "#define GPU_ARB_texture_cube_map_array\n");
-  }
+  delete interface;
 }
 
-static void gpu_shader_standard_defines(char defines[MAX_DEFINE_LENGTH])
+static void standard_defines(Vector<const char *> &sources)
 {
+  BLI_assert(sources.size() == 0);
+  /* Version needs to be first. Exact values will be added by implementation. */
+  sources.append("version");
   /* some useful defines to detect GPU type */
   if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_ANY)) {
-    strcat(defines, "#define GPU_ATI\n");
-    if (GPU_crappy_amd_driver()) {
-      strcat(defines, "#define GPU_DEPRECATED_AMD_DRIVER\n");
-    }
+    sources.append("#define GPU_ATI\n");
   }
   else if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_ANY)) {
-    strcat(defines, "#define GPU_NVIDIA\n");
+    sources.append("#define GPU_NVIDIA\n");
   }
   else if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_ANY, GPU_DRIVER_ANY)) {
-    strcat(defines, "#define GPU_INTEL\n");
+    sources.append("#define GPU_INTEL\n");
   }
-
   /* some useful defines to detect OS type */
   if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_WIN, GPU_DRIVER_ANY)) {
-    strcat(defines, "#define OS_WIN\n");
+    sources.append("#define OS_WIN\n");
   }
   else if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY)) {
-    strcat(defines, "#define OS_MAC\n");
+    sources.append("#define OS_MAC\n");
   }
   else if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_UNIX, GPU_DRIVER_ANY)) {
-    strcat(defines, "#define OS_UNIX\n");
-  }
-
-  float derivatives_factors[2];
-  GPU_get_dfdy_factors(derivatives_factors);
-  if (derivatives_factors[0] == 1.0f) {
-    strcat(defines, "#define DFDX_SIGN 1.0\n");
-  }
-  else {
-    strcat(defines, "#define DFDX_SIGN -1.0\n");
-  }
-
-  if (derivatives_factors[1] == 1.0f) {
-    strcat(defines, "#define DFDY_SIGN 1.0\n");
-  }
-  else {
-    strcat(defines, "#define DFDY_SIGN -1.0\n");
-  }
-}
-
-#define DEBUG_SHADER_NONE ""
-#define DEBUG_SHADER_VERTEX "vert"
-#define DEBUG_SHADER_FRAGMENT "frag"
-#define DEBUG_SHADER_GEOMETRY "geom"
-
-/**
- * Dump GLSL shaders to disk
- *
- * This is used for profiling shader performance externally and debug if shader code is correct.
- * If called with no code, it simply bumps the shader index, so different shaders for the same
- * program share the same index.
- */
-static void gpu_dump_shaders(const char **code, const int num_shaders, const char *extension)
-{
-  if ((G.debug & G_DEBUG_GPU_SHADERS) == 0) {
-    return;
-  }
-
-  /* We use the same shader index for shaders in the same program.
-   * So we call this function once before calling for the individual shaders. */
-  static int shader_index = 0;
-  if (code == NULL) {
-    shader_index++;
-    BLI_assert(STREQ(DEBUG_SHADER_NONE, extension));
-    return;
-  }
-
-  /* Determine the full path of the new shader. */
-  char shader_path[FILE_MAX];
-
-  char file_name[512] = {'\0'};
-  sprintf(file_name, "%04d.%s", shader_index, extension);
-
-  BLI_join_dirfile(shader_path, sizeof(shader_path), BKE_tempdir_session(), file_name);
-
-  /* Write shader to disk. */
-  FILE *f = fopen(shader_path, "w");
-  if (f == NULL) {
-    printf("Error writing to file: %s\n", shader_path);
-  }
-  for (int j = 0; j < num_shaders; j++) {
-    fprintf(f, "%s", code[j]);
-  }
-  fclose(f);
-  printf("Shader file written to disk: %s\n", shader_path);
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Creation / Destruction
- * \{ */
-
-GPUShader *GPU_shader_create(const char *vertexcode,
-                             const char *fragcode,
-                             const char *geocode,
-                             const char *libcode,
-                             const char *defines,
-                             const char *shname)
-{
-  return GPU_shader_create_ex(
-      vertexcode, fragcode, geocode, libcode, defines, GPU_SHADER_TFB_NONE, NULL, 0, shname);
-}
-
-GPUShader *GPU_shader_create_from_python(const char *vertexcode,
-                                         const char *fragcode,
-                                         const char *geocode,
-                                         const char *libcode,
-                                         const char *defines)
-{
-  char *libcodecat = NULL;
-
-  if (libcode == NULL) {
-    libcode = datatoc_gpu_shader_colorspace_lib_glsl;
-  }
-  else {
-    libcode = libcodecat = BLI_strdupcat(libcode, datatoc_gpu_shader_colorspace_lib_glsl);
+    sources.append("#define OS_UNIX\n");
   }
 
-  GPUShader *sh = GPU_shader_create_ex(
-      vertexcode, fragcode, geocode, libcode, defines, GPU_SHADER_TFB_NONE, NULL, 0, NULL);
-
-  MEM_SAFE_FREE(libcodecat);
-  return sh;
-}
-
-GPUShader *GPU_shader_load_from_binary(const char *binary,
-                                       const int binary_format,
-                                       const int binary_len,
-                                       const char *shname)
-{
-  BLI_assert(GL_ARB_get_program_binary);
-  int success;
-  int program = glCreateProgram();
-
-  glProgramBinary(program, binary_format, binary, binary_len);
-  glGetProgramiv(program, GL_LINK_STATUS, &success);
-
-  if (success) {
-    glUseProgram(program);
-
-    GPUShader *shader = (GPUShader *)MEM_callocN(sizeof(*shader), __func__);
-    shader->interface = GPU_shaderinterface_create(program);
-    shader->program = program;
-
-#ifndef NDEBUG
-    BLI_snprintf(shader->name, sizeof(shader->name), "%s_%u", shname, g_shaderid++);
-#else
-    UNUSED_VARS(shname);
-#endif
-
-    return shader;
+  if (GPU_crappy_amd_driver()) {
+    sources.append("#define GPU_DEPRECATED_AMD_DRIVER\n");
   }
-
-  glDeleteProgram(program);
-  return NULL;
 }
 
-GPUShader *GPU_shader_create_ex(const char *vertexcode,
+GPUShader *GPU_shader_create_ex(const char *vertcode,
                                 const char *fragcode,
-                                const char *geocode,
+                                const char *geomcode,
                                 const char *libcode,
                                 const char *defines,
                                 const eGPUShaderTFBType tf_type,
@@ -300,223 +271,113 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
                                 const int tf_count,
                                 const char *shname)
 {
-  GLint status;
-  GLchar log[5000];
-  GLsizei length = 0;
-  GPUShader *shader;
-  char standard_defines[MAX_DEFINE_LENGTH] = "";
-  char standard_extensions[MAX_EXT_DEFINE_LENGTH] = "";
-
-  shader = (GPUShader *)MEM_callocN(sizeof(GPUShader), "GPUShader");
-  gpu_dump_shaders(NULL, 0, DEBUG_SHADER_NONE);
-
-#ifndef NDEBUG
-  BLI_snprintf(shader->name, sizeof(shader->name), "%s_%u", shname, g_shaderid++);
-#else
-  UNUSED_VARS(shname);
-#endif
-
   /* At least a vertex shader and a fragment shader are required. */
-  BLI_assert((fragcode != NULL) && (vertexcode != NULL));
-
-  if (vertexcode) {
-    shader->vertex = glCreateShader(GL_VERTEX_SHADER);
-  }
-  if (fragcode) {
-    shader->fragment = glCreateShader(GL_FRAGMENT_SHADER);
-  }
-  if (geocode) {
-    shader->geometry = glCreateShader(GL_GEOMETRY_SHADER);
-  }
-
-  shader->program = glCreateProgram();
-
-  if (!shader->program || (vertexcode && !shader->vertex) || (fragcode && !shader->fragment) ||
-      (geocode && !shader->geometry)) {
-    fprintf(stderr, "GPUShader, object creation failed.\n");
-    GPU_shader_free(shader);
-    return NULL;
-  }
+  BLI_assert((fragcode != NULL) && (vertcode != NULL));
 
-  gpu_shader_standard_defines(standard_defines);
-  gpu_shader_standard_extensions(standard_extensions);
+  Shader *shader = GPUBackend::get()->shader_alloc(shname);
 
-  if (vertexcode) {
-    const char *source[7];
-    /* custom limit, may be too small, beware */
-    int num_source = 0;
-
-    source[num_source++] = gpu_shader_version();
-    source[num_source++] =
-        "#define GPU_VERTEX_SHADER\n"
-        "#define IN_OUT out\n";
-    source[num_source++] = standard_extensions;
-    source[num_source++] = standard_defines;
-
-    if (geocode) {
-      source[num_source++] = "#define USE_GEOMETRY_SHADER\n";
+  if (vertcode) {
+    Vector<const char *> sources;
+    standard_defines(sources);
+    sources.append("#define GPU_VERTEX_SHADER\n");
+    sources.append("#define IN_OUT out\n");
+    if (geomcode) {
+      sources.append("#define USE_GEOMETRY_SHADER\n");
     }
     if (defines) {
-      source[num_source++] = defines;
+      sources.append(defines);
     }
-    source[num_source++] = vertexcode;
-
-    gpu_dump_shaders(source, num_source, DEBUG_SHADER_VERTEX);
-
-    glAttachShader(shader->program, shader->vertex);
-    glShaderSource(shader->vertex, num_source, source, NULL);
+    sources.append(vertcode);
 
-    glCompileShader(shader->vertex);
-    glGetShaderiv(shader->vertex, GL_COMPILE_STATUS, &status);
-
-    if (!status) {
-      glGetShaderInfoLog(shader->vertex, sizeof(log), &length, log);
-      shader_print_errors("compile", log, source, num_source);
-
-      GPU_shader_free(shader);
-      return NULL;
-    }
+    shader->vertex_shader_from_glsl(sources);
   }
 
   if (fragcode) {
-    const char *source[8];
-    int num_source = 0;
-
-    source[num_source++] = gpu_shader_version();
-    source[num_source++] =
-        "#define GPU_FRAGMENT_SHADER\n"
-        "#define IN_OUT in\n";
-    source[num_source++] = standard_extensions;
-    source[num_source++] = standard_defines;
-
-    if (geocode) {
-      source[num_source++] = "#define USE_GEOMETRY_SHADER\n";
+    Vector<const char *> sources;
+    standard_defines(sources);
+    sources.append("#define GPU_FRAGMENT_SHADER\n");
+    sources.append("#define IN_OUT in\n");
+    if (geomcode) {
+      sources.append("#define USE_GEOMETRY_SHADER\n");
     }
     if (defines) {
-      source[num_source++] = defines;
+      sources.append(defines);
     }
     if (libcode) {
-      source[num_source++] = libcode;
+      sources.append(libcode);
     }
-    source[num_source++] = fragcode;
-
-    gpu_dump_shaders(source, num_source, DEBUG_SHADER_FRAGMENT);
+    sources.append(fragcode);
 
-    glAttachShader(shader->program, shader->fragment);
-    glShaderSource(shader->fragment, num_source, source, NULL);
-
-    glCompileShader(shader->fragment);
-    glGetShaderiv(shader->fragment, GL_COMPILE_STATUS, &status);
-
-    if (!status) {
-      glGetShaderInfoLog(shader->fragment, sizeof(log), &length, log);
-      shader_print_errors("compile", log, source, num_source);
-
-      GPU_shader_free(shader);
-      return NULL;
-    }
+    shader->fragment_shader_from_glsl(sources);
   }
 
-  if (geocode) {
-    const char *source[6];
-    int num_source = 0;
-
-    source[num_source++] = gpu_shader_version();
-    source[num_source++] = "#define GPU_GEOMETRY_SHADER\n";
-    source[num_source++] = standard_extensions;
-    source[num_source++] = standard_defines;
-
+  if (geomcode) {
+    Vector<const char *> sources;
+    standard_defines(sources);
+    sources.append("#define GPU_GEOMETRY_SHADER\n");
     if (defines) {
-      source[num_source++] = defines;
+      sources.append(defines);
     }
-    source[num_source++] = geocode;
-
-    gpu_dump_shaders(source, num_source, DEBUG_SHADER_GEOMETRY);
-
-    glAttachShader(shader->program, shader->geometry);
-    glShaderSource(shader->geometry, num_source, source, NULL);
+    sources.append(geomcode);
 
-    glCompileShader(shader->geometry);
-    glGetShaderiv(shader->geometry, GL_COMPILE_STATUS, &status);
-
-    if (!status) {
-      glGetShaderInfoLog(shader->geometry, sizeof(log), &length, log);
-      shader_print_errors("compile", log, source, num_source);
-
-      GPU_shader_free(shader);
-      return NULL;
-    }
+    shader->geometry_shader_from_glsl(sources);
   }
 
-  if (tf_names != NULL) {
-    glTransformFeedbackVaryings(shader->program, tf_count, tf_names, GL_INTERLEAVED_ATTRIBS);
-    /* Primitive type must be setup */
+  if (tf_names != NULL && tf_count > 0) {
     BLI_assert(tf_type != GPU_SHADER_TFB_NONE);
-    shader->feedback_transform_type = tf_type;
+    shader->transform_feedback_names_set(Span<const char *>(tf_names, tf_count), tf_type);
   }
 
-  glLinkProgram(shader->program);
-  glGetProgramiv(shader->program, GL_LINK_STATUS, &status);
-  if (!status) {
-    glGetProgramInfoLog(shader->program, sizeof(log), &length, log);
-    /* print attached shaders in pipeline order */
-    if (defines) {
-      shader_print_errors("linking", log, &defines, 1);
-    }
-    if (vertexcode) {
-      shader_print_errors("linking", log, &vertexcode, 1);
-    }
-    if (geocode) {
-      shader_print_errors("linking", log, &geocode, 1);
-    }
-    if (libcode) {
-      shader_print_errors("linking", log, &libcode, 1);
-    }
-    if (fragcode) {
-      shader_print_errors("linking", log, &fragcode, 1);
-    }
-
-    GPU_shader_free(shader);
+  if (!shader->finalize()) {
+    delete shader;
     return NULL;
-  }
+  };
 
-  glUseProgram(shader->program);
-  shader->interface = GPU_shaderinterface_create(shader->program);
+  return reinterpret_cast<GPUShader *>(shader);
+}
 
-  return shader;
+void GPU_shader_free(GPUShader *shader)
+{
+  delete reinterpret_cast<Shader *>(shader);
 }
 
-#undef DEBUG_SHADER_GEOMETRY
-#undef DEBUG_SHADER_FRAGMENT
-#undef DEBUG_SHADER_VERTEX
-#undef DEBUG_SHADER_NONE
+/** \} */
 
-void GPU_shader_free(GPUShader *shader)
+/* -------------------------------------------------------------------- */
+/** \name Creation utils
+ * \{ */
+
+GPUShader *GPU_shader_create(const char *vertcode,
+                             const char *fragcode,
+                             const char *geomcode,
+                             const char *libcode,
+                             const char *defines,
+                             const char *shname)
 {
-#if 0 /* Would be nice to have, but for now the Deferred compilation \
-       * does not have a GPUContext. */
-  BLI_assert(GPU_context_active_get() != NULL);
-#endif
-  BLI_assert(shader);
+  return GPU_shader_create_ex(
+      vertcode, fragcode, geomcode, libcode, defines, GPU_SHADER_TFB_NONE, NULL, 0, shname);
+}
 
-  if (shader->vertex) {
-    glDeleteShader(shader->vertex);
-  }
-  if (shader->geometry) {
-    glDeleteShader(shader->geometry);
-  }
-  if (shader->fragment) {
-    glDeleteShader(shader->fragment);
+GPUShader *GPU_shader_create_from_python(const char *vertcode,
+                                         const char *fragcode,
+                                         const char *geomcode,
+                                         const char *libcode,
+                                         const char *defines)
+{
+  char *libcodecat = NULL;
+
+  if (libcode == NULL) {
+    libcode = datatoc_gpu_shader_colorspace_lib_glsl;
   }
-  if (shader->program) {
-    glDeleteProgram(shader->program);
+  else {
+    libcode = libcodecat = BLI_strdupcat(libcode, datatoc_gpu_shader_colorspace_lib_glsl);
   }
 
-  if (shader->interface) {
-    GPU_shaderinterface_discard(shader->interface);
-  }
+  GPUShader *sh = GPU_shader_create_ex(
+      vertcode, fragcode, geomcode, libcode, defines, GPU_SHADER_TFB_NONE, NULL, 0, "pyGPUShader");
 
-  MEM_freeN(shader);
+  MEM_SAFE_FREE(libcodecat);
+  return sh;
 }
 
 static const char *string_join_array_maybe_alloc(const char **str_arr, bool *r_is_alloc)
@@ -565,7 +426,7 @@ static const char *string_join_array_maybe_alloc(const char **str_arr, bool *r_i
  * \endcode
  */
 struct GPUShader *GPU_shader_create_from_arrays_impl(
-    const struct GPU_ShaderCreateFromArray_Params *params)
+    const struct GPU_ShaderCreateFromArray_Params *params, const char *func, int line)
 {
   struct {
     const char *str;
@@ -577,8 +438,11 @@ struct GPUShader *GPU_shader_create_from_arrays_impl(
     str_dst[i].str = string_join_array_maybe_alloc(str_src[i], &str_dst[i].is_alloc);
   }
 
+  char name[64];
+  BLI_snprintf(name, sizeof(name), "%s_%d", func, line);
+
   GPUShader *sh = GPU_shader_create(
-      str_dst[0].str, str_dst[1].str, str_dst[2].str, NULL, str_dst[3].str, __func__);
+      str_dst[0].str, str_dst[1].str, str_dst[2].str, NULL, str_dst[3].str, name);
 
   for (int i = 0; i < ARRAY_SIZE(str_dst); i++) {
     if (str_dst[i].is_alloc) {
@@ -594,52 +458,51 @@ struct GPUShader *GPU_shader_create_from_arrays_impl(
 /** \name Binding
  * \{ */
 
-void GPU_shader_bind(GPUShader *shader)
+void GPU_shader_bind(GPUShader *gpu_shader)
 {
-  BLI_assert(shader && shader->program);
+  Shader *shader = reinterpret_cast<Shader *>(gpu_shader);
+
+  GPUContext *ctx = GPU_context_active_get();
+
+  if (ctx->shader != shader) {
+    ctx->shader = shader;
+    shader->bind();
+    GPU_matrix_bind(gpu_shader);
+    GPU_shader_set_srgb_uniform(gpu_shader);
+  }
 
-  glUseProgram(shader->program);
-  GPU_matrix_bind(shader->interface);
-  GPU_shader_set_srgb_uniform(shader->interface);
+  if (GPU_matrix_dirty_get()) {
+    GPU_matrix_bind(gpu_shader);
+  }
 }
 
 void GPU_shader_unbind(void)
 {
-  glUseProgram(0);
+#ifndef NDEBUG
+  GPUContext *ctx = GPU_context_active_get();
+  if (ctx->shader) {
+    reinterpret_cast<Shader *>(ctx->shader)->unbind();
+  }
+  ctx->shader = NULL;
+#endif
 }
 
 /** \} */
 
 /* -------------------------------------------------------------------- */
 /** \name Transform feedback
+ *
+ * TODO(fclem) Should be replaced by compute shaders.
  * \{ */
 
-bool GPU_shader_transform_feedback_enable(GPUShader *shader, uint vbo_id)
+bool GPU_shader_transform_feedback_enable(GPUShader *shader, GPUVertBuf *vertbuf)
 {
-  if (shader->feedback_transform_type == GPU_SHADER_TFB_NONE) {
-    return false;
-  }
-
-  glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, vbo_id);
-
-  switch (shader->feedback_transform_type) {
-    case GPU_SHADER_TFB_POINTS:
-      glBeginTransformFeedback(GL_POINTS);
-      return true;
-    case GPU_SHADER_TFB_LINES:
-      glBeginTransformFeedback(GL_LINES);
-      return true;
-    case GPU_SHADER_TFB_TRIANGLES:
-      glBeginTransformFeedback(GL_TRIANGLES);
-      return true;
-    default:
-      return false;
-  }
+  return reinterpret_cast<Shader *>(shader)->transform_feedback_enable(vertbuf);
 }
 
-void GPU_shader_transform_feedback_disable(GPUShader *UNUSED(shader))
+void GPU_shader_transform_feedback_disable(GPUShader *shader)
 {
-  glEndTransformFeedback();
+  reinterpret_cast<Shader *>(shader)->transform_feedback_disable();
 }
 
 /** \} */
@@ -650,50 +513,49 @@ void GPU_shader_transform_feedback_disable(GPUShader *UNUSED(shader))
 
 int GPU_shader_get_uniform(GPUShader *shader, const char *name)
 {
-  BLI_assert(shader && shader->program);
-  const GPUShaderInput *uniform = GPU_shaderinterface_uniform(shader->interface, name);
+  ShaderInterface *interface = reinterpret_cast<Shader *>(shader)->interface;
+  const ShaderInput *uniform = interface->uniform_get(name);
   return uniform ? uniform->location : -1;
 }
 
 int GPU_shader_get_builtin_uniform(GPUShader *shader, int builtin)
 {
-  BLI_assert(shader && shader->program);
-  return GPU_shaderinterface_uniform_builtin(shader->interface,
-                                             static_cast<GPUUniformBuiltin>(builtin));
+  ShaderInterface *interface = reinterpret_cast<Shader *>(shader)->interface;
+  return interface->uniform_builtin((GPUUniformBuiltin)builtin);
 }
 
 int GPU_shader_get_builtin_block(GPUShader *shader, int builtin)
 {
-  BLI_assert(shader && shader->program);
-  return GPU_shaderinterface_block_builtin(shader->interface,
-                                           static_cast<GPUUniformBlockBuiltin>(builtin));
+  ShaderInterface *interface = reinterpret_cast<Shader *>(shader)->interface;
+  return interface->ubo_builtin((GPUUniformBlockBuiltin)builtin);
 }
 
+/* DEPRECATED. */
 int GPU_shader_get_uniform_block(GPUShader *shader, const char *name)
 {
-  BLI_assert(shader && shader->program);
-  const GPUShaderInput *ubo = GPU_shaderinterface_ubo(shader->interface, name);
+  ShaderInterface *interface = reinterpret_cast<Shader *>(shader)->interface;
+  const ShaderInput *ubo = interface->ubo_get(name);
   return ubo ? ubo->location : -1;
 }
 
 int GPU_shader_get_uniform_block_binding(GPUShader *shader, const char *name)
 {
-  BLI_assert(shader && shader->program);
-  const GPUShaderInput *ubo = GPU_shaderinterface_ubo(shader->interface, name);
+  ShaderInterface *interface = reinterpret_cast<Shader *>(shader)->interface;
+  const ShaderInput *ubo = interface->ubo_get(name);
   return ubo ? ubo->binding : -1;
 }
 
 int GPU_shader_get_texture_binding(GPUShader *shader, const char *name)
 {
-  BLI_assert(shader && shader->program);
-  const GPUShaderInput *tex = GPU_shaderinterface_uniform(shader->interface, name);
+  ShaderInterface *interface = reinterpret_cast<Shader *>(shader)->interface;
+  const ShaderInput *tex = interface->uniform_get(name);
   return tex ? tex->binding : -1;
 }
 
 int GPU_shader_get_attribute(GPUShader *shader, const char *name)
 {
-  BLI_assert(shader && shader->program);
-  const GPUShaderInput *attr = GPU_shaderinterface_attr(shader->interface, name);
+  ShaderInterface *interface = reinterpret_cast<Shader *>(shader)->interface;
+  const ShaderInput *attr = interface->attr_get(name);
   return attr ? attr->location : -1;
 }
 
@@ -704,108 +566,109 @@ int GPU_shader_get_attribute(GPUShader *shader, const char *name)
  * \{ */
 
 /* Clement : Temp */
-int GPU_shader_get_program(GPUShader *shader)
+int GPU_shader_get_program(GPUShader *UNUSED(shader))
 {
-  return (int)shader->program;
+  /* TODO fixme */
+  return (int)0;
 }
 
-char *GPU_shader_get_binary(GPUShader *shader, uint *r_binary_format, int *r_binary_len)
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Uniforms setters
+ * \{ */
+
+void GPU_shader_uniform_vector(
+    GPUShader *shader, int loc, int len, int arraysize, const float *value)
 {
-  BLI_assert(GLEW_ARB_get_program_binary);
-  char *r_binary;
-  int binary_len = 0;
+  reinterpret_cast<Shader *>(shader)->uniform_float(loc, len, arraysize, value);
+}
 
-  glGetProgramiv(shader->program, GL_PROGRAM_BINARY_LENGTH, &binary_len);
-  r_binary = (char *)MEM_mallocN(binary_len, __func__);
-  glGetProgramBinary(shader->program, binary_len, NULL, r_binary_format, r_binary);
+void GPU_shader_uniform_vector_int(
+    GPUShader *shader, int loc, int len, int arraysize, const int *value)
+{
+  reinterpret_cast<Shader *>(shader)->uniform_int(loc, len, arraysize, value);
+}
 
-  if (r_binary_len) {
-    *r_binary_len = binary_len;
-  }
+void GPU_shader_uniform_int(GPUShader *shader, int location, int value)
+{
+  GPU_shader_uniform_vector_int(shader, location, 1, 1, &value);
+}
 
-  return r_binary;
+void GPU_shader_uniform_float(GPUShader *shader, int location, float value)
+{
+  GPU_shader_uniform_vector(shader, location, 1, 1, &value);
 }
 
-/** \} */
+void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value)
+{
+  const int loc = GPU_shader_get_uniform(sh, name);
+  GPU_shader_uniform_int(sh, loc, value);
+}
 
-/* -------------------------------------------------------------------- */
-/** \name Uniforms setters
- * \{ */
+void GPU_shader_uniform_1b(GPUShader *sh, const char *name, bool value)
+{
+  GPU_shader_uniform_1i(sh, name, value ? 1 : 0);
+}
 
-void GPU_shader_uniform_float(GPUShader *UNUSED(shader), int location, float value)
+void GPU_shader_uniform_2f(GPUShader *sh, const char *name, float x, float y)
 {
-  if (location == -1) {
-    return;
-  }
+  const float data[2] = {x, y};
+  GPU_shader_uniform_2fv(sh, name, data);
+}
 
-  glUniform1f(location, value);
+void GPU_shader_uniform_3f(GPUShader *sh, const char *name, float x, float y, float z)
+{
+  const float data[3] = {x, y, z};
+  GPU_shader_uniform_3fv(sh, name, data);
 }
 
-void GPU_shader_uniform_vector(
-    GPUShader *UNUSED(shader), int location, int length, int arraysize, const float *value)
+void GPU_shader_uniform_4f(GPUShader *sh, const char *name, float x, float y, float z, float w)
 {
-  if (location == -1 || value == NULL) {
-    return;
-  }
+  const float data[4] = {x, y, z, w};
+  GPU_shader_uniform_4fv(sh, name, data);
+}
 
-  switch (length) {
-    case 1:
-      glUniform1fv(location, arraysize, value);
-      break;
-    case 2:
-      glUniform2fv(location, arraysize, value);
-      break;
-    case 3:
-      glUniform3fv(location, arraysize, value);
-      break;
-    case 4:
-      glUniform4fv(location, arraysize, value);
-      break;
-    case 9:
-      glUniformMatrix3fv(location, arraysize, 0, value);
-      break;
-    case 16:
-      glUniformMatrix4fv(location, arraysize, 0, value);
-      break;
-    default:
-      BLI_assert(0);
-      break;
-  }
+void GPU_shader_uniform_1f(GPUShader *sh, const char *name, float x)
+{
+  const int loc = GPU_shader_get_uniform(sh, name);
+  GPU_shader_uniform_float(sh, loc, x);
 }
 
-void GPU_shader_uniform_int(GPUShader *UNUSED(shader), int location, int value)
+void GPU_shader_uniform_2fv(GPUShader *sh, const char *name, const float data[2])
 {
-  if (location == -1) {
-    return;
-  }
+  const int loc = GPU_shader_get_uniform(sh, name);
+  GPU_shader_uniform_vector(sh, loc, 2, 1, data);
+}
 
-  glUniform1i(location, value);
+void GPU_shader_uniform_3fv(GPUShader *sh, const char *name, const float data[3])
+{
+  const int loc = GPU_shader_get_uniform(sh, name);
+  GPU_shader_uniform_vector(sh, loc, 3, 1, data);
 }
 
-void GPU_shader_uniform_vector_int(
-    GPUShader *UNUSED(shader), int location, int length, int arraysize, const int *value)
+void GPU_shader_uniform_4fv(GPUShader *sh, const char *name, const float data[4])
 {
-  if (location == -1) {
-    return;
-  }
+  const int loc = GPU_shader_get_uniform(sh, name);
+  GPU_shader_uniform_vector(sh, loc, 4, 1, data);
+}
 
-  switch (length) {
-    case 1:
-      glUniform1iv(location, arraysize, value);
-      break;
-    case 2:
-      glUniform2iv(location, arraysize, value);
-      break;
-    case 3:
-      glUniform3iv(location, arraysize, value);
-      break;
-    case 4:
-      glUniform4iv(location, arraysize, value);
-      break;
-    default:
-      BLI_assert(0);
-      break;
-  }
+void GPU_shader_uniform_mat4(GPUShader *sh, const char *name, const float data[4][4])
+{
+  const int loc = GPU_shader_get_uniform(sh, name);
+  GPU_shader_uniform_vector(sh, loc, 16, 1, (const float *)data);
+}
+
+void GPU_shader_uniform_2fv_array(GPUShader *sh, const char *name, int len, const float (*val)[2])
+{
+  const int loc = GPU_shader_get_uniform(sh, name);
+  GPU_shader_uniform_vector(sh, loc, 2, len, (const float *)val);
+}
+
+void GPU_shader_uniform_4fv_array(GPUShader *sh, const char *name, int len, const float (*val)[4])
+{
+  const int loc = GPU_shader_get_uniform(sh, name);
+  GPU_shader_uniform_vector(sh, loc, 4, len, (const float *)val);
 }
 
 /** \} */
@@ -823,11 +686,11 @@ void GPU_shader_uniform_vector_int(
 
 static int g_shader_builtin_srgb_transform = 0;
 
-void GPU_shader_set_srgb_uniform(const GPUShaderInterface *interface)
+void GPU_shader_set_srgb_uniform(GPUShader *shader)
 {
-  int32_t loc = GPU_shaderinterface_uniform_builtin(interface, GPU_UNIFORM_SRGB_TRANSFORM);
+  int32_t loc = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_SRGB_TRANSFORM);
   if (loc != -1) {
-    glUniform1i(loc, g_shader_builtin_srgb_transform);
+    GPU_shader_uniform_vector_int(shader, loc, 1, 1, &g_shader_builtin_srgb_transform);
   }
 }
 
diff --git a/source/blender/gpu/intern/gpu_shader_builtin.c b/source/blender/gpu/intern/gpu_shader_builtin.c
index 9c0692b76e2..ed95a236da5 100644
--- a/source/blender/gpu/intern/gpu_shader_builtin.c
+++ b/source/blender/gpu/intern/gpu_shader_builtin.c
@@ -40,9 +40,7 @@
 #include "GPU_platform.h"
 #include "GPU_shader.h"
 #include "GPU_texture.h"
-#include "GPU_uniformbuffer.h"
-
-#include "gpu_shader_private.h"
+#include "GPU_uniform_buffer.h"
 
 /* Adjust these constants as needed. */
 #define MAX_DEFINE_LENGTH 256
diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc
index 4511d4a199d..dc59dca9f78 100644
--- a/source/blender/gpu/intern/gpu_shader_interface.cc
+++ b/source/blender/gpu/intern/gpu_shader_interface.cc
@@ -23,157 +23,41 @@
  * GPU shader interface (C --> GLSL)
  */
 
-#include "BKE_global.h"
-
-#include "BLI_bitmap.h"
-#include "BLI_math_base.h"
-
 #include "MEM_guardedalloc.h"
 
-#include "GPU_shader_interface.h"
-
-#include "gpu_batch_private.h"
-#include "gpu_context_private.hh"
-
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
+#include "BLI_span.hh"
+#include "BLI_vector.hh"
 
-#define DEBUG_SHADER_INTERFACE 0
-
-#if DEBUG_SHADER_INTERFACE
-#  include <stdio.h>
-#endif
-
-static const char *BuiltinUniform_name(GPUUniformBuiltin u)
-{
-  switch (u) {
-    case GPU_UNIFORM_MODEL:
-      return "ModelMatrix";
-    case GPU_UNIFORM_VIEW:
-      return "ViewMatrix";
-    case GPU_UNIFORM_MODELVIEW:
-      return "ModelViewMatrix";
-    case GPU_UNIFORM_PROJECTION:
-      return "ProjectionMatrix";
-    case GPU_UNIFORM_VIEWPROJECTION:
-      return "ViewProjectionMatrix";
-    case GPU_UNIFORM_MVP:
-      return "ModelViewProjectionMatrix";
-
-    case GPU_UNIFORM_MODEL_INV:
-      return "ModelMatrixInverse";
-    case GPU_UNIFORM_VIEW_INV:
-      return "ViewMatrixInverse";
-    case GPU_UNIFORM_MODELVIEW_INV:
-      return "ModelViewMatrixInverse";
-    case GPU_UNIFORM_PROJECTION_INV:
-      return "ProjectionMatrixInverse";
-    case GPU_UNIFORM_VIEWPROJECTION_INV:
-      return "ViewProjectionMatrixInverse";
-
-    case GPU_UNIFORM_NORMAL:
-      return "NormalMatrix";
-    case GPU_UNIFORM_ORCO:
-      return "OrcoTexCoFactors";
-    case GPU_UNIFORM_CLIPPLANES:
-      return "WorldClipPlanes";
-
-    case GPU_UNIFORM_COLOR:
-      return "color";
-    case GPU_UNIFORM_BASE_INSTANCE:
-      return "baseInstance";
-    case GPU_UNIFORM_RESOURCE_CHUNK:
-      return "resourceChunk";
-    case GPU_UNIFORM_RESOURCE_ID:
-      return "resourceId";
-    case GPU_UNIFORM_SRGB_TRANSFORM:
-      return "srgbTarget";
-
-    default:
-      return NULL;
-  }
-}
+#include "gpu_shader_interface.hh"
 
-static const char *BuiltinUniformBlock_name(GPUUniformBlockBuiltin u)
-{
-  switch (u) {
-    case GPU_UNIFORM_BLOCK_VIEW:
-      return "viewBlock";
-    case GPU_UNIFORM_BLOCK_MODEL:
-      return "modelBlock";
-    case GPU_UNIFORM_BLOCK_INFO:
-      return "infoBlock";
-    default:
-      return NULL;
-  }
-}
+namespace blender::gpu {
 
-GPU_INLINE bool match(const char *a, const char *b)
+ShaderInterface::ShaderInterface(void)
 {
-  return STREQ(a, b);
+  /* TODO(fclem) add unique ID for debugging. */
 }
 
-GPU_INLINE uint hash_string(const char *str)
+ShaderInterface::~ShaderInterface(void)
 {
-  uint i = 0, c;
-  while ((c = *str++)) {
-    i = i * 37 + c;
-  }
-  return i;
+  /* Free memory used by name_buffer. */
+  MEM_freeN(name_buffer_);
+  MEM_freeN(inputs_);
 }
 
-GPU_INLINE uint32_t set_input_name(GPUShaderInterface *shaderface,
-                                   GPUShaderInput *input,
-                                   char *name,
-                                   uint32_t name_len)
+static void sort_input_list(MutableSpan<ShaderInput> dst)
 {
-  /* remove "[0]" from array name */
-  if (name[name_len - 1] == ']') {
-    name[name_len - 3] = '\0';
-    name_len -= 3;
+  if (dst.size() == 0) {
+    return;
   }
 
-  input->name_offset = (uint32_t)(name - shaderface->name_buffer);
-  input->name_hash = hash_string(name);
-  return name_len + 1; /* include NULL terminator */
-}
-
-GPU_INLINE const GPUShaderInput *input_lookup(const GPUShaderInterface *shaderface,
-                                              const GPUShaderInput *const inputs,
-                                              const uint inputs_len,
-                                              const char *name)
-{
-  const uint name_hash = hash_string(name);
-  /* Simple linear search for now. */
-  for (int i = inputs_len - 1; i >= 0; i--) {
-    if (inputs[i].name_hash == name_hash) {
-      if ((i > 0) && UNLIKELY(inputs[i - 1].name_hash == name_hash)) {
-        /* Hash colision resolve. */
-        for (; i >= 0 && inputs[i].name_hash == name_hash; i--) {
-          if (match(name, shaderface->name_buffer + inputs[i].name_offset)) {
-            return inputs + i; /* not found */
-          }
-        }
-        return NULL; /* not found */
-      }
-
-      /* This is a bit dangerous since we could have a hash collision.
-       * where the asked uniform that does not exist has the same hash
-       * as a real uniform. */
-      BLI_assert(match(name, shaderface->name_buffer + inputs[i].name_offset));
-      return inputs + i;
-    }
-  }
-  return NULL; /* not found */
-}
+  Vector<ShaderInput> inputs_vec = Vector<ShaderInput>(dst.size());
+  MutableSpan<ShaderInput> src = inputs_vec.as_mutable_span();
+  src.copy_from(dst);
 
-/* Note that this modify the src array. */
-GPU_INLINE void sort_input_list(GPUShaderInput *dst, GPUShaderInput *src, const uint input_len)
-{
-  for (uint i = 0; i < input_len; i++) {
-    GPUShaderInput *input_src = &src[0];
-    for (uint j = 1; j < input_len; j++) {
+  /* Simple sorting by going through the array and selecting the biggest element each time. */
+  for (uint i = 0; i < dst.size(); i++) {
+    ShaderInput *input_src = &src[0];
+    for (uint j = 1; j < src.size(); j++) {
       if (src[j].name_hash > input_src->name_hash) {
         input_src = &src[j];
       }
@@ -183,358 +67,60 @@ GPU_INLINE void sort_input_list(GPUShaderInput *dst, GPUShaderInput *src, const
   }
 }
 
-static int block_binding(int32_t program, uint32_t block_index)
+/* Sorts all inputs inside their respective array.
+ * This is to allow fast hash collision detection.
+ * See ShaderInterface::input_lookup for more details. */
+void ShaderInterface::sort_inputs(void)
 {
-  /* For now just assign a consecutive index. In the future, we should set it in
-   * the shader using layout(binding = i) and query its value. */
-  glUniformBlockBinding(program, block_index, block_index);
-  return block_index;
+  sort_input_list(MutableSpan<ShaderInput>(inputs_, attr_len_));
+  sort_input_list(MutableSpan<ShaderInput>(inputs_ + attr_len_, ubo_len_));
+  sort_input_list(MutableSpan<ShaderInput>(inputs_ + attr_len_ + ubo_len_, uniform_len_));
 }
 
-static int sampler_binding(int32_t program,
-                           uint32_t uniform_index,
-                           int32_t uniform_location,
-                           int *sampler_len)
+void ShaderInterface::debug_print(void)
 {
-  /* Identify sampler uniforms and asign sampler units to them. */
-  GLint type;
-  glGetActiveUniformsiv(program, 1, &uniform_index, GL_UNIFORM_TYPE, &type);
+  Span<ShaderInput> attrs = Span<ShaderInput>(inputs_, attr_len_);
+  Span<ShaderInput> ubos = Span<ShaderInput>(inputs_ + attr_len_, ubo_len_);
+  Span<ShaderInput> uniforms = Span<ShaderInput>(inputs_ + attr_len_ + ubo_len_, uniform_len_);
+  char *name_buf = name_buffer_;
+  const char format[] = "      | %.8x : %4d : %s\n";
 
-  switch (type) {
-    case GL_SAMPLER_1D:
-    case GL_SAMPLER_2D:
-    case GL_SAMPLER_3D:
-    case GL_SAMPLER_CUBE:
-    case GL_SAMPLER_CUBE_MAP_ARRAY_ARB: /* OpenGL 4.0 */
-    case GL_SAMPLER_1D_SHADOW:
-    case GL_SAMPLER_2D_SHADOW:
-    case GL_SAMPLER_1D_ARRAY:
-    case GL_SAMPLER_2D_ARRAY:
-    case GL_SAMPLER_1D_ARRAY_SHADOW:
-    case GL_SAMPLER_2D_ARRAY_SHADOW:
-    case GL_SAMPLER_2D_MULTISAMPLE:
-    case GL_SAMPLER_2D_MULTISAMPLE_ARRAY:
-    case GL_SAMPLER_CUBE_SHADOW:
-    case GL_SAMPLER_BUFFER:
-    case GL_INT_SAMPLER_1D:
-    case GL_INT_SAMPLER_2D:
-    case GL_INT_SAMPLER_3D:
-    case GL_INT_SAMPLER_CUBE:
-    case GL_INT_SAMPLER_1D_ARRAY:
-    case GL_INT_SAMPLER_2D_ARRAY:
-    case GL_INT_SAMPLER_2D_MULTISAMPLE:
-    case GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY:
-    case GL_INT_SAMPLER_BUFFER:
-    case GL_UNSIGNED_INT_SAMPLER_1D:
-    case GL_UNSIGNED_INT_SAMPLER_2D:
-    case GL_UNSIGNED_INT_SAMPLER_3D:
-    case GL_UNSIGNED_INT_SAMPLER_CUBE:
-    case GL_UNSIGNED_INT_SAMPLER_1D_ARRAY:
-    case GL_UNSIGNED_INT_SAMPLER_2D_ARRAY:
-    case GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE:
-    case GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY:
-    case GL_UNSIGNED_INT_SAMPLER_BUFFER: {
-      /* For now just assign a consecutive index. In the future, we should set it in
-       * the shader using layout(binding = i) and query its value. */
-      int binding = *sampler_len;
-      glUniform1i(uniform_location, binding);
-      (*sampler_len)++;
-      return binding;
-    }
-    default:
-      return -1;
+  printf("      \033[1mGPUShaderInterface : \033[0m\n");
+  if (attrs.size() > 0) {
+    printf("\n    Attributes :\n");
   }
-}
-
-GPUShaderInterface *GPU_shaderinterface_create(int32_t program)
-{
-#ifndef NDEBUG
-  GLint curr_program;
-  glGetIntegerv(GL_CURRENT_PROGRAM, &curr_program);
-  BLI_assert(curr_program == program);
-#endif
-
-  GLint max_attr_name_len = 0, attr_len = 0;
-  glGetProgramiv(program, GL_ACTIVE_ATTRIBUTE_MAX_LENGTH, &max_attr_name_len);
-  glGetProgramiv(program, GL_ACTIVE_ATTRIBUTES, &attr_len);
-
-  GLint max_ubo_name_len = 0, ubo_len = 0;
-  glGetProgramiv(program, GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH, &max_ubo_name_len);
-  glGetProgramiv(program, GL_ACTIVE_UNIFORM_BLOCKS, &ubo_len);
-
-  GLint max_uniform_name_len = 0, active_uniform_len = 0, uniform_len = 0;
-  glGetProgramiv(program, GL_ACTIVE_UNIFORM_MAX_LENGTH, &max_uniform_name_len);
-  glGetProgramiv(program, GL_ACTIVE_UNIFORMS, &active_uniform_len);
-  uniform_len = active_uniform_len;
-
-  /* Work around driver bug with Intel HD 4600 on Windows 7/8, where
-   * GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH does not work. */
-  if (attr_len > 0 && max_attr_name_len == 0) {
-    max_attr_name_len = 256;
-  }
-  if (ubo_len > 0 && max_ubo_name_len == 0) {
-    max_ubo_name_len = 256;
-  }
-  if (uniform_len > 0 && max_uniform_name_len == 0) {
-    max_uniform_name_len = 256;
+  for (const ShaderInput &attr : attrs) {
+    printf(format, attr.name_hash, attr.location, name_buf + attr.name_offset);
   }
 
-  /* GL_ACTIVE_UNIFORMS lied to us! Remove the UBO uniforms from the total before
-   * allocating the uniform array. */
-  GLint max_ubo_uni_len = 0;
-  for (int i = 0; i < ubo_len; i++) {
-    GLint ubo_uni_len;
-    glGetActiveUniformBlockiv(program, i, GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS, &ubo_uni_len);
-    max_ubo_uni_len = max_ii(max_ubo_uni_len, ubo_uni_len);
-    uniform_len -= ubo_uni_len;
+  if (uniforms.size() > 0) {
+    printf("\n    Uniforms :\n");
   }
-  /* Bit set to true if uniform comes from a uniform block. */
-  BLI_bitmap *uniforms_from_blocks = BLI_BITMAP_NEW(active_uniform_len, __func__);
-  /* Set uniforms from block for exclusion. */
-  GLint *ubo_uni_ids = (GLint *)MEM_mallocN(sizeof(GLint) * max_ubo_uni_len, __func__);
-  for (int i = 0; i < ubo_len; i++) {
-    GLint ubo_uni_len;
-    glGetActiveUniformBlockiv(program, i, GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS, &ubo_uni_len);
-    glGetActiveUniformBlockiv(program, i, GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES, ubo_uni_ids);
-    for (int u = 0; u < ubo_uni_len; u++) {
-      BLI_BITMAP_ENABLE(uniforms_from_blocks, ubo_uni_ids[u]);
+  for (const ShaderInput &uni : uniforms) {
+    /* Bypass samplers. */
+    if (uni.binding == -1) {
+      printf(format, uni.name_hash, uni.location, name_buf + uni.name_offset);
     }
   }
-  MEM_freeN(ubo_uni_ids);
-
-  uint32_t name_buffer_offset = 0;
-  const uint32_t name_buffer_len = attr_len * max_attr_name_len + ubo_len * max_ubo_name_len +
-                                   uniform_len * max_uniform_name_len;
-
-  int input_tot_len = attr_len + ubo_len + uniform_len;
-  size_t interface_size = sizeof(GPUShaderInterface) + sizeof(GPUShaderInput) * input_tot_len;
 
-  GPUShaderInterface *shaderface = (GPUShaderInterface *)MEM_callocN(interface_size,
-                                                                     "GPUShaderInterface");
-  shaderface->attribute_len = attr_len;
-  shaderface->ubo_len = ubo_len;
-  shaderface->uniform_len = uniform_len;
-  shaderface->name_buffer = (char *)MEM_mallocN(name_buffer_len, "name_buffer");
-  GPUShaderInput *inputs = shaderface->inputs;
-
-  /* Temp buffer. */
-  int input_tmp_len = max_iii(attr_len, ubo_len, uniform_len);
-  GPUShaderInput *inputs_tmp = (GPUShaderInput *)MEM_mallocN(
-      sizeof(GPUShaderInput) * input_tmp_len, "name_buffer");
-
-  /* Attributes */
-  shaderface->enabled_attr_mask = 0;
-  for (int i = 0, idx = 0; i < attr_len; i++) {
-    char *name = shaderface->name_buffer + name_buffer_offset;
-    GLsizei remaining_buffer = name_buffer_len - name_buffer_offset;
-    GLsizei name_len = 0;
-    GLenum type;
-    GLint size;
-
-    glGetActiveAttrib(program, i, remaining_buffer, &name_len, &size, &type, name);
-    GLint location = glGetAttribLocation(program, name);
-    /* Ignore OpenGL names like `gl_BaseInstanceARB`, `gl_InstanceID` and `gl_VertexID`. */
-    if (location == -1) {
-      shaderface->attribute_len--;
-      continue;
-    }
-
-    GPUShaderInput *input = &inputs_tmp[idx++];
-    input->location = input->binding = location;
-
-    name_buffer_offset += set_input_name(shaderface, input, name, name_len);
-    shaderface->enabled_attr_mask |= (1 << input->location);
+  if (ubos.size() > 0) {
+    printf("\n    Uniform Buffer Objects :\n");
   }
-  sort_input_list(inputs, inputs_tmp, shaderface->attribute_len);
-  inputs += shaderface->attribute_len;
-
-  /* Uniform Blocks */
-  for (int i = 0, idx = 0; i < ubo_len; i++) {
-    char *name = shaderface->name_buffer + name_buffer_offset;
-    GLsizei remaining_buffer = name_buffer_len - name_buffer_offset;
-    GLsizei name_len = 0;
-
-    glGetActiveUniformBlockName(program, i, remaining_buffer, &name_len, name);
-
-    GPUShaderInput *input = &inputs_tmp[idx++];
-    input->binding = input->location = block_binding(program, i);
-
-    name_buffer_offset += set_input_name(shaderface, input, name, name_len);
-    shaderface->enabled_ubo_mask |= (1 << input->binding);
+  for (const ShaderInput &ubo : ubos) {
+    printf(format, ubo.name_hash, ubo.binding, name_buf + ubo.name_offset);
   }
-  sort_input_list(inputs, inputs_tmp, shaderface->ubo_len);
-  inputs += shaderface->ubo_len;
-
-  /* Uniforms */
-  for (int i = 0, idx = 0, sampler = 0; i < active_uniform_len; i++) {
-    if (BLI_BITMAP_TEST(uniforms_from_blocks, i)) {
-      continue;
-    }
-    char *name = shaderface->name_buffer + name_buffer_offset;
-    GLsizei remaining_buffer = name_buffer_len - name_buffer_offset;
-    GLsizei name_len = 0;
 
-    glGetActiveUniformName(program, i, remaining_buffer, &name_len, name);
-
-    GPUShaderInput *input = &inputs_tmp[idx++];
-    input->location = glGetUniformLocation(program, name);
-    input->binding = sampler_binding(program, i, input->location, &sampler);
-
-    name_buffer_offset += set_input_name(shaderface, input, name, name_len);
-    shaderface->enabled_tex_mask |= (input->binding != -1) ? (1lu << input->binding) : 0lu;
-  }
-  sort_input_list(inputs, inputs_tmp, shaderface->uniform_len);
-
-  /* Builtin Uniforms */
-  for (int32_t u_int = 0; u_int < GPU_NUM_UNIFORMS; u_int++) {
-    GPUUniformBuiltin u = static_cast<GPUUniformBuiltin>(u_int);
-    shaderface->builtins[u] = glGetUniformLocation(program, BuiltinUniform_name(u));
-  }
-
-  /* Builtin Uniforms Blocks */
-  for (int32_t u_int = 0; u_int < GPU_NUM_UNIFORM_BLOCKS; u_int++) {
-    GPUUniformBlockBuiltin u = static_cast<GPUUniformBlockBuiltin>(u_int);
-    const GPUShaderInput *block = GPU_shaderinterface_ubo(shaderface, BuiltinUniformBlock_name(u));
-    shaderface->builtin_blocks[u] = (block != NULL) ? block->binding : -1;
-  }
-
-  /* Batches ref buffer */
-  shaderface->batches_len = GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
-  shaderface->batches = (GPUBatch **)MEM_callocN(shaderface->batches_len * sizeof(GPUBatch *),
-                                                 "GPUShaderInterface batches");
-
-  MEM_freeN(uniforms_from_blocks);
-  MEM_freeN(inputs_tmp);
-
-  /* Resize name buffer to save some memory. */
-  if (name_buffer_offset < name_buffer_len) {
-    shaderface->name_buffer = (char *)MEM_reallocN(shaderface->name_buffer, name_buffer_offset);
-  }
-
-#if DEBUG_SHADER_INTERFACE
-  char *name_buf = shaderface->name_buffer;
-  printf("--- GPUShaderInterface %p, program %d ---\n", shaderface, program);
-  if (shaderface->attribute_len > 0) {
-    printf("Attributes {\n");
-    for (int i = 0; i < shaderface->attribute_len; i++) {
-      GPUShaderInput *input = shaderface->inputs + i;
-      printf("\t(location = %d) %s;\n", input->location, name_buf + input->name_offset);
-    }
-    printf("};\n");
+  if (enabled_tex_mask_ > 0) {
+    printf("\n    Samplers :\n");
   }
-  if (shaderface->ubo_len > 0) {
-    printf("Uniform Buffer Objects {\n");
-    for (int i = 0; i < shaderface->ubo_len; i++) {
-      GPUShaderInput *input = shaderface->inputs + shaderface->attribute_len + i;
-      printf("\t(binding = %d) %s;\n", input->binding, name_buf + input->name_offset);
-    }
-    printf("};\n");
-  }
-  if (shaderface->enabled_tex_mask > 0) {
-    printf("Samplers {\n");
-    for (int i = 0; i < shaderface->uniform_len; i++) {
-      GPUShaderInput *input = shaderface->inputs + shaderface->attribute_len +
-                              shaderface->ubo_len + i;
-      if (input->binding != -1) {
-        printf("\t(location = %d, binding = %d) %s;\n",
-               input->location,
-               input->binding,
-               name_buf + input->name_offset);
-      }
-    }
-    printf("};\n");
-  }
-  if (shaderface->uniform_len > 0) {
-    printf("Uniforms {\n");
-    for (int i = 0; i < shaderface->uniform_len; i++) {
-      GPUShaderInput *input = shaderface->inputs + shaderface->attribute_len +
-                              shaderface->ubo_len + i;
-      if (input->binding == -1) {
-        printf("\t(location = %d) %s;\n", input->location, name_buf + input->name_offset);
-      }
-    }
-    printf("};\n");
-  }
-  printf("--- GPUShaderInterface end ---\n\n");
-#endif
-
-  return shaderface;
-}
-
-void GPU_shaderinterface_discard(GPUShaderInterface *shaderface)
-{
-  /* Free memory used by name_buffer. */
-  MEM_freeN(shaderface->name_buffer);
-  /* Remove this interface from all linked Batches vao cache. */
-  for (int i = 0; i < shaderface->batches_len; i++) {
-    if (shaderface->batches[i] != NULL) {
-      gpu_batch_remove_interface_ref(shaderface->batches[i], shaderface);
+  for (const ShaderInput &samp : uniforms) {
+    /* Bypass uniforms. */
+    if (samp.binding != -1) {
+      printf(format, samp.name_hash, samp.binding, name_buf + samp.name_offset);
     }
   }
-  MEM_freeN(shaderface->batches);
-  /* Free memory used by shader interface by its self. */
-  MEM_freeN(shaderface);
-}
 
-const GPUShaderInput *GPU_shaderinterface_attr(const GPUShaderInterface *shaderface,
-                                               const char *name)
-{
-  uint ofs = 0;
-  return input_lookup(shaderface, shaderface->inputs + ofs, shaderface->attribute_len, name);
+  printf("\n");
 }
 
-const GPUShaderInput *GPU_shaderinterface_ubo(const GPUShaderInterface *shaderface,
-                                              const char *name)
-{
-  uint ofs = shaderface->attribute_len;
-  return input_lookup(shaderface, shaderface->inputs + ofs, shaderface->ubo_len, name);
-}
-
-const GPUShaderInput *GPU_shaderinterface_uniform(const GPUShaderInterface *shaderface,
-                                                  const char *name)
-{
-  uint ofs = shaderface->attribute_len + shaderface->ubo_len;
-  return input_lookup(shaderface, shaderface->inputs + ofs, shaderface->uniform_len, name);
-}
-
-int32_t GPU_shaderinterface_uniform_builtin(const GPUShaderInterface *shaderface,
-                                            GPUUniformBuiltin builtin)
-{
-  BLI_assert(builtin >= 0 && builtin < GPU_NUM_UNIFORMS);
-  return shaderface->builtins[builtin];
-}
-
-int32_t GPU_shaderinterface_block_builtin(const GPUShaderInterface *shaderface,
-                                          GPUUniformBlockBuiltin builtin)
-{
-  BLI_assert(builtin >= 0 && builtin < GPU_NUM_UNIFORM_BLOCKS);
-  return shaderface->builtin_blocks[builtin];
-}
-
-void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
-{
-  int i; /* find first unused slot */
-  for (i = 0; i < shaderface->batches_len; i++) {
-    if (shaderface->batches[i] == NULL) {
-      break;
-    }
-  }
-  if (i == shaderface->batches_len) {
-    /* Not enough place, realloc the array. */
-    i = shaderface->batches_len;
-    shaderface->batches_len += GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
-    shaderface->batches = (GPUBatch **)MEM_recallocN(shaderface->batches,
-                                                     sizeof(GPUBatch *) * shaderface->batches_len);
-  }
-  shaderface->batches[i] = batch;
-}
-
-void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
-{
-  for (int i = 0; i < shaderface->batches_len; i++) {
-    if (shaderface->batches[i] == batch) {
-      shaderface->batches[i] = NULL;
-      break; /* cannot have duplicates */
-    }
-  }
-}
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/intern/gpu_shader_interface.hh b/source/blender/gpu/intern/gpu_shader_interface.hh
new file mode 100644
index 00000000000..265fe90fc76
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_shader_interface.hh
@@ -0,0 +1,255 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU shader interface (C --> GLSL)
+ *
+ * Structure detailing needed vertex inputs and resources for a specific shader.
+ * A shader interface can be shared between two similar shaders.
+ */
+
+#pragma once
+
+#include <cstring> /* required for STREQ later on. */
+
+#include "BLI_hash.h"
+#include "BLI_utildefines.h"
+
+#include "GPU_shader.h"
+
+namespace blender::gpu {
+
+typedef struct ShaderInput {
+  uint32_t name_offset;
+  uint32_t name_hash;
+  int32_t location;
+  /** Defined at interface creation or in shader. Only for Samplers, UBOs and Vertex Attribs. */
+  int32_t binding;
+} ShaderInput;
+
+/**
+ * Implementation of Shader interface.
+ * Base class which is then specialized for each implementation (GL, VK, ...).
+ **/
+class ShaderInterface {
+  /* TODO(fclem) should be protected. */
+ public:
+  /** Flat array. In this order: Attributes, Ubos, Uniforms. */
+  ShaderInput *inputs_ = NULL;
+  /** Buffer containing all inputs names separated by '\0'. */
+  char *name_buffer_ = NULL;
+  /** Input counts inside input array. */
+  uint attr_len_ = 0;
+  uint ubo_len_ = 0;
+  uint uniform_len_ = 0;
+  /** Enabled bindpoints that needs to be fed with data. */
+  uint16_t enabled_attr_mask_ = 0;
+  uint16_t enabled_ubo_mask_ = 0;
+  uint64_t enabled_tex_mask_ = 0;
+  /** Location of builtin uniforms. Fast access, no lookup needed. */
+  int32_t builtins_[GPU_NUM_UNIFORMS];
+  int32_t builtin_blocks_[GPU_NUM_UNIFORM_BLOCKS];
+
+ public:
+  ShaderInterface();
+  virtual ~ShaderInterface();
+
+  void debug_print(void);
+
+  inline const ShaderInput *attr_get(const char *name) const
+  {
+    return input_lookup(inputs_, attr_len_, name);
+  }
+
+  inline const ShaderInput *ubo_get(const char *name) const
+  {
+    return input_lookup(inputs_ + attr_len_, ubo_len_, name);
+  }
+  inline const ShaderInput *ubo_get(const int binding) const
+  {
+    return input_lookup(inputs_ + attr_len_, ubo_len_, binding);
+  }
+
+  inline const ShaderInput *uniform_get(const char *name) const
+  {
+    return input_lookup(inputs_ + attr_len_ + ubo_len_, uniform_len_, name);
+  }
+
+  inline const char *input_name_get(const ShaderInput *input) const
+  {
+    return name_buffer_ + input->name_offset;
+  }
+
+  /* Returns uniform location. */
+  inline int32_t uniform_builtin(const GPUUniformBuiltin builtin) const
+  {
+    BLI_assert(builtin >= 0 && builtin < GPU_NUM_UNIFORMS);
+    return builtins_[builtin];
+  }
+
+  /* Returns binding position. */
+  inline int32_t ubo_builtin(const GPUUniformBlockBuiltin builtin) const
+  {
+    BLI_assert(builtin >= 0 && builtin < GPU_NUM_UNIFORM_BLOCKS);
+    return builtin_blocks_[builtin];
+  }
+
+ protected:
+  static inline const char *builtin_uniform_name(GPUUniformBuiltin u);
+  static inline const char *builtin_uniform_block_name(GPUUniformBlockBuiltin u);
+
+  inline uint32_t set_input_name(ShaderInput *input, char *name, uint32_t name_len) const;
+
+  /* Finalize interface construction by sorting the ShaderInputs for faster lookups. */
+  void sort_inputs(void);
+
+ private:
+  inline const ShaderInput *input_lookup(const ShaderInput *const inputs,
+                                         const uint inputs_len,
+                                         const char *name) const;
+
+  inline const ShaderInput *input_lookup(const ShaderInput *const inputs,
+                                         const uint inputs_len,
+                                         const int binding) const;
+};
+
+inline const char *ShaderInterface::builtin_uniform_name(GPUUniformBuiltin u)
+{
+  switch (u) {
+    case GPU_UNIFORM_MODEL:
+      return "ModelMatrix";
+    case GPU_UNIFORM_VIEW:
+      return "ViewMatrix";
+    case GPU_UNIFORM_MODELVIEW:
+      return "ModelViewMatrix";
+    case GPU_UNIFORM_PROJECTION:
+      return "ProjectionMatrix";
+    case GPU_UNIFORM_VIEWPROJECTION:
+      return "ViewProjectionMatrix";
+    case GPU_UNIFORM_MVP:
+      return "ModelViewProjectionMatrix";
+
+    case GPU_UNIFORM_MODEL_INV:
+      return "ModelMatrixInverse";
+    case GPU_UNIFORM_VIEW_INV:
+      return "ViewMatrixInverse";
+    case GPU_UNIFORM_MODELVIEW_INV:
+      return "ModelViewMatrixInverse";
+    case GPU_UNIFORM_PROJECTION_INV:
+      return "ProjectionMatrixInverse";
+    case GPU_UNIFORM_VIEWPROJECTION_INV:
+      return "ViewProjectionMatrixInverse";
+
+    case GPU_UNIFORM_NORMAL:
+      return "NormalMatrix";
+    case GPU_UNIFORM_ORCO:
+      return "OrcoTexCoFactors";
+    case GPU_UNIFORM_CLIPPLANES:
+      return "WorldClipPlanes";
+
+    case GPU_UNIFORM_COLOR:
+      return "color";
+    case GPU_UNIFORM_BASE_INSTANCE:
+      return "baseInstance";
+    case GPU_UNIFORM_RESOURCE_CHUNK:
+      return "resourceChunk";
+    case GPU_UNIFORM_RESOURCE_ID:
+      return "resourceId";
+    case GPU_UNIFORM_SRGB_TRANSFORM:
+      return "srgbTarget";
+
+    default:
+      return NULL;
+  }
+}
+
+inline const char *ShaderInterface::builtin_uniform_block_name(GPUUniformBlockBuiltin u)
+{
+  switch (u) {
+    case GPU_UNIFORM_BLOCK_VIEW:
+      return "viewBlock";
+    case GPU_UNIFORM_BLOCK_MODEL:
+      return "modelBlock";
+    case GPU_UNIFORM_BLOCK_INFO:
+      return "infoBlock";
+    default:
+      return NULL;
+  }
+}
+
+/* Returns string length including '\0' terminator. */
+inline uint32_t ShaderInterface::set_input_name(ShaderInput *input,
+                                                char *name,
+                                                uint32_t name_len) const
+{
+  /* remove "[0]" from array name */
+  if (name[name_len - 1] == ']') {
+    name[name_len - 3] = '\0';
+    name_len -= 3;
+  }
+
+  input->name_offset = (uint32_t)(name - name_buffer_);
+  input->name_hash = BLI_hash_string(name);
+  return name_len + 1; /* include NULL terminator */
+}
+
+inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const inputs,
+                                                        const uint inputs_len,
+                                                        const char *name) const
+{
+  const uint name_hash = BLI_hash_string(name);
+  /* Simple linear search for now. */
+  for (int i = inputs_len - 1; i >= 0; i--) {
+    if (inputs[i].name_hash == name_hash) {
+      if ((i > 0) && UNLIKELY(inputs[i - 1].name_hash == name_hash)) {
+        /* Hash colision resolve. */
+        for (; i >= 0 && inputs[i].name_hash == name_hash; i--) {
+          if (STREQ(name, name_buffer_ + inputs[i].name_offset)) {
+            return inputs + i; /* not found */
+          }
+        }
+        return NULL; /* not found */
+      }
+
+      /* This is a bit dangerous since we could have a hash collision.
+       * where the asked uniform that does not exist has the same hash
+       * as a real uniform. */
+      BLI_assert(STREQ(name, name_buffer_ + inputs[i].name_offset));
+      return inputs + i;
+    }
+  }
+  return NULL; /* not found */
+}
+
+inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const inputs,
+                                                        const uint inputs_len,
+                                                        const int binding) const
+{
+  /* Simple linear search for now. */
+  for (int i = inputs_len - 1; i >= 0; i--) {
+    if (inputs[i].binding == binding) {
+      return inputs + i;
+    }
+  }
+  return NULL; /* not found */
+}
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/intern/gpu_shader_private.h b/source/blender/gpu/intern/gpu_shader_private.h
deleted file mode 100644
index 0f89fbda737..00000000000
--- a/source/blender/gpu/intern/gpu_shader_private.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-/** \file
- * \ingroup gpu
- */
-
-#pragma once
-
-#include "GPU_shader_interface.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct GPUShader {
-  /** Handle for full program (links shader stages below). */
-  GLuint program;
-
-  /** Handle for vertex shader. */
-  GLuint vertex;
-  /** Handle for geometry shader. */
-  GLuint geometry;
-  /** Handle for fragment shader. */
-  GLuint fragment;
-
-  /** Cached uniform & attribute interface for shader. */
-  GPUShaderInterface *interface;
-
-  int feedback_transform_type;
-#ifndef NDEBUG
-  char name[64];
-#endif
-};
-
-/* XXX do not use it. Special hack to use OCIO with batch API. */
-GPUShader *immGetShader(void);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh
new file mode 100644
index 00000000000..9c9aa835b97
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_shader_private.hh
@@ -0,0 +1,80 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "BLI_span.hh"
+
+#include "GPU_shader.h"
+#include "GPU_vertex_buffer.h"
+#include "gpu_shader_interface.hh"
+
+namespace blender {
+namespace gpu {
+
+/**
+ * Implementation of shader compilation and uniforms handling.
+ * Base class which is then specialized for each implementation (GL, VK, ...).
+ **/
+class Shader {
+ public:
+  /** Uniform & attribute locations for shader. */
+  ShaderInterface *interface = nullptr;
+
+ protected:
+  /** For debugging purpose. */
+  char name[64];
+
+ public:
+  Shader(const char *name);
+  virtual ~Shader();
+
+  virtual void vertex_shader_from_glsl(MutableSpan<const char *> sources) = 0;
+  virtual void geometry_shader_from_glsl(MutableSpan<const char *> sources) = 0;
+  virtual void fragment_shader_from_glsl(MutableSpan<const char *> sources) = 0;
+  virtual bool finalize(void) = 0;
+
+  virtual void transform_feedback_names_set(Span<const char *> name_list,
+                                            const eGPUShaderTFBType geom_type) = 0;
+  virtual bool transform_feedback_enable(GPUVertBuf *) = 0;
+  virtual void transform_feedback_disable(void) = 0;
+
+  virtual void bind(void) = 0;
+  virtual void unbind(void) = 0;
+
+  virtual void uniform_float(int location, int comp_len, int array_size, const float *data) = 0;
+  virtual void uniform_int(int location, int comp_len, int array_size, const int *data) = 0;
+
+  virtual void vertformat_from_shader(GPUVertFormat *) const = 0;
+
+  inline const char *const name_get(void) const
+  {
+    return name;
+  };
+
+ protected:
+  void print_errors(Span<const char *> sources, char *log, const char *stage);
+};
+
+}  // namespace gpu
+}  // namespace blender
+
+/* XXX do not use it. Special hack to use OCIO with batch API. */
+GPUShader *immGetShader(void);
diff --git a/source/blender/gpu/intern/gpu_state.cc b/source/blender/gpu/intern/gpu_state.cc
index 794c7a3eb97..478fd639cdd 100644
--- a/source/blender/gpu/intern/gpu_state.cc
+++ b/source/blender/gpu/intern/gpu_state.cc
@@ -25,6 +25,7 @@
 #  define PIXELSIZE (1.0f)
 #endif
 
+#include "BLI_math_vector.h"
 #include "BLI_utildefines.h"
 
 #include "BKE_global.h"
@@ -33,239 +34,252 @@
 #include "GPU_glew.h"
 #include "GPU_state.h"
 
-static GLenum gpu_get_gl_blendfunction(eGPUBlendFunction blend)
-{
-  switch (blend) {
-    case GPU_ONE:
-      return GL_ONE;
-    case GPU_SRC_ALPHA:
-      return GL_SRC_ALPHA;
-    case GPU_ONE_MINUS_SRC_ALPHA:
-      return GL_ONE_MINUS_SRC_ALPHA;
-    case GPU_DST_COLOR:
-      return GL_DST_COLOR;
-    case GPU_ZERO:
-      return GL_ZERO;
-    default:
-      BLI_assert(!"Unhandled blend mode");
-      return GL_ZERO;
-  }
+#include "gpu_context_private.hh"
+
+#include "gpu_state_private.hh"
+
+using namespace blender::gpu;
+
+#define SET_STATE(_prefix, _state, _value) \
+  do { \
+    GPUStateManager *stack = GPU_context_active_get()->state_manager; \
+    auto &state_object = stack->_prefix##state; \
+    state_object._state = (_value); \
+  } while (0)
+
+#define SET_IMMUTABLE_STATE(_state, _value) SET_STATE(, _state, _value)
+#define SET_MUTABLE_STATE(_state, _value) SET_STATE(mutable_, _state, _value)
+
+/* -------------------------------------------------------------------- */
+/** \name Immutable state Setters
+ * \{ */
+
+void GPU_blend(eGPUBlend blend)
+{
+  SET_IMMUTABLE_STATE(blend, blend);
 }
 
-void GPU_blend(bool enable)
+void GPU_face_culling(eGPUFaceCullTest culling)
 {
-  if (enable) {
-    glEnable(GL_BLEND);
-  }
-  else {
-    glDisable(GL_BLEND);
-  }
+  SET_IMMUTABLE_STATE(culling_test, culling);
 }
 
-void GPU_blend_set_func(eGPUBlendFunction sfactor, eGPUBlendFunction dfactor)
+void GPU_front_facing(bool invert)
 {
-  glBlendFunc(gpu_get_gl_blendfunction(sfactor), gpu_get_gl_blendfunction(dfactor));
+  SET_IMMUTABLE_STATE(invert_facing, invert);
 }
 
-void GPU_blend_set_func_separate(eGPUBlendFunction src_rgb,
-                                 eGPUBlendFunction dst_rgb,
-                                 eGPUBlendFunction src_alpha,
-                                 eGPUBlendFunction dst_alpha)
+void GPU_provoking_vertex(eGPUProvokingVertex vert)
 {
-  glBlendFuncSeparate(gpu_get_gl_blendfunction(src_rgb),
-                      gpu_get_gl_blendfunction(dst_rgb),
-                      gpu_get_gl_blendfunction(src_alpha),
-                      gpu_get_gl_blendfunction(dst_alpha));
+  SET_IMMUTABLE_STATE(provoking_vert, vert);
 }
 
-void GPU_face_culling(eGPUFaceCull culling)
+void GPU_depth_test(eGPUDepthTest test)
 {
-  if (culling == GPU_CULL_NONE) {
-    glDisable(GL_CULL_FACE);
-  }
-  else {
-    glEnable(GL_CULL_FACE);
-    glCullFace((culling == GPU_CULL_FRONT) ? GL_FRONT : GL_BACK);
-  }
+  SET_IMMUTABLE_STATE(depth_test, test);
 }
 
-void GPU_front_facing(bool invert)
+void GPU_stencil_test(eGPUStencilTest test)
 {
-  glFrontFace((invert) ? GL_CW : GL_CCW);
+  SET_IMMUTABLE_STATE(stencil_test, test);
 }
 
-void GPU_provoking_vertex(eGPUProvokingVertex vert)
+void GPU_line_smooth(bool enable)
 {
-  glProvokingVertex((vert == GPU_VERTEX_FIRST) ? GL_FIRST_VERTEX_CONVENTION :
-                                                 GL_LAST_VERTEX_CONVENTION);
+  SET_IMMUTABLE_STATE(line_smooth, enable);
 }
 
-void GPU_depth_range(float near, float far)
+void GPU_polygon_smooth(bool enable)
 {
-  /* glDepthRangef is only for OpenGL 4.1 or higher */
-  glDepthRange(near, far);
+  SET_IMMUTABLE_STATE(polygon_smooth, enable);
 }
 
-void GPU_depth_test(bool enable)
+void GPU_logic_op_xor_set(bool enable)
 {
-  if (enable) {
-    glEnable(GL_DEPTH_TEST);
-  }
-  else {
-    glDisable(GL_DEPTH_TEST);
-  }
+  SET_IMMUTABLE_STATE(logic_op_xor, enable);
 }
 
-bool GPU_depth_test_enabled()
+void GPU_write_mask(eGPUWriteMask mask)
 {
-  return glIsEnabled(GL_DEPTH_TEST);
+  SET_IMMUTABLE_STATE(write_mask, mask);
 }
 
-void GPU_line_smooth(bool enable)
+void GPU_color_mask(bool r, bool g, bool b, bool a)
 {
-  if (enable && ((G.debug & G_DEBUG_GPU) == 0)) {
-    glEnable(GL_LINE_SMOOTH);
-  }
-  else {
-    glDisable(GL_LINE_SMOOTH);
-  }
+  GPUStateManager *stack = GPU_context_active_get()->state_manager;
+  auto &state = stack->state;
+  uint32_t write_mask = state.write_mask;
+  SET_FLAG_FROM_TEST(write_mask, r, (uint32_t)GPU_WRITE_RED);
+  SET_FLAG_FROM_TEST(write_mask, g, (uint32_t)GPU_WRITE_GREEN);
+  SET_FLAG_FROM_TEST(write_mask, b, (uint32_t)GPU_WRITE_BLUE);
+  SET_FLAG_FROM_TEST(write_mask, a, (uint32_t)GPU_WRITE_ALPHA);
+  state.write_mask = write_mask;
 }
 
-void GPU_line_width(float width)
+void GPU_depth_mask(bool depth)
 {
-  float max_size = GPU_max_line_width();
-  float final_size = width * PIXELSIZE;
-  /* Fix opengl errors on certain platform / drivers. */
-  CLAMP(final_size, 1.0f, max_size);
-  glLineWidth(final_size);
+  GPUStateManager *stack = GPU_context_active_get()->state_manager;
+  auto &state = stack->state;
+  uint32_t write_mask = state.write_mask;
+  SET_FLAG_FROM_TEST(write_mask, depth, (uint32_t)GPU_WRITE_DEPTH);
+  state.write_mask = write_mask;
 }
 
-void GPU_point_size(float size)
+void GPU_shadow_offset(bool enable)
 {
-  glPointSize(size * PIXELSIZE);
+  SET_IMMUTABLE_STATE(shadow_bias, enable);
 }
 
-void GPU_polygon_smooth(bool enable)
+void GPU_clip_distances(int distances_enabled)
 {
-  if (enable && ((G.debug & G_DEBUG_GPU) == 0)) {
-    glEnable(GL_POLYGON_SMOOTH);
-  }
-  else {
-    glDisable(GL_POLYGON_SMOOTH);
-  }
+  SET_IMMUTABLE_STATE(clip_distances, distances_enabled);
+}
+
+void GPU_state_set(eGPUWriteMask write_mask,
+                   eGPUBlend blend,
+                   eGPUFaceCullTest culling_test,
+                   eGPUDepthTest depth_test,
+                   eGPUStencilTest stencil_test,
+                   eGPUStencilOp stencil_op,
+                   eGPUProvokingVertex provoking_vert)
+{
+  GPUStateManager *stack = GPU_context_active_get()->state_manager;
+  auto &state = stack->state;
+  state.write_mask = (uint32_t)write_mask;
+  state.blend = (uint32_t)blend;
+  state.culling_test = (uint32_t)culling_test;
+  state.depth_test = (uint32_t)depth_test;
+  state.stencil_test = (uint32_t)stencil_test;
+  state.stencil_op = (uint32_t)stencil_op;
+  state.provoking_vert = (uint32_t)provoking_vert;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Mutable State Setters
+ * \{ */
+
+void GPU_depth_range(float near, float far)
+{
+  GPUStateManager *stack = GPU_context_active_get()->state_manager;
+  auto &state = stack->mutable_state;
+  copy_v2_fl2(state.depth_range, near, far);
+}
+
+void GPU_line_width(float width)
+{
+  SET_MUTABLE_STATE(line_width, width * PIXELSIZE);
+}
+
+void GPU_point_size(float size)
+{
+  SET_MUTABLE_STATE(point_size, size * PIXELSIZE);
 }
 
 /* Programmable point size
  * - shaders set their own point size when enabled
  * - use glPointSize when disabled */
+/* TODO remove and use program point size everywhere */
 void GPU_program_point_size(bool enable)
 {
-  if (enable) {
-    glEnable(GL_PROGRAM_POINT_SIZE);
-  }
-  else {
-    glDisable(GL_PROGRAM_POINT_SIZE);
-  }
+  GPUStateManager *stack = GPU_context_active_get()->state_manager;
+  auto &state = stack->mutable_state;
+  /* Set point size sign negative to disable. */
+  state.point_size = fabsf(state.point_size) * (enable ? 1 : -1);
 }
 
 void GPU_scissor_test(bool enable)
 {
-  if (enable) {
-    glEnable(GL_SCISSOR_TEST);
-  }
-  else {
-    glDisable(GL_SCISSOR_TEST);
-  }
+  GPU_context_active_get()->active_fb->scissor_test_set(enable);
 }
 
 void GPU_scissor(int x, int y, int width, int height)
 {
-  glScissor(x, y, width, height);
+  int scissor_rect[4] = {x, y, width, height};
+  GPU_context_active_get()->active_fb->scissor_set(scissor_rect);
 }
 
 void GPU_viewport(int x, int y, int width, int height)
 {
-  glViewport(x, y, width, height);
+  int viewport_rect[4] = {x, y, width, height};
+  GPU_context_active_get()->active_fb->viewport_set(viewport_rect);
 }
 
-void GPU_scissor_get_f(float coords[4])
+void GPU_stencil_reference_set(uint reference)
 {
-  glGetFloatv(GL_SCISSOR_BOX, coords);
+  SET_MUTABLE_STATE(stencil_reference, (uint8_t)reference);
 }
 
-void GPU_scissor_get_i(int coords[4])
+void GPU_stencil_write_mask_set(uint write_mask)
 {
-  glGetIntegerv(GL_SCISSOR_BOX, coords);
+  SET_MUTABLE_STATE(stencil_write_mask, (uint8_t)write_mask);
 }
 
-void GPU_viewport_size_get_f(float coords[4])
+void GPU_stencil_compare_mask_set(uint compare_mask)
 {
-  glGetFloatv(GL_VIEWPORT, coords);
+  SET_MUTABLE_STATE(stencil_compare_mask, (uint8_t)compare_mask);
 }
 
-void GPU_viewport_size_get_i(int coords[4])
-{
-  glGetIntegerv(GL_VIEWPORT, coords);
-}
+/** \} */
 
-void GPU_flush(void)
+/* -------------------------------------------------------------------- */
+/** \name State Getters
+ * \{ */
+
+eGPUBlend GPU_blend_get()
 {
-  glFlush();
+  GPUState &state = GPU_context_active_get()->state_manager->state;
+  return (eGPUBlend)state.blend;
 }
 
-void GPU_finish(void)
+eGPUWriteMask GPU_write_mask_get()
 {
-  glFinish();
+  GPUState &state = GPU_context_active_get()->state_manager->state;
+  return (eGPUWriteMask)state.write_mask;
 }
 
-void GPU_unpack_row_length_set(uint len)
+uint GPU_stencil_mask_get()
 {
-  glPixelStorei(GL_UNPACK_ROW_LENGTH, len);
+  GPUStateMutable &state = GPU_context_active_get()->state_manager->mutable_state;
+  return state.stencil_write_mask;
 }
 
-void GPU_logic_op_xor_set(bool enable)
+eGPUDepthTest GPU_depth_test_get()
 {
-  if (enable) {
-    glLogicOp(GL_XOR);
-    glEnable(GL_COLOR_LOGIC_OP);
-  }
-  else {
-    glDisable(GL_COLOR_LOGIC_OP);
-  }
+  GPUState &state = GPU_context_active_get()->state_manager->state;
+  return (eGPUDepthTest)state.depth_test;
 }
 
-void GPU_color_mask(bool r, bool g, bool b, bool a)
+eGPUStencilTest GPU_stencil_test_get()
 {
-  glColorMask(r, g, b, a);
+  GPUState &state = GPU_context_active_get()->state_manager->state;
+  return (eGPUStencilTest)state.stencil_test;
 }
 
-void GPU_depth_mask(bool depth)
+void GPU_scissor_get(int coords[4])
 {
-  glDepthMask(depth);
+  GPU_context_active_get()->active_fb->scissor_get(coords);
 }
 
-bool GPU_depth_mask_get(void)
+void GPU_viewport_size_get_f(float coords[4])
 {
-  GLint mask;
-  glGetIntegerv(GL_DEPTH_WRITEMASK, &mask);
-  return mask == GL_TRUE;
+  int viewport[4];
+  GPU_context_active_get()->active_fb->viewport_get(viewport);
+  for (int i = 0; i < 4; i++) {
+    coords[i] = viewport[i];
+  }
 }
 
-void GPU_stencil_mask(uint stencil)
+void GPU_viewport_size_get_i(int coords[4])
 {
-  glStencilMask(stencil);
+  GPU_context_active_get()->active_fb->viewport_get(coords);
 }
 
-void GPU_clip_distances(int distances_new)
+bool GPU_depth_mask_get(void)
 {
-  static int distances_enabled = 0;
-  for (int i = 0; i < distances_new; i++) {
-    glEnable(GL_CLIP_DISTANCE0 + i);
-  }
-  for (int i = distances_new; i < distances_enabled; i++) {
-    glDisable(GL_CLIP_DISTANCE0 + i);
-  }
-  distances_enabled = distances_new;
+  GPUState &state = GPU_context_active_get()->state_manager->state;
+  return (state.write_mask & GPU_WRITE_DEPTH) != 0;
 }
 
 bool GPU_mipmap_enabled(void)
@@ -274,163 +288,61 @@ bool GPU_mipmap_enabled(void)
   return true;
 }
 
-/** \name GPU Push/Pop State
- * \{ */
-
-#define STATE_STACK_DEPTH 16
-
-typedef struct {
-  eGPUAttrMask mask;
-
-  /* GL_BLEND_BIT */
-  uint is_blend : 1;
-
-  /* GL_DEPTH_BUFFER_BIT */
-  uint is_depth_test : 1;
-  int depth_func;
-  double depth_clear_value;
-  bool depth_write_mask;
-
-  /* GL_SCISSOR_BIT */
-  int scissor_box[4];
-  uint is_scissor_test : 1;
-
-  /* GL_VIEWPORT_BIT */
-  int viewport[4];
-  double near_far[2];
-} GPUAttrValues;
-
-typedef struct {
-  GPUAttrValues attr_stack[STATE_STACK_DEPTH];
-  uint top;
-} GPUAttrStack;
-
-static GPUAttrStack state = {
-    {},
-    0,
-};
+/** \} */
 
-#define AttrStack state
-#define Attr state.attr_stack[state.top]
+/* -------------------------------------------------------------------- */
+/** \name Context Utils
+ * \{ */
 
-/**
- * Replacement for glPush/PopAttributes
- *
- * We don't need to cover all the options of legacy OpenGL
- * but simply the ones used by Blender.
- */
-void gpuPushAttr(eGPUAttrMask mask)
+void GPU_flush(void)
 {
-  Attr.mask = mask;
-
-  if ((mask & GPU_DEPTH_BUFFER_BIT) != 0) {
-    Attr.is_depth_test = glIsEnabled(GL_DEPTH_TEST);
-    glGetIntegerv(GL_DEPTH_FUNC, &Attr.depth_func);
-    glGetDoublev(GL_DEPTH_CLEAR_VALUE, &Attr.depth_clear_value);
-    glGetBooleanv(GL_DEPTH_WRITEMASK, (GLboolean *)&Attr.depth_write_mask);
-  }
-
-  if ((mask & GPU_SCISSOR_BIT) != 0) {
-    Attr.is_scissor_test = glIsEnabled(GL_SCISSOR_TEST);
-    glGetIntegerv(GL_SCISSOR_BOX, (GLint *)&Attr.scissor_box);
-  }
-
-  if ((mask & GPU_VIEWPORT_BIT) != 0) {
-    glGetDoublev(GL_DEPTH_RANGE, (GLdouble *)&Attr.near_far);
-    glGetIntegerv(GL_VIEWPORT, (GLint *)&Attr.viewport);
-  }
-
-  if ((mask & GPU_BLEND_BIT) != 0) {
-    Attr.is_blend = glIsEnabled(GL_BLEND);
-  }
-
-  BLI_assert(AttrStack.top < STATE_STACK_DEPTH);
-  AttrStack.top++;
+  glFlush();
 }
 
-static void restore_mask(GLenum cap, const bool value)
+void GPU_finish(void)
 {
-  if (value) {
-    glEnable(cap);
-  }
-  else {
-    glDisable(cap);
-  }
+  glFinish();
 }
 
-void gpuPopAttr(void)
+void GPU_unpack_row_length_set(uint len)
 {
-  BLI_assert(AttrStack.top > 0);
-  AttrStack.top--;
-
-  GLint mask = Attr.mask;
-
-  if ((mask & GPU_DEPTH_BUFFER_BIT) != 0) {
-    restore_mask(GL_DEPTH_TEST, Attr.is_depth_test);
-    glDepthFunc(Attr.depth_func);
-    glClearDepth(Attr.depth_clear_value);
-    glDepthMask(Attr.depth_write_mask);
-  }
-
-  if ((mask & GPU_VIEWPORT_BIT) != 0) {
-    glViewport(Attr.viewport[0], Attr.viewport[1], Attr.viewport[2], Attr.viewport[3]);
-    glDepthRange(Attr.near_far[0], Attr.near_far[1]);
-  }
-
-  if ((mask & GPU_SCISSOR_BIT) != 0) {
-    restore_mask(GL_SCISSOR_TEST, Attr.is_scissor_test);
-    glScissor(Attr.scissor_box[0], Attr.scissor_box[1], Attr.scissor_box[2], Attr.scissor_box[3]);
-  }
-
-  if ((mask & GPU_BLEND_BIT) != 0) {
-    restore_mask(GL_BLEND, Attr.is_blend);
-  }
+  glPixelStorei(GL_UNPACK_ROW_LENGTH, len);
 }
 
-#undef Attr
-#undef AttrStack
+/** \} */
 
-/* Default OpenGL State
+/* -------------------------------------------------------------------- */
+/** \name Default OpenGL State
  *
  * This is called on startup, for opengl offscreen render.
  * Generally we should always return to this state when
  * temporarily modifying the state for drawing, though that are (undocumented)
- * exceptions that we should try to get rid of. */
-
-void GPU_state_init(void)
-{
-  GPU_program_point_size(false);
-
-  glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
-
-  glDisable(GL_BLEND);
-  glDisable(GL_DEPTH_TEST);
-  glDisable(GL_COLOR_LOGIC_OP);
-  glDisable(GL_STENCIL_TEST);
-  glDisable(GL_DITHER);
-
-  glDepthFunc(GL_LEQUAL);
-  glDepthRange(0.0, 1.0);
-
-  glFrontFace(GL_CCW);
-  glCullFace(GL_BACK);
-  glDisable(GL_CULL_FACE);
-
-  glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
-  glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-
-  /* Is default but better be explicit. */
-  glEnable(GL_MULTISAMPLE);
-
-  /* This is a bit dangerous since addons could change this. */
-  glEnable(GL_PRIMITIVE_RESTART);
-  glPrimitiveRestartIndex((GLuint)0xFFFFFFFF);
+ * exceptions that we should try to get rid of.
+ * \{ */
 
-  /* TODO: Should become default. But needs at least GL 4.3 */
-  if (GLEW_ARB_ES3_compatibility) {
-    /* Takes predecence over GL_PRIMITIVE_RESTART */
-    glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX);
-  }
+GPUStateManager::GPUStateManager(void)
+{
+  /* Set default state. */
+  state.write_mask = GPU_WRITE_COLOR;
+  state.blend = GPU_BLEND_NONE;
+  state.culling_test = GPU_CULL_NONE;
+  state.depth_test = GPU_DEPTH_NONE;
+  state.stencil_test = GPU_STENCIL_NONE;
+  state.stencil_op = GPU_STENCIL_OP_NONE;
+  state.provoking_vert = GPU_VERTEX_LAST;
+  state.logic_op_xor = false;
+  state.invert_facing = false;
+  state.shadow_bias = false;
+  state.polygon_smooth = false;
+  state.clip_distances = 0;
+
+  mutable_state.depth_range[0] = 0.0f;
+  mutable_state.depth_range[1] = 1.0f;
+  mutable_state.point_size = 1.0f;
+  mutable_state.line_width = 1.0f;
+  mutable_state.stencil_write_mask = 0x00;
+  mutable_state.stencil_compare_mask = 0x00;
+  mutable_state.stencil_reference = 0x00;
 }
 
 /** \} */
diff --git a/source/blender/gpu/intern/gpu_state_private.hh b/source/blender/gpu/intern/gpu_state_private.hh
new file mode 100644
index 00000000000..61234c4612c
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_state_private.hh
@@ -0,0 +1,166 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2020, Blender Foundation.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "BLI_utildefines.h"
+
+#include "GPU_state.h"
+
+#include <cstring>
+
+namespace blender {
+namespace gpu {
+
+/* Encapsulate all pipeline state that we need to track.
+ * Try to keep small to reduce validation time. */
+union GPUState {
+  struct {
+    /** eGPUWriteMask */
+    uint32_t write_mask : 13;
+    /** eGPUBlend */
+    uint32_t blend : 4;
+    /** eGPUFaceCullTest */
+    uint32_t culling_test : 2;
+    /** eGPUDepthTest */
+    uint32_t depth_test : 3;
+    /** eGPUStencilTest */
+    uint32_t stencil_test : 3;
+    /** eGPUStencilOp */
+    uint32_t stencil_op : 3;
+    /** eGPUProvokingVertex */
+    uint32_t provoking_vert : 1;
+    /** Enable bits. */
+    uint32_t logic_op_xor : 1;
+    uint32_t invert_facing : 1;
+    uint32_t shadow_bias : 1;
+    /** Number of clip distances enabled. */
+    /* TODO(fclem) This should be a shader property. */
+    uint32_t clip_distances : 3;
+    /* TODO(fclem) remove, old opengl features. */
+    uint32_t polygon_smooth : 1;
+    uint32_t line_smooth : 1;
+  };
+  /* Here to allow fast bitwise ops. */
+  uint64_t data;
+};
+
+BLI_STATIC_ASSERT(sizeof(GPUState) == sizeof(uint64_t), "GPUState is too big.");
+
+inline bool operator==(const GPUState &a, const GPUState &b)
+{
+  return a.data == b.data;
+}
+
+inline bool operator!=(const GPUState &a, const GPUState &b)
+{
+  return !(a == b);
+}
+
+inline GPUState operator^(const GPUState &a, const GPUState &b)
+{
+  GPUState r;
+  r.data = a.data ^ b.data;
+  return r;
+}
+
+inline GPUState operator~(const GPUState &a)
+{
+  GPUState r;
+  r.data = ~a.data;
+  return r;
+}
+
+/* Mutable state that does not require pipeline change. */
+union GPUStateMutable {
+  struct {
+    /* Viewport State */
+    /** TODO remove */
+    float depth_range[2];
+    /** TODO remove, use explicit clear calls. */
+    float clear_color[4];
+    float clear_depth;
+    /** Negative if using program point size. */
+    /* TODO(fclem) should be passed as uniform to all shaders. */
+    float point_size;
+    /** Not supported on every platform. Prefer using wideline shader. */
+    float line_width;
+    /** Mutable stencil states. */
+    uint8_t stencil_write_mask;
+    uint8_t stencil_compare_mask;
+    uint8_t stencil_reference;
+    uint8_t _pad0;
+    /* IMPORTANT: ensure x64 stuct alignment. */
+  };
+  /* Here to allow fast bitwise ops. */
+  uint64_t data[9];
+};
+
+BLI_STATIC_ASSERT(sizeof(GPUStateMutable) == sizeof(GPUStateMutable::data),
+                  "GPUStateMutable is too big.");
+
+inline bool operator==(const GPUStateMutable &a, const GPUStateMutable &b)
+{
+  return memcmp(&a, &b, sizeof(GPUStateMutable)) == 0;
+}
+
+inline bool operator!=(const GPUStateMutable &a, const GPUStateMutable &b)
+{
+  return !(a == b);
+}
+
+inline GPUStateMutable operator^(const GPUStateMutable &a, const GPUStateMutable &b)
+{
+  GPUStateMutable r;
+  for (int i = 0; i < ARRAY_SIZE(a.data); i++) {
+    r.data[i] = a.data[i] ^ b.data[i];
+  }
+  return r;
+}
+
+inline GPUStateMutable operator~(const GPUStateMutable &a)
+{
+  GPUStateMutable r;
+  for (int i = 0; i < ARRAY_SIZE(a.data); i++) {
+    r.data[i] = ~a.data[i];
+  }
+  return r;
+}
+
+/**
+ * State manager keeping track of the draw state and applying it before drawing.
+ * Base class which is then specialized for each implementation (GL, VK, ...).
+ **/
+class GPUStateManager {
+ public:
+  GPUState state;
+  GPUStateMutable mutable_state;
+
+ public:
+  GPUStateManager();
+  virtual ~GPUStateManager(){};
+
+  virtual void apply_state(void) = 0;
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_texture.cc b/source/blender/gpu/intern/gpu_texture.cc
index a45bd222664..1b7e1d4fd6a 100644
--- a/source/blender/gpu/intern/gpu_texture.cc
+++ b/source/blender/gpu/intern/gpu_texture.cc
@@ -44,6 +44,7 @@
 #include "GPU_texture.h"
 
 #include "gpu_context_private.hh"
+#include "gpu_framebuffer_private.hh"
 
 #define WARN_NOT_BOUND(_tex) \
   { \
@@ -109,6 +110,9 @@ struct GPUTexture {
   GPUContext *copy_fb_ctx;
 };
 
+using namespace blender;
+using namespace blender::gpu;
+
 static uint gpu_get_bytesize(eGPUTextureFormat data_type);
 static void gpu_texture_framebuffer_ensure(GPUTexture *tex);
 
@@ -615,7 +619,7 @@ static bool gpu_texture_check_capacity(
     GPUTexture *tex, GLenum proxy, GLenum internalformat, GLenum data_format, GLenum data_type)
 {
   if (proxy == GL_PROXY_TEXTURE_CUBE_MAP_ARRAY_ARB &&
-      GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_MAC, GPU_DRIVER_ANY)) {
+      GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY)) {
     /* Special fix for T79703. */
     /* Depth has already been checked. */
     return tex->w <= GPU_max_cube_map_size();
@@ -1158,9 +1162,9 @@ static GLenum convert_target_to_gl(int dimension, bool is_array)
 {
   switch (dimension) {
     case 1:
-      return is_array ? GL_TEXTURE_1D : GL_TEXTURE_1D_ARRAY;
+      return is_array ? GL_TEXTURE_1D_ARRAY : GL_TEXTURE_1D;
     case 2:
-      return is_array ? GL_TEXTURE_2D : GL_TEXTURE_2D_ARRAY;
+      return is_array ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D;
     case 3:
       return GL_TEXTURE_3D;
     default:
@@ -1611,6 +1615,9 @@ void GPU_texture_clear(GPUTexture *tex, eGPUDataFormat gpu_data_format, const vo
     /* This means that this function can only be used in one context for each texture. */
     BLI_assert(tex->copy_fb_ctx == GPU_context_active_get());
 
+    int viewport[4];
+    GPU_viewport_size_get_i(viewport);
+
     glBindFramebuffer(GL_FRAMEBUFFER, tex->copy_fb);
     glViewport(0, 0, tex->w, tex->h);
 
@@ -1675,6 +1682,8 @@ void GPU_texture_clear(GPUTexture *tex, eGPUDataFormat gpu_data_format, const vo
       glClear(GL_COLOR_BUFFER_BIT);
     }
 
+    glViewport(UNPACK4(viewport));
+
     if (prev_fb) {
       GPU_framebuffer_bind(prev_fb);
     }
@@ -2015,7 +2024,8 @@ void GPU_texture_free(GPUTexture *tex)
   if (tex->refcount == 0) {
     for (int i = 0; i < GPU_TEX_MAX_FBO_ATTACHED; i++) {
       if (tex->fb[i] != NULL) {
-        GPU_framebuffer_texture_detach_slot(tex->fb[i], tex, tex->fb_attachment[i]);
+        FrameBuffer *framebuffer = reinterpret_cast<FrameBuffer *>(tex->fb[i]);
+        framebuffer->attachment_set((GPUAttachmentType)tex->fb_attachment[i], GPU_ATTACHMENT_NONE);
       }
     }
 
@@ -2127,17 +2137,26 @@ void GPU_texture_attach_framebuffer(GPUTexture *tex, GPUFrameBuffer *fb, int att
 }
 
 /* Return previous attachment point */
-int GPU_texture_detach_framebuffer(GPUTexture *tex, GPUFrameBuffer *fb)
+void GPU_texture_detach_framebuffer(GPUTexture *tex, GPUFrameBuffer *fb)
 {
   for (int i = 0; i < GPU_TEX_MAX_FBO_ATTACHED; i++) {
     if (tex->fb[i] == fb) {
       tex->fb[i] = NULL;
-      return tex->fb_attachment[i];
+      return;
     }
   }
-
   BLI_assert(!"Error: Texture: Framebuffer is not attached");
-  return 0;
+}
+
+/* Return attachment type for the given framebuffer or -1 if not attached. */
+int GPU_texture_framebuffer_attachement_get(GPUTexture *tex, GPUFrameBuffer *fb)
+{
+  for (int i = 0; i < GPU_TEX_MAX_FBO_ATTACHED; i++) {
+    if (tex->fb[i] == fb) {
+      return tex->fb_attachment[i];
+    }
+  }
+  return -1;
 }
 
 void GPU_texture_get_mipmap_size(GPUTexture *tex, int lvl, int *size)
@@ -2174,6 +2193,11 @@ void GPU_texture_get_mipmap_size(GPUTexture *tex, int lvl, int *size)
 
 void GPU_samplers_init(void)
 {
+  float max_anisotropy = 1.0f;
+  if (GLEW_EXT_texture_filter_anisotropic) {
+    glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &max_anisotropy);
+  }
+
   glGenSamplers(GPU_SAMPLER_MAX, GG.samplers);
   for (int i = 0; i < GPU_SAMPLER_MAX; i++) {
     eGPUSamplerState state = static_cast<eGPUSamplerState>(i);
@@ -2188,7 +2212,7 @@ void GPU_samplers_init(void)
     GLenum compare_mode = (state & GPU_SAMPLER_COMPARE) ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE;
     /* TODO(fclem) Anisotropic level should be a render engine parameter. */
     float aniso_filter = ((state & GPU_SAMPLER_MIPMAP) && (state & GPU_SAMPLER_ANISO)) ?
-                             U.anisotropic_filter :
+                             max_ff(max_anisotropy, U.anisotropic_filter) :
                              1.0f;
 
     glSamplerParameteri(GG.samplers[i], GL_TEXTURE_WRAP_S, wrap_s);
diff --git a/source/blender/gpu/intern/gpu_texture_private.hh b/source/blender/gpu/intern/gpu_texture_private.hh
new file mode 100644
index 00000000000..6aa2a39046e
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_texture_private.hh
@@ -0,0 +1,53 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "BLI_assert.h"
+
+namespace blender {
+namespace gpu {
+
+class Texture {
+ public:
+  /** TODO(fclem): make it a non-static function. */
+  static GPUAttachmentType attachment_type(GPUTexture *tex, int slot)
+  {
+    switch (GPU_texture_format(tex)) {
+      case GPU_DEPTH_COMPONENT32F:
+      case GPU_DEPTH_COMPONENT24:
+      case GPU_DEPTH_COMPONENT16:
+        BLI_assert(slot == 0);
+        return GPU_FB_DEPTH_ATTACHMENT;
+      case GPU_DEPTH24_STENCIL8:
+      case GPU_DEPTH32F_STENCIL8:
+        BLI_assert(slot == 0);
+        return GPU_FB_DEPTH_STENCIL_ATTACHMENT;
+      default:
+        return static_cast<GPUAttachmentType>(GPU_FB_COLOR_ATTACHMENT0 + slot);
+    }
+  }
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_uniformbuffer.cc b/source/blender/gpu/intern/gpu_uniform_buffer.cc
index e203ffd848f..94aa6bd76ab 100644
--- a/source/blender/gpu/intern/gpu_uniformbuffer.cc
+++ b/source/blender/gpu/intern/gpu_uniform_buffer.cc
@@ -13,7 +13,7 @@
  * along with this program; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  *
- * The Original Code is Copyright (C) 2005 Blender Foundation.
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
  * All rights reserved.
  */
 
@@ -27,58 +27,46 @@
 #include "BLI_blenlib.h"
 #include "BLI_math_base.h"
 
-#include "gpu_context_private.hh"
+#include "gpu_backend.hh"
 #include "gpu_node_graph.h"
 
-#include "GPU_extensions.h"
-#include "GPU_glew.h"
 #include "GPU_material.h"
-#include "GPU_uniformbuffer.h"
-
-typedef struct GPUUniformBuffer {
-  /** Data size in bytes. */
-  int size;
-  /** GL handle for UBO. */
-  GLuint bindcode;
-  /** Current binding point. */
-  int bindpoint;
-  /** Continuous memory block to copy to GPU. Is own by the GPUUniformBuffer. */
-  void *data;
-} GPUUniformBuffer;
-
-GPUUniformBuffer *GPU_uniformbuffer_create(int size, const void *data, char err_out[256])
+
+#include "GPU_extensions.h"
+
+#include "GPU_uniform_buffer.h"
+#include "gpu_uniform_buffer_private.hh"
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
+namespace blender::gpu {
+
+UniformBuf::UniformBuf(size_t size, const char *name)
 {
   /* Make sure that UBO is padded to size of vec4 */
   BLI_assert((size % 16) == 0);
+  BLI_assert(size <= GPU_max_ubo_size());
 
-  if (size > GPU_max_ubo_size()) {
-    if (err_out) {
-      BLI_strncpy(err_out, "GPUUniformBuffer: UBO too big", 256);
-    }
-    return NULL;
-  }
-
-  GPUUniformBuffer *ubo = (GPUUniformBuffer *)MEM_mallocN(sizeof(GPUUniformBuffer), __func__);
-  ubo->size = size;
-  ubo->data = NULL;
-  ubo->bindcode = 0;
-  ubo->bindpoint = -1;
-
-  /* Direct init. */
-  if (data != NULL) {
-    GPU_uniformbuffer_update(ubo, data);
-  }
+  size_in_bytes_ = size;
 
-  return ubo;
+  BLI_strncpy(name_, name, sizeof(name_));
 }
 
-void GPU_uniformbuffer_free(GPUUniformBuffer *ubo)
+UniformBuf::~UniformBuf()
 {
-  MEM_SAFE_FREE(ubo->data);
-  GPU_buf_free(ubo->bindcode);
-  MEM_freeN(ubo);
+  MEM_SAFE_FREE(data_);
 }
 
+}  // namespace blender::gpu
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Uniform buffer from GPUInput list
+ * \{ */
+
 /**
  * We need to pad some data types (vec3) on the C side
  * To match the GPU expected memory block alignment.
@@ -111,10 +99,10 @@ static int inputs_cmp(const void *a, const void *b)
  * Make sure we respect the expected alignment of UBOs.
  * mat4, vec4, pad vec3 as vec4, then vec2, then floats.
  */
-static void gpu_uniformbuffer_inputs_sort(ListBase *inputs)
+static void buffer_from_list_inputs_sort(ListBase *inputs)
 {
-/* Only support up to this type, if you want to extend it, make sure the
- * padding logic is correct for the new types. */
+/* Only support up to this type, if you want to extend it, make sure static void
+ * inputs_sobuffer_size_compute *inputs) padding logic is correct for the new types. */
 #define MAX_UBO_GPU_TYPE GPU_MAT4
 
   /* Order them as mat4, vec4, vec3, vec2, float. */
@@ -173,23 +161,9 @@ static void gpu_uniformbuffer_inputs_sort(ListBase *inputs)
 #undef MAX_UBO_GPU_TYPE
 }
 
-/**
- * Create dynamic UBO from parameters
- * Return NULL if failed to create or if \param inputs: is empty.
- *
- * \param inputs: ListBase of #BLI_genericNodeN(#GPUInput).
- */
-GPUUniformBuffer *GPU_uniformbuffer_dynamic_create(ListBase *inputs, char err_out[256])
+static inline size_t buffer_size_from_list(ListBase *inputs)
 {
-  /* There is no point on creating an UBO if there is no arguments. */
-  if (BLI_listbase_is_empty(inputs)) {
-    return NULL;
-  }
-  /* Make sure we comply to the ubo alignment requirements. */
-  gpu_uniformbuffer_inputs_sort(inputs);
-
   size_t buffer_size = 0;
-
   LISTBASE_FOREACH (LinkData *, link, inputs) {
     const eGPUType gputype = get_padded_gpu_type(link);
     buffer_size += gputype * sizeof(float);
@@ -197,8 +171,12 @@ GPUUniformBuffer *GPU_uniformbuffer_dynamic_create(ListBase *inputs, char err_ou
   /* Round up to size of vec4. (Opengl Requirement) */
   size_t alignment = sizeof(float[4]);
   buffer_size = divide_ceil_u(buffer_size, alignment) * alignment;
-  void *data = MEM_mallocN(buffer_size, __func__);
 
+  return buffer_size;
+}
+
+static inline void buffer_fill_from_list(void *data, ListBase *inputs)
+{
   /* Now that we know the total ubo size we can start populating it. */
   float *offset = (float *)data;
   LISTBASE_FOREACH (LinkData *, link, inputs) {
@@ -206,71 +184,73 @@ GPUUniformBuffer *GPU_uniformbuffer_dynamic_create(ListBase *inputs, char err_ou
     memcpy(offset, input->vec, input->type * sizeof(float));
     offset += get_padded_gpu_type(link);
   }
-
-  /* Pass data as NULL for late init. */
-  GPUUniformBuffer *ubo = GPU_uniformbuffer_create(buffer_size, NULL, err_out);
-  /* Data will be update just before binding. */
-  ubo->data = data;
-  return ubo;
 }
 
-static void gpu_uniformbuffer_init(GPUUniformBuffer *ubo)
-{
-  BLI_assert(ubo->bindcode == 0);
-  ubo->bindcode = GPU_buf_alloc();
+/** \} */
 
-  if (ubo->bindcode == 0) {
-    fprintf(stderr, "GPUUniformBuffer: UBO create failed");
-    BLI_assert(0);
-    return;
-  }
+/* -------------------------------------------------------------------- */
+/** \name C-API
+ * \{ */
 
-  glBindBuffer(GL_UNIFORM_BUFFER, ubo->bindcode);
-  glBufferData(GL_UNIFORM_BUFFER, ubo->size, NULL, GL_DYNAMIC_DRAW);
-}
+using namespace blender::gpu;
 
-void GPU_uniformbuffer_update(GPUUniformBuffer *ubo, const void *data)
+GPUUniformBuf *GPU_uniformbuf_create_ex(size_t size, const void *data, const char *name)
 {
-  if (ubo->bindcode == 0) {
-    gpu_uniformbuffer_init(ubo);
+  UniformBuf *ubo = GPUBackend::get()->uniformbuf_alloc(size, name);
+  /* Direct init. */
+  if (data != NULL) {
+    ubo->update(data);
   }
-
-  glBindBuffer(GL_UNIFORM_BUFFER, ubo->bindcode);
-  glBufferSubData(GL_UNIFORM_BUFFER, 0, ubo->size, data);
-  glBindBuffer(GL_UNIFORM_BUFFER, 0);
+  return reinterpret_cast<GPUUniformBuf *>(ubo);
 }
 
-void GPU_uniformbuffer_bind(GPUUniformBuffer *ubo, int number)
+/**
+ * Create UBO from inputs list.
+ * Return NULL if failed to create or if \param inputs: is empty.
+ *
+ * \param inputs: ListBase of #BLI_genericNodeN(#GPUInput).
+ */
+GPUUniformBuf *GPU_uniformbuf_create_from_list(ListBase *inputs, const char *name)
 {
-  if (number >= GPU_max_ubo_binds()) {
-    fprintf(stderr, "Not enough UBO slots.\n");
-    return;
+  /* There is no point on creating an UBO if there is no arguments. */
+  if (BLI_listbase_is_empty(inputs)) {
+    return NULL;
   }
 
-  if (ubo->bindcode == 0) {
-    gpu_uniformbuffer_init(ubo);
-  }
+  buffer_from_list_inputs_sort(inputs);
+  size_t buffer_size = buffer_size_from_list(inputs);
+  void *data = MEM_mallocN(buffer_size, __func__);
+  buffer_fill_from_list(data, inputs);
 
-  if (ubo->data != NULL) {
-    GPU_uniformbuffer_update(ubo, ubo->data);
-    MEM_SAFE_FREE(ubo->data);
-  }
+  UniformBuf *ubo = GPUBackend::get()->uniformbuf_alloc(buffer_size, name);
+  /* Defer data upload. */
+  ubo->attach_data(data);
+  return reinterpret_cast<GPUUniformBuf *>(ubo);
+}
 
-  glBindBufferBase(GL_UNIFORM_BUFFER, number, ubo->bindcode);
-  ubo->bindpoint = number;
+void GPU_uniformbuf_free(GPUUniformBuf *ubo)
+{
+  delete reinterpret_cast<UniformBuf *>(ubo);
 }
 
-void GPU_uniformbuffer_unbind(GPUUniformBuffer *ubo)
+void GPU_uniformbuf_update(GPUUniformBuf *ubo, const void *data)
 {
-#ifndef NDEBUG
-  glBindBufferBase(GL_UNIFORM_BUFFER, ubo->bindpoint, 0);
-#endif
-  ubo->bindpoint = 0;
+  reinterpret_cast<UniformBuf *>(ubo)->update(data);
 }
 
-void GPU_uniformbuffer_unbind_all(void)
+void GPU_uniformbuf_bind(GPUUniformBuf *ubo, int slot)
 {
-  for (int i = 0; i < GPU_max_ubo_binds(); i++) {
-    glBindBufferBase(GL_UNIFORM_BUFFER, i, 0);
-  }
+  reinterpret_cast<UniformBuf *>(ubo)->bind(slot);
 }
+
+void GPU_uniformbuf_unbind(GPUUniformBuf *ubo)
+{
+  reinterpret_cast<UniformBuf *>(ubo)->unbind();
+}
+
+void GPU_uniformbuf_unbind_all(void)
+{
+  /* FIXME */
+}
+
+/** \} */
diff --git a/source/blender/gpu/intern/gpu_uniform_buffer_private.hh b/source/blender/gpu/intern/gpu_uniform_buffer_private.hh
new file mode 100644
index 00000000000..cf6447ccd37
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_uniform_buffer_private.hh
@@ -0,0 +1,69 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2020, Blender Foundation.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "BLI_sys_types.h"
+
+namespace blender {
+namespace gpu {
+
+#ifdef DEBUG
+#  define DEBUG_NAME_LEN 64
+#else
+#  define DEBUG_NAME_LEN 8
+#endif
+
+/**
+ * Implementation of Uniform Buffers.
+ * Base class which is then specialized for each implementation (GL, VK, ...).
+ **/
+class UniformBuf {
+ protected:
+  /** Data size in bytes. */
+  size_t size_in_bytes_;
+  /** Continuous memory block to copy to GPU. This data is owned by the UniformBuf. */
+  void *data_ = NULL;
+  /** Debugging name */
+  char name_[DEBUG_NAME_LEN];
+
+ public:
+  UniformBuf(size_t size, const char *name);
+  virtual ~UniformBuf();
+
+  virtual void update(const void *data) = 0;
+  virtual void bind(int slot) = 0;
+  virtual void unbind(void) = 0;
+
+  /** Used to defer data upload at drawing time.
+   * This is useful if the thread has no context bound.
+   * This transfers ownership to this UniformBuf. */
+  void attach_data(void *data)
+  {
+    data_ = data;
+  }
+};
+
+#undef DEBUG_NAME_LEN
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.cc b/source/blender/gpu/intern/gpu_vertex_buffer.cc
index 67ad8835b6a..debf9835c90 100644
--- a/source/blender/gpu/intern/gpu_vertex_buffer.cc
+++ b/source/blender/gpu/intern/gpu_vertex_buffer.cc
@@ -77,6 +77,7 @@ void GPU_vertbuf_init(GPUVertBuf *verts, GPUUsageType usage)
   memset(verts, 0, sizeof(GPUVertBuf));
   verts->usage = usage;
   verts->dirty = true;
+  verts->handle_refcount = 1;
 }
 
 void GPU_vertbuf_init_with_format_ex(GPUVertBuf *verts,
@@ -137,7 +138,23 @@ void GPU_vertbuf_clear(GPUVertBuf *verts)
 void GPU_vertbuf_discard(GPUVertBuf *verts)
 {
   GPU_vertbuf_clear(verts);
-  MEM_freeN(verts);
+  GPU_vertbuf_handle_ref_remove(verts);
+}
+
+void GPU_vertbuf_handle_ref_add(GPUVertBuf *verts)
+{
+  verts->handle_refcount++;
+}
+
+void GPU_vertbuf_handle_ref_remove(GPUVertBuf *verts)
+{
+  BLI_assert(verts->handle_refcount > 0);
+  verts->handle_refcount--;
+  if (verts->handle_refcount == 0) {
+    /* Should already have been cleared. */
+    BLI_assert(verts->vbo_id == 0 && verts->data == NULL);
+    MEM_freeN(verts);
+  }
 }
 
 uint GPU_vertbuf_size_get(const GPUVertBuf *verts)
diff --git a/source/blender/gpu/intern/gpu_vertex_format.cc b/source/blender/gpu/intern/gpu_vertex_format.cc
index 2e8017660d0..ed317b3a0df 100644
--- a/source/blender/gpu/intern/gpu_vertex_format.cc
+++ b/source/blender/gpu/intern/gpu_vertex_format.cc
@@ -24,7 +24,9 @@
  */
 
 #include "GPU_vertex_format.h"
+#include "gpu_shader_private.hh"
 #include "gpu_vertex_format_private.h"
+
 #include <stddef.h>
 #include <string.h>
 
@@ -32,15 +34,14 @@
 #include "BLI_string.h"
 #include "BLI_utildefines.h"
 
-#include "GPU_shader.h"
-#include "gpu_shader_private.h"
-
 #define PACK_DEBUG 0
 
 #if PACK_DEBUG
 #  include <stdio.h>
 #endif
 
+using namespace blender::gpu;
+
 void GPU_vertformat_clear(GPUVertFormat *format)
 {
 #if TRUST_NO_ONE
@@ -404,112 +405,8 @@ void VertexFormat_pack(GPUVertFormat *format)
   format->packed = true;
 }
 
-static uint calc_component_size(const GLenum gl_type)
+void GPU_vertformat_from_shader(GPUVertFormat *format, const struct GPUShader *gpushader)
 {
-  switch (gl_type) {
-    case GL_FLOAT_VEC2:
-    case GL_INT_VEC2:
-    case GL_UNSIGNED_INT_VEC2:
-      return 2;
-    case GL_FLOAT_VEC3:
-    case GL_INT_VEC3:
-    case GL_UNSIGNED_INT_VEC3:
-      return 3;
-    case GL_FLOAT_VEC4:
-    case GL_FLOAT_MAT2:
-    case GL_INT_VEC4:
-    case GL_UNSIGNED_INT_VEC4:
-      return 4;
-    case GL_FLOAT_MAT3:
-      return 9;
-    case GL_FLOAT_MAT4:
-      return 16;
-    case GL_FLOAT_MAT2x3:
-    case GL_FLOAT_MAT3x2:
-      return 6;
-    case GL_FLOAT_MAT2x4:
-    case GL_FLOAT_MAT4x2:
-      return 8;
-    case GL_FLOAT_MAT3x4:
-    case GL_FLOAT_MAT4x3:
-      return 12;
-    default:
-      return 1;
-  }
-}
-
-static void get_fetch_mode_and_comp_type(int gl_type,
-                                         GPUVertCompType *r_comp_type,
-                                         GPUVertFetchMode *r_fetch_mode)
-{
-  switch (gl_type) {
-    case GL_FLOAT:
-    case GL_FLOAT_VEC2:
-    case GL_FLOAT_VEC3:
-    case GL_FLOAT_VEC4:
-    case GL_FLOAT_MAT2:
-    case GL_FLOAT_MAT3:
-    case GL_FLOAT_MAT4:
-    case GL_FLOAT_MAT2x3:
-    case GL_FLOAT_MAT2x4:
-    case GL_FLOAT_MAT3x2:
-    case GL_FLOAT_MAT3x4:
-    case GL_FLOAT_MAT4x2:
-    case GL_FLOAT_MAT4x3:
-      *r_comp_type = GPU_COMP_F32;
-      *r_fetch_mode = GPU_FETCH_FLOAT;
-      break;
-    case GL_INT:
-    case GL_INT_VEC2:
-    case GL_INT_VEC3:
-    case GL_INT_VEC4:
-      *r_comp_type = GPU_COMP_I32;
-      *r_fetch_mode = GPU_FETCH_INT;
-      break;
-    case GL_UNSIGNED_INT:
-    case GL_UNSIGNED_INT_VEC2:
-    case GL_UNSIGNED_INT_VEC3:
-    case GL_UNSIGNED_INT_VEC4:
-      *r_comp_type = GPU_COMP_U32;
-      *r_fetch_mode = GPU_FETCH_INT;
-      break;
-    default:
-      BLI_assert(0);
-  }
-}
-
-void GPU_vertformat_from_shader(GPUVertFormat *format, const GPUShader *shader)
-{
-  GPU_vertformat_clear(format);
-  GPUVertAttr *attr = &format->attrs[0];
-
-  GLint attr_len;
-  glGetProgramiv(shader->program, GL_ACTIVE_ATTRIBUTES, &attr_len);
-
-  for (int i = 0; i < attr_len; i++) {
-    char name[256];
-    GLenum gl_type;
-    GLint size;
-    glGetActiveAttrib(shader->program, i, sizeof(name), NULL, &size, &gl_type, name);
-
-    /* Ignore OpenGL names like `gl_BaseInstanceARB`, `gl_InstanceID` and `gl_VertexID`. */
-    if (glGetAttribLocation(shader->program, name) == -1) {
-      continue;
-    }
-
-    format->name_len++; /* multiname support */
-    format->attr_len++;
-
-    GPUVertCompType comp_type;
-    GPUVertFetchMode fetch_mode;
-    get_fetch_mode_and_comp_type(gl_type, &comp_type, &fetch_mode);
-
-    attr->names[attr->name_len++] = copy_attr_name(format, name);
-    attr->offset = 0; /* offsets & stride are calculated later (during pack) */
-    attr->comp_len = calc_component_size(gl_type) * size;
-    attr->sz = attr->comp_len * 4;
-    attr->fetch_mode = fetch_mode;
-    attr->comp_type = comp_type;
-    attr += 1;
-  }
+  const Shader *shader = reinterpret_cast<const Shader *>(gpushader);
+  shader->vertformat_from_shader(format);
 }
diff --git a/source/blender/gpu/intern/gpu_viewport.c b/source/blender/gpu/intern/gpu_viewport.c
index ba938349761..fd1265dc2a6 100644
--- a/source/blender/gpu/intern/gpu_viewport.c
+++ b/source/blender/gpu/intern/gpu_viewport.c
@@ -41,7 +41,7 @@
 #include "GPU_immediate.h"
 #include "GPU_matrix.h"
 #include "GPU_texture.h"
-#include "GPU_uniformbuffer.h"
+#include "GPU_uniform_buffer.h"
 #include "GPU_viewport.h"
 
 #include "DRW_engine.h"
@@ -1020,8 +1020,8 @@ void GPU_viewport_free(GPUViewport *viewport)
   }
 
   for (int i = 0; i < viewport->vmempool.ubo_len; i++) {
-    GPU_uniformbuffer_free(viewport->vmempool.matrices_ubo[i]);
-    GPU_uniformbuffer_free(viewport->vmempool.obinfos_ubo[i]);
+    GPU_uniformbuf_free(viewport->vmempool.matrices_ubo[i]);
+    GPU_uniformbuf_free(viewport->vmempool.obinfos_ubo[i]);
   }
   MEM_SAFE_FREE(viewport->vmempool.matrices_ubo);
   MEM_SAFE_FREE(viewport->vmempool.obinfos_ubo);
diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh
index f7c01b2f184..332350e47b5 100644
--- a/source/blender/gpu/opengl/gl_backend.hh
+++ b/source/blender/gpu/opengl/gl_backend.hh
@@ -27,7 +27,12 @@
 
 #include "BLI_vector.hh"
 
+#include "gl_batch.hh"
 #include "gl_context.hh"
+#include "gl_drawlist.hh"
+#include "gl_framebuffer.hh"
+#include "gl_shader.hh"
+#include "gl_uniform_buffer.hh"
 
 namespace blender {
 namespace gpu {
@@ -37,11 +42,41 @@ class GLBackend : public GPUBackend {
   GLSharedOrphanLists shared_orphan_list_;
 
  public:
+  static GLBackend *get(void)
+  {
+    return static_cast<GLBackend *>(GPUBackend::get());
+  }
+
   GPUContext *context_alloc(void *ghost_window)
   {
     return new GLContext(ghost_window, shared_orphan_list_);
   };
 
+  Batch *batch_alloc(void)
+  {
+    return new GLBatch();
+  };
+
+  DrawList *drawlist_alloc(int list_length)
+  {
+    return new GLDrawList(list_length);
+  };
+
+  FrameBuffer *framebuffer_alloc(const char *name)
+  {
+    return new GLFrameBuffer(name);
+  };
+
+  Shader *shader_alloc(const char *name)
+  {
+    return new GLShader(name);
+  };
+
+  UniformBuf *uniformbuf_alloc(int size, const char *name)
+  {
+    return new GLUniformBuf(size, name);
+  };
+
   /* TODO remove */
   void buf_free(GLuint buf_id);
   void tex_free(GLuint tex_id);
diff --git a/source/blender/gpu/opengl/gl_batch.cc b/source/blender/gpu/opengl/gl_batch.cc
new file mode 100644
index 00000000000..bb7d5654efd
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_batch.cc
@@ -0,0 +1,381 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GL implementation of GPUBatch.
+ * The only specificity of GL here is that it caches a list of
+ * Vertex Array Objects based on the bound shader interface.
+ */
+
+#include "BLI_assert.h"
+
+#include "glew-mx.h"
+
+#include "GPU_extensions.h"
+
+#include "gpu_batch_private.hh"
+#include "gpu_shader_private.hh"
+
+#include "gl_batch.hh"
+#include "gl_context.hh"
+#include "gl_debug.hh"
+#include "gl_primitive.hh"
+#include "gl_vertex_array.hh"
+
+using namespace blender::gpu;
+
+/* -------------------------------------------------------------------- */
+/** \name Vao cache
+ *
+ * Each GLBatch has a small cache of VAO objects that are used to avoid VAO reconfiguration.
+ * TODO(fclem) Could be revisited to avoid so much cross references.
+ * \{ */
+
+GLVaoCache::GLVaoCache(void)
+{
+  init();
+}
+
+GLVaoCache::~GLVaoCache()
+{
+  this->clear();
+}
+
+void GLVaoCache::init(void)
+{
+  context_ = NULL;
+  interface_ = NULL;
+  is_dynamic_vao_count = false;
+  for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+    static_vaos.interfaces[i] = NULL;
+    static_vaos.vao_ids[i] = 0;
+  }
+  vao_base_instance_ = 0;
+  base_instance_ = 0;
+  vao_id_ = 0;
+}
+
+/* Create a new VAO object and store it in the cache. */
+void GLVaoCache::insert(const GLShaderInterface *interface, GLuint vao)
+{
+  /* Now insert the cache. */
+  if (!is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+      if (static_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i < GPU_VAO_STATIC_LEN) {
+      static_vaos.interfaces[i] = interface;
+      static_vaos.vao_ids[i] = vao;
+    }
+    else {
+      /* Erase previous entries, they will be added back if drawn again. */
+      for (int i = 0; i < GPU_VAO_STATIC_LEN; i++) {
+        if (static_vaos.interfaces[i] != NULL) {
+          const_cast<GLShaderInterface *>(static_vaos.interfaces[i])->ref_remove(this);
+          context_->vao_free(static_vaos.vao_ids[i]);
+        }
+      }
+      /* Not enough place switch to dynamic. */
+      is_dynamic_vao_count = true;
+      /* Init dynamic arrays and let the branch below set the values. */
+      dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GLShaderInterface **)MEM_callocN(
+          dynamic_vaos.count * sizeof(GLShaderInterface *), "dyn vaos interfaces");
+      dynamic_vaos.vao_ids = (GLuint *)MEM_callocN(dynamic_vaos.count * sizeof(GLuint),
+                                                   "dyn vaos ids");
+    }
+  }
+
+  if (is_dynamic_vao_count) {
+    int i; /* find first unused slot */
+    for (i = 0; i < dynamic_vaos.count; i++) {
+      if (dynamic_vaos.vao_ids[i] == 0) {
+        break;
+      }
+    }
+
+    if (i == dynamic_vaos.count) {
+      /* Not enough place, realloc the array. */
+      i = dynamic_vaos.count;
+      dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+      dynamic_vaos.interfaces = (const GLShaderInterface **)MEM_recallocN(
+          (void *)dynamic_vaos.interfaces, sizeof(GLShaderInterface *) * dynamic_vaos.count);
+      dynamic_vaos.vao_ids = (GLuint *)MEM_recallocN(dynamic_vaos.vao_ids,
+                                                     sizeof(GLuint) * dynamic_vaos.count);
+    }
+    dynamic_vaos.interfaces[i] = interface;
+    dynamic_vaos.vao_ids[i] = vao;
+  }
+
+  const_cast<GLShaderInterface *>(interface)->ref_add(this);
+}
+
+void GLVaoCache::remove(const GLShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GLShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                  static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      context_->vao_free(vaos[i]);
+      vaos[i] = 0;
+      interfaces[i] = NULL;
+      break; /* cannot have duplicates */
+    }
+  }
+}
+
+void GLVaoCache::clear(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  GLuint *vaos = (is_dynamic_vao_count) ? dynamic_vaos.vao_ids : static_vaos.vao_ids;
+  const GLShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                  static_vaos.interfaces;
+  /* Early out, nothing to free. */
+  if (context_ == NULL) {
+    return;
+  }
+
+  if (context_ == ctx) {
+    glDeleteVertexArrays(count, vaos);
+    glDeleteVertexArrays(1, &vao_base_instance_);
+  }
+  else {
+    /* TODO(fclem) Slow way. Could avoid multiple mutex lock here */
+    for (int i = 0; i < count; i++) {
+      context_->vao_free(vaos[i]);
+    }
+    context_->vao_free(vao_base_instance_);
+  }
+
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] != NULL) {
+      const_cast<GLShaderInterface *>(interfaces[i])->ref_remove(this);
+    }
+  }
+
+  if (is_dynamic_vao_count) {
+    MEM_freeN((void *)dynamic_vaos.interfaces);
+    MEM_freeN(dynamic_vaos.vao_ids);
+  }
+
+  if (context_) {
+    context_->vao_cache_unregister(this);
+  }
+  /* Reinit. */
+  this->init();
+}
+
+/* Return 0 on cache miss (invalid VAO) */
+GLuint GLVaoCache::lookup(const GLShaderInterface *interface)
+{
+  const int count = (is_dynamic_vao_count) ? dynamic_vaos.count : GPU_VAO_STATIC_LEN;
+  const GLShaderInterface **interfaces = (is_dynamic_vao_count) ? dynamic_vaos.interfaces :
+                                                                  static_vaos.interfaces;
+  for (int i = 0; i < count; i++) {
+    if (interfaces[i] == interface) {
+      return (is_dynamic_vao_count) ? dynamic_vaos.vao_ids[i] : static_vaos.vao_ids[i];
+    }
+  }
+  return 0;
+}
+
+/* The GLVaoCache object is only valid for one GLContext.
+ * Reset the cache if trying to draw in another context; */
+void GLVaoCache::context_check(void)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+
+  if (context_ != ctx) {
+    if (context_ != NULL) {
+      /* IMPORTANT: Trying to draw a batch in multiple different context will trash the VAO cache.
+       * This has major performance impact and should be avoided in most cases. */
+      context_->vao_cache_unregister(this);
+    }
+    this->clear();
+    context_ = ctx;
+    context_->vao_cache_register(this);
+  }
+}
+
+GLuint GLVaoCache::base_instance_vao_get(GPUBatch *batch, int i_first)
+{
+  this->context_check();
+  /* Make sure the interface is up to date. */
+  Shader *shader = GPU_context_active_get()->shader;
+  GLShaderInterface *interface = static_cast<GLShaderInterface *>(shader->interface);
+  if (interface_ != interface) {
+    vao_get(batch);
+    /* Trigger update. */
+    base_instance_ = 0;
+  }
+  /**
+   * There seems to be a nasty bug when drawing using the same VAO reconfiguring (T71147).
+   * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
+   **/
+#ifdef __APPLE__
+  glDeleteVertexArrays(1, &vao_base_instance_);
+  vao_base_instance_ = 0;
+  base_instance_ = 0;
+#endif
+
+  if (vao_base_instance_ == 0) {
+    glGenVertexArrays(1, &vao_base_instance_);
+  }
+
+  if (base_instance_ != i_first) {
+    base_instance_ = i_first;
+    GLVertArray::update_bindings(vao_base_instance_, batch, interface_, i_first);
+  }
+  return vao_base_instance_;
+}
+
+GLuint GLVaoCache::vao_get(GPUBatch *batch)
+{
+  this->context_check();
+
+  Shader *shader = GPU_context_active_get()->shader;
+  GLShaderInterface *interface = static_cast<GLShaderInterface *>(shader->interface);
+  if (interface_ != interface) {
+    interface_ = interface;
+    vao_id_ = this->lookup(interface_);
+
+    if (vao_id_ == 0) {
+      /* Cache miss, create a new VAO. */
+      glGenVertexArrays(1, &vao_id_);
+      this->insert(interface_, vao_id_);
+      GLVertArray::update_bindings(vao_id_, batch, interface_, 0);
+    }
+  }
+
+  return vao_id_;
+}
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
+GLBatch::GLBatch(void)
+{
+}
+
+GLBatch::~GLBatch()
+{
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Drawing
+ * \{ */
+
+#if GPU_TRACK_INDEX_RANGE
+#  define BASE_INDEX(el) ((el)->base_index)
+#  define INDEX_TYPE(el) ((el)->gl_index_type)
+#else
+#  define BASE_INDEX(el) 0
+#  define INDEX_TYPE(el) GL_UNSIGNED_INT
+#endif
+
+void GLBatch::bind(int i_first)
+{
+  GPU_context_active_get()->state_manager->apply_state();
+
+  if (flag & GPU_BATCH_DIRTY) {
+    flag &= ~GPU_BATCH_DIRTY;
+    vao_cache_.clear();
+  }
+
+#if GPU_TRACK_INDEX_RANGE
+  /* Can be removed if GL 4.3 is required. */
+  if (!GLEW_ARB_ES3_compatibility && (elem != NULL)) {
+    glPrimitiveRestartIndex((elem->index_type == GPU_INDEX_U16) ? 0xFFFFu : 0xFFFFFFFFu);
+  }
+#endif
+
+  /* Can be removed if GL 4.2 is required. */
+  if (!GPU_arb_base_instance_is_supported() && (i_first > 0)) {
+    glBindVertexArray(vao_cache_.base_instance_vao_get(this, i_first));
+  }
+  else {
+    glBindVertexArray(vao_cache_.vao_get(this));
+  }
+}
+
+void GLBatch::draw(int v_first, int v_count, int i_first, int i_count)
+{
+  GL_CHECK_RESOURCES("Batch");
+  GL_CHECK_ERROR("Batch Pre drawing");
+
+  this->bind(i_first);
+
+  BLI_assert(v_count > 0 && i_count > 0);
+
+  GLenum gl_type = to_gl(prim_type);
+
+  if (elem) {
+    const GPUIndexBuf *el = elem;
+    GLenum index_type = INDEX_TYPE(el);
+    GLint base_index = BASE_INDEX(el);
+    void *v_first_ofs = (GLuint *)0 + v_first + el->index_start;
+
+#if GPU_TRACK_INDEX_RANGE
+    if (el->index_type == GPU_INDEX_U16) {
+      v_first_ofs = (GLushort *)0 + v_first + el->index_start;
+    }
+#endif
+
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawElementsInstancedBaseVertexBaseInstance(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
+    }
+    else {
+      glDrawElementsInstancedBaseVertex(
+          gl_type, v_count, index_type, v_first_ofs, i_count, base_index);
+    }
+    GL_CHECK_ERROR("Batch Post-drawing Indexed");
+  }
+  else {
+#ifdef __APPLE__
+    glDisable(GL_PRIMITIVE_RESTART);
+#endif
+    if (GPU_arb_base_instance_is_supported()) {
+      glDrawArraysInstancedBaseInstance(gl_type, v_first, v_count, i_count, i_first);
+    }
+    else {
+      glDrawArraysInstanced(gl_type, v_first, v_count, i_count);
+    }
+#ifdef __APPLE__
+    glEnable(GL_PRIMITIVE_RESTART);
+#endif
+    GL_CHECK_ERROR("Batch Post-drawing Non-indexed");
+  }
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_batch.hh b/source/blender/gpu/opengl/gl_batch.hh
new file mode 100644
index 00000000000..9399148c68d
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_batch.hh
@@ -0,0 +1,106 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2020, Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "gpu_batch_private.hh"
+
+#include "glew-mx.h"
+
+namespace blender {
+namespace gpu {
+
+class GLContext;
+class GLShaderInterface;
+
+#define GPU_VAO_STATIC_LEN 3
+
+/* Vao management: remembers all geometry state (vertex attribute bindings & element buffer)
+ * for each shader interface. Start with a static number of vaos and fallback to dynamic count
+ * if necessary. Once a batch goes dynamic it does not go back. */
+class GLVaoCache {
+ private:
+  /** Context for which the vao_cache_ was generated. */
+  GLContext *context_ = NULL;
+  /** Last interface this batch was drawn with. */
+  GLShaderInterface *interface_ = NULL;
+  /** Cached vao for the last interface. */
+  GLuint vao_id_ = 0;
+  /** Used whend arb_base_instance is not supported. */
+  GLuint vao_base_instance_ = 0;
+  int base_instance_ = 0;
+
+  bool is_dynamic_vao_count = false;
+  union {
+    /** Static handle count */
+    struct {
+      const GLShaderInterface *interfaces[GPU_VAO_STATIC_LEN];
+      GLuint vao_ids[GPU_VAO_STATIC_LEN];
+    } static_vaos;
+    /** Dynamic handle count */
+    struct {
+      uint count;
+      const GLShaderInterface **interfaces;
+      GLuint *vao_ids;
+    } dynamic_vaos;
+  };
+
+ public:
+  GLVaoCache();
+  ~GLVaoCache();
+
+  GLuint vao_get(GPUBatch *batch);
+  GLuint base_instance_vao_get(GPUBatch *batch, int i_first);
+
+  GLuint lookup(const GLShaderInterface *interface);
+  void insert(const GLShaderInterface *interface, GLuint vao_id);
+  void remove(const GLShaderInterface *interface);
+  void clear(void);
+
+ private:
+  void init(void);
+  void context_check(void);
+};
+
+class GLBatch : public Batch {
+ public:
+  /** All vaos corresponding to all the GPUShaderInterface this batch was drawn with. */
+  GLVaoCache vao_cache_;
+
+ public:
+  GLBatch();
+  ~GLBatch();
+
+  void draw(int v_first, int v_count, int i_first, int i_count) override;
+  void bind(int i_first);
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("GLBatch");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/opengl/gl_context.cc b/source/blender/gpu/opengl/gl_context.cc
index 37c84abaa7f..1495e665aa8 100644
--- a/source/blender/gpu/opengl/gl_context.cc
+++ b/source/blender/gpu/opengl/gl_context.cc
@@ -22,14 +22,22 @@
  */
 
 #include "BLI_assert.h"
+#include "BLI_system.h"
 #include "BLI_utildefines.h"
 
+#include "BKE_global.h"
+
 #include "GPU_framebuffer.h"
 
 #include "GHOST_C-api.h"
 
 #include "gpu_context_private.hh"
 
+#include "gl_debug.hh"
+#include "gl_immediate.hh"
+#include "gl_state.hh"
+#include "gl_uniform_buffer.hh"
+
 #include "gl_backend.hh" /* TODO remove */
 #include "gl_context.hh"
 
@@ -43,17 +51,50 @@ using namespace blender::gpu;
 GLContext::GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list)
     : shared_orphan_list_(shared_orphan_list)
 {
-  default_framebuffer_ = ghost_window ?
-                             GHOST_GetDefaultOpenGLFramebuffer((GHOST_WindowHandle)ghost_window) :
-                             0;
-
-  glGenVertexArrays(1, &default_vao_);
+  if (G.debug & G_DEBUG_GPU) {
+    debug::init_gl_callbacks();
+  }
 
   float data[4] = {0.0f, 0.0f, 0.0f, 1.0f};
   glGenBuffers(1, &default_attr_vbo_);
   glBindBuffer(GL_ARRAY_BUFFER, default_attr_vbo_);
   glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
   glBindBuffer(GL_ARRAY_BUFFER, 0);
+
+  state_manager = new GLStateManager();
+  imm = new GLImmediate();
+  ghost_window_ = ghost_window;
+
+  if (ghost_window) {
+    GLuint default_fbo = GHOST_GetDefaultOpenGLFramebuffer((GHOST_WindowHandle)ghost_window);
+    GHOST_RectangleHandle bounds = GHOST_GetClientBounds((GHOST_WindowHandle)ghost_window);
+    int w = GHOST_GetWidthRectangle(bounds);
+    int h = GHOST_GetHeightRectangle(bounds);
+    GHOST_DisposeRectangle(bounds);
+
+    if (default_fbo != 0) {
+      front_left = new GLFrameBuffer("front_left", this, GL_COLOR_ATTACHMENT0, default_fbo, w, h);
+      back_left = new GLFrameBuffer("back_left", this, GL_COLOR_ATTACHMENT0, default_fbo, w, h);
+    }
+    else {
+      front_left = new GLFrameBuffer("front_left", this, GL_FRONT_LEFT, 0, w, h);
+      back_left = new GLFrameBuffer("back_left", this, GL_BACK_LEFT, 0, w, h);
+    }
+    /* TODO(fclem) enable is supported. */
+    const bool supports_stereo_quad_buffer = false;
+    if (supports_stereo_quad_buffer) {
+      front_right = new GLFrameBuffer("front_right", this, GL_FRONT_RIGHT, 0, w, h);
+      back_right = new GLFrameBuffer("back_right", this, GL_BACK_RIGHT, 0, w, h);
+    }
+  }
+  else {
+    /* For offscreen contexts. Default framebuffer is NULL. */
+    back_left = new GLFrameBuffer("back_left", this, GL_NONE, 0, 0, 0);
+  }
+
+  active_fb = back_left;
+  static_cast<GLStateManager *>(state_manager)->active_fb = static_cast<GLFrameBuffer *>(
+      back_left);
 }
 
 GLContext::~GLContext()
@@ -63,10 +104,9 @@ GLContext::~GLContext()
   /* For now don't allow GPUFrameBuffers to be reuse in another context. */
   BLI_assert(framebuffers_.is_empty());
   /* Delete vaos so the batch can be reused in another context. */
-  for (GPUBatch *batch : batches_) {
-    GPU_batch_vao_cache_clear(batch);
+  for (GLVaoCache *cache : vao_caches_) {
+    cache->clear();
   }
-  glDeleteVertexArrays(1, &default_vao_);
   glDeleteBuffers(1, &default_attr_vbo_);
 }
 
@@ -86,6 +126,31 @@ void GLContext::activate(void)
 
   /* Clear accumulated orphans. */
   orphans_clear();
+
+  if (ghost_window_) {
+    /* Get the correct framebuffer size for the internal framebuffers. */
+    GHOST_RectangleHandle bounds = GHOST_GetClientBounds((GHOST_WindowHandle)ghost_window_);
+    int w = GHOST_GetWidthRectangle(bounds);
+    int h = GHOST_GetHeightRectangle(bounds);
+    GHOST_DisposeRectangle(bounds);
+
+    if (front_left) {
+      front_left->size_set(w, h);
+    }
+    if (back_left) {
+      back_left->size_set(w, h);
+    }
+    if (front_right) {
+      front_right->size_set(w, h);
+    }
+    if (back_right) {
+      back_right->size_set(w, h);
+    }
+  }
+
+  /* Not really following the state but we should consider
+   * no ubo bound when activating a context. */
+  bound_ubo_slots = 0;
 }
 
 void GLContext::deactivate(void)
@@ -193,47 +258,22 @@ void GLBackend::tex_free(GLuint tex_id)
 /** \name Linked object deletion
  *
  * These objects contain data that are stored per context. We
- * need to do some cleanup if they are used accross context or if context
+ * need to do some cleanup if they are used across context or if context
  * is discarded.
  * \{ */
 
-void GLContext::batch_register(struct GPUBatch *batch)
-{
-  lists_mutex_.lock();
-  batches_.add(batch);
-  lists_mutex_.unlock();
-}
-
-void GLContext::batch_unregister(struct GPUBatch *batch)
-{
-  /* vao_cache_clear() can acquire lists_mutex_ so avoid deadlock. */
-  // reinterpret_cast<GLBatch *>(batch)->vao_cache_clear();
-
-  lists_mutex_.lock();
-  batches_.remove(batch);
-  lists_mutex_.unlock();
-}
-
-void GLContext::framebuffer_register(struct GPUFrameBuffer *fb)
+void GLContext::vao_cache_register(GLVaoCache *cache)
 {
-#ifdef DEBUG
   lists_mutex_.lock();
-  framebuffers_.add(fb);
+  vao_caches_.add(cache);
   lists_mutex_.unlock();
-#else
-  UNUSED_VARS(fb);
-#endif
 }
 
-void GLContext::framebuffer_unregister(struct GPUFrameBuffer *fb)
+void GLContext::vao_cache_unregister(GLVaoCache *cache)
 {
-#ifdef DEBUG
   lists_mutex_.lock();
-  framebuffers_.remove(fb);
+  vao_caches_.remove(cache);
   lists_mutex_.unlock();
-#else
-  UNUSED_VARS(fb);
-#endif
 }
 
 /** \} */
diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh
index 3b55965b9d1..bc7e2060804 100644
--- a/source/blender/gpu/opengl/gl_context.hh
+++ b/source/blender/gpu/opengl/gl_context.hh
@@ -25,19 +25,20 @@
 
 #include "gpu_context_private.hh"
 
+#include "GPU_framebuffer.h"
+
 #include "BLI_set.hh"
 #include "BLI_vector.hh"
 
 #include "glew-mx.h"
 
-#include <iostream>
 #include <mutex>
-#include <unordered_set>
-#include <vector>
 
 namespace blender {
 namespace gpu {
 
+class GLVaoCache;
+
 class GLSharedOrphanLists {
  public:
   /** Mutex for the bellow structures. */
@@ -51,19 +52,19 @@ class GLSharedOrphanLists {
 };
 
 class GLContext : public GPUContext {
+ public:
+  /** Used for debugging purpose. Bitflags of all bound slots. */
+  uint16_t bound_ubo_slots;
+
   /* TODO(fclem) these needs to become private. */
  public:
-  /** Default VAO for procedural draw calls. */
-  GLuint default_vao_;
-  /** Default framebuffer object for some GL implementation. */
-  GLuint default_framebuffer_;
   /** VBO for missing vertex attrib binding. Avoid undefined behavior on some implementation. */
   GLuint default_attr_vbo_;
   /**
    * GPUBatch & GPUFramebuffer have references to the context they are from, in the case the
    * context is destroyed, we need to remove any reference to it.
    */
-  Set<GPUBatch *> batches_;
+  Set<GLVaoCache *> vao_caches_;
   Set<GPUFrameBuffer *> framebuffers_;
   /** Mutex for the bellow structures. */
   std::mutex lists_mutex_;
@@ -77,6 +78,8 @@ class GLContext : public GPUContext {
   GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list);
   ~GLContext();
 
+  static void check_error(const char *info);
+
   void activate(void) override;
   void deactivate(void) override;
 
@@ -87,10 +90,8 @@ class GLContext : public GPUContext {
 
   void vao_free(GLuint vao_id);
   void fbo_free(GLuint fbo_id);
-  void batch_register(struct GPUBatch *batch);
-  void batch_unregister(struct GPUBatch *batch);
-  void framebuffer_register(struct GPUFrameBuffer *fb);
-  void framebuffer_unregister(struct GPUFrameBuffer *fb);
+  void vao_cache_register(GLVaoCache *cache);
+  void vao_cache_unregister(GLVaoCache *cache);
 };
 
 }  // namespace gpu
diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc
new file mode 100644
index 00000000000..c1a3780bb51
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_debug.cc
@@ -0,0 +1,207 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2005 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Debug features of OpenGL.
+ */
+
+#include "BLI_compiler_attrs.h"
+#include "BLI_string.h"
+#include "BLI_system.h"
+#include "BLI_utildefines.h"
+
+#include "glew-mx.h"
+
+#include "gl_context.hh"
+#include "gl_uniform_buffer.hh"
+
+#include "gl_debug.hh"
+
+#include <stdio.h>
+
+namespace blender::gpu::debug {
+
+/* -------------------------------------------------------------------- */
+/** \name Debug Callbacks
+ *
+ * Hooks up debug callbacks to a debug OpenGL context using extensions or 4.3 core debug
+ * capabiliities.
+ * \{ */
+
+/* Debug callbacks need the same calling convention as OpenGL functions. */
+#if defined(_WIN32)
+#  define APIENTRY __stdcall
+#else
+#  define APIENTRY
+#endif
+
+#define VERBOSE 1
+
+static void APIENTRY debug_callback(GLenum UNUSED(source),
+                                    GLenum type,
+                                    GLuint UNUSED(id),
+                                    GLenum severity,
+                                    GLsizei UNUSED(length),
+                                    const GLchar *message,
+                                    const GLvoid *UNUSED(userParm))
+{
+  const char format[] = "GPUDebug: %s%s\033[0m\n";
+
+  if (ELEM(severity, GL_DEBUG_SEVERITY_LOW, GL_DEBUG_SEVERITY_NOTIFICATION)) {
+    if (VERBOSE) {
+      fprintf(stderr, format, "\033[2m", message);
+    }
+  }
+  else {
+    switch (type) {
+      case GL_DEBUG_TYPE_ERROR:
+      case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
+      case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
+        fprintf(stderr, format, "\033[31;1mError\033[39m: ", message);
+        break;
+      case GL_DEBUG_TYPE_PORTABILITY:
+      case GL_DEBUG_TYPE_PERFORMANCE:
+      case GL_DEBUG_TYPE_OTHER:
+      case GL_DEBUG_TYPE_MARKER: /* KHR has this, ARB does not */
+      default:
+        fprintf(stderr, format, "\033[33;1mWarning\033[39m: ", message);
+        break;
+    }
+
+    if (VERBOSE && severity == GL_DEBUG_SEVERITY_HIGH) {
+      /* Focus on error message. */
+      fprintf(stderr, "\033[2m");
+      BLI_system_backtrace(stderr);
+      fprintf(stderr, "\033[0m\n");
+      fflush(stderr);
+    }
+  }
+}
+
+#undef APIENTRY
+
+void init_gl_callbacks(void)
+{
+#ifdef __APPLE__
+  fprintf(stderr, "GPUDebug: OpenGL debug callback is not available on Apple\n");
+  return;
+#endif /* not Apple */
+
+  char msg[256] = "";
+  const char format[] = "Successfully hooked OpenGL debug callback using %s";
+
+  if (GLEW_VERSION_4_3 || GLEW_KHR_debug) {
+    SNPRINTF(msg, format, GLEW_VERSION_4_3 ? "OpenGL 4.3" : "KHR_debug extension");
+    glEnable(GL_DEBUG_OUTPUT);
+    glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
+    glDebugMessageCallback((GLDEBUGPROC)debug_callback, NULL);
+    glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE);
+    glDebugMessageInsert(GL_DEBUG_SOURCE_APPLICATION,
+                         GL_DEBUG_TYPE_MARKER,
+                         0,
+                         GL_DEBUG_SEVERITY_NOTIFICATION,
+                         -1,
+                         msg);
+  }
+  else if (GLEW_ARB_debug_output) {
+    SNPRINTF(msg, format, "ARB_debug_output");
+    glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
+    glDebugMessageCallbackARB((GLDEBUGPROCARB)debug_callback, NULL);
+    glDebugMessageControlARB(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE);
+    glDebugMessageInsertARB(GL_DEBUG_SOURCE_APPLICATION_ARB,
+                            GL_DEBUG_TYPE_OTHER_ARB,
+                            0,
+                            GL_DEBUG_SEVERITY_LOW_ARB,
+                            -1,
+                            msg);
+  }
+  else {
+    fprintf(stderr, "GPUDebug: Failed to hook OpenGL debug callback\n");
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Error Checking
+ *
+ * This is only useful for implementation that does not support the KHR_debug extension OR when the
+ * implementations do not report any errors even when clearly doing shady things.
+ * \{ */
+
+void check_gl_error(const char *info)
+{
+  GLenum error = glGetError();
+
+#define ERROR_CASE(err) \
+  case err: { \
+    char msg[256]; \
+    SNPRINTF(msg, "%s : %s", #err, info); \
+    debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL); \
+    break; \
+  }
+
+  switch (error) {
+    ERROR_CASE(GL_INVALID_ENUM)
+    ERROR_CASE(GL_INVALID_VALUE)
+    ERROR_CASE(GL_INVALID_OPERATION)
+    ERROR_CASE(GL_INVALID_FRAMEBUFFER_OPERATION)
+    ERROR_CASE(GL_OUT_OF_MEMORY)
+    ERROR_CASE(GL_STACK_UNDERFLOW)
+    ERROR_CASE(GL_STACK_OVERFLOW)
+    case GL_NO_ERROR:
+      break;
+    default:
+      char msg[256];
+      SNPRINTF(msg, "Unknown GL error: %x : %s", error, info);
+      debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL);
+      break;
+  }
+}
+
+void check_gl_resources(const char *info)
+{
+  GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+  ShaderInterface *interface = ctx->shader->interface;
+  /* NOTE: This only check binding. To be valid, the bound ubo needs to
+   * be big enough to feed the data range the shader awaits. */
+  uint16_t ubo_needed = interface->enabled_ubo_mask_;
+  ubo_needed &= ~ctx->bound_ubo_slots;
+
+  if (ubo_needed == 0) {
+    return;
+  }
+
+  for (int i = 0; ubo_needed != 0; i++, ubo_needed >>= 1) {
+    if ((ubo_needed & 1) != 0) {
+      const ShaderInput *ubo_input = interface->ubo_get(i);
+      const char *ubo_name = interface->input_name_get(ubo_input);
+      const char *sh_name = ctx->shader->name_get();
+      char msg[256];
+      SNPRINTF(msg, "Missing UBO bind at slot %d : %s > %s : %s", i, sh_name, ubo_name, info);
+      debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL);
+    }
+  }
+}
+
+/** \} */
+
+}  // namespace blender::gpu::debug
+\ No newline at end of file
diff --git a/source/blender/gpu/intern/gpu_primitive_private.h b/source/blender/gpu/opengl/gl_debug.hh
index e91eec18786..dd98505ebc1 100644
--- a/source/blender/gpu/intern/gpu_primitive_private.h
+++ b/source/blender/gpu/opengl/gl_debug.hh
@@ -12,26 +12,35 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * The Original Code is Copyright (C) 2016 by Mike Erwin.
- * All rights reserved.
  */
 
 /** \file
  * \ingroup gpu
- *
- * GPU geometric primitives
  */
 
 #pragma once
 
-#ifdef __cplusplus
-extern "C" {
-#endif
+namespace blender {
+namespace gpu {
+namespace debug {
 
-/* TODO(fclem) move to OGL backend */
-GLenum convert_prim_type_to_gl(GPUPrimType);
+/* Enabled on MacOS by default since there is no support for debug callbacks. */
+#if defined(DEBUG) && defined(__APPLE__)
+#  define GL_CHECK_ERROR(info) debug::check_gl_error(info)
+#else
+#  define GL_CHECK_ERROR(info)
+#endif
 
-#ifdef __cplusplus
-}
+#ifdef DEBUG
+#  define GL_CHECK_RESOURCES(info) debug::check_gl_resources(info)
+#else
+#  define GL_CHECK_RESOURCES(info)
 #endif
+
+void check_gl_error(const char *info);
+void check_gl_resources(const char *info);
+void init_gl_callbacks(void);
+
+}  // namespace debug
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/opengl/gl_drawlist.cc b/source/blender/gpu/opengl/gl_drawlist.cc
new file mode 100644
index 00000000000..35fecc859b8
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_drawlist.cc
@@ -0,0 +1,247 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#include "BLI_assert.h"
+
+#include "GPU_batch.h"
+#include "GPU_extensions.h"
+
+#include "glew-mx.h"
+
+#include "gpu_context_private.hh"
+#include "gpu_drawlist_private.hh"
+
+#include "gl_backend.hh"
+#include "gl_drawlist.hh"
+#include "gl_primitive.hh"
+
+#include <limits.h>
+
+#define USE_MULTI_DRAW_INDIRECT 1
+
+/* TODO remove. */
+#if GPU_TRACK_INDEX_RANGE
+#  define BASE_INDEX(el) ((el)->base_index)
+#  define INDEX_TYPE(el) ((el)->gl_index_type)
+#else
+#  define BASE_INDEX(el) 0
+#  define INDEX_TYPE(el) GL_UNSIGNED_INT
+#endif
+
+using namespace blender::gpu;
+
+typedef struct GLDrawCommand {
+  GLuint v_count;
+  GLuint i_count;
+  GLuint v_first;
+  GLuint i_first;
+} GLDrawCommand;
+
+typedef struct GLDrawCommandIndexed {
+  GLuint v_count;
+  GLuint i_count;
+  GLuint v_first;
+  GLuint base_index;
+  GLuint i_first;
+} GLDrawCommandIndexed;
+
+#define MDI_ENABLED (buffer_size_ != 0)
+#define MDI_DISABLED (buffer_size_ == 0)
+#define MDI_INDEXED (base_index_ != UINT_MAX)
+
+GLDrawList::GLDrawList(int length)
+{
+  BLI_assert(length > 0);
+  batch_ = NULL;
+  buffer_id_ = 0;
+  command_len_ = 0;
+  command_offset_ = 0;
+  data_offset_ = 0;
+  data_size_ = 0;
+  data_ = NULL;
+
+  if (USE_MULTI_DRAW_INDIRECT && GLEW_ARB_multi_draw_indirect &&
+      GPU_arb_base_instance_is_supported()) {
+    /* Alloc the biggest possible command list, which is indexed. */
+    buffer_size_ = sizeof(GLDrawCommandIndexed) * length;
+  }
+  else {
+    /* Indicates MDI is not supported. */
+    buffer_size_ = 0;
+  }
+}
+
+GLDrawList::~GLDrawList()
+{
+  /* TODO This ... */
+  static_cast<GLBackend *>(GPUBackend::get())->buf_free(buffer_id_);
+  /* ... should be this. */
+  // context_->buf_free(buffer_id_)
+}
+
+void GLDrawList::init(void)
+{
+  BLI_assert(GPU_context_active_get());
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_ == NULL);
+  batch_ = NULL;
+  command_len_ = 0;
+
+  if (buffer_id_ == 0) {
+    /* Allocate on first use. */
+    glGenBuffers(1, &buffer_id_);
+    context_ = static_cast<GLContext *>(GPU_context_active_get());
+  }
+
+  glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buffer_id_);
+  /* If buffer is full, orphan buffer data and start fresh. */
+  // if (command_offset_ >= data_size_) {
+  glBufferData(GL_DRAW_INDIRECT_BUFFER, buffer_size_, NULL, GL_DYNAMIC_DRAW);
+  data_offset_ = 0;
+  // }
+  /* Map the remaining range. */
+  GLbitfield flag = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
+  data_size_ = buffer_size_ - data_offset_;
+  data_ = (GLbyte *)glMapBufferRange(GL_DRAW_INDIRECT_BUFFER, data_offset_, data_size_, flag);
+  command_offset_ = 0;
+}
+
+void GLDrawList::append(GPUBatch *batch, int i_first, int i_count)
+{
+  /* Fallback when MultiDrawIndirect is not supported/enabled. */
+  if (MDI_DISABLED) {
+    GPU_batch_draw_advanced(batch, 0, 0, i_first, i_count);
+    return;
+  }
+
+  if (data_ == NULL) {
+    this->init();
+  }
+
+  if (batch != batch_) {
+    // BLI_assert(batch->flag | GPU_BATCH_INIT);
+    this->submit();
+    batch_ = batch;
+    /* Cached for faster access. */
+    base_index_ = batch->elem ? BASE_INDEX(batch->elem) : UINT_MAX;
+    v_first_ = batch->elem ? batch->elem->index_start : 0;
+    v_count_ = batch->elem ? batch->elem->index_len : batch->verts[0]->vertex_len;
+  }
+
+  if (v_count_ == 0) {
+    /* Nothing to draw. */
+    return;
+  }
+
+  if (MDI_INDEXED) {
+    GLDrawCommandIndexed *cmd = reinterpret_cast<GLDrawCommandIndexed *>(data_ + command_offset_);
+    cmd->v_first = v_first_;
+    cmd->v_count = v_count_;
+    cmd->i_count = i_count;
+    cmd->base_index = base_index_;
+    cmd->i_first = i_first;
+    command_offset_ += sizeof(GLDrawCommandIndexed);
+  }
+  else {
+    GLDrawCommand *cmd = reinterpret_cast<GLDrawCommand *>(data_ + command_offset_);
+    cmd->v_first = v_first_;
+    cmd->v_count = v_count_;
+    cmd->i_count = i_count;
+    cmd->i_first = i_first;
+    command_offset_ += sizeof(GLDrawCommand);
+  }
+
+  command_len_++;
+
+  if (command_offset_ >= data_size_) {
+    this->submit();
+  }
+}
+
+void GLDrawList::submit(void)
+{
+  if (command_len_ == 0) {
+    return;
+  }
+  /* Something's wrong if we get here without MDI support. */
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_);
+  BLI_assert(GPU_context_active_get()->shader != NULL);
+
+  GLBatch *batch = static_cast<GLBatch *>(batch_);
+
+  /* Only do multi-draw indirect if doing more than 2 drawcall. This avoids the overhead of
+   * buffer mapping if scene is not very instance friendly. BUT we also need to take into
+   * account the
+   * case where only a few instances are needed to finish filling a call buffer. */
+  const bool is_finishing_a_buffer = (command_offset_ >= data_size_);
+  if (command_len_ > 2 || is_finishing_a_buffer) {
+    GLenum prim = to_gl(batch_->prim_type);
+    void *offset = (void *)data_offset_;
+
+    glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buffer_id_);
+    glFlushMappedBufferRange(GL_DRAW_INDIRECT_BUFFER, 0, command_offset_);
+    glUnmapBuffer(GL_DRAW_INDIRECT_BUFFER);
+    data_ = NULL; /* Unmapped */
+    data_offset_ += command_offset_;
+
+    batch->bind(0);
+
+    if (MDI_INDEXED) {
+      glMultiDrawElementsIndirect(prim, INDEX_TYPE(batch_->elem), offset, command_len_, 0);
+    }
+    else {
+      glMultiDrawArraysIndirect(prim, offset, command_len_, 0);
+    }
+  }
+  else {
+    /* Fallback do simple drawcalls, and don't unmap the buffer. */
+    if (MDI_INDEXED) {
+      GLDrawCommandIndexed *cmd = (GLDrawCommandIndexed *)data_;
+      for (int i = 0; i < command_len_; i++, cmd++) {
+        /* Index start was already added. Avoid counting it twice. */
+        cmd->v_first -= batch->elem->index_start;
+        batch->draw(cmd->v_first, cmd->v_count, cmd->i_first, cmd->i_count);
+      }
+      /* Reuse the same data. */
+      command_offset_ -= command_len_ * sizeof(GLDrawCommandIndexed);
+    }
+    else {
+      GLDrawCommand *cmd = (GLDrawCommand *)data_;
+      for (int i = 0; i < command_len_; i++, cmd++) {
+        batch->draw(cmd->v_first, cmd->v_count, cmd->i_first, cmd->i_count);
+      }
+      /* Reuse the same data. */
+      command_offset_ -= command_len_ * sizeof(GLDrawCommand);
+    }
+  }
+  /* Do not submit this buffer again. */
+  command_len_ = 0;
+  /* Avoid keeping reference to the batch. */
+  batch_ = NULL;
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_drawlist.hh b/source/blender/gpu/opengl/gl_drawlist.hh
new file mode 100644
index 00000000000..b690b8f8a98
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_drawlist.hh
@@ -0,0 +1,86 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_sys_types.h"
+
+#include "GPU_batch.h"
+#include "GPU_glew.h"
+
+#include "gpu_drawlist_private.hh"
+
+#include "gl_context.hh"
+
+namespace blender {
+namespace gpu {
+
+/**
+ * Implementation of Multi Draw Indirect using OpenGL.
+ **/
+class GLDrawList : public DrawList {
+ public:
+  GLDrawList(int length);
+  ~GLDrawList();
+
+  void append(GPUBatch *batch, int i_first, int i_count) override;
+  void submit(void) override;
+
+ private:
+  void init(void);
+
+  /** Batch for which we are recording commands for. */
+  GPUBatch *batch_;
+  /** Mapped memory bounds. */
+  GLbyte *data_;
+  /** Length of the mapped buffer (in byte). */
+  GLsizeiptr data_size_;
+  /** Current offset inside the mapped buffer (in byte). */
+  GLintptr command_offset_;
+  /** Current number of command recorded inside the mapped buffer. */
+  uint command_len_;
+  /** Is UINT_MAX if not drawing indexed geom. Also Avoid dereferencing batch. */
+  GLuint base_index_;
+  /** Also Avoid dereferencing batch. */
+  GLuint v_first_, v_count_;
+
+  /** GL Indirect Buffer id. 0 means MultiDrawIndirect is not supported/enabled. */
+  GLuint buffer_id_;
+  /** Length of whole the buffer (in byte). */
+  GLsizeiptr buffer_size_;
+  /** Offset of data_ inside the whole buffer (in byte). */
+  GLintptr data_offset_;
+
+  /** To free the buffer_id_. */
+  GLContext *context_;
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("GLDrawList");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/opengl/gl_framebuffer.cc b/source/blender/gpu/opengl/gl_framebuffer.cc
new file mode 100644
index 00000000000..8d48c9f8de3
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_framebuffer.cc
@@ -0,0 +1,460 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "BKE_global.h"
+
+#include "GPU_extensions.h"
+
+#include "gl_backend.hh"
+#include "gl_framebuffer.hh"
+#include "gl_state.hh"
+#include "gl_texture.hh"
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
+GLFrameBuffer::GLFrameBuffer(const char *name) : FrameBuffer(name)
+{
+  /* Just-In-Time init. See GLFrameBuffer::init(). */
+  immutable_ = false;
+  fbo_id_ = 0;
+}
+
+GLFrameBuffer::GLFrameBuffer(
+    const char *name, GLContext *ctx, GLenum target, GLuint fbo, int w, int h)
+    : FrameBuffer(name)
+{
+  context_ = ctx;
+  state_manager_ = static_cast<GLStateManager *>(ctx->state_manager);
+  immutable_ = true;
+  fbo_id_ = fbo;
+  gl_attachments_[0] = target;
+  /* Never update an internal framebuffer. */
+  dirty_attachments_ = false;
+  width_ = w;
+  height_ = h;
+  srgb_ = false;
+
+  viewport_[0] = scissor_[0] = 0;
+  viewport_[1] = scissor_[1] = 0;
+  viewport_[2] = scissor_[2] = w;
+  viewport_[3] = scissor_[3] = h;
+
+#ifndef __APPLE__
+  if (fbo_id_ && (G.debug & G_DEBUG_GPU) && (GLEW_VERSION_4_3 || GLEW_KHR_debug)) {
+    char sh_name[32];
+    SNPRINTF(sh_name, "FrameBuffer-%s", name);
+    glObjectLabel(GL_FRAMEBUFFER, fbo_id_, -1, sh_name);
+  }
+#endif
+}
+
+GLFrameBuffer::~GLFrameBuffer()
+{
+  if (context_ == NULL) {
+    return;
+  }
+
+  if (context_ == GPU_context_active_get()) {
+    /* Context might be partially freed. This happens when destroying the window framebuffers. */
+    glDeleteFramebuffers(1, &fbo_id_);
+  }
+  else {
+    context_->fbo_free(fbo_id_);
+  }
+  /* Restore default framebuffer if this framebuffer was bound. */
+  if (context_->active_fb == this && context_->back_left != this) {
+    /* If this assert triggers it means the framebuffer is being freed while in use by another
+     * context which, by the way, is TOTALLY UNSAFE!!!  */
+    BLI_assert(context_ == GPU_context_active_get());
+    GPU_framebuffer_restore();
+  }
+}
+
+void GLFrameBuffer::init(void)
+{
+  context_ = static_cast<GLContext *>(GPU_context_active_get());
+  state_manager_ = static_cast<GLStateManager *>(context_->state_manager);
+  glGenFramebuffers(1, &fbo_id_);
+
+#ifndef __APPLE__
+  if ((G.debug & G_DEBUG_GPU) && (GLEW_VERSION_4_3 || GLEW_KHR_debug)) {
+    char sh_name[64];
+    SNPRINTF(sh_name, "FrameBuffer-%s", name_);
+    /* Binding before setting the label is needed on some drivers. */
+    glBindFramebuffer(GL_FRAMEBUFFER, fbo_id_);
+    glObjectLabel(GL_FRAMEBUFFER, fbo_id_, -1, sh_name);
+  }
+#endif
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Config
+ * \{ */
+
+/* This is a rather slow operation. Don't check in normal cases. */
+bool GLFrameBuffer::check(char err_out[256])
+{
+  this->bind(true);
+
+  GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
+
+#define FORMAT_STATUS(X) \
+  case X: { \
+    err = #X; \
+    break; \
+  }
+
+  const char *err;
+  switch (status) {
+    FORMAT_STATUS(GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT);
+    FORMAT_STATUS(GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT);
+    FORMAT_STATUS(GL_FRAMEBUFFER_UNSUPPORTED);
+    FORMAT_STATUS(GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER);
+    FORMAT_STATUS(GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER);
+    FORMAT_STATUS(GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE);
+    FORMAT_STATUS(GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS);
+    FORMAT_STATUS(GL_FRAMEBUFFER_UNDEFINED);
+    case GL_FRAMEBUFFER_COMPLETE:
+      return true;
+    default:
+      err = "unknown";
+      break;
+  }
+
+#undef FORMAT_STATUS
+
+  const char *format = "GPUFrameBuffer: framebuffer status %s\n";
+
+  if (err_out) {
+    BLI_snprintf(err_out, 256, format, err);
+  }
+  else {
+    fprintf(stderr, format, err);
+  }
+
+  return false;
+}
+
+void GLFrameBuffer::update_attachments(void)
+{
+  /* Default framebuffers cannot have attachements. */
+  BLI_assert(immutable_ == false);
+
+  /* First color texture OR the depth texture if no color is attached.
+   * Used to determine framebuffer colorspace and dimensions. */
+  GPUAttachmentType first_attachment = GPU_FB_MAX_ATTACHEMENT;
+  /* NOTE: Inverse iteration to get the first color texture. */
+  for (GPUAttachmentType type = GPU_FB_MAX_ATTACHEMENT - 1; type >= 0; --type) {
+    GPUAttachment &attach = attachments_[type];
+    GLenum gl_attachment = to_gl(type);
+
+    if (type >= GPU_FB_COLOR_ATTACHMENT0) {
+      gl_attachments_[type - GPU_FB_COLOR_ATTACHMENT0] = (attach.tex) ? gl_attachment : GL_NONE;
+      first_attachment = (attach.tex) ? type : first_attachment;
+    }
+    else if (first_attachment == GPU_FB_MAX_ATTACHEMENT) {
+      /* Only use depth texture to get infos if there is no color attachment. */
+      first_attachment = (attach.tex) ? type : first_attachment;
+    }
+
+    if (attach.tex == NULL) {
+      glFramebufferTexture(GL_FRAMEBUFFER, gl_attachment, 0, 0);
+      continue;
+    }
+    GLuint gl_tex = GPU_texture_opengl_bindcode(attach.tex);
+    if (attach.layer > -1 && GPU_texture_cube(attach.tex) && !GPU_texture_array(attach.tex)) {
+      /* Could be avoided if ARB_direct_state_access is required. In this case
+       * glFramebufferTextureLayer would bind the correct face. */
+      GLenum gl_target = GL_TEXTURE_CUBE_MAP_POSITIVE_X + attach.layer;
+      glFramebufferTexture2D(GL_FRAMEBUFFER, gl_attachment, gl_target, gl_tex, attach.mip);
+    }
+    else if (attach.layer > -1) {
+      glFramebufferTextureLayer(GL_FRAMEBUFFER, gl_attachment, gl_tex, attach.mip, attach.layer);
+    }
+    else {
+      /* The whole texture level is attached. The framebuffer is potentially layered. */
+      glFramebufferTexture(GL_FRAMEBUFFER, gl_attachment, gl_tex, attach.mip);
+    }
+    /* We found one depth buffer type. Stop here, otherwise we would
+     * override it by setting GPU_FB_DEPTH_ATTACHMENT */
+    if (type == GPU_FB_DEPTH_STENCIL_ATTACHMENT) {
+      break;
+    }
+  }
+
+  if (GPU_unused_fb_slot_workaround()) {
+    /* Fill normally un-occupied slots to avoid rendering artifacts on some hardware. */
+    GLuint gl_tex = 0;
+    /* NOTE: Inverse iteration to get the first color texture. */
+    for (int i = ARRAY_SIZE(gl_attachments_) - 1; i >= 0; --i) {
+      GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i;
+      GPUAttachment &attach = attachments_[type];
+      if (attach.tex != NULL) {
+        gl_tex = GPU_texture_opengl_bindcode(attach.tex);
+      }
+      else if (gl_tex != 0) {
+        GLenum gl_attachment = to_gl(type);
+        gl_attachments_[i] = gl_attachment;
+        glFramebufferTexture(GL_FRAMEBUFFER, gl_attachment, gl_tex, 0);
+      }
+    }
+  }
+
+  if (first_attachment != GPU_FB_MAX_ATTACHEMENT) {
+    GPUAttachment &attach = attachments_[first_attachment];
+    int size[3];
+    GPU_texture_get_mipmap_size(attach.tex, attach.mip, size);
+    this->size_set(size[0], size[1]);
+    srgb_ = (GPU_texture_format(attach.tex) == GPU_SRGB8_A8);
+  }
+
+  dirty_attachments_ = false;
+
+  glDrawBuffers(ARRAY_SIZE(gl_attachments_), gl_attachments_);
+
+  if (G.debug & G_DEBUG_GPU) {
+    BLI_assert(this->check(NULL));
+  }
+}
+
+void GLFrameBuffer::apply_state(void)
+{
+  if (dirty_state_ == false) {
+    return;
+  }
+
+  glViewport(UNPACK4(viewport_));
+  glScissor(UNPACK4(scissor_));
+
+  if (scissor_test_) {
+    glEnable(GL_SCISSOR_TEST);
+  }
+  else {
+    glDisable(GL_SCISSOR_TEST);
+  }
+
+  dirty_state_ = false;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Binding
+ * \{ */
+
+void GLFrameBuffer::bind(bool enabled_srgb)
+{
+  if (!immutable_ && fbo_id_ == 0) {
+    this->init();
+  }
+
+  if (context_ != GPU_context_active_get()) {
+    BLI_assert(!"Trying to use the same framebuffer in multiple context");
+    return;
+  }
+
+  if (context_->active_fb != this) {
+    glBindFramebuffer(GL_FRAMEBUFFER, fbo_id_);
+    /* Internal framebuffers have only one color output and needs to be set everytime. */
+    if (immutable_ && fbo_id_ == 0) {
+      glDrawBuffer(gl_attachments_[0]);
+    }
+  }
+
+  if (dirty_attachments_) {
+    this->update_attachments();
+    this->viewport_reset();
+    this->scissor_reset();
+  }
+
+  if (context_->active_fb != this) {
+    context_->active_fb = this;
+    state_manager_->active_fb = this;
+    dirty_state_ = true;
+
+    if (enabled_srgb) {
+      glEnable(GL_FRAMEBUFFER_SRGB);
+    }
+    else {
+      glDisable(GL_FRAMEBUFFER_SRGB);
+    }
+
+    GPU_shader_set_framebuffer_srgb_target(enabled_srgb && srgb_);
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Operations.
+ * \{ */
+
+void GLFrameBuffer::clear(eGPUFrameBufferBits buffers,
+                          const float clear_col[4],
+                          float clear_depth,
+                          uint clear_stencil)
+{
+  BLI_assert(GPU_context_active_get() == context_);
+  BLI_assert(context_->active_fb == this);
+
+  /* Save and restore the state. */
+  eGPUWriteMask write_mask = GPU_write_mask_get();
+  uint stencil_mask = GPU_stencil_mask_get();
+  eGPUStencilTest stencil_test = GPU_stencil_test_get();
+
+  if (buffers & GPU_COLOR_BIT) {
+    GPU_color_mask(true, true, true, true);
+    glClearColor(clear_col[0], clear_col[1], clear_col[2], clear_col[3]);
+  }
+  if (buffers & GPU_DEPTH_BIT) {
+    GPU_depth_mask(true);
+    glClearDepth(clear_depth);
+  }
+  if (buffers & GPU_STENCIL_BIT) {
+    GPU_stencil_write_mask_set(0xFFu);
+    GPU_stencil_test(GPU_STENCIL_ALWAYS);
+    glClearStencil(clear_stencil);
+  }
+
+  context_->state_manager->apply_state();
+
+  GLbitfield mask = to_gl(buffers);
+  glClear(mask);
+
+  if (buffers & (GPU_COLOR_BIT | GPU_DEPTH_BIT)) {
+    GPU_write_mask(write_mask);
+  }
+  if (buffers & GPU_STENCIL_BIT) {
+    GPU_stencil_write_mask_set(stencil_mask);
+    GPU_stencil_test(stencil_test);
+  }
+}
+
+void GLFrameBuffer::clear_multi(const float (*clear_cols)[4])
+{
+  BLI_assert(GPU_context_active_get() == context_);
+  BLI_assert(context_->active_fb == this);
+
+  /* Save and restore the state. */
+  eGPUWriteMask write_mask = GPU_write_mask_get();
+  GPU_color_mask(true, true, true, true);
+
+  context_->state_manager->apply_state();
+
+  /* WATCH: This can easilly access clear_cols out of bounds it clear_cols is not big enough for
+   * all attachments.
+   * TODO(fclem) fix this insecurity? */
+  int type = GPU_FB_COLOR_ATTACHMENT0;
+  for (int i = 0; type < GPU_FB_MAX_ATTACHEMENT; i++, type++) {
+    if (attachments_[type].tex != NULL) {
+      glClearBufferfv(GL_COLOR, i, clear_cols[i]);
+    }
+  }
+
+  GPU_write_mask(write_mask);
+}
+
+void GLFrameBuffer::read(eGPUFrameBufferBits plane,
+                         eGPUDataFormat data_format,
+                         const int area[4],
+                         int channel_len,
+                         int slot,
+                         void *r_data)
+{
+  GLenum format, type, mode;
+  mode = gl_attachments_[slot];
+  type = to_gl(data_format);
+
+  switch (plane) {
+    case GPU_DEPTH_BIT:
+      format = GL_DEPTH_COMPONENT;
+      break;
+    case GPU_COLOR_BIT:
+      format = channel_len_to_gl(channel_len);
+      /* TODO: needed for selection buffers to work properly, this should be handled better. */
+      if (format == GL_RED && type == GL_UNSIGNED_INT) {
+        format = GL_RED_INTEGER;
+      }
+      break;
+    case GPU_STENCIL_BIT:
+      fprintf(stderr, "GPUFramebuffer: Error: Trying to read stencil bit. Unsupported.");
+      return;
+    default:
+      fprintf(stderr, "GPUFramebuffer: Error: Trying to read more than one framebuffer plane.");
+      return;
+  }
+
+  glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo_id_);
+  glReadBuffer(mode);
+  glReadPixels(UNPACK4(area), format, type, r_data);
+}
+
+/* Copy src at the give offset inside dst. */
+void GLFrameBuffer::blit_to(
+    eGPUFrameBufferBits planes, int src_slot, FrameBuffer *dst_, int dst_slot, int x, int y)
+{
+  GLFrameBuffer *src = this;
+  GLFrameBuffer *dst = static_cast<GLFrameBuffer *>(dst_);
+
+  /* Framebuffers must be up to date. This simplify this function. */
+  if (src->dirty_attachments_) {
+    src->bind(true);
+  }
+  if (dst->dirty_attachments_) {
+    dst->bind(true);
+  }
+
+  glBindFramebuffer(GL_READ_FRAMEBUFFER, src->fbo_id_);
+  glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst->fbo_id_);
+
+  if (planes & GPU_COLOR_BIT) {
+    BLI_assert(src->immutable_ == false || src_slot == 0);
+    BLI_assert(dst->immutable_ == false || dst_slot == 0);
+    BLI_assert(src->gl_attachments_[src_slot] != GL_NONE);
+    BLI_assert(dst->gl_attachments_[dst_slot] != GL_NONE);
+    glReadBuffer(src->gl_attachments_[src_slot]);
+    glDrawBuffer(dst->gl_attachments_[dst_slot]);
+  }
+
+  context_->state_manager->apply_state();
+
+  int w = src->width_;
+  int h = src->height_;
+  GLbitfield mask = to_gl(planes);
+  glBlitFramebuffer(0, 0, w, h, x, y, x + w, y + h, mask, GL_NEAREST);
+
+  if (!dst->immutable_) {
+    /* Restore the draw buffers. */
+    glDrawBuffers(ARRAY_SIZE(dst->gl_attachments_), dst->gl_attachments_);
+  }
+}
+
+/** \} */
+
+}  // namespace blender::gpu
+\ No newline at end of file
diff --git a/source/blender/gpu/opengl/gl_framebuffer.hh b/source/blender/gpu/opengl/gl_framebuffer.hh
new file mode 100644
index 00000000000..8d386116159
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_framebuffer.hh
@@ -0,0 +1,148 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Encapsulation of Framebuffer states (attached textures, viewport, scissors).
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "glew-mx.h"
+
+#include "gpu_framebuffer_private.hh"
+
+namespace blender::gpu {
+
+class GLStateManager;
+
+/**
+ * Implementation of FrameBuffer object using OpenGL.
+ **/
+class GLFrameBuffer : public FrameBuffer {
+ private:
+  /** OpenGL handle. */
+  GLuint fbo_id_ = 0;
+  /** Context the handle is from. Framebuffers are not shared accros contexts. */
+  GLContext *context_ = NULL;
+  /** State Manager of the same contexts. */
+  GLStateManager *state_manager_ = NULL;
+  /** Copy of the GL state. Contains ONLY color attachments enums for slot binding. */
+  GLenum gl_attachments_[GPU_FB_MAX_COLOR_ATTACHMENT];
+  /** Internal framebuffers are immutable. */
+  bool immutable_;
+  /** True is the framebuffer has it's first color target using the GPU_SRGB8_A8 format. */
+  bool srgb_;
+
+ public:
+  /**
+   * Create a conventional framebuffer to attach texture to.
+   **/
+  GLFrameBuffer(const char *name);
+
+  /**
+   * Special Framebuffer encapsulating internal window framebuffer.
+   *  (i.e.: GL_FRONT_LEFT, GL_BACK_RIGHT, ...)
+   * @param ctx     context the handle is from.
+   * @param target  the internal GL name (i.e: GL_BACK_LEFT).
+   * @param fbo     the (optional) already created object for some implementation. Default is 0.
+   * @param w       buffer width.
+   * @param h       buffer height.
+   **/
+  GLFrameBuffer(const char *name, GLContext *ctx, GLenum target, GLuint fbo, int w, int h);
+
+  ~GLFrameBuffer();
+
+  void bind(bool enabled_srgb) override;
+
+  bool check(char err_out[256]) override;
+
+  void clear(eGPUFrameBufferBits buffers,
+             const float clear_col[4],
+             float clear_depth,
+             uint clear_stencil) override;
+  void clear_multi(const float (*clear_cols)[4]) override;
+
+  void read(eGPUFrameBufferBits planes,
+            eGPUDataFormat format,
+            const int area[4],
+            int channel_len,
+            int slot,
+            void *r_data) override;
+
+  void blit_to(eGPUFrameBufferBits planes,
+               int src_slot,
+               FrameBuffer *dst,
+               int dst_slot,
+               int dst_offset_x,
+               int dst_offset_y) override;
+
+  void apply_state(void);
+
+ private:
+  void init(void);
+  void update_attachments(void);
+  void update_drawbuffers(void);
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("GLFrameBuffer");
+};
+
+/* -------------------------------------------------------------------- */
+/** \name Enums Conversion
+ * \{ */
+
+static inline GLenum to_gl(const GPUAttachmentType type)
+{
+#define ATTACHMENT(X) \
+  case GPU_FB_##X: { \
+    return GL_##X; \
+  } \
+    ((void)0)
+
+  switch (type) {
+    ATTACHMENT(DEPTH_ATTACHMENT);
+    ATTACHMENT(DEPTH_STENCIL_ATTACHMENT);
+    ATTACHMENT(COLOR_ATTACHMENT0);
+    ATTACHMENT(COLOR_ATTACHMENT1);
+    ATTACHMENT(COLOR_ATTACHMENT2);
+    ATTACHMENT(COLOR_ATTACHMENT3);
+    ATTACHMENT(COLOR_ATTACHMENT4);
+    ATTACHMENT(COLOR_ATTACHMENT5);
+    default:
+      BLI_assert(0);
+      return GL_COLOR_ATTACHMENT0;
+  }
+#undef ATTACHMENT
+}
+
+static inline GLbitfield to_gl(const eGPUFrameBufferBits bits)
+{
+  GLbitfield mask = 0;
+  mask |= (bits & GPU_DEPTH_BIT) ? GL_DEPTH_BUFFER_BIT : 0;
+  mask |= (bits & GPU_STENCIL_BIT) ? GL_STENCIL_BUFFER_BIT : 0;
+  mask |= (bits & GPU_COLOR_BIT) ? GL_COLOR_BUFFER_BIT : 0;
+  return mask;
+}
+
+/** \} */
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/opengl/gl_immediate.cc b/source/blender/gpu/opengl/gl_immediate.cc
new file mode 100644
index 00000000000..7f12f41a598
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_immediate.cc
@@ -0,0 +1,194 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#include "BKE_global.h"
+
+#include "gpu_context_private.hh"
+#include "gpu_shader_private.hh"
+#include "gpu_vertex_format_private.h"
+
+#include "gl_context.hh"
+#include "gl_debug.hh"
+#include "gl_primitive.hh"
+#include "gl_vertex_array.hh"
+
+#include "gl_immediate.hh"
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
+GLImmediate::GLImmediate()
+{
+  glGenVertexArrays(1, &vao_id_);
+  glBindVertexArray(vao_id_); /* Necessary for glObjectLabel. */
+
+  buffer.buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE;
+  glGenBuffers(1, &buffer.vbo_id);
+  glBindBuffer(GL_ARRAY_BUFFER, buffer.vbo_id);
+  glBufferData(GL_ARRAY_BUFFER, buffer.buffer_size, NULL, GL_DYNAMIC_DRAW);
+
+  buffer_strict.buffer_size = DEFAULT_INTERNAL_BUFFER_SIZE;
+  glGenBuffers(1, &buffer_strict.vbo_id);
+  glBindBuffer(GL_ARRAY_BUFFER, buffer_strict.vbo_id);
+  glBufferData(GL_ARRAY_BUFFER, buffer_strict.buffer_size, NULL, GL_DYNAMIC_DRAW);
+
+  glBindBuffer(GL_ARRAY_BUFFER, 0);
+  glBindVertexArray(0);
+
+#ifndef __APPLE__
+  if ((G.debug & G_DEBUG_GPU) && (GLEW_VERSION_4_3 || GLEW_KHR_debug)) {
+    glObjectLabel(GL_VERTEX_ARRAY, vao_id_, -1, "VAO-Immediate");
+    glObjectLabel(GL_BUFFER, buffer.vbo_id, -1, "VBO-ImmediateBuffer");
+    glObjectLabel(GL_BUFFER, buffer_strict.vbo_id, -1, "VBO-ImmediateBufferStrict");
+  }
+#endif
+}
+
+GLImmediate::~GLImmediate()
+{
+  glDeleteVertexArrays(1, &vao_id_);
+
+  glDeleteBuffers(1, &buffer.vbo_id);
+  glDeleteBuffers(1, &buffer_strict.vbo_id);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Buffer management
+ * \{ */
+
+uchar *GLImmediate::begin()
+{
+  /* How many bytes do we need for this draw call? */
+  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+  /* Does the current buffer have enough room? */
+  const size_t available_bytes = buffer_size() - buffer_offset();
+
+  GL_CHECK_RESOURCES("Immediate");
+  GL_CHECK_ERROR("Immediate Pre-Begin");
+
+  glBindBuffer(GL_ARRAY_BUFFER, vbo_id());
+
+  bool recreate_buffer = false;
+  if (bytes_needed > buffer_size()) {
+    /* expand the internal buffer */
+    buffer_size() = bytes_needed;
+    recreate_buffer = true;
+  }
+  else if (bytes_needed < DEFAULT_INTERNAL_BUFFER_SIZE &&
+           buffer_size() > DEFAULT_INTERNAL_BUFFER_SIZE) {
+    /* shrink the internal buffer */
+    buffer_size() = DEFAULT_INTERNAL_BUFFER_SIZE;
+    recreate_buffer = true;
+  }
+
+  /* ensure vertex data is aligned */
+  /* Might waste a little space, but it's safe. */
+  const uint pre_padding = padding(buffer_offset(), vertex_format.stride);
+
+  if (!recreate_buffer && ((bytes_needed + pre_padding) <= available_bytes)) {
+    buffer_offset() += pre_padding;
+  }
+  else {
+    /* orphan this buffer & start with a fresh one */
+    glBufferData(GL_ARRAY_BUFFER, buffer_size(), NULL, GL_DYNAMIC_DRAW);
+    buffer_offset() = 0;
+  }
+
+#ifndef NDEBUG
+  {
+    GLint bufsize;
+    glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, &bufsize);
+    BLI_assert(buffer_offset() + bytes_needed <= bufsize);
+  }
+#endif
+
+  GLbitfield access = GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
+  if (!strict_vertex_len) {
+    access |= GL_MAP_FLUSH_EXPLICIT_BIT;
+  }
+  void *data = glMapBufferRange(GL_ARRAY_BUFFER, buffer_offset(), bytes_needed, access);
+  BLI_assert(data != NULL);
+  GL_CHECK_ERROR("Immediate Post-Begin");
+
+  bytes_mapped_ = bytes_needed;
+  return (uchar *)data;
+}
+
+void GLImmediate::end(void)
+{
+  BLI_assert(prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+
+  uint buffer_bytes_used = bytes_mapped_;
+  if (!strict_vertex_len) {
+    if (vertex_idx != vertex_len) {
+      vertex_len = vertex_idx;
+      buffer_bytes_used = vertex_buffer_size(&vertex_format, vertex_len);
+      /* unused buffer bytes are available to the next immBegin */
+    }
+    /* tell OpenGL what range was modified so it doesn't copy the whole mapped range */
+    glFlushMappedBufferRange(GL_ARRAY_BUFFER, 0, buffer_bytes_used);
+  }
+  glUnmapBuffer(GL_ARRAY_BUFFER);
+
+  GL_CHECK_ERROR("Immediate Post-Unmap");
+
+  if (vertex_len > 0) {
+    GPU_context_active_get()->state_manager->apply_state();
+
+    /* We convert the offset in vertex offset from the buffer's start.
+     * This works because we added some padding to align the first vertex vertex.  */
+    uint v_first = buffer_offset() / vertex_format.stride;
+    GLVertArray::update_bindings(
+        vao_id_, v_first, &vertex_format, reinterpret_cast<Shader *>(shader)->interface);
+
+    /* Update matrices. */
+    GPU_shader_bind(shader);
+
+#ifdef __APPLE__
+    glDisable(GL_PRIMITIVE_RESTART);
+#endif
+    glDrawArrays(to_gl(prim_type), 0, vertex_len);
+#ifdef __APPLE__
+    glEnable(GL_PRIMITIVE_RESTART);
+#endif
+    /* These lines are causing crash on startup on some old GPU + drivers.
+     * They are not required so just comment them. (T55722) */
+    // glBindBuffer(GL_ARRAY_BUFFER, 0);
+    // glBindVertexArray(0);
+
+    GL_CHECK_ERROR("Immediate Post-drawing");
+  }
+
+  buffer_offset() += buffer_bytes_used;
+}
+
+/** \} */
+
+}  // namespace blender::gpu
+\ No newline at end of file
diff --git a/source/blender/gpu/opengl/gl_immediate.hh b/source/blender/gpu/opengl/gl_immediate.hh
new file mode 100644
index 00000000000..2b9b90d692b
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_immediate.hh
@@ -0,0 +1,81 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "glew-mx.h"
+
+#include "gpu_immediate_private.hh"
+
+namespace blender::gpu {
+
+/* size of internal buffer */
+#define DEFAULT_INTERNAL_BUFFER_SIZE (4 * 1024 * 1024)
+
+class GLImmediate : public Immediate {
+ private:
+  /* Use two buffers for strict and unstrict vertex count to
+   * avoid some huge driver slowdown (see T70922).
+   * Use accessor functions to get / modify. */
+  struct {
+    /** Opengl Handle for this buffer. */
+    GLuint vbo_id = 0;
+    /** Offset of the mapped data in data. */
+    size_t buffer_offset = 0;
+    /** Size of the whole buffer in bytes. */
+    size_t buffer_size = 0;
+  } buffer, buffer_strict;
+  /** Size in bytes of the mapped region. */
+  size_t bytes_mapped_ = 0;
+  /** Vertex array for this immediate mode instance. */
+  GLuint vao_id_ = 0;
+
+ public:
+  GLImmediate();
+  ~GLImmediate();
+
+  uchar *begin(void) override;
+  void end(void) override;
+
+ private:
+  GLuint &vbo_id(void)
+  {
+    return strict_vertex_len ? buffer_strict.vbo_id : buffer.vbo_id;
+  };
+
+  size_t &buffer_offset(void)
+  {
+    return strict_vertex_len ? buffer_strict.buffer_offset : buffer.buffer_offset;
+  };
+
+  size_t &buffer_size(void)
+  {
+    return strict_vertex_len ? buffer_strict.buffer_size : buffer.buffer_size;
+  };
+};
+
+}  // namespace blender::gpu
+\ No newline at end of file
diff --git a/source/blender/gpu/opengl/gl_primitive.hh b/source/blender/gpu/opengl/gl_primitive.hh
new file mode 100644
index 00000000000..7cd0654bc2c
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_primitive.hh
@@ -0,0 +1,65 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Encapsulation of Framebuffer states (attached textures, viewport, scissors).
+ */
+
+#pragma once
+
+#include "BLI_assert.h"
+
+#include "GPU_primitive.h"
+
+#include "glew-mx.h"
+
+namespace blender::gpu {
+
+static inline GLenum to_gl(GPUPrimType prim_type)
+{
+  BLI_assert(prim_type != GPU_PRIM_NONE);
+  switch (prim_type) {
+    default:
+    case GPU_PRIM_POINTS:
+      return GL_POINTS;
+    case GPU_PRIM_LINES:
+      return GL_LINES;
+    case GPU_PRIM_LINE_STRIP:
+      return GL_LINE_STRIP;
+    case GPU_PRIM_LINE_LOOP:
+      return GL_LINE_LOOP;
+    case GPU_PRIM_TRIS:
+      return GL_TRIANGLES;
+    case GPU_PRIM_TRI_STRIP:
+      return GL_TRIANGLE_STRIP;
+    case GPU_PRIM_TRI_FAN:
+      return GL_TRIANGLE_FAN;
+
+    case GPU_PRIM_LINES_ADJ:
+      return GL_LINES_ADJACENCY;
+    case GPU_PRIM_LINE_STRIP_ADJ:
+      return GL_LINE_STRIP_ADJACENCY;
+    case GPU_PRIM_TRIS_ADJ:
+      return GL_TRIANGLES_ADJACENCY;
+  };
+}
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc
new file mode 100644
index 00000000000..17058a6a5a7
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_shader.cc
@@ -0,0 +1,456 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "BKE_global.h"
+
+#include "BLI_string.h"
+
+#include "GPU_extensions.h"
+#include "GPU_platform.h"
+
+#include "gl_shader.hh"
+#include "gl_shader_interface.hh"
+
+using namespace blender;
+using namespace blender::gpu;
+
+/* -------------------------------------------------------------------- */
+/** \name Creation / Destruction
+ * \{ */
+
+GLShader::GLShader(const char *name) : Shader(name)
+{
+#if 0 /* Would be nice to have, but for now the Deferred compilation \
+       * does not have a GPUContext. */
+  BLI_assert(GPU_context_active_get() != NULL);
+#endif
+  shader_program_ = glCreateProgram();
+
+#ifndef __APPLE__
+  if ((G.debug & G_DEBUG_GPU) && (GLEW_VERSION_4_3 || GLEW_KHR_debug)) {
+    char sh_name[64];
+    SNPRINTF(sh_name, "ShaderProgram-%s", name);
+    glObjectLabel(GL_PROGRAM, shader_program_, -1, sh_name);
+  }
+#endif
+}
+
+GLShader::~GLShader(void)
+{
+#if 0 /* Would be nice to have, but for now the Deferred compilation \
+       * does not have a GPUContext. */
+  BLI_assert(GPU_context_active_get() != NULL);
+#endif
+  /* Invalid handles are silently ignored. */
+  glDeleteShader(vert_shader_);
+  glDeleteShader(geom_shader_);
+  glDeleteShader(frag_shader_);
+  glDeleteProgram(shader_program_);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Shader stage creation
+ * \{ */
+
+char *GLShader::glsl_patch_get(void)
+{
+  /** Used for shader patching. Init once. */
+  static char patch[512] = "\0";
+  if (patch[0] != '\0') {
+    return patch;
+  }
+
+  size_t slen = 0;
+  /* Version need to go first. */
+  STR_CONCAT(patch, slen, "#version 330\n");
+
+  /* Enable extensions for features that are not part of our base GLSL version
+   * don't use an extension for something already available! */
+  if (GLEW_ARB_texture_gather) {
+    /* There is a bug on older Nvidia GPU where GL_ARB_texture_gather
+     * is reported to be supported but yield a compile error (see T55802). */
+    if (!GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_ANY) || GLEW_VERSION_4_0) {
+      STR_CONCAT(patch, slen, "#extension GL_ARB_texture_gather: enable\n");
+
+      /* Some drivers don't agree on GLEW_ARB_texture_gather and the actual support in the
+       * shader so double check the preprocessor define (see T56544). */
+      if (!GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_ANY) && !GLEW_VERSION_4_0) {
+        STR_CONCAT(patch, slen, "#ifdef GL_ARB_texture_gather\n");
+        STR_CONCAT(patch, slen, "#  define GPU_ARB_texture_gather\n");
+        STR_CONCAT(patch, slen, "#endif\n");
+      }
+      else {
+        STR_CONCAT(patch, slen, "#define GPU_ARB_texture_gather\n");
+      }
+    }
+  }
+  if (GLEW_ARB_shader_draw_parameters) {
+    STR_CONCAT(patch, slen, "#extension GL_ARB_shader_draw_parameters : enable\n");
+    STR_CONCAT(patch, slen, "#define GPU_ARB_shader_draw_parameters\n");
+  }
+  if (GPU_arb_texture_cube_map_array_is_supported()) {
+    STR_CONCAT(patch, slen, "#extension GL_ARB_texture_cube_map_array : enable\n");
+    STR_CONCAT(patch, slen, "#define GPU_ARB_texture_cube_map_array\n");
+  }
+
+  /* Derivative sign can change depending on implementation. */
+  float derivatives[2];
+  GPU_get_dfdy_factors(derivatives);
+  STR_CONCATF(patch, slen, "#define DFDX_SIGN %1.1f\n", derivatives[0]);
+  STR_CONCATF(patch, slen, "#define DFDY_SIGN %1.1f\n", derivatives[1]);
+
+  BLI_assert(slen < sizeof(patch));
+  return patch;
+}
+
+/* Create, compile and attach the shader stage to the shader program. */
+GLuint GLShader::create_shader_stage(GLenum gl_stage, MutableSpan<const char *> sources)
+{
+  GLuint shader = glCreateShader(gl_stage);
+  if (shader == 0) {
+    fprintf(stderr, "GLShader: Error: Could not create shader object.");
+    return 0;
+  }
+
+  /* Patch the shader code using the first source slot. */
+  sources[0] = glsl_patch_get();
+
+  glShaderSource(shader, sources.size(), sources.data(), NULL);
+  glCompileShader(shader);
+
+  GLint status;
+  glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
+  if (!status || (G.debug & G_DEBUG_GPU)) {
+    char log[5000] = "";
+    glGetShaderInfoLog(shader, sizeof(log), NULL, log);
+    if (log[0] != '\0') {
+      switch (gl_stage) {
+        case GL_VERTEX_SHADER:
+          this->print_errors(sources, log, "VertShader");
+          break;
+        case GL_GEOMETRY_SHADER:
+          this->print_errors(sources, log, "GeomShader");
+          break;
+        case GL_FRAGMENT_SHADER:
+          this->print_errors(sources, log, "FragShader");
+          break;
+      }
+    }
+  }
+  if (!status) {
+    glDeleteShader(shader);
+    compilation_failed_ = true;
+    return 0;
+  }
+
+#ifndef __APPLE__
+  if ((G.debug & G_DEBUG_GPU) && (GLEW_VERSION_4_3 || GLEW_KHR_debug)) {
+    char sh_name[64];
+    switch (gl_stage) {
+      case GL_VERTEX_SHADER:
+        BLI_snprintf(sh_name, sizeof(sh_name), "VertShader-%s", name);
+        break;
+      case GL_GEOMETRY_SHADER:
+        BLI_snprintf(sh_name, sizeof(sh_name), "GeomShader-%s", name);
+        break;
+      case GL_FRAGMENT_SHADER:
+        BLI_snprintf(sh_name, sizeof(sh_name), "FragShader-%s", name);
+        break;
+    }
+    glObjectLabel(GL_SHADER, shader, -1, sh_name);
+  }
+#endif
+
+  glAttachShader(shader_program_, shader);
+  return shader;
+}
+
+void GLShader::vertex_shader_from_glsl(MutableSpan<const char *> sources)
+{
+  vert_shader_ = this->create_shader_stage(GL_VERTEX_SHADER, sources);
+}
+
+void GLShader::geometry_shader_from_glsl(MutableSpan<const char *> sources)
+{
+  geom_shader_ = this->create_shader_stage(GL_GEOMETRY_SHADER, sources);
+}
+
+void GLShader::fragment_shader_from_glsl(MutableSpan<const char *> sources)
+{
+  frag_shader_ = this->create_shader_stage(GL_FRAGMENT_SHADER, sources);
+}
+
+bool GLShader::finalize(void)
+{
+  if (compilation_failed_) {
+    return false;
+  }
+
+  glLinkProgram(shader_program_);
+
+  GLint status;
+  glGetProgramiv(shader_program_, GL_LINK_STATUS, &status);
+  if (!status) {
+    char log[5000];
+    glGetProgramInfoLog(shader_program_, sizeof(log), NULL, log);
+    fprintf(stderr, "\nLinking Error:\n\n%s", log);
+    return false;
+  }
+
+  interface = new GLShaderInterface(shader_program_);
+
+  return true;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Binding
+ * \{ */
+
+void GLShader::bind(void)
+{
+  BLI_assert(shader_program_ != 0);
+  glUseProgram(shader_program_);
+}
+
+void GLShader::unbind(void)
+{
+#ifndef NDEBUG
+  glUseProgram(0);
+#endif
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Transform feedback
+ *
+ * TODO(fclem) Should be replaced by compute shaders.
+ * \{ */
+
+/* Should be called before linking. */
+void GLShader::transform_feedback_names_set(Span<const char *> name_list,
+                                            const eGPUShaderTFBType geom_type)
+{
+  glTransformFeedbackVaryings(
+      shader_program_, name_list.size(), name_list.data(), GL_INTERLEAVED_ATTRIBS);
+  transform_feedback_type_ = geom_type;
+}
+
+bool GLShader::transform_feedback_enable(GPUVertBuf *buf)
+{
+  if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) {
+    return false;
+  }
+
+  BLI_assert(buf->vbo_id != 0);
+
+  glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, buf->vbo_id);
+
+  switch (transform_feedback_type_) {
+    case GPU_SHADER_TFB_POINTS:
+      glBeginTransformFeedback(GL_POINTS);
+      break;
+    case GPU_SHADER_TFB_LINES:
+      glBeginTransformFeedback(GL_LINES);
+      break;
+    case GPU_SHADER_TFB_TRIANGLES:
+      glBeginTransformFeedback(GL_TRIANGLES);
+      break;
+    default:
+      return false;
+  }
+  return true;
+}
+
+void GLShader::transform_feedback_disable(void)
+{
+  glEndTransformFeedback();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Uniforms setters
+ * \{ */
+
+void GLShader::uniform_float(int location, int comp_len, int array_size, const float *data)
+{
+  switch (comp_len) {
+    case 1:
+      glUniform1fv(location, array_size, data);
+      break;
+    case 2:
+      glUniform2fv(location, array_size, data);
+      break;
+    case 3:
+      glUniform3fv(location, array_size, data);
+      break;
+    case 4:
+      glUniform4fv(location, array_size, data);
+      break;
+    case 9:
+      glUniformMatrix3fv(location, array_size, 0, data);
+      break;
+    case 16:
+      glUniformMatrix4fv(location, array_size, 0, data);
+      break;
+    default:
+      BLI_assert(0);
+      break;
+  }
+}
+
+void GLShader::uniform_int(int location, int comp_len, int array_size, const int *data)
+{
+  switch (comp_len) {
+    case 1:
+      glUniform1iv(location, array_size, data);
+      break;
+    case 2:
+      glUniform2iv(location, array_size, data);
+      break;
+    case 3:
+      glUniform3iv(location, array_size, data);
+      break;
+    case 4:
+      glUniform4iv(location, array_size, data);
+      break;
+    default:
+      BLI_assert(0);
+      break;
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name GPUVertFormat from Shader
+ * \{ */
+
+static uint calc_component_size(const GLenum gl_type)
+{
+  switch (gl_type) {
+    case GL_FLOAT_VEC2:
+    case GL_INT_VEC2:
+    case GL_UNSIGNED_INT_VEC2:
+      return 2;
+    case GL_FLOAT_VEC3:
+    case GL_INT_VEC3:
+    case GL_UNSIGNED_INT_VEC3:
+      return 3;
+    case GL_FLOAT_VEC4:
+    case GL_FLOAT_MAT2:
+    case GL_INT_VEC4:
+    case GL_UNSIGNED_INT_VEC4:
+      return 4;
+    case GL_FLOAT_MAT3:
+      return 9;
+    case GL_FLOAT_MAT4:
+      return 16;
+    case GL_FLOAT_MAT2x3:
+    case GL_FLOAT_MAT3x2:
+      return 6;
+    case GL_FLOAT_MAT2x4:
+    case GL_FLOAT_MAT4x2:
+      return 8;
+    case GL_FLOAT_MAT3x4:
+    case GL_FLOAT_MAT4x3:
+      return 12;
+    default:
+      return 1;
+  }
+}
+
+static void get_fetch_mode_and_comp_type(int gl_type,
+                                         GPUVertCompType *r_comp_type,
+                                         GPUVertFetchMode *r_fetch_mode)
+{
+  switch (gl_type) {
+    case GL_FLOAT:
+    case GL_FLOAT_VEC2:
+    case GL_FLOAT_VEC3:
+    case GL_FLOAT_VEC4:
+    case GL_FLOAT_MAT2:
+    case GL_FLOAT_MAT3:
+    case GL_FLOAT_MAT4:
+    case GL_FLOAT_MAT2x3:
+    case GL_FLOAT_MAT2x4:
+    case GL_FLOAT_MAT3x2:
+    case GL_FLOAT_MAT3x4:
+    case GL_FLOAT_MAT4x2:
+    case GL_FLOAT_MAT4x3:
+      *r_comp_type = GPU_COMP_F32;
+      *r_fetch_mode = GPU_FETCH_FLOAT;
+      break;
+    case GL_INT:
+    case GL_INT_VEC2:
+    case GL_INT_VEC3:
+    case GL_INT_VEC4:
+      *r_comp_type = GPU_COMP_I32;
+      *r_fetch_mode = GPU_FETCH_INT;
+      break;
+    case GL_UNSIGNED_INT:
+    case GL_UNSIGNED_INT_VEC2:
+    case GL_UNSIGNED_INT_VEC3:
+    case GL_UNSIGNED_INT_VEC4:
+      *r_comp_type = GPU_COMP_U32;
+      *r_fetch_mode = GPU_FETCH_INT;
+      break;
+    default:
+      BLI_assert(0);
+  }
+}
+
+void GLShader::vertformat_from_shader(GPUVertFormat *format) const
+{
+  GPU_vertformat_clear(format);
+
+  GLint attr_len;
+  glGetProgramiv(shader_program_, GL_ACTIVE_ATTRIBUTES, &attr_len);
+
+  for (int i = 0; i < attr_len; i++) {
+    char name[256];
+    GLenum gl_type;
+    GLint size;
+    glGetActiveAttrib(shader_program_, i, sizeof(name), NULL, &size, &gl_type, name);
+
+    /* Ignore OpenGL names like `gl_BaseInstanceARB`, `gl_InstanceID` and `gl_VertexID`. */
+    if (glGetAttribLocation(shader_program_, name) == -1) {
+      continue;
+    }
+
+    GPUVertCompType comp_type;
+    GPUVertFetchMode fetch_mode;
+    get_fetch_mode_and_comp_type(gl_type, &comp_type, &fetch_mode);
+
+    int comp_len = calc_component_size(gl_type) * size;
+
+    GPU_vertformat_attr_add(format, name, comp_type, comp_len, fetch_mode);
+  }
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh
new file mode 100644
index 00000000000..a686014f4c5
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_shader.hh
@@ -0,0 +1,83 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "glew-mx.h"
+
+#include "gpu_shader_private.hh"
+
+namespace blender {
+namespace gpu {
+
+/**
+ * Implementation of shader compilation and uniforms handling using OpenGL.
+ **/
+class GLShader : public Shader {
+ private:
+  /** Handle for full program (links shader stages below). */
+  GLuint shader_program_ = 0;
+  /** Individual shader stages. */
+  GLuint vert_shader_ = 0;
+  GLuint geom_shader_ = 0;
+  GLuint frag_shader_ = 0;
+  /** True if any shader failed to compile. */
+  bool compilation_failed_ = false;
+
+  eGPUShaderTFBType transform_feedback_type_ = GPU_SHADER_TFB_NONE;
+
+ public:
+  GLShader(const char *name);
+  ~GLShader();
+
+  /* Return true on success. */
+  void vertex_shader_from_glsl(MutableSpan<const char *> sources) override;
+  void geometry_shader_from_glsl(MutableSpan<const char *> sources) override;
+  void fragment_shader_from_glsl(MutableSpan<const char *> sources) override;
+  bool finalize(void) override;
+
+  void transform_feedback_names_set(Span<const char *> name_list,
+                                    const eGPUShaderTFBType geom_type) override;
+  bool transform_feedback_enable(GPUVertBuf *buf) override;
+  void transform_feedback_disable(void) override;
+
+  void bind(void) override;
+  void unbind(void) override;
+
+  void uniform_float(int location, int comp_len, int array_size, const float *data) override;
+  void uniform_int(int location, int comp_len, int array_size, const int *data) override;
+
+  void vertformat_from_shader(GPUVertFormat *format) const override;
+
+ private:
+  char *glsl_patch_get(void);
+
+  GLuint create_shader_stage(GLenum gl_stage, MutableSpan<const char *> sources);
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("GLShader");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/opengl/gl_shader_interface.cc b/source/blender/gpu/opengl/gl_shader_interface.cc
new file mode 100644
index 00000000000..d611efcd975
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_shader_interface.cc
@@ -0,0 +1,299 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU shader interface (C --> GLSL)
+ */
+
+#include "BLI_bitmap.h"
+
+#include "gl_batch.hh"
+
+#include "gl_shader_interface.hh"
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Binding assignment
+ *
+ * To mimic vulkan, we assign binding at shader creation to avoid shader recompilation.
+ * In the future, we should set it in the shader using layout(binding = i) and query its value.
+ * \{ */
+
+static inline int block_binding(int32_t program, uint32_t block_index)
+{
+  /* For now just assign a consecutive index. In the future, we should set it in
+   * the shader using layout(binding = i) and query its value. */
+  glUniformBlockBinding(program, block_index, block_index);
+  return block_index;
+}
+
+static inline int sampler_binding(int32_t program,
+                                  uint32_t uniform_index,
+                                  int32_t uniform_location,
+                                  int *sampler_len)
+{
+  /* Identify sampler uniforms and asign sampler units to them. */
+  GLint type;
+  glGetActiveUniformsiv(program, 1, &uniform_index, GL_UNIFORM_TYPE, &type);
+
+  switch (type) {
+    case GL_SAMPLER_1D:
+    case GL_SAMPLER_2D:
+    case GL_SAMPLER_3D:
+    case GL_SAMPLER_CUBE:
+    case GL_SAMPLER_CUBE_MAP_ARRAY_ARB: /* OpenGL 4.0 */
+    case GL_SAMPLER_1D_SHADOW:
+    case GL_SAMPLER_2D_SHADOW:
+    case GL_SAMPLER_1D_ARRAY:
+    case GL_SAMPLER_2D_ARRAY:
+    case GL_SAMPLER_1D_ARRAY_SHADOW:
+    case GL_SAMPLER_2D_ARRAY_SHADOW:
+    case GL_SAMPLER_2D_MULTISAMPLE:
+    case GL_SAMPLER_2D_MULTISAMPLE_ARRAY:
+    case GL_SAMPLER_CUBE_SHADOW:
+    case GL_SAMPLER_BUFFER:
+    case GL_INT_SAMPLER_1D:
+    case GL_INT_SAMPLER_2D:
+    case GL_INT_SAMPLER_3D:
+    case GL_INT_SAMPLER_CUBE:
+    case GL_INT_SAMPLER_1D_ARRAY:
+    case GL_INT_SAMPLER_2D_ARRAY:
+    case GL_INT_SAMPLER_2D_MULTISAMPLE:
+    case GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY:
+    case GL_INT_SAMPLER_BUFFER:
+    case GL_UNSIGNED_INT_SAMPLER_1D:
+    case GL_UNSIGNED_INT_SAMPLER_2D:
+    case GL_UNSIGNED_INT_SAMPLER_3D:
+    case GL_UNSIGNED_INT_SAMPLER_CUBE:
+    case GL_UNSIGNED_INT_SAMPLER_1D_ARRAY:
+    case GL_UNSIGNED_INT_SAMPLER_2D_ARRAY:
+    case GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE:
+    case GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY:
+    case GL_UNSIGNED_INT_SAMPLER_BUFFER: {
+      /* For now just assign a consecutive index. In the future, we should set it in
+       * the shader using layout(binding = i) and query its value. */
+      int binding = *sampler_len;
+      glUniform1i(uniform_location, binding);
+      (*sampler_len)++;
+      return binding;
+    }
+    default:
+      return -1;
+  }
+}
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Creation / Destruction
+ * \{ */
+
+GLShaderInterface::GLShaderInterface(GLuint program)
+{
+  /* Necessary to make glUniform works. */
+  glUseProgram(program);
+
+  GLint max_attr_name_len = 0, attr_len = 0;
+  glGetProgramiv(program, GL_ACTIVE_ATTRIBUTE_MAX_LENGTH, &max_attr_name_len);
+  glGetProgramiv(program, GL_ACTIVE_ATTRIBUTES, &attr_len);
+
+  GLint max_ubo_name_len = 0, ubo_len = 0;
+  glGetProgramiv(program, GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH, &max_ubo_name_len);
+  glGetProgramiv(program, GL_ACTIVE_UNIFORM_BLOCKS, &ubo_len);
+
+  GLint max_uniform_name_len = 0, active_uniform_len = 0, uniform_len = 0;
+  glGetProgramiv(program, GL_ACTIVE_UNIFORM_MAX_LENGTH, &max_uniform_name_len);
+  glGetProgramiv(program, GL_ACTIVE_UNIFORMS, &active_uniform_len);
+  uniform_len = active_uniform_len;
+
+  BLI_assert(ubo_len <= 16 && "enabled_ubo_mask_ is uint16_t");
+
+  /* Work around driver bug with Intel HD 4600 on Windows 7/8, where
+   * GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH does not work. */
+  if (attr_len > 0 && max_attr_name_len == 0) {
+    max_attr_name_len = 256;
+  }
+  if (ubo_len > 0 && max_ubo_name_len == 0) {
+    max_ubo_name_len = 256;
+  }
+  if (uniform_len > 0 && max_uniform_name_len == 0) {
+    max_uniform_name_len = 256;
+  }
+
+  /* GL_ACTIVE_UNIFORMS lied to us! Remove the UBO uniforms from the total before
+   * allocating the uniform array. */
+  GLint max_ubo_uni_len = 0;
+  for (int i = 0; i < ubo_len; i++) {
+    GLint ubo_uni_len;
+    glGetActiveUniformBlockiv(program, i, GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS, &ubo_uni_len);
+    max_ubo_uni_len = max_ii(max_ubo_uni_len, ubo_uni_len);
+    uniform_len -= ubo_uni_len;
+  }
+  /* Bit set to true if uniform comes from a uniform block. */
+  BLI_bitmap *uniforms_from_blocks = BLI_BITMAP_NEW(active_uniform_len, __func__);
+  /* Set uniforms from block for exclusion. */
+  GLint *ubo_uni_ids = (GLint *)MEM_mallocN(sizeof(GLint) * max_ubo_uni_len, __func__);
+  for (int i = 0; i < ubo_len; i++) {
+    GLint ubo_uni_len;
+    glGetActiveUniformBlockiv(program, i, GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS, &ubo_uni_len);
+    glGetActiveUniformBlockiv(program, i, GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES, ubo_uni_ids);
+    for (int u = 0; u < ubo_uni_len; u++) {
+      BLI_BITMAP_ENABLE(uniforms_from_blocks, ubo_uni_ids[u]);
+    }
+  }
+  MEM_freeN(ubo_uni_ids);
+
+  int input_tot_len = attr_len + ubo_len + uniform_len;
+  inputs_ = (ShaderInput *)MEM_callocN(sizeof(ShaderInput) * input_tot_len, __func__);
+
+  const uint32_t name_buffer_len = attr_len * max_attr_name_len + ubo_len * max_ubo_name_len +
+                                   uniform_len * max_uniform_name_len;
+  name_buffer_ = (char *)MEM_mallocN(name_buffer_len, "name_buffer");
+  uint32_t name_buffer_offset = 0;
+
+  /* Attributes */
+  enabled_attr_mask_ = 0;
+  for (int i = 0; i < attr_len; i++) {
+    char *name = name_buffer_ + name_buffer_offset;
+    GLsizei remaining_buffer = name_buffer_len - name_buffer_offset;
+    GLsizei name_len = 0;
+    GLenum type;
+    GLint size;
+
+    glGetActiveAttrib(program, i, remaining_buffer, &name_len, &size, &type, name);
+    GLint location = glGetAttribLocation(program, name);
+    /* Ignore OpenGL names like `gl_BaseInstanceARB`, `gl_InstanceID` and `gl_VertexID`. */
+    if (location == -1) {
+      continue;
+    }
+
+    ShaderInput *input = &inputs_[attr_len_++];
+    input->location = input->binding = location;
+
+    name_buffer_offset += set_input_name(input, name, name_len);
+    enabled_attr_mask_ |= (1 << input->location);
+  }
+
+  /* Uniform Blocks */
+  for (int i = 0; i < ubo_len; i++) {
+    char *name = name_buffer_ + name_buffer_offset;
+    GLsizei remaining_buffer = name_buffer_len - name_buffer_offset;
+    GLsizei name_len = 0;
+
+    glGetActiveUniformBlockName(program, i, remaining_buffer, &name_len, name);
+
+    ShaderInput *input = &inputs_[attr_len_ + ubo_len_++];
+    input->binding = input->location = block_binding(program, i);
+
+    name_buffer_offset += this->set_input_name(input, name, name_len);
+    enabled_ubo_mask_ |= (1 << input->binding);
+  }
+
+  /* Uniforms */
+  for (int i = 0, sampler = 0; i < active_uniform_len; i++) {
+    if (BLI_BITMAP_TEST(uniforms_from_blocks, i)) {
+      continue;
+    }
+    char *name = name_buffer_ + name_buffer_offset;
+    GLsizei remaining_buffer = name_buffer_len - name_buffer_offset;
+    GLsizei name_len = 0;
+
+    glGetActiveUniformName(program, i, remaining_buffer, &name_len, name);
+
+    ShaderInput *input = &inputs_[attr_len_ + ubo_len_ + uniform_len_++];
+    input->location = glGetUniformLocation(program, name);
+    input->binding = sampler_binding(program, i, input->location, &sampler);
+
+    name_buffer_offset += this->set_input_name(input, name, name_len);
+    enabled_tex_mask_ |= (input->binding != -1) ? (1lu << input->binding) : 0lu;
+  }
+
+  /* Builtin Uniforms */
+  for (int32_t u_int = 0; u_int < GPU_NUM_UNIFORMS; u_int++) {
+    GPUUniformBuiltin u = static_cast<GPUUniformBuiltin>(u_int);
+    builtins_[u] = glGetUniformLocation(program, builtin_uniform_name(u));
+  }
+
+  /* Builtin Uniforms Blocks */
+  for (int32_t u_int = 0; u_int < GPU_NUM_UNIFORM_BLOCKS; u_int++) {
+    GPUUniformBlockBuiltin u = static_cast<GPUUniformBlockBuiltin>(u_int);
+    const ShaderInput *block = this->ubo_get(builtin_uniform_block_name(u));
+    builtin_blocks_[u] = (block != NULL) ? block->binding : -1;
+  }
+
+  MEM_freeN(uniforms_from_blocks);
+
+  /* Resize name buffer to save some memory. */
+  if (name_buffer_offset < name_buffer_len) {
+    name_buffer_ = (char *)MEM_reallocN(name_buffer_, name_buffer_offset);
+  }
+
+  // this->debug_print();
+
+  this->sort_inputs();
+}
+
+GLShaderInterface::~GLShaderInterface()
+{
+  for (auto *ref : refs_) {
+    if (ref != NULL) {
+      ref->remove(this);
+    }
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Batch Reference
+ * \{ */
+
+void GLShaderInterface::ref_add(GLVaoCache *ref)
+{
+  for (int i = 0; i < refs_.size(); i++) {
+    if (refs_[i] == NULL) {
+      refs_[i] = ref;
+      return;
+    }
+  }
+  refs_.append(ref);
+}
+
+void GLShaderInterface::ref_remove(GLVaoCache *ref)
+{
+  for (int i = 0; i < refs_.size(); i++) {
+    if (refs_[i] == ref) {
+      refs_[i] = NULL;
+      break; /* cannot have duplicates */
+    }
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Validation
+ * TODO
+ * \{ */
+
+/** \} */
+
+}  // namespace blender::gpu
+\ No newline at end of file
diff --git a/source/blender/gpu/opengl/gl_shader_interface.hh b/source/blender/gpu/opengl/gl_shader_interface.hh
new file mode 100644
index 00000000000..8cac84a5990
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_shader_interface.hh
@@ -0,0 +1,61 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU shader interface (C --> GLSL)
+ *
+ * Structure detailing needed vertex inputs and resources for a specific shader.
+ * A shader interface can be shared between two similar shaders.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_vector.hh"
+
+#include "glew-mx.h"
+
+#include "gpu_shader_interface.hh"
+
+namespace blender::gpu {
+
+class GLVaoCache;
+
+/**
+ * Implementation of Shader interface using OpenGL.
+ **/
+class GLShaderInterface : public ShaderInterface {
+ private:
+  /** Reference to VaoCaches using this interface */
+  Vector<GLVaoCache *> refs_;
+
+ public:
+  GLShaderInterface(GLuint program);
+  ~GLShaderInterface();
+
+  void ref_add(GLVaoCache *ref);
+  void ref_remove(GLVaoCache *ref);
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("GLShaderInterface");
+};
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/opengl/gl_state.cc b/source/blender/gpu/opengl/gl_state.cc
new file mode 100644
index 00000000000..8f01ff13486
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_state.cc
@@ -0,0 +1,421 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2020, Blender Foundation.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "BLI_math_base.h"
+
+#include "GPU_extensions.h"
+
+#include "glew-mx.h"
+
+#include "gl_context.hh"
+#include "gl_framebuffer.hh"
+#include "gl_state.hh"
+
+using namespace blender::gpu;
+
+/* -------------------------------------------------------------------- */
+/** \name GLStateManager
+ * \{ */
+
+GLStateManager::GLStateManager(void) : GPUStateManager()
+{
+  /* Set other states that never change. */
+  glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
+  glEnable(GL_MULTISAMPLE);
+  glEnable(GL_PRIMITIVE_RESTART);
+
+  glDisable(GL_DITHER);
+
+  glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+  glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+
+  glPrimitiveRestartIndex((GLuint)0xFFFFFFFF);
+  /* TODO: Should become default. But needs at least GL 4.3 */
+  if (GLEW_ARB_ES3_compatibility) {
+    /* Takes predecence over GL_PRIMITIVE_RESTART */
+    glEnable(GL_PRIMITIVE_RESTART_FIXED_INDEX);
+  }
+
+  /* Limits. */
+  glGetFloatv(GL_ALIASED_LINE_WIDTH_RANGE, line_width_range_);
+
+  /* Force update using default state. */
+  current_ = ~state;
+  current_mutable_ = ~mutable_state;
+  set_state(state);
+  set_mutable_state(mutable_state);
+}
+
+void GLStateManager::apply_state(void)
+{
+  this->set_state(this->state);
+  this->set_mutable_state(this->mutable_state);
+  active_fb->apply_state();
+};
+
+void GLStateManager::set_state(const GPUState &state)
+{
+  GPUState changed = state ^ current_;
+
+  if (changed.blend != 0) {
+    set_blend((eGPUBlend)state.blend);
+  }
+  if (changed.write_mask != 0) {
+    set_write_mask((eGPUWriteMask)state.write_mask);
+  }
+  if (changed.depth_test != 0) {
+    set_depth_test((eGPUDepthTest)state.depth_test);
+  }
+  if (changed.stencil_test != 0 || changed.stencil_op != 0) {
+    set_stencil_test((eGPUStencilTest)state.stencil_test, (eGPUStencilOp)state.stencil_op);
+    set_stencil_mask((eGPUStencilTest)state.stencil_test, mutable_state);
+  }
+  if (changed.clip_distances != 0) {
+    set_clip_distances(state.clip_distances, current_.clip_distances);
+  }
+  if (changed.culling_test != 0) {
+    set_backface_culling((eGPUFaceCullTest)state.culling_test);
+  }
+  if (changed.logic_op_xor != 0) {
+    set_logic_op(state.logic_op_xor);
+  }
+  if (changed.invert_facing != 0) {
+    set_facing(state.invert_facing);
+  }
+  if (changed.provoking_vert != 0) {
+    set_provoking_vert((eGPUProvokingVertex)state.provoking_vert);
+  }
+  if (changed.shadow_bias != 0) {
+    set_shadow_bias(state.shadow_bias);
+  }
+
+  /* TODO remove */
+  if (changed.polygon_smooth) {
+    if (state.polygon_smooth) {
+      glEnable(GL_POLYGON_SMOOTH);
+    }
+    else {
+      glDisable(GL_POLYGON_SMOOTH);
+    }
+  }
+  if (changed.line_smooth) {
+    if (state.line_smooth) {
+      glEnable(GL_LINE_SMOOTH);
+    }
+    else {
+      glDisable(GL_LINE_SMOOTH);
+    }
+  }
+
+  current_ = state;
+}
+
+void GLStateManager::set_mutable_state(const GPUStateMutable &state)
+{
+  GPUStateMutable changed = state ^ current_mutable_;
+
+  /* TODO remove, should be uniform. */
+  if (changed.point_size != 0) {
+    if (state.point_size > 0.0f) {
+      glEnable(GL_PROGRAM_POINT_SIZE);
+      glPointSize(state.point_size);
+    }
+    else {
+      glDisable(GL_PROGRAM_POINT_SIZE);
+    }
+  }
+
+  if (changed.line_width != 0) {
+    /* TODO remove, should use wide line shader. */
+    glLineWidth(clamp_f(state.line_width, line_width_range_[0], line_width_range_[1]));
+  }
+
+  if (changed.depth_range[0] != 0 || changed.depth_range[1] != 0) {
+    /* TODO remove, should modify the projection matrix instead. */
+    glDepthRange(UNPACK2(state.depth_range));
+  }
+
+  if (changed.stencil_compare_mask != 0 || changed.stencil_reference != 0 ||
+      changed.stencil_write_mask != 0) {
+    set_stencil_mask((eGPUStencilTest)current_.stencil_test, state);
+  }
+
+  current_mutable_ = state;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name State set functions
+ * \{ */
+
+void GLStateManager::set_write_mask(const eGPUWriteMask value)
+{
+  glDepthMask((value & GPU_WRITE_DEPTH) != 0);
+  glColorMask((value & GPU_WRITE_RED) != 0,
+              (value & GPU_WRITE_GREEN) != 0,
+              (value & GPU_WRITE_BLUE) != 0,
+              (value & GPU_WRITE_ALPHA) != 0);
+
+  if (value == GPU_WRITE_NONE) {
+    glEnable(GL_RASTERIZER_DISCARD);
+  }
+  else {
+    glDisable(GL_RASTERIZER_DISCARD);
+  }
+}
+
+void GLStateManager::set_depth_test(const eGPUDepthTest value)
+{
+  GLenum func;
+  switch (value) {
+    case GPU_DEPTH_LESS:
+      func = GL_LESS;
+      break;
+    case GPU_DEPTH_LESS_EQUAL:
+      func = GL_LEQUAL;
+      break;
+    case GPU_DEPTH_EQUAL:
+      func = GL_EQUAL;
+      break;
+    case GPU_DEPTH_GREATER:
+      func = GL_GREATER;
+      break;
+    case GPU_DEPTH_GREATER_EQUAL:
+      func = GL_GEQUAL;
+      break;
+    case GPU_DEPTH_ALWAYS:
+    default:
+      func = GL_ALWAYS;
+      break;
+  }
+
+  if (value != GPU_DEPTH_NONE) {
+    glEnable(GL_DEPTH_TEST);
+    glDepthFunc(func);
+  }
+  else {
+    glDisable(GL_DEPTH_TEST);
+  }
+}
+
+void GLStateManager::set_stencil_test(const eGPUStencilTest test, const eGPUStencilOp operation)
+{
+  switch (operation) {
+    case GPU_STENCIL_OP_REPLACE:
+      glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
+      break;
+    case GPU_STENCIL_OP_COUNT_DEPTH_PASS:
+      glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_INCR_WRAP);
+      glStencilOpSeparate(GL_FRONT, GL_KEEP, GL_KEEP, GL_DECR_WRAP);
+      break;
+    case GPU_STENCIL_OP_COUNT_DEPTH_FAIL:
+      glStencilOpSeparate(GL_BACK, GL_KEEP, GL_DECR_WRAP, GL_KEEP);
+      glStencilOpSeparate(GL_FRONT, GL_KEEP, GL_INCR_WRAP, GL_KEEP);
+      break;
+    case GPU_STENCIL_OP_NONE:
+    default:
+      glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
+  }
+
+  if (test != GPU_STENCIL_NONE) {
+    glEnable(GL_STENCIL_TEST);
+  }
+  else {
+    glDisable(GL_STENCIL_TEST);
+  }
+}
+
+void GLStateManager::set_stencil_mask(const eGPUStencilTest test, const GPUStateMutable state)
+{
+  GLenum func;
+  switch (test) {
+    case GPU_STENCIL_NEQUAL:
+      func = GL_NOTEQUAL;
+      break;
+    case GPU_STENCIL_EQUAL:
+      func = GL_EQUAL;
+      break;
+    case GPU_STENCIL_ALWAYS:
+      func = GL_ALWAYS;
+      break;
+    case GPU_STENCIL_NONE:
+    default:
+      glStencilMask(0x00);
+      glStencilFunc(GL_ALWAYS, 0x00, 0x00);
+      return;
+  }
+
+  glStencilMask(state.stencil_write_mask);
+  glStencilFunc(func, state.stencil_reference, state.stencil_compare_mask);
+}
+
+void GLStateManager::set_clip_distances(const int new_dist_len, const int old_dist_len)
+{
+  for (int i = 0; i < new_dist_len; i++) {
+    glEnable(GL_CLIP_DISTANCE0 + i);
+  }
+  for (int i = new_dist_len; i < old_dist_len; i++) {
+    glDisable(GL_CLIP_DISTANCE0 + i);
+  }
+}
+
+void GLStateManager::set_logic_op(const bool enable)
+{
+  if (enable) {
+    glEnable(GL_COLOR_LOGIC_OP);
+    glLogicOp(GL_XOR);
+  }
+  else {
+    glDisable(GL_COLOR_LOGIC_OP);
+  }
+}
+
+void GLStateManager::set_facing(const bool invert)
+{
+  glFrontFace((invert) ? GL_CW : GL_CCW);
+}
+
+void GLStateManager::set_backface_culling(const eGPUFaceCullTest test)
+{
+  if (test != GPU_CULL_NONE) {
+    glEnable(GL_CULL_FACE);
+    glCullFace((test == GPU_CULL_FRONT) ? GL_FRONT : GL_BACK);
+  }
+  else {
+    glDisable(GL_CULL_FACE);
+  }
+}
+
+void GLStateManager::set_provoking_vert(const eGPUProvokingVertex vert)
+{
+  GLenum value = (vert == GPU_VERTEX_FIRST) ? GL_FIRST_VERTEX_CONVENTION :
+                                              GL_LAST_VERTEX_CONVENTION;
+  glProvokingVertex(value);
+}
+
+void GLStateManager::set_shadow_bias(const bool enable)
+{
+  if (enable) {
+    glEnable(GL_POLYGON_OFFSET_FILL);
+    glEnable(GL_POLYGON_OFFSET_LINE);
+    /* 2.0 Seems to be the lowest possible slope bias that works in every case. */
+    glPolygonOffset(2.0f, 1.0f);
+  }
+  else {
+    glDisable(GL_POLYGON_OFFSET_FILL);
+    glDisable(GL_POLYGON_OFFSET_LINE);
+  }
+}
+
+void GLStateManager::set_blend(const eGPUBlend value)
+{
+  /**
+   * Factors to the equation.
+   * SRC is fragment shader output.
+   * DST is framebuffer color.
+   * final.rgb = SRC.rgb * src_rgb + DST.rgb * dst_rgb;
+   * final.a = SRC.a * src_alpha + DST.a * dst_alpha;
+   **/
+  GLenum src_rgb, src_alpha, dst_rgb, dst_alpha;
+  switch (value) {
+    default:
+    case GPU_BLEND_ALPHA: {
+      src_rgb = GL_SRC_ALPHA;
+      dst_rgb = GL_ONE_MINUS_SRC_ALPHA;
+      src_alpha = GL_ONE;
+      dst_alpha = GL_ONE_MINUS_SRC_ALPHA;
+      break;
+    }
+    case GPU_BLEND_ALPHA_PREMULT: {
+      src_rgb = GL_ONE;
+      dst_rgb = GL_ONE_MINUS_SRC_ALPHA;
+      src_alpha = GL_ONE;
+      dst_alpha = GL_ONE_MINUS_SRC_ALPHA;
+      break;
+    }
+    case GPU_BLEND_ADDITIVE: {
+      /* Do not let alpha accumulate but premult the source RGB by it. */
+      src_rgb = GL_SRC_ALPHA;
+      dst_rgb = GL_ONE;
+      src_alpha = GL_ZERO;
+      dst_alpha = GL_ONE;
+      break;
+    }
+    case GPU_BLEND_SUBTRACT:
+    case GPU_BLEND_ADDITIVE_PREMULT: {
+      /* Let alpha accumulate. */
+      src_rgb = GL_ONE;
+      dst_rgb = GL_ONE;
+      src_alpha = GL_ONE;
+      dst_alpha = GL_ONE;
+      break;
+    }
+    case GPU_BLEND_MULTIPLY: {
+      src_rgb = GL_DST_COLOR;
+      dst_rgb = GL_ZERO;
+      src_alpha = GL_DST_ALPHA;
+      dst_alpha = GL_ZERO;
+      break;
+    }
+    case GPU_BLEND_INVERT: {
+      src_rgb = GL_ONE_MINUS_DST_COLOR;
+      dst_rgb = GL_ZERO;
+      src_alpha = GL_ZERO;
+      dst_alpha = GL_ONE;
+      break;
+    }
+    case GPU_BLEND_OIT: {
+      src_rgb = GL_ONE;
+      dst_rgb = GL_ONE;
+      src_alpha = GL_ZERO;
+      dst_alpha = GL_ONE_MINUS_SRC_ALPHA;
+      break;
+    }
+    case GPU_BLEND_BACKGROUND: {
+      src_rgb = GL_ONE_MINUS_DST_ALPHA;
+      dst_rgb = GL_SRC_ALPHA;
+      src_alpha = GL_ZERO;
+      dst_alpha = GL_SRC_ALPHA;
+      break;
+    }
+    case GPU_BLEND_CUSTOM: {
+      src_rgb = GL_ONE;
+      dst_rgb = GL_SRC1_COLOR;
+      src_alpha = GL_ONE;
+      dst_alpha = GL_SRC1_ALPHA;
+      break;
+    }
+  }
+
+  /* Always set the blend function. This avoid a rendering error when blending is disabled but
+   * GPU_BLEND_CUSTOM was used just before and the framebuffer is using more than 1 color targe */
+  glBlendFuncSeparate(src_rgb, dst_rgb, src_alpha, dst_alpha);
+  if (value != GPU_BLEND_NONE) {
+    glEnable(GL_BLEND);
+  }
+  else {
+    glDisable(GL_BLEND);
+  }
+}
+
+/** \} */
diff --git a/source/blender/gpu/opengl/gl_state.hh b/source/blender/gpu/opengl/gl_state.hh
new file mode 100644
index 00000000000..c25e384fcd7
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_state.hh
@@ -0,0 +1,77 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright 2020, Blender Foundation.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_utildefines.h"
+
+#include "gpu_state_private.hh"
+
+#include "glew-mx.h"
+
+namespace blender {
+namespace gpu {
+
+class GLFrameBuffer;
+
+/**
+ * State manager keeping track of the draw state and applying it before drawing.
+ * Opengl Implementation.
+ **/
+class GLStateManager : public GPUStateManager {
+ public:
+  /** Anothter reference to tje active framebuffer. */
+  GLFrameBuffer *active_fb;
+
+ private:
+  /** Current state of the GL implementation. Avoids resetting the whole state for every change. */
+  GPUState current_;
+  GPUStateMutable current_mutable_;
+  /** Limits. */
+  float line_width_range_[2];
+
+ public:
+  GLStateManager();
+
+  void apply_state(void) override;
+
+ private:
+  static void set_write_mask(const eGPUWriteMask value);
+  static void set_depth_test(const eGPUDepthTest value);
+  static void set_stencil_test(const eGPUStencilTest test, const eGPUStencilOp operation);
+  static void set_stencil_mask(const eGPUStencilTest test, const GPUStateMutable state);
+  static void set_clip_distances(const int new_dist_len, const int old_dist_len);
+  static void set_logic_op(const bool enable);
+  static void set_facing(const bool invert);
+  static void set_backface_culling(const eGPUFaceCullTest test);
+  static void set_provoking_vert(const eGPUProvokingVertex vert);
+  static void set_shadow_bias(const bool enable);
+  static void set_blend(const eGPUBlend value);
+
+  void set_state(const GPUState &state);
+  void set_mutable_state(const GPUStateMutable &state);
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("GLStateManager")
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/opengl/gl_texture.hh b/source/blender/gpu/opengl/gl_texture.hh
new file mode 100644
index 00000000000..c1194941038
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_texture.hh
@@ -0,0 +1,81 @@
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU Framebuffer
+ * - this is a wrapper for an OpenGL framebuffer object (FBO). in practice
+ *   multiple FBO's may be created.
+ * - actual FBO creation & config is deferred until GPU_framebuffer_bind or
+ *   GPU_framebuffer_check_valid to allow creation & config while another
+ *   opengl context is bound (since FBOs are not shared between ogl contexts).
+ */
+
+#pragma once
+
+#include "BLI_assert.h"
+
+#include "glew-mx.h"
+
+namespace blender {
+namespace gpu {
+
+static GLenum to_gl(eGPUDataFormat format)
+{
+  switch (format) {
+    case GPU_DATA_FLOAT:
+      return GL_FLOAT;
+    case GPU_DATA_INT:
+      return GL_INT;
+    case GPU_DATA_UNSIGNED_INT:
+      return GL_UNSIGNED_INT;
+    case GPU_DATA_UNSIGNED_BYTE:
+      return GL_UNSIGNED_BYTE;
+    case GPU_DATA_UNSIGNED_INT_24_8:
+      return GL_UNSIGNED_INT_24_8;
+    case GPU_DATA_10_11_11_REV:
+      return GL_UNSIGNED_INT_10F_11F_11F_REV;
+    default:
+      BLI_assert(!"Unhandled data format");
+      return GL_FLOAT;
+  }
+}
+
+/* Assume Unorm / Float target. Used with glReadPixels. */
+static GLenum channel_len_to_gl(int channel_len)
+{
+  switch (channel_len) {
+    case 1:
+      return GL_RED;
+    case 2:
+      return GL_RG;
+    case 3:
+      return GL_RGB;
+    case 4:
+      return GL_RGBA;
+    default:
+      BLI_assert(!"Wrong number of texture channels");
+      return GL_RED;
+  }
+}
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/opengl/gl_uniform_buffer.cc b/source/blender/gpu/opengl/gl_uniform_buffer.cc
new file mode 100644
index 00000000000..0e0c64e5c60
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_uniform_buffer.cc
@@ -0,0 +1,133 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "BKE_global.h"
+
+#include "BLI_string.h"
+
+#include "GPU_extensions.h"
+
+#include "gpu_backend.hh"
+#include "gpu_context_private.hh"
+
+#include "gl_backend.hh"
+#include "gl_uniform_buffer.hh"
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
+GLUniformBuf::GLUniformBuf(size_t size, const char *name) : UniformBuf(size, name)
+{
+  /* Do not create ubo GL buffer here to allow allocation from any thread. */
+}
+
+GLUniformBuf::~GLUniformBuf()
+{
+  GLBackend::get()->buf_free(ubo_id_);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Data upload / update
+ * \{ */
+
+void GLUniformBuf::init(void)
+{
+  BLI_assert(GPU_context_active_get());
+
+  glGenBuffers(1, &ubo_id_);
+  glBindBuffer(GL_UNIFORM_BUFFER, ubo_id_);
+  glBufferData(GL_UNIFORM_BUFFER, size_in_bytes_, NULL, GL_DYNAMIC_DRAW);
+
+#ifndef __APPLE__
+  if ((G.debug & G_DEBUG_GPU) && (GLEW_VERSION_4_3 || GLEW_KHR_debug)) {
+    char sh_name[64];
+    SNPRINTF(sh_name, "UBO-%s", name_);
+    glObjectLabel(GL_BUFFER, ubo_id_, -1, sh_name);
+  }
+#endif
+}
+
+void GLUniformBuf::update(const void *data)
+{
+  if (ubo_id_ == 0) {
+    this->init();
+  }
+  glBindBuffer(GL_UNIFORM_BUFFER, ubo_id_);
+  glBufferSubData(GL_UNIFORM_BUFFER, 0, size_in_bytes_, data);
+  glBindBuffer(GL_UNIFORM_BUFFER, 0);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Usage
+ * \{ */
+
+void GLUniformBuf::bind(int slot)
+{
+  if (slot >= GPU_max_ubo_binds()) {
+    fprintf(stderr,
+            "Error: Trying to bind \"%s\" ubo to slot %d which is above the reported limit of %d.",
+            name_,
+            slot,
+            GPU_max_ubo_binds());
+    return;
+  }
+
+  if (ubo_id_ == 0) {
+    this->init();
+  }
+
+  if (data_ != NULL) {
+    this->update(data_);
+    MEM_SAFE_FREE(data_);
+  }
+
+  slot_ = slot;
+  glBindBufferBase(GL_UNIFORM_BUFFER, slot_, ubo_id_);
+
+#ifdef DEBUG
+  BLI_assert(slot < 16);
+  static_cast<GLContext *>(GPU_context_active_get())->bound_ubo_slots |= 1 << slot;
+#endif
+}
+
+void GLUniformBuf::unbind(void)
+{
+#ifdef DEBUG
+  /* NOTE: This only unbinds the last bound slot. */
+  glBindBufferBase(GL_UNIFORM_BUFFER, slot_, 0);
+  /* Hope that the context did not change. */
+  static_cast<GLContext *>(GPU_context_active_get())->bound_ubo_slots &= ~(1 << slot_);
+#endif
+  slot_ = 0;
+}
+
+/** \} */
+
+}  // namespace blender::gpu
+\ No newline at end of file
diff --git a/source/blender/gpu/opengl/gl_uniform_buffer.hh b/source/blender/gpu/opengl/gl_uniform_buffer.hh
new file mode 100644
index 00000000000..8cd2ab91be9
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_uniform_buffer.hh
@@ -0,0 +1,60 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "gpu_uniform_buffer_private.hh"
+
+#include "glew-mx.h"
+
+namespace blender {
+namespace gpu {
+
+/**
+ * Implementation of Uniform Buffers using OpenGL.
+ **/
+class GLUniformBuf : public UniformBuf {
+ private:
+  /** Slot to which this UBO is currently bound. -1 if not bound. */
+  int slot_ = -1;
+  /** OpenGL Object handle. */
+  GLuint ubo_id_ = 0;
+
+ public:
+  GLUniformBuf(size_t size, const char *name);
+  ~GLUniformBuf();
+
+  void update(const void *data) override;
+  void bind(int slot) override;
+  void unbind(void) override;
+
+ private:
+  void init(void);
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("GLUniformBuf");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/opengl/gl_vertex_array.cc b/source/blender/gpu/opengl/gl_vertex_array.cc
new file mode 100644
index 00000000000..64d44c39587
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_vertex_array.cc
@@ -0,0 +1,171 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "GPU_glew.h"
+
+#include "GPU_vertex_buffer.h"
+
+#include "gpu_shader_interface.hh"
+#include "gpu_vertex_format_private.h"
+
+#include "gl_batch.hh"
+#include "gl_context.hh"
+
+#include "gl_vertex_array.hh"
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Vertex Array Bindings
+ * \{ */
+
+/* Returns enabled vertex pointers as a bitflag (one bit per attrib). */
+static uint16_t vbo_bind(const ShaderInterface *interface,
+                         const GPUVertFormat *format,
+                         uint v_first,
+                         uint v_len,
+                         const bool use_instancing)
+{
+  uint16_t enabled_attrib = 0;
+  const uint attr_len = format->attr_len;
+  uint stride = format->stride;
+  uint offset = 0;
+  GLuint divisor = (use_instancing) ? 1 : 0;
+
+  for (uint a_idx = 0; a_idx < attr_len; a_idx++) {
+    const GPUVertAttr *a = &format->attrs[a_idx];
+
+    if (format->deinterleaved) {
+      offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].sz) * v_len;
+      stride = a->sz;
+    }
+    else {
+      offset = a->offset;
+    }
+
+    const GLvoid *pointer = (const GLubyte *)0 + offset + v_first * stride;
+    const GLenum type = convert_comp_type_to_gl(static_cast<GPUVertCompType>(a->comp_type));
+
+    for (uint n_idx = 0; n_idx < a->name_len; n_idx++) {
+      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
+      const ShaderInput *input = interface->attr_get(name);
+
+      if (input == NULL) {
+        continue;
+      }
+
+      enabled_attrib |= (1 << input->location);
+
+      if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
+        BLI_assert(a->fetch_mode == GPU_FETCH_FLOAT);
+        BLI_assert(a->comp_type == GPU_COMP_F32);
+        for (int i = 0; i < a->comp_len / 4; i++) {
+          glEnableVertexAttribArray(input->location + i);
+          glVertexAttribDivisor(input->location + i, divisor);
+          glVertexAttribPointer(
+              input->location + i, 4, type, GL_FALSE, stride, (const GLubyte *)pointer + i * 16);
+        }
+      }
+      else {
+        glEnableVertexAttribArray(input->location);
+        glVertexAttribDivisor(input->location, divisor);
+
+        switch (a->fetch_mode) {
+          case GPU_FETCH_FLOAT:
+          case GPU_FETCH_INT_TO_FLOAT:
+            glVertexAttribPointer(input->location, a->comp_len, type, GL_FALSE, stride, pointer);
+            break;
+          case GPU_FETCH_INT_TO_FLOAT_UNIT:
+            glVertexAttribPointer(input->location, a->comp_len, type, GL_TRUE, stride, pointer);
+            break;
+          case GPU_FETCH_INT:
+            glVertexAttribIPointer(input->location, a->comp_len, type, stride, pointer);
+            break;
+        }
+      }
+    }
+  }
+  return enabled_attrib;
+}
+
+/* Update the Attrib Binding of the currently bound VAO. */
+void GLVertArray::update_bindings(const GLuint vao,
+                                  const GPUBatch *batch,
+                                  const ShaderInterface *interface,
+                                  const int base_instance)
+{
+  uint16_t attr_mask = interface->enabled_attr_mask_;
+
+  glBindVertexArray(vao);
+
+  /* Reverse order so first VBO'S have more prevalence (in term of attribute override). */
+  for (int v = GPU_BATCH_VBO_MAX_LEN - 1; v > -1; v--) {
+    GPUVertBuf *vbo = batch->verts[v];
+    if (vbo) {
+      GPU_vertbuf_use(vbo);
+      attr_mask &= ~vbo_bind(interface, &vbo->format, 0, vbo->vertex_len, false);
+    }
+  }
+
+  for (int v = GPU_BATCH_INST_VBO_MAX_LEN - 1; v > -1; v--) {
+    GPUVertBuf *vbo = batch->inst[v];
+    if (vbo) {
+      GPU_vertbuf_use(vbo);
+      attr_mask &= ~vbo_bind(interface, &vbo->format, base_instance, vbo->vertex_len, true);
+    }
+  }
+
+  if (attr_mask != 0 && GLEW_ARB_vertex_attrib_binding) {
+    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
+      if (attr_mask & mask) {
+        GLContext *ctx = static_cast<GLContext *>(GPU_context_active_get());
+        /* This replaces glVertexAttrib4f(a, 0.0f, 0.0f, 0.0f, 1.0f); with a more modern style.
+         * Fix issues for some drivers (see T75069). */
+        glBindVertexBuffer(a, ctx->default_attr_vbo_, (intptr_t)0, (intptr_t)0);
+        glEnableVertexAttribArray(a);
+        glVertexAttribFormat(a, 4, GL_FLOAT, GL_FALSE, 0);
+        glVertexAttribBinding(a, a);
+      }
+    }
+  }
+
+  if (batch->elem) {
+    /* Binds the index buffer. This state is also saved in the VAO. */
+    GPU_indexbuf_use(batch->elem);
+  }
+}
+
+/* Another version of update_bindings for Immediate mode. */
+void GLVertArray::update_bindings(const GLuint vao,
+                                  const uint v_first,
+                                  const GPUVertFormat *format,
+                                  const ShaderInterface *interface)
+{
+  glBindVertexArray(vao);
+
+  vbo_bind(interface, format, v_first, 0, false);
+}
+
+/** \} */
+
+}  // namespace blender::gpu
+\ No newline at end of file
diff --git a/source/blender/gpu/opengl/gl_vertex_array.hh b/source/blender/gpu/opengl/gl_vertex_array.hh
new file mode 100644
index 00000000000..7037986e31e
--- /dev/null
+++ b/source/blender/gpu/opengl/gl_vertex_array.hh
@@ -0,0 +1,49 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2020 Blender Foundation.
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "glew-mx.h"
+
+#include "GPU_batch.h"
+#include "gl_shader_interface.hh"
+
+namespace blender {
+namespace gpu {
+
+namespace GLVertArray {
+
+void update_bindings(const GLuint vao,
+                     const GPUBatch *batch,
+                     const ShaderInterface *interface,
+                     const int base_instance);
+
+void update_bindings(const GLuint vao,
+                     const uint v_first,
+                     const GPUVertFormat *format,
+                     const ShaderInterface *interface);
+
+}  // namespace GLVertArray
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/shaders/gpu_shader_2D_image_multi_rect_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_image_multi_rect_vert.glsl
index d25cd586e65..640ceb97e5b 100644
--- a/source/blender/gpu/shaders/gpu_shader_2D_image_multi_rect_vert.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_2D_image_multi_rect_vert.glsl
@@ -13,34 +13,19 @@ flat out vec4 finalColor;
 
 void main()
 {
-  /* Rendering 2 triangle per icon. */
-  int i = gl_VertexID / 6;
-  int v = gl_VertexID % 6;
+  vec4 pos = calls_data[gl_InstanceID * 3];
+  vec4 tex = calls_data[gl_InstanceID * 3 + 1];
+  finalColor = calls_data[gl_InstanceID * 3 + 2];
 
-  vec4 pos = calls_data[i * 3];
-  vec4 tex = calls_data[i * 3 + 1];
-  finalColor = calls_data[i * 3 + 2];
-
-  /* TODO Remove this */
-  if (v == 2) {
-    v = 4;
-  }
-  else if (v == 3) {
-    v = 0;
-  }
-  else if (v == 5) {
-    v = 2;
+  if (gl_VertexID == 0) {
+    pos.xy = pos.xz;
+    tex.xy = tex.xz;
   }
-
-  if (v == 0) {
+  else if (gl_VertexID == 1) {
     pos.xy = pos.xw;
     tex.xy = tex.xw;
   }
-  else if (v == 1) {
-    pos.xy = pos.xz;
-    tex.xy = tex.xz;
-  }
-  else if (v == 2) {
+  else if (gl_VertexID == 2) {
     pos.xy = pos.yw;
     tex.xy = tex.yw;
   }
diff --git a/source/blender/gpu/shaders/gpu_shader_2D_image_rect_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_image_rect_vert.glsl
index fcd877a37eb..ab9c30505c2 100644
--- a/source/blender/gpu/shaders/gpu_shader_2D_image_rect_vert.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_2D_image_rect_vert.glsl
@@ -14,13 +14,13 @@ void main()
   vec2 uv;
   vec2 co;
   if (gl_VertexID == 0) {
-    co = rect_geom.xw;
-    uv = rect_icon.xw;
-  }
-  else if (gl_VertexID == 1) {
     co = rect_geom.xy;
     uv = rect_icon.xy;
   }
+  else if (gl_VertexID == 1) {
+    co = rect_geom.xw;
+    uv = rect_icon.xw;
+  }
   else if (gl_VertexID == 2) {
     co = rect_geom.zw;
     uv = rect_icon.zw;
diff --git a/source/blender/gpu/shaders/gpu_shader_2D_widget_base_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_widget_base_vert.glsl
index d15f48c8f8a..fb512a1f00e 100644
--- a/source/blender/gpu/shaders/gpu_shader_2D_widget_base_vert.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_2D_widget_base_vert.glsl
@@ -57,9 +57,14 @@ in float dummy;
 
 vec2 do_widget(void)
 {
+  /* Offset to avoid loosing pixels (mimics conservative rasterization). */
+  const vec2 ofs = vec2(0.5, -0.5);
   lineWidth = abs(rect.x - recti.x);
   vec2 emboss_ofs = vec2(0.0, -lineWidth);
-  vec2 v_pos[4] = vec2[4](rect.xz + emboss_ofs, rect.xw, rect.yz + emboss_ofs, rect.yw);
+  vec2 v_pos[4] = vec2[4](rect.xz + emboss_ofs + ofs.yy,
+                          rect.xw + ofs.yx,
+                          rect.yz + emboss_ofs + ofs.xy,
+                          rect.yw + ofs.xx);
   vec2 pos = v_pos[gl_VertexID];
 
   uvInterp = pos - rect.xz;
diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_output_world.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_output_world.glsl
index 27ca96501ae..5eb853a4c1a 100644
--- a/source/blender/gpu/shaders/material/gpu_shader_material_output_world.glsl
+++ b/source/blender/gpu/shaders/material/gpu_shader_material_output_world.glsl
@@ -6,7 +6,8 @@ void node_output_world(Closure surface, Closure volume, out Closure result)
   float alpha = renderPassEnvironment ? 1.0 : backgroundAlpha;
   result = CLOSURE_DEFAULT;
   result.radiance = surface.radiance * alpha;
-  result.transmittance = vec3(1.0 - alpha);
+  result.transmittance = vec3(0.0);
+  result.holdout = (1.0 - alpha);
 #else
   result = volume;
 #endif /* VOLUMETRICS */