35 files changed, 4454 insertions, 727 deletions
diff --git a/source/blender/gpu/intern/gpu_attr_binding.c b/source/blender/gpu/intern/gpu_attr_binding.c
new file mode 100644
index 00000000000..398b97c7f9d
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_attr_binding.c
@@ -0,0 +1,85 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_attr_binding.c
+ *  \ingroup gpu
+ *
+ * GPU vertex attribute binding
+ */
+
+#include "GPU_attr_binding.h"
+#include "gpu_attr_binding_private.h"
+#include <stddef.h>
+#include <stdlib.h>
+
+#if GPU_VERT_ATTR_MAX_LEN != 16
+#  error "attrib binding code assumes GPU_VERT_ATTR_MAX_LEN = 16"
+#endif
+
+void AttribBinding_clear(GPUAttrBinding *binding)
+{
+	binding->loc_bits = 0;
+	binding->enabled_bits = 0;
+}
+
+uint read_attrib_location(const GPUAttrBinding *binding, uint a_idx)
+{
+#if TRUST_NO_ONE
+	assert(a_idx < GPU_VERT_ATTR_MAX_LEN);
+	assert(binding->enabled_bits & (1 << a_idx));
+#endif
+	return (binding->loc_bits >> (4 * a_idx)) & 0xF;
+}
+
+static void write_attrib_location(GPUAttrBinding *binding, uint a_idx, uint location)
+{
+#if TRUST_NO_ONE
+	assert(a_idx < GPU_VERT_ATTR_MAX_LEN);
+	assert(location < GPU_VERT_ATTR_MAX_LEN);
+#endif
+	const uint shift = 4 * a_idx;
+	const uint64_t mask = ((uint64_t)0xF) << shift;
+	/* overwrite this attrib's previous location */
+	binding->loc_bits = (binding->loc_bits & ~mask) | (location << shift);
+	/* mark this attrib as enabled */
+	binding->enabled_bits |= 1 << a_idx;
+}
+
+void get_attrib_locations(const GPUVertFormat *format, GPUAttrBinding *binding, const GPUShaderInterface *shaderface)
+{
+	AttribBinding_clear(binding);
+
+	for (uint a_idx = 0; a_idx < format->attr_len; ++a_idx) {
+		const GPUVertAttr *a = format->attribs + a_idx;
+		for (uint n_idx = 0; n_idx < a->name_len; ++n_idx) {
+			const GPUShaderInput *input = GPU_shaderinterface_attr(shaderface, a->name[n_idx]);
+#if TRUST_NO_ONE
+			assert(input != NULL);
+			/* TODO: make this a recoverable runtime error? indicates mismatch between vertex format and program */
+#endif
+			write_attrib_location(binding, a_idx, input->location);
+		}
+	}
+}
diff --git a/source/blender/gpu/intern/gpu_attr_binding_private.h b/source/blender/gpu/intern/gpu_attr_binding_private.h
new file mode 100644
index 00000000000..cb338b10aa4
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_attr_binding_private.h
@@ -0,0 +1,45 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_attr_binding_private.h
+ *  \ingroup gpu
+ *
+ * GPU vertex attribute binding
+ */
+
+#ifndef __GPU_ATTR_BINDING_PRIVATE_H__
+#define __GPU_ATTR_BINDING_PRIVATE_H__
+
+#include "GPU_vertex_format.h"
+#include "GPU_shader_interface.h"
+
+void AttribBinding_clear(GPUAttrBinding *binding);
+
+void get_attrib_locations(
+        const GPUVertFormat *format, GPUAttrBinding *binding, const GPUShaderInterface *shaderface);
+unsigned read_attrib_location(
+        const GPUAttrBinding *binding, unsigned a_idx);
+
+#endif /* __GPU_ATTR_BINDING_PRIVATE_H__ */
diff --git a/source/blender/gpu/intern/gpu_basic_shader.c b/source/blender/gpu/intern/gpu_basic_shader.c
index b720bed2d0c..1b7b1ecf85a 100644
--- a/source/blender/gpu/intern/gpu_basic_shader.c
+++ b/source/blender/gpu/intern/gpu_basic_shader.c
@@ -82,7 +82,8 @@ const GLubyte stipple_halftone[128] = {
 	0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x55, 0x55,
 	0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x55, 0x55,
 	0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x55, 0x55,
-	0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x55, 0x55};
+	0xAA, 0xAA, 0xAA, 0xAA, 0x55, 0x55, 0x55, 0x55,
+};
 
 const GLubyte stipple_quarttone[128] = {
 	136, 136, 136, 136, 0, 0, 0, 0, 34, 34, 34, 34, 0, 0, 0, 0,
@@ -92,7 +93,8 @@ const GLubyte stipple_quarttone[128] = {
 	136, 136, 136, 136, 0, 0, 0, 0, 34, 34, 34, 34, 0, 0, 0, 0,
 	136, 136, 136, 136, 0, 0, 0, 0, 34, 34, 34, 34, 0, 0, 0, 0,
 	136, 136, 136, 136, 0, 0, 0, 0, 34, 34, 34, 34, 0, 0, 0, 0,
-	136, 136, 136, 136, 0, 0, 0, 0, 34, 34, 34, 34, 0, 0, 0, 0};
+	136, 136, 136, 136, 0, 0, 0, 0, 34, 34, 34, 34, 0, 0, 0, 0,
+};
 
 const GLubyte stipple_diag_stripes_pos[128] = {
 	0x00, 0xff, 0x00, 0xff, 0x01, 0xfe, 0x01, 0xfe,
@@ -110,7 +112,8 @@ const GLubyte stipple_diag_stripes_pos[128] = {
 	0xff, 0x00, 0xff, 0x00, 0xfe, 0x01, 0xfe, 0x01,
 	0xfc, 0x03, 0xfc, 0x03, 0xf8, 0x07, 0xf8, 0x07,
 	0xf0, 0x0f, 0xf0, 0x0f, 0xe0, 0x1f, 0xe0, 0x1f,
-	0xc0, 0x3f, 0xc0, 0x3f, 0x80, 0x7f, 0x80, 0x7f};
+	0xc0, 0x3f, 0xc0, 0x3f, 0x80, 0x7f, 0x80, 0x7f,
+};
 
 const GLubyte stipple_diag_stripes_neg[128] = {
 	0xff, 0x00, 0xff, 0x00, 0xfe, 0x01, 0xfe, 0x01,
@@ -128,7 +131,8 @@ const GLubyte stipple_diag_stripes_neg[128] = {
 	0x00, 0xff, 0x00, 0xff, 0x01, 0xfe, 0x01, 0xfe,
 	0x03, 0xfc, 0x03, 0xfc, 0x07, 0xf8, 0x07, 0xf8,
 	0x0f, 0xf0, 0x0f, 0xf0, 0x1f, 0xe0, 0x1f, 0xe0,
-	0x3f, 0xc0, 0x3f, 0xc0, 0x7f, 0x80, 0x7f, 0x80};
+	0x3f, 0xc0, 0x3f, 0xc0, 0x7f, 0x80, 0x7f, 0x80,
+};
 
 const GLubyte stipple_checker_8px[128] = {
 	255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0,
@@ -138,7 +142,8 @@ const GLubyte stipple_checker_8px[128] = {
 	255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0,
 	255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0,
 	0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255,
-	0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255};
+	0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255,
+};
 
 const GLubyte stipple_hexagon[128] = {
 	0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
@@ -156,7 +161,8 @@ const GLubyte stipple_hexagon[128] = {
 	0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
 	0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
 	0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
-	0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22};
+	0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+};
 /* ********************************************* */
 
 /* Init / exit */
@@ -237,7 +243,8 @@ static GPUShader *gpu_basic_shader(int options)
 			datatoc_gpu_shader_basic_frag_glsl,
 			geom_glsl,
 			NULL,
-			defines);
+			defines,
+			__func__);
 
 		if (shader) {
 			/* set texture map to first texture unit */
diff --git a/source/blender/gpu/intern/gpu_batch.c b/source/blender/gpu/intern/gpu_batch.c
index 5bfd20e3c8b..87ea112148c 100644
--- a/source/blender/gpu/intern/gpu_batch.c
+++ b/source/blender/gpu/intern/gpu_batch.c
@@ -15,40 +15,646 @@
  * along with this program; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  *
- * The Original Code is Copyright (C) 2016 Blender Foundation.
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
  * All rights reserved.
  *
- * The Original Code is: all of this file.
- *
- * Contributor(s): Mike Erwin
+ * Contributor(s): Blender Foundation
  *
  * ***** END GPL LICENSE BLOCK *****
  */
 
 /** \file blender/gpu/intern/gpu_batch.c
  *  \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
  */
 
 #include "MEM_guardedalloc.h"
 
-#include "BLI_utildefines.h"
-#include "BLI_rect.h"
-#include "BLI_math.h"
-#include "BLI_polyfill_2d.h"
-#include "BLI_sort_utils.h"
-
-#include "GPU_batch.h"  /* own include */
+#include "GPU_batch.h"
 #include "GPU_batch_presets.h"
+#include "GPU_matrix.h"
+#include "GPU_shader.h"
+
+#include "gpu_batch_private.h"
+#include "gpu_context_private.h"
+#include "gpu_primitive_private.h"
 #include "gpu_shader_private.h"
 
+#include <stdlib.h>
+#include <string.h>
+
+static void batch_update_program_bindings(GPUBatch *batch, uint v_first);
+
+void GPU_batch_vao_cache_clear(GPUBatch *batch)
+{
+	if (batch->context == NULL) {
+		return;
+	}
+	if (batch->is_dynamic_vao_count) {
+		for (int i = 0; i < batch->dynamic_vaos.count; ++i) {
+			if (batch->dynamic_vaos.vao_ids[i]) {
+				GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
+			}
+			if (batch->dynamic_vaos.interfaces[i]) {
+				GPU_shaderinterface_remove_batch_ref((GPUShaderInterface *)batch->dynamic_vaos.interfaces[i], batch);
+			}
+		}
+		MEM_freeN(batch->dynamic_vaos.interfaces);
+		MEM_freeN(batch->dynamic_vaos.vao_ids);
+	}
+	else {
+		for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; ++i) {
+			if (batch->static_vaos.vao_ids[i]) {
+				GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
+			}
+			if (batch->static_vaos.interfaces[i]) {
+				GPU_shaderinterface_remove_batch_ref((GPUShaderInterface *)batch->static_vaos.interfaces[i], batch);
+			}
+		}
+	}
+	batch->is_dynamic_vao_count = false;
+	for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; ++i) {
+		batch->static_vaos.vao_ids[i] = 0;
+		batch->static_vaos.interfaces[i] = NULL;
+	}
+	gpu_context_remove_batch(batch->context, batch);
+	batch->context = NULL;
+}
+
+GPUBatch *GPU_batch_create_ex(
+        GPUPrimType prim_type, GPUVertBuf *verts, GPUIndexBuf *elem,
+        uint owns_flag)
+{
+	GPUBatch *batch = MEM_callocN(sizeof(GPUBatch), "GPUBatch");
+	GPU_batch_init_ex(batch, prim_type, verts, elem, owns_flag);
+	return batch;
+}
+
+void GPU_batch_init_ex(
+        GPUBatch *batch, GPUPrimType prim_type, GPUVertBuf *verts, GPUIndexBuf *elem,
+        uint owns_flag)
+{
+#if TRUST_NO_ONE
+	assert(verts != NULL);
+#endif
+
+	batch->verts[0] = verts;
+	for (int v = 1; v < GPU_BATCH_VBO_MAX_LEN; ++v) {
+		batch->verts[v] = NULL;
+	}
+	batch->inst = NULL;
+	batch->elem = elem;
+	batch->gl_prim_type = convert_prim_type_to_gl(prim_type);
+	batch->phase = GPU_BATCH_READY_TO_DRAW;
+	batch->is_dynamic_vao_count = false;
+	batch->owns_flag = owns_flag;
+	batch->free_callback = NULL;
+}
+
+/* This will share the VBOs with the new batch. */
+GPUBatch *GPU_batch_duplicate(GPUBatch *batch_src)
+{
+	GPUBatch *batch = GPU_batch_create_ex(GPU_PRIM_POINTS, batch_src->verts[0], batch_src->elem, 0);
+
+	batch->gl_prim_type = batch_src->gl_prim_type;
+	for (int v = 1; v < GPU_BATCH_VBO_MAX_LEN; ++v) {
+		batch->verts[v] = batch_src->verts[v];
+	}
+	return batch;
+}
+
+void GPU_batch_discard(GPUBatch *batch)
+{
+	if (batch->owns_flag & GPU_BATCH_OWNS_INDEX) {
+		GPU_indexbuf_discard(batch->elem);
+	}
+	if (batch->owns_flag & GPU_BATCH_OWNS_INSTANCES) {
+		GPU_vertbuf_discard(batch->inst);
+	}
+	if ((batch->owns_flag & ~GPU_BATCH_OWNS_INDEX) != 0) {
+		for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; ++v) {
+			if (batch->verts[v] == NULL) {
+				break;
+			}
+			if (batch->owns_flag & (1 << v)) {
+				GPU_vertbuf_discard(batch->verts[v]);
+			}
+		}
+	}
+	GPU_batch_vao_cache_clear(batch);
+
+	if (batch->free_callback) {
+		batch->free_callback(batch, batch->callback_data);
+	}
+	MEM_freeN(batch);
+}
+
+void GPU_batch_callback_free_set(GPUBatch *batch, void (*callback)(GPUBatch *, void *), void *user_data)
+{
+	batch->free_callback = callback;
+	batch->callback_data = user_data;
+}
+
+void GPU_batch_instbuf_set(GPUBatch *batch, GPUVertBuf *inst, bool own_vbo)
+{
+#if TRUST_NO_ONE
+	assert(inst != NULL);
+#endif
+	/* redo the bindings */
+	GPU_batch_vao_cache_clear(batch);
+
+	if (batch->inst != NULL && (batch->owns_flag & GPU_BATCH_OWNS_INSTANCES)) {
+		GPU_vertbuf_discard(batch->inst);
+	}
+	batch->inst = inst;
+
+	if (own_vbo) {
+		batch->owns_flag |= GPU_BATCH_OWNS_INSTANCES;
+	}
+	else {
+		batch->owns_flag &= ~GPU_BATCH_OWNS_INSTANCES;
+	}
+}
+
+/* Returns the index of verts in the batch. */
+int GPU_batch_vertbuf_add_ex(
+        GPUBatch *batch, GPUVertBuf *verts,
+        bool own_vbo)
+{
+	/* redo the bindings */
+	GPU_batch_vao_cache_clear(batch);
+
+	for (uint v = 0; v < GPU_BATCH_VBO_MAX_LEN; ++v) {
+		if (batch->verts[v] == NULL) {
+#if TRUST_NO_ONE
+			/* for now all VertexBuffers must have same vertex_len */
+			assert(verts->vertex_len == batch->verts[0]->vertex_len);
+#endif
+			batch->verts[v] = verts;
+			/* TODO: mark dirty so we can keep attrib bindings up-to-date */
+			if (own_vbo)
+				batch->owns_flag |= (1 << v);
+			return v;
+		}
+	}
+
+	/* we only make it this far if there is no room for another GPUVertBuf */
+#if TRUST_NO_ONE
+	assert(false);
+#endif
+	return -1;
+}
+
+static GLuint batch_vao_get(GPUBatch *batch)
+{
+	/* Search through cache */
+	if (batch->is_dynamic_vao_count) {
+		for (int i = 0; i < batch->dynamic_vaos.count; ++i)
+			if (batch->dynamic_vaos.interfaces[i] == batch->interface)
+				return batch->dynamic_vaos.vao_ids[i];
+	}
+	else {
+		for (int i = 0; i < GPU_BATCH_VAO_STATIC_LEN; ++i)
+			if (batch->static_vaos.interfaces[i] == batch->interface)
+				return batch->static_vaos.vao_ids[i];
+	}
+
+	/* Set context of this batch.
+	 * It will be bound to it until GPU_batch_vao_cache_clear is called.
+	 * Until then it can only be drawn with this context. */
+	if (batch->context == NULL) {
+		batch->context = GPU_context_active_get();
+		gpu_context_add_batch(batch->context, batch);
+	}
+#if TRUST_NO_ONE
+	else {
+		/* Make sure you are not trying to draw this batch in another context. */
+		assert(batch->context == GPU_context_active_get());
+	}
+#endif
+
+	/* Cache miss, time to add a new entry! */
+	GLuint new_vao = 0;
+	if (!batch->is_dynamic_vao_count) {
+		int i; /* find first unused slot */
+		for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; ++i)
+			if (batch->static_vaos.vao_ids[i] == 0)
+				break;
+
+		if (i < GPU_BATCH_VAO_STATIC_LEN) {
+			batch->static_vaos.interfaces[i] = batch->interface;
+			batch->static_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
+		}
+		else {
+			/* Not enough place switch to dynamic. */
+			batch->is_dynamic_vao_count = true;
+			/* Erase previous entries, they will be added back if drawn again. */
+			for (int j = 0; j < GPU_BATCH_VAO_STATIC_LEN; ++j) {
+				GPU_shaderinterface_remove_batch_ref((GPUShaderInterface *)batch->static_vaos.interfaces[j], batch);
+				GPU_vao_free(batch->static_vaos.vao_ids[j], batch->context);
+			}
+			/* Init dynamic arrays and let the branch below set the values. */
+			batch->dynamic_vaos.count = GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+			batch->dynamic_vaos.interfaces = MEM_callocN(batch->dynamic_vaos.count * sizeof(GPUShaderInterface *), "dyn vaos interfaces");
+			batch->dynamic_vaos.vao_ids = MEM_callocN(batch->dynamic_vaos.count * sizeof(GLuint), "dyn vaos ids");
+		}
+	}
+
+	if (batch->is_dynamic_vao_count) {
+		int i; /* find first unused slot */
+		for (i = 0; i < batch->dynamic_vaos.count; ++i)
+			if (batch->dynamic_vaos.vao_ids[i] == 0)
+				break;
+
+		if (i == batch->dynamic_vaos.count) {
+			/* Not enough place, realloc the array. */
+			i = batch->dynamic_vaos.count;
+			batch->dynamic_vaos.count += GPU_BATCH_VAO_DYN_ALLOC_COUNT;
+			batch->dynamic_vaos.interfaces = MEM_recallocN(batch->dynamic_vaos.interfaces, sizeof(GPUShaderInterface *) * batch->dynamic_vaos.count);
+			batch->dynamic_vaos.vao_ids = MEM_recallocN(batch->dynamic_vaos.vao_ids, sizeof(GLuint) * batch->dynamic_vaos.count);
+		}
+		batch->dynamic_vaos.interfaces[i] = batch->interface;
+		batch->dynamic_vaos.vao_ids[i] = new_vao = GPU_vao_alloc();
+	}
+
+	GPU_shaderinterface_add_batch_ref((GPUShaderInterface *)batch->interface, batch);
+
+#if TRUST_NO_ONE
+	assert(new_vao != 0);
+#endif
+
+	/* We just got a fresh VAO we need to initialize it. */
+	glBindVertexArray(new_vao);
+	batch_update_program_bindings(batch, 0);
+	glBindVertexArray(0);
+
+	return new_vao;
+}
+
+void GPU_batch_program_set_no_use(GPUBatch *batch, uint32_t program, const GPUShaderInterface *shaderface)
+{
+#if TRUST_NO_ONE
+	assert(glIsProgram(shaderface->program));
+	assert(batch->program_in_use == 0);
+#endif
+	batch->interface = shaderface;
+	batch->program = program;
+	batch->vao_id = batch_vao_get(batch);
+}
+
+void GPU_batch_program_set(GPUBatch *batch, uint32_t program, const GPUShaderInterface *shaderface)
+{
+	GPU_batch_program_set_no_use(batch, program, shaderface);
+	GPU_batch_program_use_begin(batch); /* hack! to make Batch_Uniform* simpler */
+}
+
+void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface)
+{
+	if (batch->is_dynamic_vao_count) {
+		for (int i = 0; i < batch->dynamic_vaos.count; ++i) {
+			if (batch->dynamic_vaos.interfaces[i] == interface) {
+				GPU_vao_free(batch->dynamic_vaos.vao_ids[i], batch->context);
+				batch->dynamic_vaos.vao_ids[i] = 0;
+				batch->dynamic_vaos.interfaces[i] = NULL;
+				break; /* cannot have duplicates */
+			}
+		}
+	}
+	else {
+		int i;
+		for (i = 0; i < GPU_BATCH_VAO_STATIC_LEN; ++i) {
+			if (batch->static_vaos.interfaces[i] == interface) {
+				GPU_vao_free(batch->static_vaos.vao_ids[i], batch->context);
+				batch->static_vaos.vao_ids[i] = 0;
+				batch->static_vaos.interfaces[i] = NULL;
+				break; /* cannot have duplicates */
+			}
+		}
+	}
+}
+
+static void create_bindings(
+	GPUVertBuf *verts, const GPUShaderInterface *interface,
+	uint v_first, const bool use_instancing)
+{
+	const GPUVertFormat *format = &verts->format;
+
+	const uint attr_len = format->attr_len;
+	const uint stride = format->stride;
+
+	GPU_vertbuf_use(verts);
+
+	for (uint a_idx = 0; a_idx < attr_len; ++a_idx) {
+		const GPUVertAttr *a = format->attribs + a_idx;
+		const GLvoid *pointer = (const GLubyte *)0 + a->offset + v_first * stride;
+
+		for (uint n_idx = 0; n_idx < a->name_len; ++n_idx) {
+			const GPUShaderInput *input = GPU_shaderinterface_attr(interface, a->name[n_idx]);
+
+			if (input == NULL) continue;
+
+			if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
+#if TRUST_NO_ONE
+				assert(a->fetch_mode == GPU_FETCH_FLOAT);
+				assert(a->gl_comp_type == GL_FLOAT);
+#endif
+				for (int i = 0; i < a->comp_len / 4; ++i) {
+					glEnableVertexAttribArray(input->location + i);
+					glVertexAttribDivisor(input->location + i, (use_instancing) ? 1 : 0);
+					glVertexAttribPointer(input->location + i, 4, a->gl_comp_type, GL_FALSE, stride,
+					                      (const GLubyte *)pointer + i * 16);
+				}
+			}
+			else {
+				glEnableVertexAttribArray(input->location);
+				glVertexAttribDivisor(input->location, (use_instancing) ? 1 : 0);
+
+				switch (a->fetch_mode) {
+					case GPU_FETCH_FLOAT:
+					case GPU_FETCH_INT_TO_FLOAT:
+						glVertexAttribPointer(input->location, a->comp_len, a->gl_comp_type, GL_FALSE, stride, pointer);
+						break;
+					case GPU_FETCH_INT_TO_FLOAT_UNIT:
+						glVertexAttribPointer(input->location, a->comp_len, a->gl_comp_type, GL_TRUE, stride, pointer);
+						break;
+					case GPU_FETCH_INT:
+						glVertexAttribIPointer(input->location, a->comp_len, a->gl_comp_type, stride, pointer);
+						break;
+				}
+			}
+		}
+	}
+}
+
+static void batch_update_program_bindings(GPUBatch *batch, uint v_first)
+{
+	for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN && batch->verts[v] != NULL; ++v) {
+		create_bindings(batch->verts[v], batch->interface, (batch->inst) ? 0 : v_first, false);
+	}
+	if (batch->inst) {
+		create_bindings(batch->inst, batch->interface, v_first, true);
+	}
+	if (batch->elem) {
+		GPU_indexbuf_use(batch->elem);
+	}
+}
+
+void GPU_batch_program_use_begin(GPUBatch *batch)
+{
+	/* NOTE: use_program & done_using_program are fragile, depend on staying in sync with
+	 *       the GL context's active program. use_program doesn't mark other programs as "not used". */
+	/* TODO: make not fragile (somehow) */
+
+	if (!batch->program_in_use) {
+		glUseProgram(batch->program);
+		batch->program_in_use = true;
+	}
+}
+
+void GPU_batch_program_use_end(GPUBatch *batch)
+{
+	if (batch->program_in_use) {
+#if PROGRAM_NO_OPTI
+		glUseProgram(0);
+#endif
+		batch->program_in_use = false;
+	}
+}
+
+#if TRUST_NO_ONE
+#  define GET_UNIFORM const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->interface, name); assert(uniform);
+#else
+#  define GET_UNIFORM const GPUShaderInput *uniform = GPU_shaderinterface_uniform(batch->interface, name);
+#endif
+
+void GPU_batch_uniform_1ui(GPUBatch *batch, const char *name, int value)
+{
+	GET_UNIFORM
+	glUniform1ui(uniform->location, value);
+}
+
+void GPU_batch_uniform_1i(GPUBatch *batch, const char *name, int value)
+{
+	GET_UNIFORM
+	glUniform1i(uniform->location, value);
+}
+
+void GPU_batch_uniform_1b(GPUBatch *batch, const char *name, bool value)
+{
+	GET_UNIFORM
+	glUniform1i(uniform->location, value ? GL_TRUE : GL_FALSE);
+}
+
+void GPU_batch_uniform_2f(GPUBatch *batch, const char *name, float x, float y)
+{
+	GET_UNIFORM
+	glUniform2f(uniform->location, x, y);
+}
+
+void GPU_batch_uniform_3f(GPUBatch *batch, const char *name, float x, float y, float z)
+{
+	GET_UNIFORM
+	glUniform3f(uniform->location, x, y, z);
+}
+
+void GPU_batch_uniform_4f(GPUBatch *batch, const char *name, float x, float y, float z, float w)
+{
+	GET_UNIFORM
+	glUniform4f(uniform->location, x, y, z, w);
+}
+
+void GPU_batch_uniform_1f(GPUBatch *batch, const char *name, float x)
+{
+	GET_UNIFORM
+	glUniform1f(uniform->location, x);
+}
+
+void GPU_batch_uniform_2fv(GPUBatch *batch, const char *name, const float data[2])
+{
+	GET_UNIFORM
+	glUniform2fv(uniform->location, 1, data);
+}
+
+void GPU_batch_uniform_3fv(GPUBatch *batch, const char *name, const float data[3])
+{
+	GET_UNIFORM
+	glUniform3fv(uniform->location, 1, data);
+}
+
+void GPU_batch_uniform_4fv(GPUBatch *batch, const char *name, const float data[4])
+{
+	GET_UNIFORM
+	glUniform4fv(uniform->location, 1, data);
+}
+
+void GPU_batch_uniform_2fv_array(GPUBatch *batch, const char *name, const int len, const float *data)
+{
+	GET_UNIFORM
+	glUniform2fv(uniform->location, len, data);
+}
+
+void GPU_batch_uniform_4fv_array(GPUBatch *batch, const char *name, const int len, const float *data)
+{
+	GET_UNIFORM
+	glUniform4fv(uniform->location, len, data);
+}
+
+void GPU_batch_uniform_mat4(GPUBatch *batch, const char *name, const float data[4][4])
+{
+	GET_UNIFORM
+	glUniformMatrix4fv(uniform->location, 1, GL_FALSE, (const float *)data);
+}
+
+static void primitive_restart_enable(const GPUIndexBuf *el)
+{
+	// TODO(fclem) Replace by GL_PRIMITIVE_RESTART_FIXED_INDEX when we have ogl 4.3
+	glEnable(GL_PRIMITIVE_RESTART);
+	GLuint restart_index = (GLuint)0xFFFFFFFF;
+
+#if GPU_TRACK_INDEX_RANGE
+	if (el->index_type == GPU_INDEX_U8)
+		restart_index = (GLuint)0xFF;
+	else if (el->index_type == GPU_INDEX_U16)
+		restart_index = (GLuint)0xFFFF;
+#endif
+
+	glPrimitiveRestartIndex(restart_index);
+}
+
+static void primitive_restart_disable(void)
+{
+	glDisable(GL_PRIMITIVE_RESTART);
+}
+
+void GPU_batch_draw(GPUBatch *batch)
+{
+#if TRUST_NO_ONE
+	assert(batch->phase == GPU_BATCH_READY_TO_DRAW);
+	assert(batch->verts[0]->vbo_id != 0);
+#endif
+	GPU_batch_program_use_begin(batch);
+	GPU_matrix_bind(batch->interface); // external call.
+
+	GPU_batch_draw_range_ex(batch, 0, 0, false);
+
+	GPU_batch_program_use_end(batch);
+}
+
+void GPU_batch_draw_range_ex(GPUBatch *batch, int v_first, int v_count, bool force_instance)
+{
+#if TRUST_NO_ONE
+	assert(!(force_instance && (batch->inst == NULL)) || v_count > 0); // we cannot infer length if force_instance
+#endif
+	const bool do_instance = (force_instance || batch->inst);
+
+	// If using offset drawing, use the default VAO and redo bindings.
+	if (v_first != 0 && (do_instance || batch->elem)) {
+		glBindVertexArray(GPU_vao_default());
+		batch_update_program_bindings(batch, v_first);
+	}
+	else {
+		glBindVertexArray(batch->vao_id);
+	}
+
+	if (do_instance) {
+		/* Infer length if vertex count is not given */
+		if (v_count == 0) {
+			v_count = batch->inst->vertex_len;
+		}
+
+		if (batch->elem) {
+			const GPUIndexBuf *el = batch->elem;
+
+			if (el->use_prim_restart) {
+				primitive_restart_enable(el);
+			}
+#if GPU_TRACK_INDEX_RANGE
+			glDrawElementsInstancedBaseVertex(batch->gl_prim_type,
+			                                  el->index_len,
+			                                  el->gl_index_type,
+			                                  0,
+			                                  v_count,
+			                                  el->base_index);
+#else
+			glDrawElementsInstanced(batch->gl_prim_type, el->index_len, GL_UNSIGNED_INT, 0, v_count);
+#endif
+			if (el->use_prim_restart) {
+				primitive_restart_disable();
+			}
+		}
+		else {
+			glDrawArraysInstanced(batch->gl_prim_type, 0, batch->verts[0]->vertex_len, v_count);
+		}
+	}
+	else {
+		/* Infer length if vertex count is not given */
+		if (v_count == 0) {
+			v_count = (batch->elem) ? batch->elem->index_len : batch->verts[0]->vertex_len;
+		}
+
+		if (batch->elem) {
+			const GPUIndexBuf *el = batch->elem;
+
+			if (el->use_prim_restart) {
+				primitive_restart_enable(el);
+			}
+
+#if GPU_TRACK_INDEX_RANGE
+			if (el->base_index) {
+				glDrawRangeElementsBaseVertex(
+				        batch->gl_prim_type,
+				        el->min_index,
+				        el->max_index,
+				        v_count,
+				        el->gl_index_type,
+				        0,
+				        el->base_index);
+			}
+			else {
+				glDrawRangeElements(batch->gl_prim_type, el->min_index, el->max_index, v_count, el->gl_index_type, 0);
+			}
+#else
+			glDrawElements(batch->gl_prim_type, v_count, GL_UNSIGNED_INT, 0);
+#endif
+			if (el->use_prim_restart) {
+				primitive_restart_disable();
+			}
+		}
+		else {
+			glDrawArrays(batch->gl_prim_type, v_first, v_count);
+		}
+	}
+
+	/* Performance hog if you are drawing with the same vao multiple time.
+	 * Only activate for debugging. */
+	// glBindVertexArray(0);
+}
+
+/* just draw some vertices and let shader place them where we want. */
+void GPU_draw_primitive(GPUPrimType prim_type, int v_count)
+{
+	/* we cannot draw without vao ... annoying ... */
+	glBindVertexArray(GPU_vao_default());
+
+	GLenum type = convert_prim_type_to_gl(prim_type);
+	glDrawArrays(type, 0, v_count);
+
+	/* Performance hog if you are drawing with the same vao multiple time.
+	 * Only activate for debugging.*/
+	// glBindVertexArray(0);
+}
+
+
 /* -------------------------------------------------------------------- */
 /** \name Utilities
  * \{ */
 
-void GWN_batch_program_set_builtin(Gwn_Batch *batch, GPUBuiltinShader shader_id)
+void GPU_batch_program_set_builtin(GPUBatch *batch, GPUBuiltinShader shader_id)
 {
 	GPUShader *shader = GPU_shader_get_builtin_shader(shader_id);
-	GWN_batch_program_set(batch, shader->program, shader->interface);
+	GPU_batch_program_set(batch, shader->program, shader->interface);
 }
 
 /** \} */
diff --git a/source/blender/gpu/intern/gpu_batch_presets.c b/source/blender/gpu/intern/gpu_batch_presets.c
index fb696fd09a5..83287c57441 100644
--- a/source/blender/gpu/intern/gpu_batch_presets.c
+++ b/source/blender/gpu/intern/gpu_batch_presets.c
@@ -45,14 +45,14 @@
 /* Struct to store 3D Batches and their format */
 static struct {
 	struct {
-		Gwn_Batch *sphere_high;
-		Gwn_Batch *sphere_med;
-		Gwn_Batch *sphere_low;
-		Gwn_Batch *sphere_wire_low;
-		Gwn_Batch *sphere_wire_med;
+		GPUBatch *sphere_high;
+		GPUBatch *sphere_med;
+		GPUBatch *sphere_low;
+		GPUBatch *sphere_wire_low;
+		GPUBatch *sphere_wire_med;
 	} batch;
 
-	Gwn_VertFormat format;
+	GPUVertFormat format;
 
 	struct {
 		uint pos, nor;
@@ -66,28 +66,28 @@ static ListBase presets_list = {NULL, NULL};
 /** \name 3D Primitives
  * \{ */
 
-static Gwn_VertFormat *preset_3d_format(void)
+static GPUVertFormat *preset_3d_format(void)
 {
 	if (g_presets_3d.format.attr_len == 0) {
-		Gwn_VertFormat *format = &g_presets_3d.format;
-		g_presets_3d.attr_id.pos = GWN_vertformat_attr_add(format, "pos", GWN_COMP_F32, 3, GWN_FETCH_FLOAT);
-		g_presets_3d.attr_id.nor = GWN_vertformat_attr_add(format, "nor", GWN_COMP_F32, 3, GWN_FETCH_FLOAT);
+		GPUVertFormat *format = &g_presets_3d.format;
+		g_presets_3d.attr_id.pos = GPU_vertformat_attr_add(format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+		g_presets_3d.attr_id.nor = GPU_vertformat_attr_add(format, "nor", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
 	}
 	return &g_presets_3d.format;
 }
 
 static void batch_sphere_lat_lon_vert(
-        Gwn_VertBufRaw *pos_step, Gwn_VertBufRaw *nor_step,
+        GPUVertBufRaw *pos_step, GPUVertBufRaw *nor_step,
         float lat, float lon)
 {
 	float pos[3];
 	pos[0] = sinf(lat) * cosf(lon);
 	pos[1] = cosf(lat);
 	pos[2] = sinf(lat) * sinf(lon);
-	copy_v3_v3(GWN_vertbuf_raw_step(pos_step), pos);
-	copy_v3_v3(GWN_vertbuf_raw_step(nor_step), pos);
+	copy_v3_v3(GPU_vertbuf_raw_step(pos_step), pos);
+	copy_v3_v3(GPU_vertbuf_raw_step(nor_step), pos);
 }
-Gwn_Batch *GPU_batch_preset_sphere(int lod)
+GPUBatch *GPU_batch_preset_sphere(int lod)
 {
 	BLI_assert(lod >= 0 && lod <= 2);
 	BLI_assert(BLI_thread_is_main());
@@ -103,7 +103,7 @@ Gwn_Batch *GPU_batch_preset_sphere(int lod)
 	}
 }
 
-Gwn_Batch *GPU_batch_preset_sphere_wire(int lod)
+GPUBatch *GPU_batch_preset_sphere_wire(int lod)
 {
 	BLI_assert(lod >= 0 && lod <= 1);
 	BLI_assert(BLI_thread_is_main());
@@ -123,19 +123,19 @@ Gwn_Batch *GPU_batch_preset_sphere_wire(int lod)
  * \{ */
 
 /* Replacement for gluSphere */
-Gwn_Batch *gpu_batch_sphere(int lat_res, int lon_res)
+GPUBatch *gpu_batch_sphere(int lat_res, int lon_res)
 {
 	const float lon_inc = 2 * M_PI / lon_res;
 	const float lat_inc = M_PI / lat_res;
 	float lon, lat;
 
-	Gwn_VertBuf *vbo = GWN_vertbuf_create_with_format(preset_3d_format());
+	GPUVertBuf *vbo = GPU_vertbuf_create_with_format(preset_3d_format());
 	const uint vbo_len = (lat_res - 1) * lon_res * 6;
-	GWN_vertbuf_data_alloc(vbo, vbo_len);
+	GPU_vertbuf_data_alloc(vbo, vbo_len);
 
-	Gwn_VertBufRaw pos_step, nor_step;
-	GWN_vertbuf_attr_get_raw_data(vbo, g_presets_3d.attr_id.pos, &pos_step);
-	GWN_vertbuf_attr_get_raw_data(vbo, g_presets_3d.attr_id.nor, &nor_step);
+	GPUVertBufRaw pos_step, nor_step;
+	GPU_vertbuf_attr_get_raw_data(vbo, g_presets_3d.attr_id.pos, &pos_step);
+	GPU_vertbuf_attr_get_raw_data(vbo, g_presets_3d.attr_id.nor, &nor_step);
 
 	lon = 0.0f;
 	for (int i = 0; i < lon_res; i++, lon += lon_inc) {
@@ -155,25 +155,25 @@ Gwn_Batch *gpu_batch_sphere(int lat_res, int lon_res)
 		}
 	}
 
-	BLI_assert(vbo_len == GWN_vertbuf_raw_used(&pos_step));
-	BLI_assert(vbo_len == GWN_vertbuf_raw_used(&nor_step));
+	BLI_assert(vbo_len == GPU_vertbuf_raw_used(&pos_step));
+	BLI_assert(vbo_len == GPU_vertbuf_raw_used(&nor_step));
 
-	return GWN_batch_create_ex(GWN_PRIM_TRIS, vbo, NULL, GWN_BATCH_OWNS_VBO);
+	return GPU_batch_create_ex(GPU_PRIM_TRIS, vbo, NULL, GPU_BATCH_OWNS_VBO);
 }
 
-static Gwn_Batch *batch_sphere_wire(int lat_res, int lon_res)
+static GPUBatch *batch_sphere_wire(int lat_res, int lon_res)
 {
 	const float lon_inc = 2 * M_PI / lon_res;
 	const float lat_inc = M_PI / lat_res;
 	float lon, lat;
 
-	Gwn_VertBuf *vbo = GWN_vertbuf_create_with_format(preset_3d_format());
+	GPUVertBuf *vbo = GPU_vertbuf_create_with_format(preset_3d_format());
 	const uint vbo_len = (lat_res * lon_res * 2) + ((lat_res - 1) * lon_res * 2);
-	GWN_vertbuf_data_alloc(vbo, vbo_len);
+	GPU_vertbuf_data_alloc(vbo, vbo_len);
 
-	Gwn_VertBufRaw pos_step, nor_step;
-	GWN_vertbuf_attr_get_raw_data(vbo, g_presets_3d.attr_id.pos, &pos_step);
-	GWN_vertbuf_attr_get_raw_data(vbo, g_presets_3d.attr_id.nor, &nor_step);
+	GPUVertBufRaw pos_step, nor_step;
+	GPU_vertbuf_attr_get_raw_data(vbo, g_presets_3d.attr_id.pos, &pos_step);
+	GPU_vertbuf_attr_get_raw_data(vbo, g_presets_3d.attr_id.nor, &nor_step);
 
 	lon = 0.0f;
 	for (int i = 0; i < lon_res; i++, lon += lon_inc) {
@@ -189,10 +189,10 @@ static Gwn_Batch *batch_sphere_wire(int lat_res, int lon_res)
 		}
 	}
 
-	BLI_assert(vbo_len == GWN_vertbuf_raw_used(&pos_step));
-	BLI_assert(vbo_len == GWN_vertbuf_raw_used(&nor_step));
+	BLI_assert(vbo_len == GPU_vertbuf_raw_used(&pos_step));
+	BLI_assert(vbo_len == GPU_vertbuf_raw_used(&nor_step));
 
-	return GWN_batch_create_ex(GWN_PRIM_LINES, vbo, NULL, GWN_BATCH_OWNS_VBO);
+	return GPU_batch_create_ex(GPU_PRIM_LINES, vbo, NULL, GPU_BATCH_OWNS_VBO);
 }
 
 /** \} */
@@ -216,7 +216,7 @@ void gpu_batch_presets_init(void)
 	gpu_batch_presets_register(g_presets_3d.batch.sphere_wire_med);
 }
 
-void gpu_batch_presets_register(Gwn_Batch *preset_batch)
+void gpu_batch_presets_register(GPUBatch *preset_batch)
 {
 	BLI_addtail(&presets_list, BLI_genericNodeN(preset_batch));
 }
@@ -227,8 +227,8 @@ void gpu_batch_presets_reset(void)
 	 * This way they will draw correctly for each window. */
 	LinkData *link = presets_list.first;
 	for (link = presets_list.first; link; link = link->next) {
-		Gwn_Batch *preset = link->data;
-		gwn_batch_vao_cache_clear(preset);
+		GPUBatch *preset = link->data;
+		GPU_batch_vao_cache_clear(preset);
 	}
 }
 
@@ -236,8 +236,8 @@ void gpu_batch_presets_exit(void)
 {
 	LinkData *link;
 	while ((link = BLI_pophead(&presets_list))) {
-		Gwn_Batch *preset = link->data;
-		GWN_batch_discard(preset);
+		GPUBatch *preset = link->data;
+		GPU_batch_discard(preset);
 		MEM_freeN(link);
 	}
 }
diff --git a/source/blender/gpu/intern/gpu_batch_private.h b/source/blender/gpu/intern/gpu_batch_private.h
new file mode 100644
index 00000000000..3a05e243065
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_batch_private.h
@@ -0,0 +1,50 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_batch_private.h
+ *  \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#ifndef __GPU_BATCH_PRIVATE_H__
+#define __GPU_BATCH_PRIVATE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "GPU_batch.h"
+#include "GPU_context.h"
+#include "GPU_shader_interface.h"
+
+void gpu_batch_remove_interface_ref(GPUBatch *batch, const GPUShaderInterface *interface);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __GPU_BATCH_PRIVATE_H__ */
diff --git a/source/blender/gpu/intern/gpu_batch_utils.c b/source/blender/gpu/intern/gpu_batch_utils.c
index d6d82ac18b6..0a7f1ca901d 100644
--- a/source/blender/gpu/intern/gpu_batch_utils.c
+++ b/source/blender/gpu/intern/gpu_batch_utils.c
@@ -47,7 +47,7 @@
  * \param polys_flat_len: Length of the array (must be an even number).
  * \param rect: Optional region to map the byte 0..255 coords to. When not set use -1..1.
  */
-Gwn_Batch *GPU_batch_tris_from_poly_2d_encoded(
+GPUBatch *GPU_batch_tris_from_poly_2d_encoded(
         const uchar *polys_flat, uint polys_flat_len, const rctf *rect)
 {
 	const uchar (*polys)[2] = (const void *)polys_flat;
@@ -103,41 +103,41 @@ Gwn_Batch *GPU_batch_tris_from_poly_2d_encoded(
 	}
 
 	/* We have vertices and tris, make a batch from this. */
-	static Gwn_VertFormat format = {0};
+	static GPUVertFormat format = {0};
 	static struct { uint pos; } attr_id;
 	if (format.attr_len == 0) {
-		attr_id.pos = GWN_vertformat_attr_add(&format, "pos", GWN_COMP_F32, 2, GWN_FETCH_FLOAT);
+		attr_id.pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
 	}
 
 	const uint verts_len = (verts_step - verts);
 	const uint tris_len = (tris_step - tris);
-	Gwn_VertBuf *vbo = GWN_vertbuf_create_with_format(&format);
-	GWN_vertbuf_data_alloc(vbo, verts_len);
+	GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
+	GPU_vertbuf_data_alloc(vbo, verts_len);
 
-	Gwn_VertBufRaw pos_step;
-	GWN_vertbuf_attr_get_raw_data(vbo, attr_id.pos, &pos_step);
+	GPUVertBufRaw pos_step;
+	GPU_vertbuf_attr_get_raw_data(vbo, attr_id.pos, &pos_step);
 
 	for (uint i = 0; i < verts_len; i++) {
-		copy_v2_v2(GWN_vertbuf_raw_step(&pos_step), verts[i]);
+		copy_v2_v2(GPU_vertbuf_raw_step(&pos_step), verts[i]);
 	}
 
-	Gwn_IndexBufBuilder elb;
-	GWN_indexbuf_init(&elb, GWN_PRIM_TRIS, tris_len, verts_len);
+	GPUIndexBufBuilder elb;
+	GPU_indexbuf_init(&elb, GPU_PRIM_TRIS, tris_len, verts_len);
 	for (uint i = 0; i < tris_len; i++) {
-		GWN_indexbuf_add_tri_verts(&elb, UNPACK3(tris[i]));
+		GPU_indexbuf_add_tri_verts(&elb, UNPACK3(tris[i]));
 	}
-	Gwn_IndexBuf *indexbuf = GWN_indexbuf_build(&elb);
+	GPUIndexBuf *indexbuf = GPU_indexbuf_build(&elb);
 
 	MEM_freeN(tris);
 	MEM_freeN(verts);
 
-	return GWN_batch_create_ex(
-	        GWN_PRIM_TRIS, vbo,
+	return GPU_batch_create_ex(
+	        GPU_PRIM_TRIS, vbo,
 	        indexbuf,
-	        GWN_BATCH_OWNS_VBO | GWN_BATCH_OWNS_INDEX);
+	        GPU_BATCH_OWNS_VBO | GPU_BATCH_OWNS_INDEX);
 }
 
-Gwn_Batch *GPU_batch_wire_from_poly_2d_encoded(
+GPUBatch *GPU_batch_wire_from_poly_2d_encoded(
         const uchar *polys_flat, uint polys_flat_len, const rctf *rect)
 {
 	const uchar (*polys)[2] = (const void *)polys_flat;
@@ -206,18 +206,18 @@ Gwn_Batch *GPU_batch_wire_from_poly_2d_encoded(
 	}
 
 	/* We have vertices and tris, make a batch from this. */
-	static Gwn_VertFormat format = {0};
+	static GPUVertFormat format = {0};
 	static struct { uint pos; } attr_id;
 	if (format.attr_len == 0) {
-		attr_id.pos = GWN_vertformat_attr_add(&format, "pos", GWN_COMP_F32, 2, GWN_FETCH_FLOAT);
+		attr_id.pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
 	}
 
-	Gwn_VertBuf *vbo = GWN_vertbuf_create_with_format(&format);
+	GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
 	const uint vbo_len_capacity = lines_len * 2;
-	GWN_vertbuf_data_alloc(vbo, vbo_len_capacity);
+	GPU_vertbuf_data_alloc(vbo, vbo_len_capacity);
 
-	Gwn_VertBufRaw pos_step;
-	GWN_vertbuf_attr_get_raw_data(vbo, attr_id.pos, &pos_step);
+	GPUVertBufRaw pos_step;
+	GPU_vertbuf_attr_get_raw_data(vbo, attr_id.pos, &pos_step);
 
 	for (uint i = 0; i < lines_len; i++) {
 		union {
@@ -226,18 +226,18 @@ Gwn_Batch *GPU_batch_wire_from_poly_2d_encoded(
 		} data;
 		data.as_u32 = lines[i];
 		for (uint k = 0; k < 2; k++) {
-			float *pos_v2 = GWN_vertbuf_raw_step(&pos_step);
+			float *pos_v2 = GPU_vertbuf_raw_step(&pos_step);
 			for (uint j = 0; j < 2; j++) {
 				pos_v2[j] = min_uchar[j] + ((float)data.as_u8_pair[k][j] * range_uchar[j]);
 			}
 		}
 	}
-	BLI_assert(vbo_len_capacity == GWN_vertbuf_raw_used(&pos_step));
+	BLI_assert(vbo_len_capacity == GPU_vertbuf_raw_used(&pos_step));
 	MEM_freeN(lines);
-	return GWN_batch_create_ex(
-	        GWN_PRIM_LINES, vbo,
+	return GPU_batch_create_ex(
+	        GPU_PRIM_LINES, vbo,
 	        NULL,
-	        GWN_BATCH_OWNS_VBO);
+	        GPU_BATCH_OWNS_VBO);
 }
 
 /** \} */
diff --git a/source/blender/gpu/intern/gpu_buffers.c b/source/blender/gpu/intern/gpu_buffers.c
index 1a4750652cc..16590785af6 100644
--- a/source/blender/gpu/intern/gpu_buffers.c
+++ b/source/blender/gpu/intern/gpu_buffers.c
@@ -62,7 +62,7 @@ static ThreadMutex buffer_mutex = BLI_MUTEX_INITIALIZER;
 
 /* multires global buffer, can be used for many grids having the same grid size */
 typedef struct GridCommonGPUBuffer {
-	Gwn_IndexBuf *mres_buffer;
+	GPUIndexBuf *mres_buffer;
 	int mres_prev_gridsize;
 	unsigned mres_prev_totquad;
 } GridCommonGPUBuffer;
@@ -71,11 +71,11 @@ typedef struct GridCommonGPUBuffer {
  * drawing and doesn't interact at all with the buffer code above */
 
 struct GPU_PBVH_Buffers {
-	Gwn_IndexBuf *index_buf, *index_buf_fast;
-	Gwn_VertBuf *vert_buf;
+	GPUIndexBuf *index_buf, *index_buf_fast;
+	GPUVertBuf *vert_buf;
 
-	Gwn_Batch *triangles;
-	Gwn_Batch *triangles_fast;
+	GPUBatch *triangles;
+	GPUBatch *triangles_fast;
 
 	/* mesh pointers in case buffer allocation fails */
 	const MPoly *mpoly;
@@ -132,23 +132,23 @@ static bool gpu_pbvh_vert_buf_data_set(GPU_PBVH_Buffers *buffers, unsigned int v
 		/* Initialize vertex buffer */
 		/* match 'VertexBufferFormat' */
 
-		static Gwn_VertFormat format = {0};
+		static GPUVertFormat format = {0};
 		if (format.attr_len == 0) {
-			g_vbo_id.pos = GWN_vertformat_attr_add(&format, "pos", GWN_COMP_F32, 3, GWN_FETCH_FLOAT);
-			g_vbo_id.nor = GWN_vertformat_attr_add(&format, "nor", GWN_COMP_I16, 3, GWN_FETCH_INT_TO_FLOAT_UNIT);
-			g_vbo_id.col = GWN_vertformat_attr_add(&format, "color", GWN_COMP_U8, 3, GWN_FETCH_INT_TO_FLOAT_UNIT);
+			g_vbo_id.pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+			g_vbo_id.nor = GPU_vertformat_attr_add(&format, "nor", GPU_COMP_I16, 3, GPU_FETCH_INT_TO_FLOAT_UNIT);
+			g_vbo_id.col = GPU_vertformat_attr_add(&format, "color", GPU_COMP_U8, 3, GPU_FETCH_INT_TO_FLOAT_UNIT);
 		}
 #if 0
-		buffers->vert_buf = GWN_vertbuf_create_with_format_ex(&format, GWN_USAGE_DYNAMIC);
-		GWN_vertbuf_data_alloc(buffers->vert_buf, vert_len);
+		buffers->vert_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DYNAMIC);
+		GPU_vertbuf_data_alloc(buffers->vert_buf, vert_len);
 	}
 	else if (vert_len != buffers->vert_buf->vertex_len) {
-		GWN_vertbuf_data_resize(buffers->vert_buf, vert_len);
+		GPU_vertbuf_data_resize(buffers->vert_buf, vert_len);
 	}
 #else
-		buffers->vert_buf = GWN_vertbuf_create_with_format_ex(&format, GWN_USAGE_STATIC);
+		buffers->vert_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_STATIC);
 	}
-	GWN_vertbuf_data_alloc(buffers->vert_buf, vert_len);
+	GPU_vertbuf_data_alloc(buffers->vert_buf, vert_len);
 #endif
 	return buffers->vert_buf->data != NULL;
 }
@@ -157,19 +157,19 @@ static void gpu_pbvh_batch_init(GPU_PBVH_Buffers *buffers)
 {
 	/* force flushing to the GPU */
 	if (buffers->vert_buf->data) {
-		GWN_vertbuf_use(buffers->vert_buf);
+		GPU_vertbuf_use(buffers->vert_buf);
 	}
 
 	if (buffers->triangles == NULL) {
-		buffers->triangles = GWN_batch_create(
-		        GWN_PRIM_TRIS, buffers->vert_buf,
+		buffers->triangles = GPU_batch_create(
+		        GPU_PRIM_TRIS, buffers->vert_buf,
 		        /* can be NULL */
 		        buffers->index_buf);
 	}
 
 	if ((buffers->triangles_fast == NULL) && buffers->index_buf_fast) {
-		buffers->triangles_fast = GWN_batch_create(
-		        GWN_PRIM_TRIS, buffers->vert_buf,
+		buffers->triangles_fast = GPU_batch_create(
+		        GPU_PRIM_TRIS, buffers->vert_buf,
 		        /* can be NULL */
 		        buffers->index_buf_fast);
 	}
@@ -245,8 +245,8 @@ void GPU_pbvh_mesh_buffers_update(
 			if (buffers->smooth) {
 				for (uint i = 0; i < totvert; ++i) {
 					const MVert *v = &mvert[vert_indices[i]];
-					GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.pos, i, v->co);
-					GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.nor, i, v->no);
+					GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.pos, i, v->co);
+					GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.nor, i, v->no);
 				}
 
 				for (uint i = 0; i < buffers->face_indices_len; i++) {
@@ -257,10 +257,10 @@ void GPU_pbvh_mesh_buffers_update(
 							int v_index = buffers->mloop[lt->tri[j]].v;
 							uchar color_ub[3];
 							gpu_color_from_mask_copy(vmask[v_index], diffuse_color, color_ub);
-							GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vidx, color_ub);
+							GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vidx, color_ub);
 						}
 						else {
-							GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vidx, diffuse_color_ub);
+							GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vidx, diffuse_color_ub);
 						}
 					}
 				}
@@ -303,9 +303,9 @@ void GPU_pbvh_mesh_buffers_update(
 					for (uint j = 0; j < 3; j++) {
 						const MVert *v = &mvert[vtri[j]];
 
-						GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.pos, vbo_index, v->co);
-						GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.nor, vbo_index, no);
-						GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vbo_index, color_ub);
+						GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.pos, vbo_index, v->co);
+						GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.nor, vbo_index, no);
+						GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vbo_index, color_ub);
 
 						vbo_index++;
 					}
@@ -367,8 +367,8 @@ GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(
 	if (buffers->smooth) {
 		/* Fill the triangle buffer */
 		buffers->index_buf = NULL;
-		Gwn_IndexBufBuilder elb;
-		GWN_indexbuf_init(&elb, GWN_PRIM_TRIS, tottri, INT_MAX);
+		GPUIndexBufBuilder elb;
+		GPU_indexbuf_init(&elb, GPU_PRIM_TRIS, tottri, INT_MAX);
 
 		for (i = 0; i < face_indices_len; ++i) {
 			const MLoopTri *lt = &looptri[face_indices[i]];
@@ -377,13 +377,13 @@ GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(
 			if (paint_is_face_hidden(lt, mvert, mloop))
 				continue;
 
-			GWN_indexbuf_add_tri_verts(&elb, UNPACK3(face_vert_indices[i]));
+			GPU_indexbuf_add_tri_verts(&elb, UNPACK3(face_vert_indices[i]));
 		}
-		buffers->index_buf = GWN_indexbuf_build(&elb);
+		buffers->index_buf = GPU_indexbuf_build(&elb);
 	}
 	else {
 		if (!buffers->is_index_buf_global) {
-			GWN_INDEXBUF_DISCARD_SAFE(buffers->index_buf);
+			GPU_INDEXBUF_DISCARD_SAFE(buffers->index_buf);
 		}
 		buffers->index_buf = NULL;
 		buffers->is_index_buf_global = false;
@@ -438,12 +438,12 @@ void GPU_pbvh_grid_buffers_update(
 				for (y = 0; y < key->grid_size; y++) {
 					for (x = 0; x < key->grid_size; x++) {
 						CCGElem *elem = CCG_grid_elem(key, grid, x, y);
-						GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.pos, vbo_index, CCG_elem_co(key, elem));
+						GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.pos, vbo_index, CCG_elem_co(key, elem));
 
 						if (buffers->smooth) {
 							short no_short[3];
 							normal_float_to_short_v3(no_short, CCG_elem_no(key, elem));
-							GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.nor, vbo_index, no_short);
+							GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.nor, vbo_index, no_short);
 
 							if (has_mask) {
 								uchar color_ub[3];
@@ -454,7 +454,7 @@ void GPU_pbvh_grid_buffers_update(
 								else {
 									unit_float_to_uchar_clamp_v3(color_ub, diffuse_color);
 								}
-								GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vbo_index, color_ub);
+								GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vbo_index, color_ub);
 							}
 						}
 						vbo_index += 1;
@@ -481,7 +481,7 @@ void GPU_pbvh_grid_buffers_update(
 							vbo_index = vbo_index_offset + ((j + 1) * key->grid_size + k);
 							short no_short[3];
 							normal_float_to_short_v3(no_short, fno);
-							GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.nor, vbo_index, no_short);
+							GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.nor, vbo_index, no_short);
 
 							if (has_mask) {
 								uchar color_ub[3];
@@ -497,7 +497,7 @@ void GPU_pbvh_grid_buffers_update(
 								else {
 									unit_float_to_uchar_clamp_v3(color_ub, diffuse_color);
 								}
-								GWN_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vbo_index, color_ub);
+								GPU_vertbuf_attr_set(buffers->vert_buf, g_vbo_id.col, vbo_index, color_ub);
 							}
 						}
 					}
@@ -522,47 +522,47 @@ void GPU_pbvh_grid_buffers_update(
 /* Build the element array buffer of grid indices using either
  * unsigned shorts or unsigned ints. */
 #define FILL_QUAD_BUFFER(max_vert_, tot_quad_, buffer_)                 \
-    {                                                                   \
-        int offset = 0;                                                 \
-        int i, j, k;                                                    \
+	{                                                                   \
+		int offset = 0;                                                 \
+		int i, j, k;                                                    \
                                                                         \
-        Gwn_IndexBufBuilder elb;                                        \
-        GWN_indexbuf_init(                                              \
-                &elb, GWN_PRIM_TRIS, tot_quad_ * 2, max_vert_);         \
+		GPUIndexBufBuilder elb;                                         \
+		GPU_indexbuf_init(                                              \
+		       &elb, GPU_PRIM_TRIS, tot_quad_ * 2, max_vert_);          \
                                                                         \
-        /* Fill the buffer */                                           \
-        for (i = 0; i < totgrid; ++i) {                                 \
-            BLI_bitmap *gh = NULL;                                      \
-            if (grid_hidden)                                            \
-                gh = grid_hidden[(grid_indices)[i]];                    \
+		/* Fill the buffer */                                           \
+		for (i = 0; i < totgrid; ++i) {                                 \
+			BLI_bitmap *gh = NULL;                                      \
+			if (grid_hidden)                                            \
+				gh = grid_hidden[(grid_indices)[i]];                    \
                                                                         \
-            for (j = 0; j < gridsize - 1; ++j) {                        \
-                for (k = 0; k < gridsize - 1; ++k) {                    \
-                    /* Skip hidden grid face */                         \
-                    if (gh && paint_is_grid_face_hidden(                \
-                            gh, gridsize, k, j))                        \
-                    {                                                   \
-                        continue;                                       \
-                    }                                                   \
-                    GWN_indexbuf_add_generic_vert(&elb, offset + j * gridsize + k + 1); \
-                    GWN_indexbuf_add_generic_vert(&elb, offset + j * gridsize + k);    \
-                    GWN_indexbuf_add_generic_vert(&elb, offset + (j + 1) * gridsize + k); \
-                                                                            \
-                    GWN_indexbuf_add_generic_vert(&elb, offset + (j + 1) * gridsize + k + 1); \
-                    GWN_indexbuf_add_generic_vert(&elb, offset + j * gridsize + k + 1); \
-                    GWN_indexbuf_add_generic_vert(&elb, offset + (j + 1) * gridsize + k); \
-                }                                                       \
-            }                                                           \
+			for (j = 0; j < gridsize - 1; ++j) {                        \
+				for (k = 0; k < gridsize - 1; ++k) {                    \
+					/* Skip hidden grid face */                         \
+					if (gh && paint_is_grid_face_hidden(                \
+					        gh, gridsize, k, j))                        \
+					{                                                   \
+						continue;                                       \
+					}                                                   \
+					GPU_indexbuf_add_generic_vert(&elb, offset + j * gridsize + k + 1); \
+					GPU_indexbuf_add_generic_vert(&elb, offset + j * gridsize + k);    \
+					GPU_indexbuf_add_generic_vert(&elb, offset + (j + 1) * gridsize + k); \
                                                                         \
-            offset += gridsize * gridsize;                              \
-        }                                                               \
-        buffer_ = GWN_indexbuf_build(&elb);                             \
-    } (void)0
+					GPU_indexbuf_add_generic_vert(&elb, offset + (j + 1) * gridsize + k + 1); \
+					GPU_indexbuf_add_generic_vert(&elb, offset + j * gridsize + k + 1); \
+					GPU_indexbuf_add_generic_vert(&elb, offset + (j + 1) * gridsize + k); \
+				}                                                       \
+			}                                                           \
+                                                                        \
+			offset += gridsize * gridsize;                              \
+		}                                                               \
+		buffer_ = GPU_indexbuf_build(&elb);                             \
+	} (void)0
 /* end FILL_QUAD_BUFFER */
 
-static Gwn_IndexBuf *gpu_get_grid_buffer(
+static GPUIndexBuf *gpu_get_grid_buffer(
         int gridsize, unsigned *totquad, GridCommonGPUBuffer **grid_common_gpu_buffer,
-        /* remove this arg  when gawain gets base-vertex support! */
+        /* remove this arg  when GPU gets base-vertex support! */
         int totgrid)
 {
 	/* used in the FILL_QUAD_BUFFER macro */
@@ -586,7 +586,7 @@ static Gwn_IndexBuf *gpu_get_grid_buffer(
 	}
 	/* we can't reuse old, delete the existing buffer */
 	else if (gridbuff->mres_buffer) {
-		GWN_indexbuf_discard(gridbuff->mres_buffer);
+		GPU_indexbuf_discard(gridbuff->mres_buffer);
 		gridbuff->mres_buffer = NULL;
 	}
 
@@ -603,17 +603,17 @@ static Gwn_IndexBuf *gpu_get_grid_buffer(
 
 #define FILL_FAST_BUFFER() \
 { \
-	Gwn_IndexBufBuilder elb; \
-	GWN_indexbuf_init(&elb, GWN_PRIM_TRIS, 6 * totgrid, INT_MAX); \
+	GPUIndexBufBuilder elb; \
+	GPU_indexbuf_init(&elb, GPU_PRIM_TRIS, 6 * totgrid, INT_MAX); \
 	for (int i = 0; i < totgrid; i++) { \
-		GWN_indexbuf_add_generic_vert(&elb, i * gridsize * gridsize + gridsize - 1); \
-		GWN_indexbuf_add_generic_vert(&elb, i * gridsize * gridsize); \
-		GWN_indexbuf_add_generic_vert(&elb, (i + 1) * gridsize * gridsize - gridsize); \
-		GWN_indexbuf_add_generic_vert(&elb, (i + 1) * gridsize * gridsize - 1); \
-		GWN_indexbuf_add_generic_vert(&elb, i * gridsize * gridsize + gridsize - 1); \
-		GWN_indexbuf_add_generic_vert(&elb, (i + 1) * gridsize * gridsize - gridsize); \
+		GPU_indexbuf_add_generic_vert(&elb, i * gridsize * gridsize + gridsize - 1); \
+		GPU_indexbuf_add_generic_vert(&elb, i * gridsize * gridsize); \
+		GPU_indexbuf_add_generic_vert(&elb, (i + 1) * gridsize * gridsize - gridsize); \
+		GPU_indexbuf_add_generic_vert(&elb, (i + 1) * gridsize * gridsize - 1); \
+		GPU_indexbuf_add_generic_vert(&elb, i * gridsize * gridsize + gridsize - 1); \
+		GPU_indexbuf_add_generic_vert(&elb, (i + 1) * gridsize * gridsize - gridsize); \
 	} \
-	buffers->index_buf_fast = GWN_indexbuf_build(&elb); \
+	buffers->index_buf_fast = GPU_indexbuf_build(&elb); \
 } (void)0
 
 GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(
@@ -684,7 +684,7 @@ GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(
  */
 static void gpu_bmesh_vert_to_buffer_copy__gwn(
         BMVert *v,
-        Gwn_VertBuf *vert_buf,
+        GPUVertBuf *vert_buf,
         int *v_index,
         const float fno[3],
         const float *fmask,
@@ -695,12 +695,12 @@ static void gpu_bmesh_vert_to_buffer_copy__gwn(
 	if (!BM_elem_flag_test(v, BM_ELEM_HIDDEN)) {
 
 		/* Set coord, normal, and mask */
-		GWN_vertbuf_attr_set(vert_buf, g_vbo_id.pos, *v_index, v->co);
+		GPU_vertbuf_attr_set(vert_buf, g_vbo_id.pos, *v_index, v->co);
 
 		{
 			short no_short[3];
 			normal_float_to_short_v3(no_short, fno ? fno : v->no);
-			GWN_vertbuf_attr_set(vert_buf, g_vbo_id.nor, *v_index, no_short);
+			GPU_vertbuf_attr_set(vert_buf, g_vbo_id.nor, *v_index, no_short);
 		}
 
 		{
@@ -718,7 +718,7 @@ static void gpu_bmesh_vert_to_buffer_copy__gwn(
 			        effective_mask,
 			        diffuse_color,
 			        color_ub);
-			GWN_vertbuf_attr_set(vert_buf, g_vbo_id.col, *v_index, color_ub);
+			GPU_vertbuf_attr_set(vert_buf, g_vbo_id.col, *v_index, color_ub);
 		}
 
 		/* Assign index for use in the triangle index buffer */
@@ -792,7 +792,7 @@ void GPU_pbvh_bmesh_buffers_update(
 
 	if (buffers->smooth) {
 		/* Smooth needs to recreate index buffer, so we have to invalidate the batch. */
-		GWN_BATCH_DISCARD_SAFE(buffers->triangles);
+		GPU_BATCH_DISCARD_SAFE(buffers->triangles);
 		/* Count visible vertices */
 		totvert = gpu_bmesh_vert_visible_count(bm_unique_verts, bm_other_verts);
 	}
@@ -893,8 +893,8 @@ void GPU_pbvh_bmesh_buffers_update(
 	if (buffers->smooth) {
 		/* Fill the triangle buffer */
 		buffers->index_buf = NULL;
-		Gwn_IndexBufBuilder elb;
-		GWN_indexbuf_init(&elb, GWN_PRIM_TRIS, tottri, maxvert);
+		GPUIndexBufBuilder elb;
+		GPU_indexbuf_init(&elb, GPU_PRIM_TRIS, tottri, maxvert);
 
 		/* Initialize triangle index buffer */
 		buffers->is_index_buf_global = false;
@@ -911,24 +911,24 @@ void GPU_pbvh_bmesh_buffers_update(
 					BMVert *v[3];
 
 					BM_face_as_array_vert_tri(f, v);
-					GWN_indexbuf_add_tri_verts(
-					            &elb, BM_elem_index_get(v[0]), BM_elem_index_get(v[1]), BM_elem_index_get(v[2]));
+					GPU_indexbuf_add_tri_verts(
+					        &elb, BM_elem_index_get(v[0]), BM_elem_index_get(v[1]), BM_elem_index_get(v[2]));
 				}
 			}
 
 			buffers->tot_tri = tottri;
 
 			if (buffers->index_buf == NULL) {
-				buffers->index_buf = GWN_indexbuf_build(&elb);
+				buffers->index_buf = GPU_indexbuf_build(&elb);
 			}
 			else {
-				GWN_indexbuf_build_in_place(&elb, buffers->index_buf);
+				GPU_indexbuf_build_in_place(&elb, buffers->index_buf);
 			}
 		}
 	}
 	else if (buffers->index_buf) {
 		if (!buffers->is_index_buf_global) {
-			GWN_INDEXBUF_DISCARD_SAFE(buffers->index_buf);
+			GPU_INDEXBUF_DISCARD_SAFE(buffers->index_buf);
 		}
 		buffers->index_buf = NULL;
 		buffers->is_index_buf_global = false;
@@ -950,7 +950,7 @@ GPU_PBVH_Buffers *GPU_pbvh_bmesh_buffers_build(bool smooth_shading)
 	return buffers;
 }
 
-Gwn_Batch *GPU_pbvh_buffers_batch_get(GPU_PBVH_Buffers *buffers, bool fast)
+GPUBatch *GPU_pbvh_buffers_batch_get(GPU_PBVH_Buffers *buffers, bool fast)
 {
 	return (fast && buffers->triangles_fast) ?
 	        buffers->triangles_fast : buffers->triangles;
@@ -1003,13 +1003,13 @@ bool GPU_pbvh_buffers_mask_changed(GPU_PBVH_Buffers *buffers, bool show_mask)
 void GPU_pbvh_buffers_free(GPU_PBVH_Buffers *buffers)
 {
 	if (buffers) {
-		GWN_BATCH_DISCARD_SAFE(buffers->triangles);
-		GWN_BATCH_DISCARD_SAFE(buffers->triangles_fast);
+		GPU_BATCH_DISCARD_SAFE(buffers->triangles);
+		GPU_BATCH_DISCARD_SAFE(buffers->triangles_fast);
 		if (!buffers->is_index_buf_global) {
-			GWN_INDEXBUF_DISCARD_SAFE(buffers->index_buf);
+			GPU_INDEXBUF_DISCARD_SAFE(buffers->index_buf);
 		}
-		GWN_INDEXBUF_DISCARD_SAFE(buffers->index_buf_fast);
-		GWN_VERTBUF_DISCARD_SAFE(buffers->vert_buf);
+		GPU_INDEXBUF_DISCARD_SAFE(buffers->index_buf_fast);
+		GPU_VERTBUF_DISCARD_SAFE(buffers->vert_buf);
 
 #ifdef USE_BASE_ELEM
 		if (buffers->baseelemarray)
@@ -1029,7 +1029,7 @@ void GPU_pbvh_multires_buffers_free(GridCommonGPUBuffer **grid_common_gpu_buffer
 	if (gridbuff) {
 		if (gridbuff->mres_buffer) {
 			BLI_mutex_lock(&buffer_mutex);
-			GWN_INDEXBUF_DISCARD_SAFE(gridbuff->mres_buffer);
+			GPU_INDEXBUF_DISCARD_SAFE(gridbuff->mres_buffer);
 			BLI_mutex_unlock(&buffer_mutex);
 		}
 		MEM_freeN(gridbuff);
@@ -1049,7 +1049,7 @@ void GPU_pbvh_BB_draw(float min[3], float max[3], bool leaf, unsigned int pos)
 	 * could keep a static batch & index buffer, change the VBO contents per draw
 	 */
 
-	immBegin(GWN_PRIM_LINES, 24);
+	immBegin(GPU_PRIM_LINES, 24);
 
 	/* top */
 	immVertex3f(pos, min[0], min[1], max[2]);
diff --git a/source/blender/gpu/intern/gpu_codegen.c b/source/blender/gpu/intern/gpu_codegen.c
index a450b551d4a..289befe674e 100644
--- a/source/blender/gpu/intern/gpu_codegen.c
+++ b/source/blender/gpu/intern/gpu_codegen.c
@@ -257,6 +257,9 @@ static void gpu_parse_functions_string(GHash *hash, char *code)
 			if (!type && gpu_str_prefix(code, "sampler2DShadow")) {
 				type = GPU_SHADOW2D;
 			}
+			if (!type && gpu_str_prefix(code, "sampler1DArray")) {
+				type = GPU_TEX1D_ARRAY;
+			}
 			if (!type && gpu_str_prefix(code, "sampler2D")) {
 				type = GPU_TEX2D;
 			}
@@ -615,10 +618,12 @@ static int codegen_process_uniforms_functions(GPUMaterial *material, DynStr *ds,
 			if ((input->source == GPU_SOURCE_TEX) || (input->source == GPU_SOURCE_TEX_PIXEL)) {
 				/* create exactly one sampler for each texture */
 				if (codegen_input_has_texture(input) && input->bindtex) {
-					BLI_dynstr_appendf(ds, "uniform %s samp%d;\n",
-						(input->textype == GPU_TEX2D) ? "sampler2D" :
-						(input->textype == GPU_TEXCUBE) ? "samplerCube" : "sampler2DShadow",
-						input->texid);
+					BLI_dynstr_appendf(
+					        ds, "uniform %s samp%d;\n",
+					        (input->textype == GPU_TEX1D_ARRAY) ? "sampler1DArray" :
+					        (input->textype == GPU_TEX2D) ? "sampler2D" :
+					        (input->textype == GPU_TEXCUBE) ? "samplerCube" : "sampler2DShadow",
+					        input->texid);
 				}
 			}
 			else if (input->source == GPU_SOURCE_BUILTIN) {
@@ -635,13 +640,15 @@ static int codegen_process_uniforms_functions(GPUMaterial *material, DynStr *ds,
 						}
 					}
 					else if (gpu_str_prefix(name, "unf")) {
-						BLI_dynstr_appendf(ds, "uniform %s %s;\n",
-							GPU_DATATYPE_STR[input->type], name);
+						BLI_dynstr_appendf(
+						        ds, "uniform %s %s;\n",
+						        GPU_DATATYPE_STR[input->type], name);
 					}
 					else {
-						BLI_dynstr_appendf(ds, "%s %s %s;\n",
-							GLEW_VERSION_3_0 ? "in" : "varying",
-							GPU_DATATYPE_STR[input->type], name);
+						BLI_dynstr_appendf(
+						        ds, "%s %s %s;\n",
+						        GLEW_VERSION_3_0 ? "in" : "varying",
+						        GPU_DATATYPE_STR[input->type], name);
 					}
 				}
 			}
@@ -658,12 +665,14 @@ static int codegen_process_uniforms_functions(GPUMaterial *material, DynStr *ds,
 				}
 				else if (input->dynamicvec) {
 					/* only create uniforms for dynamic vectors */
-					BLI_dynstr_appendf(ds, "uniform %s unf%d;\n",
-						GPU_DATATYPE_STR[input->type], input->id);
+					BLI_dynstr_appendf(
+					        ds, "uniform %s unf%d;\n",
+					        GPU_DATATYPE_STR[input->type], input->id);
 				}
 				else {
-					BLI_dynstr_appendf(ds, "const %s cons%d = ",
-						GPU_DATATYPE_STR[input->type], input->id);
+					BLI_dynstr_appendf(
+					        ds, "const %s cons%d = ",
+					        GPU_DATATYPE_STR[input->type], input->id);
 					codegen_print_datatype(ds, input->type, input->vec);
 					BLI_dynstr_append(ds, ";\n");
 				}
@@ -675,9 +684,10 @@ static int codegen_process_uniforms_functions(GPUMaterial *material, DynStr *ds,
 					BLI_dynstr_appendf(ds, "#ifndef USE_OPENSUBDIV\n");
 				}
 #endif
-				BLI_dynstr_appendf(ds, "%s %s var%d;\n",
-					GLEW_VERSION_3_0 ? "in" : "varying",
-					GPU_DATATYPE_STR[input->type], input->attribid);
+				BLI_dynstr_appendf(
+				        ds, "%s %s var%d;\n",
+				        GLEW_VERSION_3_0 ? "in" : "varying",
+				        GPU_DATATYPE_STR[input->type], input->attribid);
 #ifdef WITH_OPENSUBDIV
 				if (skip_opensubdiv) {
 					BLI_dynstr_appendf(ds, "#endif\n");
@@ -696,8 +706,9 @@ static int codegen_process_uniforms_functions(GPUMaterial *material, DynStr *ds,
 
 		for (LinkData *link = ubo_inputs.first; link; link = link->next) {
 			input = link->data;
-			BLI_dynstr_appendf(ds, "\t%s unf%d;\n",
-				GPU_DATATYPE_STR[input->type], input->id);
+			BLI_dynstr_appendf(
+			        ds, "\t%s unf%d;\n",
+			        GPU_DATATYPE_STR[input->type], input->id);
 		}
 		BLI_dynstr_append(ds, "};\n");
 		BLI_freelistN(&ubo_inputs);
@@ -719,9 +730,11 @@ static void codegen_declare_tmps(DynStr *ds, ListBase *nodes)
 		for (input = node->inputs.first; input; input = input->next) {
 			if (input->source == GPU_SOURCE_TEX_PIXEL) {
 				if (codegen_input_has_texture(input) && input->definetex) {
-					BLI_dynstr_appendf(ds, "\tvec4 tex%d = texture2D(", input->texid);
-					BLI_dynstr_appendf(ds, "samp%d, gl_TexCoord[%d].st);\n",
-					                   input->texid, input->texid);
+					BLI_dynstr_appendf(
+					        ds, "\tvec4 tex%d = texture2D(", input->texid);
+					BLI_dynstr_appendf(
+					        ds, "samp%d, gl_TexCoord[%d].st);\n",
+					        input->texid, input->texid);
 				}
 			}
 		}
@@ -729,11 +742,13 @@ static void codegen_declare_tmps(DynStr *ds, ListBase *nodes)
 		/* declare temporary variables for node output storage */
 		for (output = node->outputs.first; output; output = output->next) {
 			if (output->type == GPU_CLOSURE) {
-				BLI_dynstr_appendf(ds, "\tClosure tmp%d;\n", output->id);
+				BLI_dynstr_appendf(
+				        ds, "\tClosure tmp%d;\n", output->id);
 			}
 			else {
-				BLI_dynstr_appendf(ds, "\t%s tmp%d;\n",
-				                   GPU_DATATYPE_STR[output->type], output->id);
+				BLI_dynstr_appendf(
+				        ds, "\t%s tmp%d;\n",
+				        GPU_DATATYPE_STR[output->type], output->id);
 			}
 		}
 	}
@@ -757,8 +772,9 @@ static void codegen_call_functions(DynStr *ds, ListBase *nodes, GPUOutput *final
 					BLI_dynstr_appendf(ds, ", gl_TexCoord[%d].st", input->texid);
 			}
 			else if (input->source == GPU_SOURCE_TEX_PIXEL) {
-				codegen_convert_datatype(ds, input->link->output->type, input->type,
-					"tmp", input->link->output->id);
+				codegen_convert_datatype(
+				        ds, input->link->output->type, input->type,
+				        "tmp", input->link->output->id);
 			}
 			else if (input->source == GPU_SOURCE_BUILTIN) {
 				if (input->builtin == GPU_INVERSE_VIEW_MATRIX)
@@ -862,10 +878,12 @@ static char *code_generate_fragment(GPUMaterial *material, ListBase *nodes, GPUO
 			for (input = node->inputs.first; input; input = input->next) {
 				if (input->source == GPU_SOURCE_ATTRIB && input->attribfirst) {
 					if (input->attribtype == CD_TANGENT) {
-						BLI_dynstr_appendf(ds, "#ifdef USE_OPENSUBDIV\n");
-						BLI_dynstr_appendf(ds, "\t%s var%d;\n",
-						                   GPU_DATATYPE_STR[input->type],
-						                   input->attribid);
+						BLI_dynstr_appendf(
+						        ds, "#ifdef USE_OPENSUBDIV\n");
+						BLI_dynstr_appendf(
+						        ds, "\t%s var%d;\n",
+						        GPU_DATATYPE_STR[input->type],
+						        input->attribid);
 						if (has_tangent == false) {
 							BLI_dynstr_appendf(ds, "\tvec3 Q1 = dFdx(inpt.v.position.xyz);\n");
 							BLI_dynstr_appendf(ds, "\tvec3 Q2 = dFdy(inpt.v.position.xyz);\n");
@@ -917,7 +935,7 @@ static const char *attrib_prefix_get(CustomDataType type)
 		case CD_TANGENT:        return "t";
 		case CD_MCOL:           return "c";
 		case CD_AUTO_FROM_NAME: return "a";
-		default: BLI_assert(false && "Gwn_VertAttr Prefix type not found : This should not happen!"); return "";
+		default: BLI_assert(false && "GPUVertAttr Prefix type not found : This should not happen!"); return "";
 	}
 }
 
@@ -929,12 +947,13 @@ static char *code_generate_vertex(ListBase *nodes, const char *vert_code, bool u
 	char *code;
 
 	/* Hairs uv and col attribs are passed by bufferTextures. */
-	BLI_dynstr_append(ds,
-	    "#ifdef HAIR_SHADER\n"
-	    "#define DEFINE_ATTRIB(type, attr) uniform samplerBuffer attr\n"
-	    "#else\n"
-	    "#define DEFINE_ATTRIB(type, attr) in type attr\n"
-	    "#endif\n"
+	BLI_dynstr_append(
+	        ds,
+	        "#ifdef HAIR_SHADER\n"
+	        "#define DEFINE_ATTRIB(type, attr) uniform samplerBuffer attr\n"
+	        "#else\n"
+	        "#define DEFINE_ATTRIB(type, attr) in type attr\n"
+	        "#endif\n"
 	);
 
 	for (node = nodes->first; node; node = node->next) {
@@ -952,10 +971,12 @@ static char *code_generate_vertex(ListBase *nodes, const char *vert_code, bool u
 				}
 				else {
 					unsigned int hash = BLI_ghashutil_strhash_p(input->attribname);
-					BLI_dynstr_appendf(ds, "DEFINE_ATTRIB(%s, %s%u);\n",
-						GPU_DATATYPE_STR[input->type], attrib_prefix_get(input->attribtype), hash);
-					BLI_dynstr_appendf(ds, "#define att%d %s%u\n",
-						input->attribid, attrib_prefix_get(input->attribtype), hash);
+					BLI_dynstr_appendf(
+					        ds, "DEFINE_ATTRIB(%s, %s%u);\n",
+					        GPU_DATATYPE_STR[input->type], attrib_prefix_get(input->attribtype), hash);
+					BLI_dynstr_appendf(
+					        ds, "#define att%d %s%u\n",
+					        input->attribid, attrib_prefix_get(input->attribtype), hash);
 					/* Auto attrib can be vertex color byte buffer.
 					 * We need to know and convert them to linear space in VS. */
 					if (!use_geom && input->attribtype == CD_AUTO_FROM_NAME) {
@@ -963,33 +984,36 @@ static char *code_generate_vertex(ListBase *nodes, const char *vert_code, bool u
 						BLI_dynstr_appendf(ds, "#define att%d_is_srgb ba%u\n", input->attribid, hash);
 					}
 				}
-				BLI_dynstr_appendf(ds, "out %s var%d%s;\n",
-					GPU_DATATYPE_STR[input->type], input->attribid, use_geom ? "g" : "");
+				BLI_dynstr_appendf(
+				        ds, "out %s var%d%s;\n",
+				        GPU_DATATYPE_STR[input->type], input->attribid, use_geom ? "g" : "");
 			}
 		}
 	}
 
 	BLI_dynstr_append(ds, "\n");
 
-	BLI_dynstr_append(ds,
-	    "#define ATTRIB\n"
-	    "uniform mat3 NormalMatrix;\n"
-	    "uniform mat4 ModelMatrixInverse;\n"
-	    "vec3 srgb_to_linear_attrib(vec3 c) {\n"
-	    "\tc = max(c, vec3(0.0));\n"
-	    "\tvec3 c1 = c * (1.0 / 12.92);\n"
-	    "\tvec3 c2 = pow((c + 0.055) * (1.0 / 1.055), vec3(2.4));\n"
-	    "\treturn mix(c1, c2, step(vec3(0.04045), c));\n"
-	    "}\n\n"
+	BLI_dynstr_append(
+	        ds,
+	        "#define ATTRIB\n"
+	        "uniform mat3 NormalMatrix;\n"
+	        "uniform mat4 ModelMatrixInverse;\n"
+	        "vec3 srgb_to_linear_attrib(vec3 c) {\n"
+	        "\tc = max(c, vec3(0.0));\n"
+	        "\tvec3 c1 = c * (1.0 / 12.92);\n"
+	        "\tvec3 c2 = pow((c + 0.055) * (1.0 / 1.055), vec3(2.4));\n"
+	        "\treturn mix(c1, c2, step(vec3(0.04045), c));\n"
+	        "}\n\n"
 	);
 
 	/* Prototype because defined later. */
-	BLI_dynstr_append(ds,
-	    "vec2 hair_get_customdata_vec2(const samplerBuffer);\n"
-	    "vec3 hair_get_customdata_vec3(const samplerBuffer);\n"
-	    "vec4 hair_get_customdata_vec4(const samplerBuffer);\n"
-	    "vec3 hair_get_strand_pos(void);\n"
-	    "\n"
+	BLI_dynstr_append(
+	        ds,
+	        "vec2 hair_get_customdata_vec2(const samplerBuffer);\n"
+	        "vec3 hair_get_customdata_vec3(const samplerBuffer);\n"
+	        "vec4 hair_get_customdata_vec4(const samplerBuffer);\n"
+	        "vec3 hair_get_strand_pos(void);\n"
+	        "\n"
 	);
 
 	BLI_dynstr_append(ds, "void pass_attrib(in vec3 position) {\n");
@@ -1001,16 +1025,19 @@ static char *code_generate_vertex(ListBase *nodes, const char *vert_code, bool u
 			if (input->source == GPU_SOURCE_ATTRIB && input->attribfirst) {
 				if (input->attribtype == CD_TANGENT) {
 					/* Not supported by hairs */
-					BLI_dynstr_appendf(ds, "\tvar%d%s = vec4(0.0);\n",
-					                   input->attribid, use_geom ? "g" : "");
+					BLI_dynstr_appendf(
+					        ds, "\tvar%d%s = vec4(0.0);\n",
+					        input->attribid, use_geom ? "g" : "");
 				}
 				else if (input->attribtype == CD_ORCO) {
-					BLI_dynstr_appendf(ds, "\tvar%d%s = OrcoTexCoFactors[0] + (ModelMatrixInverse * vec4(hair_get_strand_pos(), 1.0)).xyz * OrcoTexCoFactors[1];\n",
-					                   input->attribid, use_geom ? "g" : "");
+					BLI_dynstr_appendf(
+					        ds, "\tvar%d%s = OrcoTexCoFactors[0] + (ModelMatrixInverse * vec4(hair_get_strand_pos(), 1.0)).xyz * OrcoTexCoFactors[1];\n",
+					        input->attribid, use_geom ? "g" : "");
 				}
 				else {
-					BLI_dynstr_appendf(ds, "\tvar%d%s = hair_get_customdata_%s(att%d);\n",
-					                   input->attribid, use_geom ? "g" : "", GPU_DATATYPE_STR[input->type], input->attribid);
+					BLI_dynstr_appendf(
+					        ds, "\tvar%d%s = hair_get_customdata_%s(att%d);\n",
+					        input->attribid, use_geom ? "g" : "", GPU_DATATYPE_STR[input->type], input->attribid);
 				}
 			}
 		}
@@ -1030,21 +1057,25 @@ static char *code_generate_vertex(ListBase *nodes, const char *vert_code, bool u
 					        input->attribid, use_geom ? "g" : "", input->attribid);
 				}
 				else if (input->attribtype == CD_ORCO) {
-					BLI_dynstr_appendf(ds, "\tvar%d%s = OrcoTexCoFactors[0] + position * OrcoTexCoFactors[1];\n",
-					                   input->attribid, use_geom ? "g" : "");
+					BLI_dynstr_appendf(
+					        ds, "\tvar%d%s = OrcoTexCoFactors[0] + position * OrcoTexCoFactors[1];\n",
+					        input->attribid, use_geom ? "g" : "");
 				}
 				else if (input->attribtype == CD_MCOL) {
-					BLI_dynstr_appendf(ds, "\tvar%d%s = srgb_to_linear_attrib(att%d);\n",
-					                   input->attribid, use_geom ? "g" : "", input->attribid);
+					BLI_dynstr_appendf(
+					        ds, "\tvar%d%s = srgb_to_linear_attrib(att%d);\n",
+					        input->attribid, use_geom ? "g" : "", input->attribid);
 				}
 				else if (input->attribtype == CD_AUTO_FROM_NAME) {
-					BLI_dynstr_appendf(ds, "\tvar%d%s = (att%d_is_srgb) ? srgb_to_linear_attrib(att%d) : att%d;\n",
-					                   input->attribid, use_geom ? "g" : "",
-					                   input->attribid, input->attribid, input->attribid);
+					BLI_dynstr_appendf(
+					        ds, "\tvar%d%s = (att%d_is_srgb) ? srgb_to_linear_attrib(att%d) : att%d;\n",
+					        input->attribid, use_geom ? "g" : "",
+					        input->attribid, input->attribid, input->attribid);
 				}
 				else {
-					BLI_dynstr_appendf(ds, "\tvar%d%s = att%d;\n",
-					                   input->attribid, use_geom ? "g" : "", input->attribid);
+					BLI_dynstr_appendf(
+					        ds, "\tvar%d%s = att%d;\n",
+					        input->attribid, use_geom ? "g" : "", input->attribid);
 				}
 			}
 		}
@@ -1083,12 +1114,14 @@ static char *code_generate_geometry(ListBase *nodes, const char *geom_code)
 	for (node = nodes->first; node; node = node->next) {
 		for (input = node->inputs.first; input; input = input->next) {
 			if (input->source == GPU_SOURCE_ATTRIB && input->attribfirst) {
-				BLI_dynstr_appendf(ds, "in %s var%dg[];\n",
-				                   GPU_DATATYPE_STR[input->type],
-				                   input->attribid);
-				BLI_dynstr_appendf(ds, "out %s var%d;\n",
-				                   GPU_DATATYPE_STR[input->type],
-				                   input->attribid);
+				BLI_dynstr_appendf(
+				        ds, "in %s var%dg[];\n",
+				        GPU_DATATYPE_STR[input->type],
+				        input->attribid);
+				BLI_dynstr_appendf(
+				        ds, "out %s var%d;\n",
+				        GPU_DATATYPE_STR[input->type],
+				        input->attribid);
 			}
 		}
 	}
@@ -1301,15 +1334,9 @@ static void gpu_node_input_link(GPUNode *node, GPUNodeLink *link, const GPUType
 		/* small texture created on the fly, like for colorbands */
 		input->type = GPU_VEC4;
 		input->source = GPU_SOURCE_TEX;
-		input->textype = type;
-
-#if 0
-		input->tex = GPU_texture_create_2D(link->texturesize, link->texturesize, link->ptr2, NULL);
-#endif
-		input->tex = GPU_texture_create_2D(link->texturesize, 1, GPU_RGBA8, link->ptr1, NULL);
-		input->textarget = GL_TEXTURE_2D;
-
-		MEM_freeN(link->ptr1);
+		input->textype = GPU_TEX1D_ARRAY;
+		input->tex = link->ptr1; /* HACK ptr1 is actually a (GPUTexture **). */
+		input->textarget = GL_TEXTURE_1D_ARRAY;
 		MEM_freeN(link);
 	}
 	else if (link->image) {
@@ -1382,8 +1409,8 @@ static const char *gpu_uniform_set_function_from_type(eNodeSocketDatatype type)
 		case SOCK_RGBA:
 			return "set_rgba";
 		default:
-			 BLI_assert(!"No gpu function for non-supported eNodeSocketDatatype");
-			 return NULL;
+			BLI_assert(!"No gpu function for non-supported eNodeSocketDatatype");
+			return NULL;
 	}
 }
 
@@ -1553,8 +1580,9 @@ void GPU_nodes_get_vertex_attributes(ListBase *nodes, GPUVertexAttribs *attribs)
 
 						attribs->layer[a].type = input->attribtype;
 						attribs->layer[a].attribid = input->attribid;
-						BLI_strncpy(attribs->layer[a].name, input->attribname,
-						            sizeof(attribs->layer[a].name));
+						BLI_strncpy(
+						        attribs->layer[a].name, input->attribname,
+						        sizeof(attribs->layer[a].name));
 					}
 					else {
 						input->attribid = attribs->layer[a].attribid;
@@ -1657,13 +1685,14 @@ GPUNodeLink *GPU_image_preview(PreviewImage *prv)
 }
 
 
-GPUNodeLink *GPU_texture(int size, float *pixels)
+GPUNodeLink *GPU_texture_ramp(GPUMaterial *mat, int size, float *pixels, float *row)
 {
 	GPUNodeLink *link = GPU_node_link_create();
 
 	link->texture = true;
-	link->texturesize = size;
-	link->ptr1 = pixels;
+	link->ptr1 = gpu_material_ramp_texture_row_set(mat, size, pixels, row);
+
+	MEM_freeN(pixels);
 
 	return link;
 }
@@ -1864,9 +1893,8 @@ void GPU_nodes_prune(ListBase *nodes, GPUNodeLink *outlink)
 
 static bool gpu_pass_is_valid(GPUPass *pass)
 {
-	/* Shader is not null if compilation is successful,
-	 * refcount is positive if compilation as not yet been done. */
-	return (pass->shader != NULL || pass->refcount > 0);
+	/* Shader is not null if compilation is successful. */
+	return (pass->compiled == false || pass->shader != NULL);
 }
 
 GPUPass *GPU_generate_pass_new(
@@ -1963,14 +1991,16 @@ GPUPass *GPU_generate_pass_new(
 	return pass;
 }
 
-void GPU_pass_compile(GPUPass *pass)
+void GPU_pass_compile(GPUPass *pass, const char *shname)
 {
 	if (!pass->compiled) {
-		pass->shader = GPU_shader_create(pass->vertexcode,
-		                                 pass->fragmentcode,
-		                                 pass->geometrycode,
-		                                 NULL,
-		                                 pass->defines);
+		pass->shader = GPU_shader_create(
+		        pass->vertexcode,
+		        pass->fragmentcode,
+		        pass->geometrycode,
+		        NULL,
+		        pass->defines,
+		        shname);
 		pass->compiled = true;
 	}
 }
diff --git a/source/blender/gpu/intern/gpu_codegen.h b/source/blender/gpu/intern/gpu_codegen.h
index 278843fc948..77e6e5cf4ef 100644
--- a/source/blender/gpu/intern/gpu_codegen.h
+++ b/source/blender/gpu/intern/gpu_codegen.h
@@ -184,7 +184,7 @@ void GPU_nodes_extract_dynamic_inputs(struct GPUShader *shader, ListBase *inputs
 void GPU_nodes_get_vertex_attributes(ListBase *nodes, struct GPUVertexAttribs *attribs);
 void GPU_nodes_prune(ListBase *nodes, struct GPUNodeLink *outlink);
 
-void GPU_pass_compile(GPUPass *pass);
+void GPU_pass_compile(GPUPass *pass, const char *shname);
 void GPU_pass_release(GPUPass *pass);
 void GPU_pass_free_nodes(ListBase *nodes);
 
@@ -197,6 +197,7 @@ void gpu_codegen_exit(void);
 
 const char *GPU_builtin_name(GPUBuiltin builtin);
 void gpu_material_add_node(struct GPUMaterial *material, struct GPUNode *node);
+struct GPUTexture **gpu_material_ramp_texture_row_set(GPUMaterial *mat, int size, float *pixels, float *row);
 int GPU_link_changed(struct GPUNodeLink *link);
 
 #endif
diff --git a/source/blender/gpu/intern/gpu_context.cpp b/source/blender/gpu/intern/gpu_context.cpp
new file mode 100644
index 00000000000..ce3eb64fa37
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_context.cpp
@@ -0,0 +1,329 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation, Clément Foucault
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_context.cpp
+ *  \ingroup gpu
+ *
+ * Manage GL vertex array IDs in a thread-safe way
+ * Use these instead of glGenBuffers & its friends
+ * - alloc must be called from a thread that is bound
+ *   to the context that will be used for drawing with
+ *   this vao.
+ * - free can be called from any thread
+ */
+
+#include "BLI_assert.h"
+#include "BLI_utildefines.h"
+
+#include "GPU_context.h"
+#include "GPU_framebuffer.h"
+
+#include "gpu_batch_private.h"
+#include "gpu_context_private.h"
+
+#include <vector>
+#include <string.h>
+#include <pthread.h>
+#include <mutex>
+#include <unordered_set>
+
+#if TRUST_NO_ONE
+#if 0
+extern "C" {
+extern int BLI_thread_is_main(void); /* Blender-specific function */
+}
+
+static bool thread_is_main() {
+	/* "main" here means the GL context's thread */
+	return BLI_thread_is_main();
+}
+#endif
+#endif
+
+static std::vector<GLuint> orphaned_buffer_ids;
+static std::vector<GLuint> orphaned_texture_ids;
+
+static std::mutex orphans_mutex;
+
+struct GPUContext {
+	GLuint default_vao;
+	GPUFrameBuffer *current_fbo;
+	std::unordered_set<GPUBatch *> batches; /* Batches that have VAOs from this context */
+#ifdef DEBUG
+	std::unordered_set<GPUFrameBuffer *> framebuffers; /* Framebuffers that have FBO from this context */
+#endif
+	std::vector<GLuint> orphaned_vertarray_ids;
+	std::vector<GLuint> orphaned_framebuffer_ids;
+	std::mutex orphans_mutex; /* todo: try spinlock instead */
+#if TRUST_NO_ONE
+	pthread_t thread; /* Thread on which this context is active. */
+	bool thread_is_used;
+
+	GPUContext() {
+		thread_is_used = false;
+		current_fbo = 0;
+	}
+#endif
+};
+
+#if defined(_MSC_VER) && (_MSC_VER == 1800)
+#define thread_local __declspec(thread)
+thread_local GPUContext *active_ctx = NULL;
+#else
+static thread_local GPUContext *active_ctx = NULL;
+#endif
+
+static void orphans_add(GPUContext *ctx, std::vector<GLuint> *orphan_list, GLuint id)
+{
+	std::mutex *mutex = (ctx) ? &ctx->orphans_mutex : &orphans_mutex;
+
+	mutex->lock();
+	orphan_list->emplace_back(id);
+	mutex->unlock();
+}
+
+static void orphans_clear(GPUContext *ctx)
+{
+	BLI_assert(ctx); /* need at least an active context */
+	BLI_assert(pthread_equal(pthread_self(), ctx->thread)); /* context has been activated by another thread! */
+
+	ctx->orphans_mutex.lock();
+	if (!ctx->orphaned_vertarray_ids.empty()) {
+		uint orphan_len = (uint)ctx->orphaned_vertarray_ids.size();
+		glDeleteVertexArrays(orphan_len, ctx->orphaned_vertarray_ids.data());
+		ctx->orphaned_vertarray_ids.clear();
+	}
+	if (!ctx->orphaned_framebuffer_ids.empty()) {
+		uint orphan_len = (uint)ctx->orphaned_framebuffer_ids.size();
+		glDeleteFramebuffers(orphan_len, ctx->orphaned_framebuffer_ids.data());
+		ctx->orphaned_framebuffer_ids.clear();
+	}
+
+	ctx->orphans_mutex.unlock();
+
+	orphans_mutex.lock();
+	if (!orphaned_buffer_ids.empty()) {
+		uint orphan_len = (uint)orphaned_buffer_ids.size();
+		glDeleteBuffers(orphan_len, orphaned_buffer_ids.data());
+		orphaned_buffer_ids.clear();
+	}
+	if (!orphaned_texture_ids.empty()) {
+		uint orphan_len = (uint)orphaned_texture_ids.size();
+		glDeleteTextures(orphan_len, orphaned_texture_ids.data());
+		orphaned_texture_ids.clear();
+	}
+	orphans_mutex.unlock();
+}
+
+GPUContext *GPU_context_create(void)
+{
+	/* BLI_assert(thread_is_main()); */
+	GPUContext *ctx = new GPUContext;
+	glGenVertexArrays(1, &ctx->default_vao);
+	GPU_context_active_set(ctx);
+	return ctx;
+}
+
+/* to be called after GPU_context_active_set(ctx_to_destroy) */
+void GPU_context_discard(GPUContext *ctx)
+{
+	/* Make sure no other thread has locked it. */
+	BLI_assert(ctx == active_ctx);
+	BLI_assert(pthread_equal(pthread_self(), ctx->thread));
+	BLI_assert(ctx->orphaned_vertarray_ids.empty());
+#ifdef DEBUG
+	/* For now don't allow GPUFrameBuffers to be reuse in another ctx. */
+	BLI_assert(ctx->framebuffers.empty());
+#endif
+	/* delete remaining vaos */
+	while (!ctx->batches.empty()) {
+		/* this removes the array entry */
+		GPU_batch_vao_cache_clear(*ctx->batches.begin());
+	}
+	glDeleteVertexArrays(1, &ctx->default_vao);
+	delete ctx;
+	active_ctx = NULL;
+}
+
+/* ctx can be NULL */
+void GPU_context_active_set(GPUContext *ctx)
+{
+#if TRUST_NO_ONE
+	if (active_ctx) {
+		active_ctx->thread_is_used = false;
+	}
+	/* Make sure no other context is already bound to this thread. */
+	if (ctx) {
+		/* Make sure no other thread has locked it. */
+		assert(ctx->thread_is_used == false);
+		ctx->thread = pthread_self();
+		ctx->thread_is_used = true;
+	}
+#endif
+	if (ctx) {
+		orphans_clear(ctx);
+	}
+	active_ctx = ctx;
+}
+
+GPUContext *GPU_context_active_get(void)
+{
+	return active_ctx;
+}
+
+GLuint GPU_vao_default(void)
+{
+	BLI_assert(active_ctx); /* need at least an active context */
+	BLI_assert(pthread_equal(pthread_self(), active_ctx->thread)); /* context has been activated by another thread! */
+	return active_ctx->default_vao;
+}
+
+GLuint GPU_vao_alloc(void)
+{
+	GLuint new_vao_id = 0;
+	orphans_clear(active_ctx);
+	glGenVertexArrays(1, &new_vao_id);
+	return new_vao_id;
+}
+
+GLuint GPU_fbo_alloc(void)
+{
+	GLuint new_fbo_id = 0;
+	orphans_clear(active_ctx);
+	glGenFramebuffers(1, &new_fbo_id);
+	return new_fbo_id;
+}
+
+GLuint GPU_buf_alloc(void)
+{
+	GLuint new_buffer_id = 0;
+	orphans_clear(active_ctx);
+	glGenBuffers(1, &new_buffer_id);
+	return new_buffer_id;
+}
+
+GLuint GPU_tex_alloc(void)
+{
+	GLuint new_texture_id = 0;
+	orphans_clear(active_ctx);
+	glGenTextures(1, &new_texture_id);
+	return new_texture_id;
+}
+
+void GPU_vao_free(GLuint vao_id, GPUContext *ctx)
+{
+	BLI_assert(ctx);
+	if (ctx == active_ctx) {
+		glDeleteVertexArrays(1, &vao_id);
+	}
+	else {
+		orphans_add(ctx, &ctx->orphaned_vertarray_ids, vao_id);
+	}
+}
+
+void GPU_fbo_free(GLuint fbo_id, GPUContext *ctx)
+{
+	BLI_assert(ctx);
+	if (ctx == active_ctx) {
+		glDeleteFramebuffers(1, &fbo_id);
+	}
+	else {
+		orphans_add(ctx, &ctx->orphaned_framebuffer_ids, fbo_id);
+	}
+}
+
+void GPU_buf_free(GLuint buf_id)
+{
+	if (active_ctx) {
+		glDeleteBuffers(1, &buf_id);
+	}
+	else {
+		orphans_add(NULL, &orphaned_buffer_ids, buf_id);
+	}
+}
+
+void GPU_tex_free(GLuint tex_id)
+{
+	if (active_ctx) {
+		glDeleteTextures(1, &tex_id);
+	}
+	else {
+		orphans_add(NULL, &orphaned_texture_ids, tex_id);
+	}
+}
+
+/* GPUBatch & GPUFrameBuffer contains respectively VAO & FBO indices
+ * which are not shared across contexts. So we need to keep track of
+ * ownership. */
+
+void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch)
+{
+	BLI_assert(ctx);
+	ctx->orphans_mutex.lock();
+	ctx->batches.emplace(batch);
+	ctx->orphans_mutex.unlock();
+}
+
+void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch)
+{
+	BLI_assert(ctx);
+	ctx->orphans_mutex.lock();
+	ctx->batches.erase(batch);
+	ctx->orphans_mutex.unlock();
+}
+
+void gpu_context_add_framebuffer(GPUContext *ctx, GPUFrameBuffer *fb)
+{
+#ifdef DEBUG
+	BLI_assert(ctx);
+	ctx->orphans_mutex.lock();
+	ctx->framebuffers.emplace(fb);
+	ctx->orphans_mutex.unlock();
+#else
+	UNUSED_VARS(ctx, fb);
+#endif
+}
+
+void gpu_context_remove_framebuffer(GPUContext *ctx, GPUFrameBuffer *fb)
+{
+#ifdef DEBUG
+	BLI_assert(ctx);
+	ctx->orphans_mutex.lock();
+	ctx->framebuffers.erase(fb);
+	ctx->orphans_mutex.unlock();
+#else
+	UNUSED_VARS(ctx, fb);
+#endif
+}
+
+void gpu_context_active_framebuffer_set(GPUContext *ctx, GPUFrameBuffer *fb)
+{
+	ctx->current_fbo = fb;
+}
+
+GPUFrameBuffer *gpu_context_active_framebuffer_get(GPUContext *ctx)
+{
+	return ctx->current_fbo;
+}
diff --git a/source/blender/gpu/intern/gpu_context_private.h b/source/blender/gpu/intern/gpu_context_private.h
new file mode 100644
index 00000000000..762d9ff10c0
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_context_private.h
@@ -0,0 +1,71 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_context_private.h
+ *  \ingroup gpu
+ *
+ * This interface allow GPU to manage GL objects for mutiple context and threads.
+ */
+
+#ifndef __GPU_CONTEXT_PRIVATE_H__
+#define __GPU_CONTEXT_PRIVATE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "GPU_context.h"
+
+struct GPUFrameBuffer;
+
+GLuint GPU_vao_default(void);
+
+/* These require a gl ctx bound. */
+GLuint GPU_buf_alloc(void);
+GLuint GPU_tex_alloc(void);
+GLuint GPU_vao_alloc(void);
+GLuint GPU_fbo_alloc(void);
+
+/* These can be called any threads even without gl ctx. */
+void GPU_buf_free(GLuint buf_id);
+void GPU_tex_free(GLuint tex_id);
+/* These two need the ctx the id was created with. */
+void GPU_vao_free(GLuint vao_id, GPUContext *ctx);
+void GPU_fbo_free(GLuint fbo_id, GPUContext *ctx);
+
+void gpu_context_add_batch(GPUContext *ctx, GPUBatch *batch);
+void gpu_context_remove_batch(GPUContext *ctx, GPUBatch *batch);
+
+void gpu_context_add_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
+void gpu_context_remove_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
+
+void gpu_context_active_framebuffer_set(GPUContext *ctx, struct GPUFrameBuffer *fb);
+struct GPUFrameBuffer *gpu_context_active_framebuffer_get(GPUContext *ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __GPU_CONTEXT_PRIVATE_H__ */
diff --git a/source/blender/gpu/intern/gpu_draw.c b/source/blender/gpu/intern/gpu_draw.c
index 7383868843d..965caba0955 100644
--- a/source/blender/gpu/intern/gpu_draw.c
+++ b/source/blender/gpu/intern/gpu_draw.c
@@ -107,7 +107,7 @@ static bool is_over_resolution_limit(GLenum textarget, int w, int h)
 	int size = (textarget == GL_TEXTURE_2D) ?
 	        GPU_max_texture_size() : GPU_max_cube_map_size();
 	int reslimit = (U.glreslimit != 0) ?
-	    min_ii(U.glreslimit, size) : size;
+	        min_ii(U.glreslimit, size) : size;
 
 	return (w > reslimit || h > reslimit);
 }
@@ -239,42 +239,48 @@ typedef struct VerifyThreadData {
 	float *srgb_frect;
 } VerifyThreadData;
 
-static void gpu_verify_high_bit_srgb_buffer_slice(float *srgb_frect,
-                                                  ImBuf *ibuf,
-                                                  const int start_line,
-                                                  const int height)
+static void gpu_verify_high_bit_srgb_buffer_slice(
+        float *srgb_frect,
+        ImBuf *ibuf,
+        const int start_line,
+        const int height)
 {
 	size_t offset = ibuf->channels * start_line * ibuf->x;
 	float *current_srgb_frect = srgb_frect + offset;
 	float *current_rect_float = ibuf->rect_float + offset;
-	IMB_buffer_float_from_float(current_srgb_frect,
-	                            current_rect_float,
-	                            ibuf->channels,
-	                            IB_PROFILE_SRGB,
-	                            IB_PROFILE_LINEAR_RGB, true,
-	                            ibuf->x, height,
-	                            ibuf->x, ibuf->x);
+	IMB_buffer_float_from_float(
+	        current_srgb_frect,
+	        current_rect_float,
+	        ibuf->channels,
+	        IB_PROFILE_SRGB,
+	        IB_PROFILE_LINEAR_RGB, true,
+	        ibuf->x, height,
+	        ibuf->x, ibuf->x);
 	IMB_buffer_float_unpremultiply(current_srgb_frect, ibuf->x, height);
 }
 
-static void verify_thread_do(void *data_v,
-                             int start_scanline,
-                             int num_scanlines)
+static void verify_thread_do(
+        void *data_v,
+        int start_scanline,
+        int num_scanlines)
 {
 	VerifyThreadData *data = (VerifyThreadData *)data_v;
-	gpu_verify_high_bit_srgb_buffer_slice(data->srgb_frect,
-	                                      data->ibuf,
-	                                      start_scanline,
-	                                      num_scanlines);
+	gpu_verify_high_bit_srgb_buffer_slice(
+	        data->srgb_frect,
+	        data->ibuf,
+	        start_scanline,
+	        num_scanlines);
 }
 
-static void gpu_verify_high_bit_srgb_buffer(float *srgb_frect,
-                                            ImBuf *ibuf)
+static void gpu_verify_high_bit_srgb_buffer(
+        float *srgb_frect,
+        ImBuf *ibuf)
 {
 	if (ibuf->y < 64) {
-		gpu_verify_high_bit_srgb_buffer_slice(srgb_frect,
-		                                      ibuf,
-		                                      0, ibuf->y);
+		gpu_verify_high_bit_srgb_buffer_slice(
+		        srgb_frect,
+		        ibuf,
+		        0, ibuf->y);
 	}
 	else {
 		VerifyThreadData data;
@@ -284,11 +290,12 @@ static void gpu_verify_high_bit_srgb_buffer(float *srgb_frect,
 	}
 }
 
-GPUTexture *GPU_texture_from_blender(Image *ima,
-                                     ImageUser *iuser,
-                                     int textarget,
-                                     bool is_data,
-                                     double UNUSED(time))
+GPUTexture *GPU_texture_from_blender(
+        Image *ima,
+        ImageUser *iuser,
+        int textarget,
+        bool is_data,
+        double UNUSED(time))
 {
 	if (ima == NULL) {
 		return NULL;
@@ -363,11 +370,14 @@ GPUTexture *GPU_texture_from_blender(Image *ima,
 	const bool mipmap = GPU_get_mipmap();
 
 #ifdef WITH_DDS
-	if (ibuf->ftype == IMB_FTYPE_DDS)
+	if (ibuf->ftype == IMB_FTYPE_DDS) {
 		GPU_create_gl_tex_compressed(&bindcode, rect, rectw, recth, textarget, mipmap, ima, ibuf);
+	}
 	else
 #endif
+	{
 		GPU_create_gl_tex(&bindcode, rect, frect, rectw, recth, textarget, mipmap, use_high_bit_depth, ima);
+	}
 
 	/* mark as non-color data texture */
 	if (bindcode) {
@@ -556,8 +566,9 @@ void GPU_create_gl_tex(
 
 						if (mip_cube_map) {
 							for (int j = 0; j < 6; j++) {
-								glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + j, i,
-								    informat, mipw, miph, 0, GL_RGBA, type, mip_cube_map[j]);
+								glTexImage2D(
+								        GL_TEXTURE_CUBE_MAP_POSITIVE_X + j, i,
+								        informat, mipw, miph, 0, GL_RGBA, type, mip_cube_map[j]);
 							}
 						}
 						gpu_del_cube_map(mip_cube_map);
@@ -639,8 +650,9 @@ bool GPU_upload_dxt_texture(ImBuf *ibuf)
 
 		size = ((width + 3) / 4) * ((height + 3) / 4) * blocksize;
 
-		glCompressedTexImage2D(GL_TEXTURE_2D, i, format, width, height,
-		    0, size, ibuf->dds_data.data + offset);
+		glCompressedTexImage2D(
+		        GL_TEXTURE_2D, i, format, width, height,
+		        0, size, ibuf->dds_data.data + offset);
 
 		offset += size;
 		width >>= 1;
@@ -755,8 +767,9 @@ static bool gpu_check_scaled_image(ImBuf *ibuf, Image *ima, float *frect, int x,
 			ImBuf *ibuf_scale = IMB_allocFromBuffer(NULL, frect, w, h);
 			IMB_scaleImBuf(ibuf_scale, rectw, recth);
 
-			glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, rectw, recth, GL_RGBA,
-			                GL_FLOAT, ibuf_scale->rect_float);
+			glTexSubImage2D(
+			        GL_TEXTURE_2D, 0, x, y, rectw, recth, GL_RGBA,
+			        GL_FLOAT, ibuf_scale->rect_float);
 
 			IMB_freeImBuf(ibuf_scale);
 		}
@@ -775,8 +788,9 @@ static bool gpu_check_scaled_image(ImBuf *ibuf, Image *ima, float *frect, int x,
 				}
 			}
 
-			glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, rectw, recth, GL_RGBA,
-			                GL_UNSIGNED_BYTE, scalerect);
+			glTexSubImage2D(
+			        GL_TEXTURE_2D, 0, x, y, rectw, recth, GL_RGBA,
+			        GL_UNSIGNED_BYTE, scalerect);
 
 			MEM_freeN(scalerect);
 		}
@@ -860,8 +874,9 @@ void GPU_paint_update_image(Image *ima, ImageUser *iuser, int x, int y, int w, i
 		glPixelStorei(GL_UNPACK_SKIP_PIXELS, x);
 		glPixelStorei(GL_UNPACK_SKIP_ROWS, y);
 
-		glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, w, h, GL_RGBA,
-		    GL_UNSIGNED_BYTE, ibuf->rect);
+		glTexSubImage2D(
+		        GL_TEXTURE_2D, 0, x, y, w, h, GL_RGBA,
+		        GL_UNSIGNED_BYTE, ibuf->rect);
 
 		glPixelStorei(GL_UNPACK_ROW_LENGTH, row_length);
 		glPixelStorei(GL_UNPACK_SKIP_PIXELS, skip_pixels);
@@ -913,8 +928,9 @@ void GPU_create_smoke(SmokeModifierData *smd, int highres)
 			}
 			/* density only */
 			else {
-				sds->tex = GPU_texture_create_3D(sds->res[0], sds->res[1], sds->res[2],
-				                                 GPU_R8, smoke_get_density(sds->fluid), NULL);
+				sds->tex = GPU_texture_create_3D(
+				        sds->res[0], sds->res[1], sds->res[2],
+				        GPU_R8, smoke_get_density(sds->fluid), NULL);
 
 				/* Swizzle the RGBA components to read the Red channel so
 				 * that the shader stay the same for colored and non color
@@ -926,10 +942,12 @@ void GPU_create_smoke(SmokeModifierData *smd, int highres)
 				glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_SWIZZLE_A, GL_RED);
 				GPU_texture_unbind(sds->tex);
 			}
-			sds->tex_flame = (smoke_has_fuel(sds->fluid)) ?
-			                  GPU_texture_create_3D(sds->res[0], sds->res[1], sds->res[2],
-			                  GPU_R8, smoke_get_flame(sds->fluid), NULL) :
-			                  NULL;
+			sds->tex_flame = (
+			        smoke_has_fuel(sds->fluid) ?
+			        GPU_texture_create_3D(
+			                sds->res[0], sds->res[1], sds->res[2],
+			                GPU_R8, smoke_get_flame(sds->fluid), NULL) :
+			        NULL);
 		}
 		else if (!sds->tex && highres) {
 			/* rgba texture for color + density */
@@ -941,8 +959,9 @@ void GPU_create_smoke(SmokeModifierData *smd, int highres)
 			}
 			/* density only */
 			else {
-				sds->tex = GPU_texture_create_3D(sds->res_wt[0], sds->res_wt[1], sds->res_wt[2],
-				                                        GPU_R8, smoke_turbulence_get_density(sds->wt), NULL);
+				sds->tex = GPU_texture_create_3D(
+				        sds->res_wt[0], sds->res_wt[1], sds->res_wt[2],
+				        GPU_R8, smoke_turbulence_get_density(sds->wt), NULL);
 
 				/* Swizzle the RGBA components to read the Red channel so
 				 * that the shader stay the same for colored and non color
@@ -954,14 +973,17 @@ void GPU_create_smoke(SmokeModifierData *smd, int highres)
 				glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_SWIZZLE_A, GL_RED);
 				GPU_texture_unbind(sds->tex);
 			}
-			sds->tex_flame = (smoke_turbulence_has_fuel(sds->wt)) ?
-			                  GPU_texture_create_3D(sds->res_wt[0], sds->res_wt[1], sds->res_wt[2],
-			                                               GPU_R8, smoke_turbulence_get_flame(sds->wt), NULL) :
-			                  NULL;
+			sds->tex_flame = (
+			        smoke_turbulence_has_fuel(sds->wt) ?
+			        GPU_texture_create_3D(
+			                sds->res_wt[0], sds->res_wt[1], sds->res_wt[2],
+			                GPU_R8, smoke_turbulence_get_flame(sds->wt), NULL) :
+			        NULL);
 		}
 
-		sds->tex_shadow = GPU_texture_create_3D(sds->res[0], sds->res[1], sds->res[2],
-		                                        GPU_R8, sds->shadow, NULL);
+		sds->tex_shadow = GPU_texture_create_3D(
+		        sds->res[0], sds->res[1], sds->res[2],
+		        GPU_R8, sds->shadow, NULL);
 	}
 #else // WITH_SMOKE
 	(void)highres;
@@ -971,6 +993,52 @@ void GPU_create_smoke(SmokeModifierData *smd, int highres)
 #endif // WITH_SMOKE
 }
 
+void GPU_create_smoke_velocity(SmokeModifierData *smd)
+{
+#ifdef WITH_SMOKE
+	if (smd->type & MOD_SMOKE_TYPE_DOMAIN) {
+		SmokeDomainSettings *sds = smd->domain;
+
+		const float *vel_x = smoke_get_velocity_x(sds->fluid);
+		const float *vel_y = smoke_get_velocity_y(sds->fluid);
+		const float *vel_z = smoke_get_velocity_z(sds->fluid);
+
+		if (ELEM(NULL, vel_x, vel_y, vel_z)) {
+			return;
+		}
+
+		if (!sds->tex_velocity_x) {
+			sds->tex_velocity_x = GPU_texture_create_3D(sds->res[0], sds->res[1], sds->res[2], GPU_R16F, vel_x, NULL);
+			sds->tex_velocity_y = GPU_texture_create_3D(sds->res[0], sds->res[1], sds->res[2], GPU_R16F, vel_y, NULL);
+			sds->tex_velocity_z = GPU_texture_create_3D(sds->res[0], sds->res[1], sds->res[2], GPU_R16F, vel_z, NULL);
+		}
+	}
+#else // WITH_SMOKE
+	smd->domain->tex_velocity_x = NULL;
+	smd->domain->tex_velocity_y = NULL;
+	smd->domain->tex_velocity_z = NULL;
+#endif // WITH_SMOKE
+}
+
+/* TODO Unify with the other GPU_free_smoke. */
+void GPU_free_smoke_velocity(SmokeModifierData *smd)
+{
+	if (smd->type & MOD_SMOKE_TYPE_DOMAIN && smd->domain) {
+		if (smd->domain->tex_velocity_x)
+			GPU_texture_free(smd->domain->tex_velocity_x);
+
+		if (smd->domain->tex_velocity_y)
+			GPU_texture_free(smd->domain->tex_velocity_y);
+
+		if (smd->domain->tex_velocity_z)
+			GPU_texture_free(smd->domain->tex_velocity_z);
+
+		smd->domain->tex_velocity_x = NULL;
+		smd->domain->tex_velocity_y = NULL;
+		smd->domain->tex_velocity_z = NULL;
+	}
+}
+
 static LinkNode *image_free_queue = NULL;
 
 static void gpu_queue_image_for_free(Image *ima)
@@ -1255,10 +1323,10 @@ void GPU_select_to_index_array(unsigned int *col, const unsigned int size)
 {
 #define INDEX_BUF_ARRAY(INDEX_FROM_BUF_BITS) \
 	for (i = size; i--; col++) { \
-	    if ((c = *col)) { \
-	        *col = INDEX_FROM_BUF_BITS(c); \
-        } \
-    } ((void)0)
+		if ((c = *col)) { \
+			*col = INDEX_FROM_BUF_BITS(c); \
+		} \
+	} ((void)0)
 
 	if (size > 0) {
 		unsigned int i, c;
diff --git a/source/blender/gpu/intern/gpu_element.c b/source/blender/gpu/intern/gpu_element.c
new file mode 100644
index 00000000000..56a0c90d5b5
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_element.c
@@ -0,0 +1,311 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_element.c
+ *  \ingroup gpu
+ *
+ * GPU element list (AKA index buffer)
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "GPU_element.h"
+
+#include "gpu_context_private.h"
+
+#include <stdlib.h>
+
+#define KEEP_SINGLE_COPY 1
+
+static GLenum convert_index_type_to_gl(GPUIndexBufType type)
+{
+	static const GLenum table[] = {
+		[GPU_INDEX_U8] = GL_UNSIGNED_BYTE, /* GL has this, Vulkan does not */
+		[GPU_INDEX_U16] = GL_UNSIGNED_SHORT,
+		[GPU_INDEX_U32] = GL_UNSIGNED_INT
+	};
+	return table[type];
+}
+
+uint GPU_indexbuf_size_get(const GPUIndexBuf *elem)
+{
+#if GPU_TRACK_INDEX_RANGE
+	static const uint table[] = {
+		[GPU_INDEX_U8] = sizeof(GLubyte), /* GL has this, Vulkan does not */
+		[GPU_INDEX_U16] = sizeof(GLushort),
+		[GPU_INDEX_U32] = sizeof(GLuint)
+	};
+	return elem->index_len * table[elem->index_type];
+#else
+	return elem->index_len * sizeof(GLuint);
+#endif
+}
+
+void GPU_indexbuf_init_ex(
+        GPUIndexBufBuilder *builder, GPUPrimType prim_type,
+        uint index_len, uint vertex_len, bool use_prim_restart)
+{
+	builder->use_prim_restart = use_prim_restart;
+	builder->max_allowed_index = vertex_len - 1;
+	builder->max_index_len = index_len;
+	builder->index_len = 0; // start empty
+	builder->prim_type = prim_type;
+	builder->data = MEM_callocN(builder->max_index_len * sizeof(uint), "GPUIndexBuf data");
+}
+
+void GPU_indexbuf_init(GPUIndexBufBuilder *builder, GPUPrimType prim_type, uint prim_len, uint vertex_len)
+{
+	uint verts_per_prim = 0;
+	switch (prim_type) {
+		case GPU_PRIM_POINTS:
+			verts_per_prim = 1;
+			break;
+		case GPU_PRIM_LINES:
+			verts_per_prim = 2;
+			break;
+		case GPU_PRIM_TRIS:
+			verts_per_prim = 3;
+			break;
+		case GPU_PRIM_LINES_ADJ:
+			verts_per_prim = 4;
+			break;
+		default:
+#if TRUST_NO_ONE
+			assert(false);
+#endif
+			return;
+	}
+
+	GPU_indexbuf_init_ex(builder, prim_type, prim_len * verts_per_prim, vertex_len, false);
+}
+
+void GPU_indexbuf_add_generic_vert(GPUIndexBufBuilder *builder, uint v)
+{
+#if TRUST_NO_ONE
+	assert(builder->data != NULL);
+	assert(builder->index_len < builder->max_index_len);
+	assert(v <= builder->max_allowed_index);
+#endif
+	builder->data[builder->index_len++] = v;
+}
+
+void GPU_indexbuf_add_primitive_restart(GPUIndexBufBuilder *builder)
+{
+#if TRUST_NO_ONE
+	assert(builder->data != NULL);
+	assert(builder->index_len < builder->max_index_len);
+	assert(builder->use_prim_restart);
+#endif
+	builder->data[builder->index_len++] = GPU_PRIM_RESTART;
+}
+
+void GPU_indexbuf_add_point_vert(GPUIndexBufBuilder *builder, uint v)
+{
+#if TRUST_NO_ONE
+	assert(builder->prim_type == GPU_PRIM_POINTS);
+#endif
+	GPU_indexbuf_add_generic_vert(builder, v);
+}
+
+void GPU_indexbuf_add_line_verts(GPUIndexBufBuilder *builder, uint v1, uint v2)
+{
+#if TRUST_NO_ONE
+	assert(builder->prim_type == GPU_PRIM_LINES);
+	assert(v1 != v2);
+#endif
+	GPU_indexbuf_add_generic_vert(builder, v1);
+	GPU_indexbuf_add_generic_vert(builder, v2);
+}
+
+void GPU_indexbuf_add_tri_verts(GPUIndexBufBuilder *builder, uint v1, uint v2, uint v3)
+{
+#if TRUST_NO_ONE
+	assert(builder->prim_type == GPU_PRIM_TRIS);
+	assert(v1 != v2 && v2 != v3 && v3 != v1);
+#endif
+	GPU_indexbuf_add_generic_vert(builder, v1);
+	GPU_indexbuf_add_generic_vert(builder, v2);
+	GPU_indexbuf_add_generic_vert(builder, v3);
+}
+
+void GPU_indexbuf_add_line_adj_verts(GPUIndexBufBuilder *builder, uint v1, uint v2, uint v3, uint v4)
+{
+#if TRUST_NO_ONE
+	assert(builder->prim_type == GPU_PRIM_LINES_ADJ);
+	assert(v2 != v3); /* only the line need diff indices */
+#endif
+	GPU_indexbuf_add_generic_vert(builder, v1);
+	GPU_indexbuf_add_generic_vert(builder, v2);
+	GPU_indexbuf_add_generic_vert(builder, v3);
+	GPU_indexbuf_add_generic_vert(builder, v4);
+}
+
+#if GPU_TRACK_INDEX_RANGE
+/* Everything remains 32 bit while building to keep things simple.
+ * Find min/max after, then convert to smallest index type possible. */
+
+static uint index_range(const uint values[], uint value_len, uint *min_out, uint *max_out)
+{
+	if (value_len == 0) {
+		*min_out = 0;
+		*max_out = 0;
+		return 0;
+	}
+	uint min_value = values[0];
+	uint max_value = values[0];
+	for (uint i = 1; i < value_len; ++i) {
+		const uint value = values[i];
+		if (value == GPU_PRIM_RESTART)
+			continue;
+		else if (value < min_value)
+			min_value = value;
+		else if (value > max_value)
+			max_value = value;
+	}
+	*min_out = min_value;
+	*max_out = max_value;
+	return max_value - min_value;
+}
+
+static void squeeze_indices_byte(GPUIndexBufBuilder *builder, GPUIndexBuf *elem)
+{
+	const uint *values = builder->data;
+	const uint index_len = elem->index_len;
+
+	/* data will never be *larger* than builder->data...
+	 * converting in place to avoid extra allocation */
+	GLubyte *data = (GLubyte *)builder->data;
+
+	if (elem->max_index > 0xFF) {
+		const uint base = elem->min_index;
+		elem->base_index = base;
+		elem->min_index = 0;
+		elem->max_index -= base;
+		for (uint i = 0; i < index_len; ++i) {
+			data[i] = (values[i] == GPU_PRIM_RESTART) ? 0xFF : (GLubyte)(values[i] - base);
+		}
+	}
+	else {
+		elem->base_index = 0;
+		for (uint i = 0; i < index_len; ++i) {
+			data[i] = (GLubyte)(values[i]);
+		}
+	}
+}
+
+static void squeeze_indices_short(GPUIndexBufBuilder *builder, GPUIndexBuf *elem)
+{
+	const uint *values = builder->data;
+	const uint index_len = elem->index_len;
+
+	/* data will never be *larger* than builder->data...
+	 * converting in place to avoid extra allocation */
+	GLushort *data = (GLushort *)builder->data;
+
+	if (elem->max_index > 0xFFFF) {
+		const uint base = elem->min_index;
+		elem->base_index = base;
+		elem->min_index = 0;
+		elem->max_index -= base;
+		for (uint i = 0; i < index_len; ++i) {
+			data[i] = (values[i] == GPU_PRIM_RESTART) ? 0xFFFF : (GLushort)(values[i] - base);
+		}
+	}
+	else {
+		elem->base_index = 0;
+		for (uint i = 0; i < index_len; ++i) {
+			data[i] = (GLushort)(values[i]);
+		}
+	}
+}
+
+#endif /* GPU_TRACK_INDEX_RANGE */
+
+GPUIndexBuf *GPU_indexbuf_build(GPUIndexBufBuilder *builder)
+{
+	GPUIndexBuf *elem = MEM_callocN(sizeof(GPUIndexBuf), "GPUIndexBuf");
+	GPU_indexbuf_build_in_place(builder, elem);
+	return elem;
+}
+
+void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *builder, GPUIndexBuf *elem)
+{
+#if TRUST_NO_ONE
+	assert(builder->data != NULL);
+#endif
+	elem->index_len = builder->index_len;
+	elem->use_prim_restart = builder->use_prim_restart;
+
+#if GPU_TRACK_INDEX_RANGE
+	uint range = index_range(builder->data, builder->index_len, &elem->min_index, &elem->max_index);
+
+	/* count the primitive restart index. */
+	if (elem->use_prim_restart) {
+		range += 1;
+	}
+
+	if (range <= 0xFF) {
+		elem->index_type = GPU_INDEX_U8;
+		squeeze_indices_byte(builder, elem);
+	}
+	else if (range <= 0xFFFF) {
+		elem->index_type = GPU_INDEX_U16;
+		squeeze_indices_short(builder, elem);
+	}
+	else {
+		elem->index_type = GPU_INDEX_U32;
+		elem->base_index = 0;
+	}
+	elem->gl_index_type = convert_index_type_to_gl(elem->index_type);
+#endif
+
+	if (elem->vbo_id == 0) {
+		elem->vbo_id = GPU_buf_alloc();
+	}
+	/* send data to GPU */
+	/* GL_ELEMENT_ARRAY_BUFFER changes the state of the last VAO bound,
+	 * so we use the GL_ARRAY_BUFFER here to create a buffer without
+	 * interfering in the VAO state. */
+	glBindBuffer(GL_ARRAY_BUFFER, elem->vbo_id);
+	glBufferData(GL_ARRAY_BUFFER, GPU_indexbuf_size_get(elem), builder->data, GL_STATIC_DRAW);
+
+	/* discard builder (one-time use) */
+	MEM_freeN(builder->data);
+	builder->data = NULL;
+	/* other fields are safe to leave */
+}
+
+void GPU_indexbuf_use(GPUIndexBuf *elem)
+{
+	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elem->vbo_id);
+}
+
+void GPU_indexbuf_discard(GPUIndexBuf *elem)
+{
+	if (elem->vbo_id) {
+		GPU_buf_free(elem->vbo_id);
+	}
+	MEM_freeN(elem);
+}
diff --git a/source/blender/gpu/intern/gpu_extensions.c b/source/blender/gpu/intern/gpu_extensions.c
index dff6cfb74a8..43081154e89 100644
--- a/source/blender/gpu/intern/gpu_extensions.c
+++ b/source/blender/gpu/intern/gpu_extensions.c
@@ -66,6 +66,7 @@
 
 static struct GPUGlobal {
 	GLint maxtexsize;
+	GLint maxtexlayers;
 	GLint maxcubemapsize;
 	GLint maxtextures;
 	GLint maxubosize;
@@ -96,6 +97,11 @@ int GPU_max_texture_size(void)
 	return GG.maxtexsize;
 }
 
+int GPU_max_texture_layers(void)
+{
+	return GG.maxtexlayers;
+}
+
 int GPU_max_textures(void)
 {
 	return GG.maxtextures;
@@ -142,6 +148,7 @@ void gpu_extensions_init(void)
 	glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &GG.maxtextures);
 
 	glGetIntegerv(GL_MAX_TEXTURE_SIZE, &GG.maxtexsize);
+	glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &GG.maxtexlayers);
 	glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &GG.maxcubemapsize);
 
 	if (GLEW_EXT_texture_filter_anisotropic)
diff --git a/source/blender/gpu/intern/gpu_framebuffer.c b/source/blender/gpu/intern/gpu_framebuffer.c
index ffc72718e42..56abe040f32 100644
--- a/source/blender/gpu/intern/gpu_framebuffer.c
+++ b/source/blender/gpu/intern/gpu_framebuffer.c
@@ -42,9 +42,8 @@
 #include "GPU_shader.h"
 #include "GPU_texture.h"
 
-#include "intern/gpu_private.h"
-
-static ThreadLocal(void *) g_currentfb;
+#include "gpu_private.h"
+#include "gpu_context_private.h"
 
 typedef enum {
 	GPU_FB_DEPTH_ATTACHMENT = 0,
@@ -69,6 +68,7 @@ typedef enum {
 #define GPU_FB_ATTACHEMENT_SET_DIRTY(flag, type) (flag |= (1 << type))
 
 struct GPUFrameBuffer {
+	GPUContext *ctx;
 	GLuint object;
 	GPUAttachment attachments[GPU_FB_MAX_ATTACHEMENT];
 	uint16_t dirty_flag;
@@ -121,7 +121,7 @@ static GPUTexture *framebuffer_get_depth_tex(GPUFrameBuffer *fb)
 	if (fb->attachments[GPU_FB_DEPTH_ATTACHMENT].tex)
 		return fb->attachments[GPU_FB_DEPTH_ATTACHMENT].tex;
 	else
-		return fb->attachments[GPU_FB_DEPTH_STENCIL_ATTACHMENT].tex;;
+		return fb->attachments[GPU_FB_DEPTH_STENCIL_ATTACHMENT].tex;
 }
 
 static GPUTexture *framebuffer_get_color_tex(GPUFrameBuffer *fb, int slot)
@@ -167,22 +167,29 @@ static void gpu_print_framebuffer_error(GLenum status, char err_out[256])
 
 void gpu_framebuffer_module_init(void)
 {
-    BLI_thread_local_create(g_currentfb);
 }
 
 void gpu_framebuffer_module_exit(void)
 {
-    BLI_thread_local_delete(g_currentfb);
 }
 
-static uint gpu_framebuffer_current_get(void)
+GPUFrameBuffer *GPU_framebuffer_active_get(void)
 {
-	return GET_UINT_FROM_POINTER(BLI_thread_local_get(g_currentfb));
+	GPUContext *ctx = GPU_context_active_get();
+	if (ctx) {
+		return gpu_context_active_framebuffer_get(ctx);
+	}
+	else {
+		return 0;
+	}
 }
 
-static void gpu_framebuffer_current_set(uint object)
+static void gpu_framebuffer_current_set(GPUFrameBuffer *fb)
 {
-	BLI_thread_local_set(g_currentfb, SET_UINT_IN_POINTER(object));
+	GPUContext *ctx = GPU_context_active_get();
+	if (ctx) {
+		gpu_context_active_framebuffer_set(ctx, fb);
+	}
 }
 
 /* GPUFrameBuffer */
@@ -196,7 +203,9 @@ GPUFrameBuffer *GPU_framebuffer_create(void)
 
 static void gpu_framebuffer_init(GPUFrameBuffer *fb)
 {
-	glGenFramebuffers(1, &fb->object);
+	fb->object = GPU_fbo_alloc();
+	fb->ctx = GPU_context_active_get();
+	gpu_context_add_framebuffer(fb->ctx, fb);
 }
 
 void GPU_framebuffer_free(GPUFrameBuffer *fb)
@@ -207,11 +216,14 @@ void GPU_framebuffer_free(GPUFrameBuffer *fb)
 		}
 	}
 
-	/* This restores the framebuffer if it was bound */
-	glDeleteFramebuffers(1, &fb->object);
+	if (fb->object != 0) {
+		/* This restores the framebuffer if it was bound */
+		GPU_fbo_free(fb->object, fb->ctx);
+		gpu_context_remove_framebuffer(fb->ctx, fb);
+	}
 
-	if (gpu_framebuffer_current_get() == fb->object) {
-		gpu_framebuffer_current_set(0);
+	if (GPU_framebuffer_active_get() == fb) {
+		gpu_framebuffer_current_set(NULL);
 	}
 
 	MEM_freeN(fb);
@@ -340,8 +352,9 @@ static void gpu_framebuffer_attachment_attach(GPUAttachment *attach, GPUAttachme
 
 	if (attach->layer > -1) {
 		if (GPU_texture_cube(attach->tex)) {
-			glFramebufferTexture2D(GL_FRAMEBUFFER, gl_attachment, GL_TEXTURE_CUBE_MAP_POSITIVE_X + attach->layer,
-			                       tex_bind, attach->mip);
+			glFramebufferTexture2D(
+			        GL_FRAMEBUFFER, gl_attachment, GL_TEXTURE_CUBE_MAP_POSITIVE_X + attach->layer,
+			        tex_bind, attach->mip);
 		}
 		else {
 			glFramebufferTextureLayer(GL_FRAMEBUFFER, gl_attachment, tex_bind, attach->mip, attach->layer);
@@ -363,7 +376,7 @@ static void gpu_framebuffer_update_attachments(GPUFrameBuffer *fb)
 	GLenum gl_attachments[GPU_FB_MAX_COLOR_ATTACHMENT];
 	int numslots = 0;
 
-	BLI_assert(gpu_framebuffer_current_get() == fb->object);
+	BLI_assert(GPU_framebuffer_active_get() == fb);
 
 	/* Update attachments */
 	for (GPUAttachmentType type = 0; type < GPU_FB_MAX_ATTACHEMENT; ++type) {
@@ -407,10 +420,10 @@ void GPU_framebuffer_bind(GPUFrameBuffer *fb)
 	if (fb->object == 0)
 		gpu_framebuffer_init(fb);
 
-	if (gpu_framebuffer_current_get() != fb->object)
+	if (GPU_framebuffer_active_get() != fb)
 		glBindFramebuffer(GL_FRAMEBUFFER, fb->object);
 
-	gpu_framebuffer_current_set(fb->object);
+	gpu_framebuffer_current_set(fb);
 
 	if (fb->dirty_flag != 0)
 		gpu_framebuffer_update_attachments(fb);
@@ -431,20 +444,15 @@ void GPU_framebuffer_bind(GPUFrameBuffer *fb)
 
 void GPU_framebuffer_restore(void)
 {
-	if (gpu_framebuffer_current_get() != 0) {
+	if (GPU_framebuffer_active_get() != NULL) {
 		glBindFramebuffer(GL_FRAMEBUFFER, 0);
-		gpu_framebuffer_current_set(0);
+		gpu_framebuffer_current_set(NULL);
 	}
 }
 
 bool GPU_framebuffer_bound(GPUFrameBuffer *fb)
 {
-	return (fb->object == gpu_framebuffer_current_get()) && (fb->object != 0);
-}
-
-unsigned int GPU_framebuffer_current_get(void)
-{
-	return gpu_framebuffer_current_get();
+	return (fb == GPU_framebuffer_active_get()) && (fb->object != 0);
 }
 
 bool GPU_framebuffer_check_valid(GPUFrameBuffer *fb, char err_out[256])
@@ -518,7 +526,7 @@ void GPU_framebuffer_read_color(
 		case 1: type = GL_RED; break;
 		case 2: type = GL_RG; break;
 		case 3: type = GL_RGB; break;
-		case 4: type = GL_RGBA;	break;
+		case 4: type = GL_RGBA; break;
 		default:
 			BLI_assert(false && "wrong number of read channels");
 			return;
@@ -535,7 +543,7 @@ void GPU_framebuffer_blit(
 {
 	BLI_assert(blit_buffers != 0);
 
-	GLuint prev_fb = gpu_framebuffer_current_get();
+	GPUFrameBuffer *prev_fb = GPU_framebuffer_active_get();
 
 	/* Framebuffers must be up to date. This simplify this function. */
 	if (fb_read->dirty_flag != 0 || fb_read->object == 0) {
@@ -549,12 +557,14 @@ void GPU_framebuffer_blit(
 	const bool do_depth = (blit_buffers & GPU_DEPTH_BIT);
 	const bool do_stencil = (blit_buffers & GPU_STENCIL_BIT);
 
-	GPUTexture *read_tex = (do_depth || do_stencil)
-	                       ? framebuffer_get_depth_tex(fb_read)
-	                       : framebuffer_get_color_tex(fb_read, read_slot);
-	GPUTexture *write_tex = (do_depth || do_stencil)
-	                        ? framebuffer_get_depth_tex(fb_write)
-	                        : framebuffer_get_color_tex(fb_write, read_slot);
+	GPUTexture *read_tex = (
+	        (do_depth || do_stencil) ?
+	        framebuffer_get_depth_tex(fb_read) :
+	        framebuffer_get_color_tex(fb_read, read_slot));
+	GPUTexture *write_tex = (
+	        (do_depth || do_stencil) ?
+	        framebuffer_get_depth_tex(fb_write) :
+	        framebuffer_get_color_tex(fb_write, read_slot));
 
 	if (do_depth) {
 		BLI_assert(GPU_texture_depth(read_tex) && GPU_texture_depth(write_tex));
@@ -585,16 +595,17 @@ void GPU_framebuffer_blit(
 
 	GLbitfield mask = convert_buffer_bits_to_gl(blit_buffers);
 
-	glBlitFramebuffer(0, 0, fb_read->width, fb_read->height,
-	                  0, 0, fb_write->width, fb_write->height,
-	                  mask, GL_NEAREST);
+	glBlitFramebuffer(
+	        0, 0, fb_read->width, fb_read->height,
+	        0, 0, fb_write->width, fb_write->height,
+	        mask, GL_NEAREST);
 
 	/* Restore previous framebuffer */
-	if (fb_write->object == prev_fb) {
+	if (fb_write == prev_fb) {
 		GPU_framebuffer_bind(fb_write); /* To update drawbuffers */
 	}
 	else {
-		glBindFramebuffer(GL_FRAMEBUFFER, prev_fb);
+		glBindFramebuffer(GL_FRAMEBUFFER, prev_fb->object);
 		gpu_framebuffer_current_set(prev_fb);
 	}
 }
@@ -608,13 +619,13 @@ void GPU_framebuffer_recursive_downsample(
         void (*callback)(void *userData, int level), void *userData)
 {
 	/* Framebuffer must be up to date and bound. This simplify this function. */
-	if (gpu_framebuffer_current_get() != fb->object || fb->dirty_flag != 0 || fb->object == 0) {
+	if (GPU_framebuffer_active_get() != fb || fb->dirty_flag != 0 || fb->object == 0) {
 		GPU_framebuffer_bind(fb);
 	}
 	/* HACK: We make the framebuffer appear not bound in order to
 	 * not trigger any error in GPU_texture_bind().  */
-	GLuint prev_fb = gpu_framebuffer_current_get();
-	gpu_framebuffer_current_set(0);
+	GPUFrameBuffer *prev_fb = GPU_framebuffer_active_get();
+	gpu_framebuffer_current_set(NULL);
 
 	int i;
 	int current_dim[2] = {fb->width, fb->height};
@@ -679,7 +690,8 @@ GPUOffScreen *GPU_offscreen_create(int width, int height, int samples, bool dept
 
 	ofs = MEM_callocN(sizeof(GPUOffScreen), "GPUOffScreen");
 
-	ofs->color = GPU_texture_create_2D_multisample(width, height,
+	ofs->color = GPU_texture_create_2D_multisample(
+	        width, height,
 	        (high_bitdepth) ? GPU_RGBA16F : GPU_RGBA8, NULL, samples, err_out);
 
 	if (depth) {
@@ -776,14 +788,16 @@ void GPU_offscreen_read_pixels(GPUOffScreen *ofs, int type, void *pixels)
 		/* create texture for new 'fbo_blit' */
 		glGenTextures(1, &tex_blit);
 		glBindTexture(GL_TEXTURE_2D, tex_blit);
-		glTexImage2D(GL_TEXTURE_2D, 0, (type == GL_FLOAT) ? GL_RGBA16F : GL_RGBA8,
-		             w, h, 0, GL_RGBA, type, 0);
+		glTexImage2D(
+		        GL_TEXTURE_2D, 0, (type == GL_FLOAT) ? GL_RGBA16F : GL_RGBA8,
+		        w, h, 0, GL_RGBA, type, 0);
 
 		/* write into new single-sample buffer */
 		glGenFramebuffers(1, &fbo_blit);
 		glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_blit);
-		glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
-		                       GL_TEXTURE_2D, tex_blit, 0);
+		glFramebufferTexture2D(
+		        GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+		        GL_TEXTURE_2D, tex_blit, 0);
 
 		GLenum status = glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER);
 		if (status != GL_FRAMEBUFFER_COMPLETE) {
diff --git a/source/blender/gpu/intern/gpu_immediate.c b/source/blender/gpu/intern/gpu_immediate.c
index 5f22b7f9279..9674cf0b9f7 100644
--- a/source/blender/gpu/intern/gpu_immediate.c
+++ b/source/blender/gpu/intern/gpu_immediate.c
@@ -15,22 +15,144 @@
  * along with this program; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  *
- * The Original Code is Copyright (C) 2016 Blender Foundation.
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
  * All rights reserved.
  *
- * The Original Code is: all of this file.
- *
- * Contributor(s): Mike Erwin
+ * Contributor(s): Blender Foundation
  *
  * ***** END GPL LICENSE BLOCK *****
  */
 
-#include "GPU_immediate.h"
-#include "GPU_matrix.h"
+/** \file blender/gpu/intern/gpu_immediate.c
+ *  \ingroup gpu
+ *
+ * GPU immediate mode work-alike
+ */
+
 #include "UI_resources.h"
-#include "BLI_utildefines.h"
 
+#include "GPU_attr_binding.h"
+#include "GPU_immediate.h"
+
+#include "gpu_attr_binding_private.h"
+#include "gpu_context_private.h"
+#include "gpu_primitive_private.h"
 #include "gpu_shader_private.h"
+#include "gpu_vertex_format_private.h"
+
+#include <string.h>
+#include <stdlib.h>
+
+/* necessary functions from matrix API */
+extern void GPU_matrix_bind(const GPUShaderInterface *);
+extern bool GPU_matrix_dirty_get(void);
+
+typedef struct {
+	/* TODO: organize this struct by frequency of change (run-time) */
+
+	GPUBatch *batch;
+	GPUContext *context;
+
+	/* current draw call */
+	GLubyte *buffer_data;
+	uint buffer_offset;
+	uint buffer_bytes_mapped;
+	uint vertex_len;
+	bool strict_vertex_len;
+	GPUPrimType prim_type;
+
+	GPUVertFormat vertex_format;
+
+	/* current vertex */
+	uint vertex_idx;
+	GLubyte *vertex_data;
+	uint16_t unassigned_attrib_bits; /* which attributes of current vertex have not been given values? */
+
+	GLuint vbo_id;
+	GLuint vao_id;
+
+	GLuint bound_program;
+	const GPUShaderInterface *shader_interface;
+	GPUAttrBinding attrib_binding;
+	uint16_t prev_enabled_attrib_bits; /* <-- only affects this VAO, so we're ok */
+} Immediate;
+
+/* size of internal buffer -- make this adjustable? */
+#define IMM_BUFFER_SIZE (4 * 1024 * 1024)
+
+static bool initialized = false;
+static Immediate imm;
+
+void immInit(void)
+{
+#if TRUST_NO_ONE
+	assert(!initialized);
+#endif
+	memset(&imm, 0, sizeof(Immediate));
+
+	imm.vbo_id = GPU_buf_alloc();
+	glBindBuffer(GL_ARRAY_BUFFER, imm.vbo_id);
+	glBufferData(GL_ARRAY_BUFFER, IMM_BUFFER_SIZE, NULL, GL_DYNAMIC_DRAW);
+
+	imm.prim_type = GPU_PRIM_NONE;
+	imm.strict_vertex_len = true;
+
+	glBindBuffer(GL_ARRAY_BUFFER, 0);
+	initialized = true;
+}
+
+void immActivate(void)
+{
+#if TRUST_NO_ONE
+	assert(initialized);
+	assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we're not between a Begin/End pair */
+	assert(imm.vao_id == 0);
+#endif
+	imm.vao_id = GPU_vao_alloc();
+	imm.context = GPU_context_active_get();
+}
+
+void immDeactivate(void)
+{
+#if TRUST_NO_ONE
+	assert(initialized);
+	assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we're not between a Begin/End pair */
+	assert(imm.vao_id != 0);
+#endif
+	GPU_vao_free(imm.vao_id, imm.context);
+	imm.vao_id = 0;
+	imm.prev_enabled_attrib_bits = 0;
+}
+
+void immDestroy(void)
+{
+	GPU_buf_free(imm.vbo_id);
+	initialized = false;
+}
+
+GPUVertFormat *immVertexFormat(void)
+{
+	GPU_vertformat_clear(&imm.vertex_format);
+	return &imm.vertex_format;
+}
+
+void immBindProgram(GLuint program, const GPUShaderInterface *shaderface)
+{
+#if TRUST_NO_ONE
+	assert(imm.bound_program == 0);
+	assert(glIsProgram(program));
+#endif
+
+	imm.bound_program = program;
+	imm.shader_interface = shaderface;
+
+	if (!imm.vertex_format.packed)
+		VertexFormat_pack(&imm.vertex_format);
+
+	glUseProgram(program);
+	get_attrib_locations(&imm.vertex_format, &imm.attrib_binding, shaderface);
+	GPU_matrix_bind(shaderface);
+}
 
 void immBindBuiltinProgram(GPUBuiltinShader shader_id)
 {
@@ -38,6 +160,718 @@ void immBindBuiltinProgram(GPUBuiltinShader shader_id)
 	immBindProgram(shader->program, shader->interface);
 }
 
+void immUnbindProgram(void)
+{
+#if TRUST_NO_ONE
+	assert(imm.bound_program != 0);
+#endif
+#if PROGRAM_NO_OPTI
+	glUseProgram(0);
+#endif
+	imm.bound_program = 0;
+}
+
+#if TRUST_NO_ONE
+static bool vertex_count_makes_sense_for_primitive(uint vertex_len, GPUPrimType prim_type)
+{
+	/* does vertex_len make sense for this primitive type? */
+	if (vertex_len == 0) {
+		return false;
+	}
+
+	switch (prim_type) {
+		case GPU_PRIM_POINTS:
+			return true;
+		case GPU_PRIM_LINES:
+			return vertex_len % 2 == 0;
+		case GPU_PRIM_LINE_STRIP:
+		case GPU_PRIM_LINE_LOOP:
+			return vertex_len >= 2;
+		case GPU_PRIM_LINE_STRIP_ADJ:
+			return vertex_len >= 4;
+		case GPU_PRIM_TRIS:
+			return vertex_len % 3 == 0;
+		case GPU_PRIM_TRI_STRIP:
+		case GPU_PRIM_TRI_FAN:
+			return vertex_len >= 3;
+		default:
+			return false;
+	}
+}
+#endif
+
+void immBegin(GPUPrimType prim_type, uint vertex_len)
+{
+#if TRUST_NO_ONE
+	assert(initialized);
+	assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we haven't already begun */
+	assert(vertex_count_makes_sense_for_primitive(vertex_len, prim_type));
+#endif
+	imm.prim_type = prim_type;
+	imm.vertex_len = vertex_len;
+	imm.vertex_idx = 0;
+	imm.unassigned_attrib_bits = imm.attrib_binding.enabled_bits;
+
+	/* how many bytes do we need for this draw call? */
+	const uint bytes_needed = vertex_buffer_size(&imm.vertex_format, vertex_len);
+
+#if TRUST_NO_ONE
+	assert(bytes_needed <= IMM_BUFFER_SIZE);
+#endif
+
+	glBindBuffer(GL_ARRAY_BUFFER, imm.vbo_id);
+
+	/* does the current buffer have enough room? */
+	const uint available_bytes = IMM_BUFFER_SIZE - imm.buffer_offset;
+	/* ensure vertex data is aligned */
+	const uint pre_padding = padding(imm.buffer_offset, imm.vertex_format.stride); /* might waste a little space, but it's safe */
+	if ((bytes_needed + pre_padding) <= available_bytes) {
+		imm.buffer_offset += pre_padding;
+	}
+	else {
+		/* orphan this buffer & start with a fresh one */
+		/* this method works on all platforms, old & new */
+		glBufferData(GL_ARRAY_BUFFER, IMM_BUFFER_SIZE, NULL, GL_DYNAMIC_DRAW);
+
+		imm.buffer_offset = 0;
+	}
+
+/*	printf("mapping %u to %u\n", imm.buffer_offset, imm.buffer_offset + bytes_needed - 1); */
+
+	imm.buffer_data = glMapBufferRange(GL_ARRAY_BUFFER, imm.buffer_offset, bytes_needed,
+	                                   GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | (imm.strict_vertex_len ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT));
+
+#if TRUST_NO_ONE
+	assert(imm.buffer_data != NULL);
+#endif
+
+	imm.buffer_bytes_mapped = bytes_needed;
+	imm.vertex_data = imm.buffer_data;
+}
+
+void immBeginAtMost(GPUPrimType prim_type, uint vertex_len)
+{
+#if TRUST_NO_ONE
+	assert(vertex_len > 0);
+#endif
+
+	imm.strict_vertex_len = false;
+	immBegin(prim_type, vertex_len);
+}
+
+
+GPUBatch *immBeginBatch(GPUPrimType prim_type, uint vertex_len)
+{
+#if TRUST_NO_ONE
+	assert(initialized);
+	assert(imm.prim_type == GPU_PRIM_NONE); /* make sure we haven't already begun */
+	assert(vertex_count_makes_sense_for_primitive(vertex_len, prim_type));
+#endif
+	imm.prim_type = prim_type;
+	imm.vertex_len = vertex_len;
+	imm.vertex_idx = 0;
+	imm.unassigned_attrib_bits = imm.attrib_binding.enabled_bits;
+
+	GPUVertBuf *verts = GPU_vertbuf_create_with_format(&imm.vertex_format);
+	GPU_vertbuf_data_alloc(verts, vertex_len);
+
+	imm.buffer_bytes_mapped = GPU_vertbuf_size_get(verts);
+	imm.vertex_data = verts->data;
+
+	imm.batch = GPU_batch_create_ex(prim_type, verts, NULL, GPU_BATCH_OWNS_VBO);
+	imm.batch->phase = GPU_BATCH_BUILDING;
+
+	return imm.batch;
+}
+
+GPUBatch *immBeginBatchAtMost(GPUPrimType prim_type, uint vertex_len)
+{
+	imm.strict_vertex_len = false;
+	return immBeginBatch(prim_type, vertex_len);
+}
+
+static void immDrawSetup(void)
+{
+	/* set up VAO -- can be done during Begin or End really */
+	glBindVertexArray(imm.vao_id);
+
+	/* enable/disable vertex attribs as needed */
+	if (imm.attrib_binding.enabled_bits != imm.prev_enabled_attrib_bits) {
+		for (uint loc = 0; loc < GPU_VERT_ATTR_MAX_LEN; ++loc) {
+			bool is_enabled = imm.attrib_binding.enabled_bits & (1 << loc);
+			bool was_enabled = imm.prev_enabled_attrib_bits & (1 << loc);
+
+			if (is_enabled && !was_enabled) {
+				glEnableVertexAttribArray(loc);
+			}
+			else if (was_enabled && !is_enabled) {
+				glDisableVertexAttribArray(loc);
+			}
+		}
+
+		imm.prev_enabled_attrib_bits = imm.attrib_binding.enabled_bits;
+	}
+
+	const uint stride = imm.vertex_format.stride;
+
+	for (uint a_idx = 0; a_idx < imm.vertex_format.attr_len; ++a_idx) {
+		const GPUVertAttr *a = imm.vertex_format.attribs + a_idx;
+
+		const uint offset = imm.buffer_offset + a->offset;
+		const GLvoid *pointer = (const GLubyte *)0 + offset;
+
+		const uint loc = read_attrib_location(&imm.attrib_binding, a_idx);
+
+		switch (a->fetch_mode) {
+			case GPU_FETCH_FLOAT:
+			case GPU_FETCH_INT_TO_FLOAT:
+				glVertexAttribPointer(loc, a->comp_len, a->gl_comp_type, GL_FALSE, stride, pointer);
+				break;
+			case GPU_FETCH_INT_TO_FLOAT_UNIT:
+				glVertexAttribPointer(loc, a->comp_len, a->gl_comp_type, GL_TRUE, stride, pointer);
+				break;
+			case GPU_FETCH_INT:
+				glVertexAttribIPointer(loc, a->comp_len, a->gl_comp_type, stride, pointer);
+		}
+	}
+
+	if (GPU_matrix_dirty_get()) {
+		GPU_matrix_bind(imm.shader_interface);
+	}
+}
+
+void immEnd(void)
+{
+#if TRUST_NO_ONE
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+
+	uint buffer_bytes_used;
+	if (imm.strict_vertex_len) {
+#if TRUST_NO_ONE
+		assert(imm.vertex_idx == imm.vertex_len); /* with all vertices defined */
+#endif
+		buffer_bytes_used = imm.buffer_bytes_mapped;
+	}
+	else {
+#if TRUST_NO_ONE
+		assert(imm.vertex_idx <= imm.vertex_len);
+#endif
+		if (imm.vertex_idx == imm.vertex_len) {
+			buffer_bytes_used = imm.buffer_bytes_mapped;
+		}
+		else {
+#if TRUST_NO_ONE
+			assert(imm.vertex_idx == 0 || vertex_count_makes_sense_for_primitive(imm.vertex_idx, imm.prim_type));
+#endif
+			imm.vertex_len = imm.vertex_idx;
+			buffer_bytes_used = vertex_buffer_size(&imm.vertex_format, imm.vertex_len);
+			/* unused buffer bytes are available to the next immBegin */
+		}
+		/* tell OpenGL what range was modified so it doesn't copy the whole mapped range */
+		glFlushMappedBufferRange(GL_ARRAY_BUFFER, 0, buffer_bytes_used);
+	}
+
+	if (imm.batch) {
+		if (buffer_bytes_used != imm.buffer_bytes_mapped) {
+			GPU_vertbuf_data_resize(imm.batch->verts[0], imm.vertex_len);
+			/* TODO: resize only if vertex count is much smaller */
+		}
+		GPU_batch_program_set(imm.batch, imm.bound_program, imm.shader_interface);
+		imm.batch->phase = GPU_BATCH_READY_TO_DRAW;
+		imm.batch = NULL; /* don't free, batch belongs to caller */
+	}
+	else {
+		glUnmapBuffer(GL_ARRAY_BUFFER);
+		if (imm.vertex_len > 0) {
+			immDrawSetup();
+			glDrawArrays(convert_prim_type_to_gl(imm.prim_type), 0, imm.vertex_len);
+		}
+		glBindBuffer(GL_ARRAY_BUFFER, 0);
+		glBindVertexArray(0);
+		/* prep for next immBegin */
+		imm.buffer_offset += buffer_bytes_used;
+	}
+
+	/* prep for next immBegin */
+	imm.prim_type = GPU_PRIM_NONE;
+	imm.strict_vertex_len = true;
+}
+
+static void setAttribValueBit(uint attrib_id)
+{
+	uint16_t mask = 1 << attrib_id;
+#if TRUST_NO_ONE
+	assert(imm.unassigned_attrib_bits & mask); /* not already set */
+#endif
+	imm.unassigned_attrib_bits &= ~mask;
+}
+
+
+/* --- generic attribute functions --- */
+
+void immAttrib1f(uint attrib_id, float x)
+{
+	GPUVertAttr *attrib = imm.vertex_format.attribs + attrib_id;
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(attrib->comp_type == GPU_COMP_F32);
+	assert(attrib->comp_len == 1);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+
+	float *data = (float *)(imm.vertex_data + attrib->offset);
+/*	printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data); */
+
+	data[0] = x;
+}
+
+void immAttrib2f(uint attrib_id, float x, float y)
+{
+	GPUVertAttr *attrib = imm.vertex_format.attribs + attrib_id;
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(attrib->comp_type == GPU_COMP_F32);
+	assert(attrib->comp_len == 2);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+
+	float *data = (float *)(imm.vertex_data + attrib->offset);
+/*	printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data); */
+
+	data[0] = x;
+	data[1] = y;
+}
+
+void immAttrib3f(uint attrib_id, float x, float y, float z)
+{
+	GPUVertAttr *attrib = imm.vertex_format.attribs + attrib_id;
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(attrib->comp_type == GPU_COMP_F32);
+	assert(attrib->comp_len == 3);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+
+	float *data = (float *)(imm.vertex_data + attrib->offset);
+/*	printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data); */
+
+	data[0] = x;
+	data[1] = y;
+	data[2] = z;
+}
+
+void immAttrib4f(uint attrib_id, float x, float y, float z, float w)
+{
+	GPUVertAttr *attrib = imm.vertex_format.attribs + attrib_id;
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(attrib->comp_type == GPU_COMP_F32);
+	assert(attrib->comp_len == 4);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+
+	float *data = (float *)(imm.vertex_data + attrib->offset);
+/*	printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data); */
+
+	data[0] = x;
+	data[1] = y;
+	data[2] = z;
+	data[3] = w;
+}
+
+void immAttrib1u(uint attrib_id, uint x)
+{
+	GPUVertAttr *attrib = imm.vertex_format.attribs + attrib_id;
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(attrib->comp_type == GPU_COMP_U32);
+	assert(attrib->comp_len == 1);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+
+	uint *data = (uint *)(imm.vertex_data + attrib->offset);
+
+	data[0] = x;
+}
+
+void immAttrib2i(uint attrib_id, int x, int y)
+{
+	GPUVertAttr *attrib = imm.vertex_format.attribs + attrib_id;
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(attrib->comp_type == GPU_COMP_I32);
+	assert(attrib->comp_len == 2);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+
+	int *data = (int *)(imm.vertex_data + attrib->offset);
+
+	data[0] = x;
+	data[1] = y;
+}
+
+void immAttrib2s(uint attrib_id, short x, short y)
+{
+	GPUVertAttr *attrib = imm.vertex_format.attribs + attrib_id;
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(attrib->comp_type == GPU_COMP_I16);
+	assert(attrib->comp_len == 2);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+
+	short *data = (short *)(imm.vertex_data + attrib->offset);
+
+	data[0] = x;
+	data[1] = y;
+}
+
+void immAttrib2fv(uint attrib_id, const float data[2])
+{
+	immAttrib2f(attrib_id, data[0], data[1]);
+}
+
+void immAttrib3fv(uint attrib_id, const float data[3])
+{
+	immAttrib3f(attrib_id, data[0], data[1], data[2]);
+}
+
+void immAttrib4fv(uint attrib_id, const float data[4])
+{
+	immAttrib4f(attrib_id, data[0], data[1], data[2], data[3]);
+}
+
+void immAttrib3ub(uint attrib_id, unsigned char r, unsigned char g, unsigned char b)
+{
+	GPUVertAttr *attrib = imm.vertex_format.attribs + attrib_id;
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(attrib->comp_type == GPU_COMP_U8);
+	assert(attrib->comp_len == 3);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+
+	GLubyte *data = imm.vertex_data + attrib->offset;
+/*	printf("%s %td %p\n", __FUNCTION__, data - imm.buffer_data, data); */
+
+	data[0] = r;
+	data[1] = g;
+	data[2] = b;
+}
+
+void immAttrib4ub(uint attrib_id, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
+{
+	GPUVertAttr *attrib = imm.vertex_format.attribs + attrib_id;
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(attrib->comp_type == GPU_COMP_U8);
+	assert(attrib->comp_len == 4);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+
+	GLubyte *data = imm.vertex_data + attrib->offset;
+/*	printf("%s %td %p\n", __FUNCTION__, data - imm.buffer_data, data); */
+
+	data[0] = r;
+	data[1] = g;
+	data[2] = b;
+	data[3] = a;
+}
+
+void immAttrib3ubv(uint attrib_id, const unsigned char data[3])
+{
+	immAttrib3ub(attrib_id, data[0], data[1], data[2]);
+}
+
+void immAttrib4ubv(uint attrib_id, const unsigned char data[4])
+{
+	immAttrib4ub(attrib_id, data[0], data[1], data[2], data[3]);
+}
+
+void immSkipAttrib(uint attrib_id)
+{
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attr_len);
+	assert(imm.vertex_idx < imm.vertex_len);
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+#endif
+	setAttribValueBit(attrib_id);
+}
+
+static void immEndVertex(void) /* and move on to the next vertex */
+{
+#if TRUST_NO_ONE
+	assert(imm.prim_type != GPU_PRIM_NONE); /* make sure we're between a Begin/End pair */
+	assert(imm.vertex_idx < imm.vertex_len);
+#endif
+
+	/* have all attribs been assigned values?
+	 * if not, copy value from previous vertex */
+	if (imm.unassigned_attrib_bits) {
+#if TRUST_NO_ONE
+		assert(imm.vertex_idx > 0); /* first vertex must have all attribs specified */
+#endif
+		for (uint a_idx = 0; a_idx < imm.vertex_format.attr_len; ++a_idx) {
+			if ((imm.unassigned_attrib_bits >> a_idx) & 1) {
+				const GPUVertAttr *a = imm.vertex_format.attribs + a_idx;
+
+/*				printf("copying %s from vertex %u to %u\n", a->name, imm.vertex_idx - 1, imm.vertex_idx); */
+
+				GLubyte *data = imm.vertex_data + a->offset;
+				memcpy(data, data - imm.vertex_format.stride, a->sz);
+				/* TODO: consolidate copy of adjacent attributes */
+			}
+		}
+	}
+
+	imm.vertex_idx++;
+	imm.vertex_data += imm.vertex_format.stride;
+	imm.unassigned_attrib_bits = imm.attrib_binding.enabled_bits;
+}
+
+void immVertex2f(uint attrib_id, float x, float y)
+{
+	immAttrib2f(attrib_id, x, y);
+	immEndVertex();
+}
+
+void immVertex3f(uint attrib_id, float x, float y, float z)
+{
+	immAttrib3f(attrib_id, x, y, z);
+	immEndVertex();
+}
+
+void immVertex4f(uint attrib_id, float x, float y, float z, float w)
+{
+	immAttrib4f(attrib_id, x, y, z, w);
+	immEndVertex();
+}
+
+void immVertex2i(uint attrib_id, int x, int y)
+{
+	immAttrib2i(attrib_id, x, y);
+	immEndVertex();
+}
+
+void immVertex2s(uint attrib_id, short x, short y)
+{
+	immAttrib2s(attrib_id, x, y);
+	immEndVertex();
+}
+
+void immVertex2fv(uint attrib_id, const float data[2])
+{
+	immAttrib2f(attrib_id, data[0], data[1]);
+	immEndVertex();
+}
+
+void immVertex3fv(uint attrib_id, const float data[3])
+{
+	immAttrib3f(attrib_id, data[0], data[1], data[2]);
+	immEndVertex();
+}
+
+void immVertex2iv(uint attrib_id, const int data[2])
+{
+	immAttrib2i(attrib_id, data[0], data[1]);
+	immEndVertex();
+}
+
+
+/* --- generic uniform functions --- */
+
+#if 0
+#  if TRUST_NO_ONE
+#    define GET_UNIFORM const GPUShaderInput* uniform = GPU_shaderinterface_uniform(imm.shader_interface, name); assert(uniform);
+#  else
+#    define GET_UNIFORM const GPUShaderInput* uniform = GPU_shaderinterface_uniform(imm.shader_interface, name);
+#  endif
+#else
+	/* NOTE: It is possible to have uniform fully optimized out from the shader.
+	 *       In this case we can't assert failure or allow NULL-pointer dereference.
+	 * TODO(sergey): How can we detect existing-but-optimized-out uniform but still
+	 *               catch typos in uniform names passed to immUniform*() functions? */
+#  define GET_UNIFORM const GPUShaderInput* uniform = GPU_shaderinterface_uniform(imm.shader_interface, name); if (uniform == NULL) return;
+#endif
+
+void immUniform1f(const char *name, float x)
+{
+	GET_UNIFORM
+	glUniform1f(uniform->location, x);
+}
+
+void immUniform2f(const char *name, float x, float y)
+{
+	GET_UNIFORM
+	glUniform2f(uniform->location, x, y);
+}
+
+void immUniform2fv(const char *name, const float data[2])
+{
+	GET_UNIFORM
+	glUniform2fv(uniform->location, 1, data);
+}
+
+void immUniform3f(const char *name, float x, float y, float z)
+{
+	GET_UNIFORM
+	glUniform3f(uniform->location, x, y, z);
+}
+
+void immUniform3fv(const char *name, const float data[3])
+{
+	GET_UNIFORM
+	glUniform3fv(uniform->location, 1, data);
+}
+
+/* can increase this limit or move to another file */
+#define MAX_UNIFORM_NAME_LEN 60
+
+void immUniformArray3fv(const char *bare_name, const float *data, int count)
+{
+	/* look up "name[0]" when given "name" */
+	const size_t len = strlen(bare_name);
+#if TRUST_NO_ONE
+	assert(len <= MAX_UNIFORM_NAME_LEN);
+#endif
+	char name[MAX_UNIFORM_NAME_LEN];
+	strcpy(name, bare_name);
+	name[len + 0] = '[';
+	name[len + 1] = '0';
+	name[len + 2] = ']';
+	name[len + 3] = '\0';
+
+	GET_UNIFORM
+	glUniform3fv(uniform->location, count, data);
+}
+
+void immUniform4f(const char *name, float x, float y, float z, float w)
+{
+	GET_UNIFORM
+	glUniform4f(uniform->location, x, y, z, w);
+}
+
+void immUniform4fv(const char *name, const float data[4])
+{
+	GET_UNIFORM
+	glUniform4fv(uniform->location, 1, data);
+}
+
+void immUniformArray4fv(const char *bare_name, const float *data, int count)
+{
+	/* look up "name[0]" when given "name" */
+	const size_t len = strlen(bare_name);
+#if TRUST_NO_ONE
+	assert(len <= MAX_UNIFORM_NAME_LEN);
+#endif
+	char name[MAX_UNIFORM_NAME_LEN];
+	strcpy(name, bare_name);
+	name[len + 0] = '[';
+	name[len + 1] = '0';
+	name[len + 2] = ']';
+	name[len + 3] = '\0';
+
+	GET_UNIFORM
+	glUniform4fv(uniform->location, count, data);
+}
+
+void immUniformMatrix4fv(const char *name, const float data[4][4])
+{
+	GET_UNIFORM
+	glUniformMatrix4fv(uniform->location, 1, GL_FALSE, (float *)data);
+}
+
+void immUniform1i(const char *name, int x)
+{
+	GET_UNIFORM
+	glUniform1i(uniform->location, x);
+}
+
+void immUniform4iv(const char *name, const int data[4])
+{
+	GET_UNIFORM
+	glUniform4iv(uniform->location, 1, data);
+}
+
+/* --- convenience functions for setting "uniform vec4 color" --- */
+
+void immUniformColor4f(float r, float g, float b, float a)
+{
+	const GPUShaderInput *uniform = GPU_shaderinterface_uniform_builtin(imm.shader_interface, GPU_UNIFORM_COLOR);
+#if TRUST_NO_ONE
+	assert(uniform != NULL);
+#endif
+	glUniform4f(uniform->location, r, g, b, a);
+}
+
+void immUniformColor4fv(const float rgba[4])
+{
+	immUniformColor4f(rgba[0], rgba[1], rgba[2], rgba[3]);
+}
+
+void immUniformColor3f(float r, float g, float b)
+{
+	immUniformColor4f(r, g, b, 1.0f);
+}
+
+void immUniformColor3fv(const float rgb[3])
+{
+	immUniformColor4f(rgb[0], rgb[1], rgb[2], 1.0f);
+}
+
+void immUniformColor3fvAlpha(const float rgb[3], float a)
+{
+	immUniformColor4f(rgb[0], rgb[1], rgb[2], a);
+}
+
+/* TODO: v-- treat as sRGB? --v */
+
+void immUniformColor3ub(unsigned char r, unsigned char g, unsigned char b)
+{
+	const float scale = 1.0f / 255.0f;
+	immUniformColor4f(scale * r, scale * g, scale * b, 1.0f);
+}
+
+void immUniformColor4ub(unsigned char r, unsigned char g, unsigned char b, unsigned char a)
+{
+	const float scale = 1.0f / 255.0f;
+	immUniformColor4f(scale * r, scale * g, scale * b, scale * a);
+}
+
+void immUniformColor3ubv(const unsigned char rgb[3])
+{
+	immUniformColor3ub(rgb[0], rgb[1], rgb[2]);
+}
+
+void immUniformColor3ubvAlpha(const unsigned char rgb[3], unsigned char alpha)
+{
+	immUniformColor4ub(rgb[0], rgb[1], rgb[2], alpha);
+}
+
+void immUniformColor4ubv(const unsigned char rgba[4])
+{
+	immUniformColor4ub(rgba[0], rgba[1], rgba[2], rgba[3]);
+}
+
 void immUniformThemeColor(int color_id)
 {
 	float color[4];
diff --git a/source/blender/gpu/intern/gpu_immediate_util.c b/source/blender/gpu/intern/gpu_immediate_util.c
index 30672af9c02..8384ef3b5d0 100644
--- a/source/blender/gpu/intern/gpu_immediate_util.c
+++ b/source/blender/gpu/intern/gpu_immediate_util.c
@@ -18,8 +18,10 @@
  * ***** END GPL LICENSE BLOCK *****
  */
 
-/** \file source/blender/gpu/intern/gpu_immediate_util.c
+/** \file blender/gpu/intern/gpu_immediate_util.c
  *  \ingroup gpu
+ *
+ * GPU immediate mode drawing utilities
  */
 
 #include <stdio.h>
@@ -66,6 +68,72 @@ static const int cube_line_index[12][2] = {
 	{6, 7},
 };
 
+void immRectf(uint pos, float x1, float y1, float x2, float y2)
+{
+	immBegin(GPU_PRIM_TRI_FAN, 4);
+	immVertex2f(pos, x1, y1);
+	immVertex2f(pos, x2, y1);
+	immVertex2f(pos, x2, y2);
+	immVertex2f(pos, x1, y2);
+	immEnd();
+}
+
+void immRecti(uint pos, int x1, int y1, int x2, int y2)
+{
+	immBegin(GPU_PRIM_TRI_FAN, 4);
+	immVertex2i(pos, x1, y1);
+	immVertex2i(pos, x2, y1);
+	immVertex2i(pos, x2, y2);
+	immVertex2i(pos, x1, y2);
+	immEnd();
+}
+
+void immRectf_fast_with_color(uint pos, uint col, float x1, float y1, float x2, float y2, const float color[4])
+{
+	immAttrib4fv(col, color);
+	immVertex2f(pos, x1, y1);
+	immAttrib4fv(col, color);
+	immVertex2f(pos, x2, y1);
+	immAttrib4fv(col, color);
+	immVertex2f(pos, x2, y2);
+
+	immAttrib4fv(col, color);
+	immVertex2f(pos, x1, y1);
+	immAttrib4fv(col, color);
+	immVertex2f(pos, x2, y2);
+	immAttrib4fv(col, color);
+	immVertex2f(pos, x1, y2);
+}
+
+void immRecti_fast_with_color(uint pos, uint col, int x1, int y1, int x2, int y2, const float color[4])
+{
+	immAttrib4fv(col, color);
+	immVertex2i(pos, x1, y1);
+	immAttrib4fv(col, color);
+	immVertex2i(pos, x2, y1);
+	immAttrib4fv(col, color);
+	immVertex2i(pos, x2, y2);
+
+	immAttrib4fv(col, color);
+	immVertex2i(pos, x1, y1);
+	immAttrib4fv(col, color);
+	immVertex2i(pos, x2, y2);
+	immAttrib4fv(col, color);
+	immVertex2i(pos, x1, y2);
+}
+
+#if 0 /* more complete version in case we want that */
+void immRecti_complete(int x1, int y1, int x2, int y2, const float color[4])
+{
+	GPUVertFormat *format = immVertexFormat();
+	uint pos = add_attrib(format, "pos", GPU_COMP_I32, 2, GPU_FETCH_INT_TO_FLOAT);
+	immBindBuiltinProgram(GPU_SHADER_2D_UNIFORM_COLOR);
+	immUniformColor4fv(color);
+	immRecti(pos, x1, y1, x2, y2);
+	immUnbindProgram();
+}
+#endif
+
 /**
  * Pack color into 3 bytes
  *
@@ -85,7 +153,7 @@ void imm_cpack(unsigned int x)
 }
 
 static void imm_draw_circle(
-        Gwn_PrimType prim_type, const uint shdr_pos, float x, float y, float rad_x, float rad_y, int nsegments)
+        GPUPrimType prim_type, const uint shdr_pos, float x, float y, float rad_x, float rad_y, int nsegments)
 {
 	immBegin(prim_type, nsegments);
 	for (int i = 0; i < nsegments; ++i) {
@@ -107,7 +175,7 @@ static void imm_draw_circle(
  */
 void imm_draw_circle_wire_2d(uint shdr_pos, float x, float y, float rad, int nsegments)
 {
-	imm_draw_circle(GWN_PRIM_LINE_LOOP, shdr_pos, x, y, rad, rad, nsegments);
+	imm_draw_circle(GPU_PRIM_LINE_LOOP, shdr_pos, x, y, rad, rad, nsegments);
 }
 
 /**
@@ -122,23 +190,23 @@ void imm_draw_circle_wire_2d(uint shdr_pos, float x, float y, float rad, int nse
  */
 void imm_draw_circle_fill_2d(uint shdr_pos, float x, float y, float rad, int nsegments)
 {
-	imm_draw_circle(GWN_PRIM_TRI_FAN, shdr_pos, x, y, rad, rad, nsegments);
+	imm_draw_circle(GPU_PRIM_TRI_FAN, shdr_pos, x, y, rad, rad, nsegments);
 }
 
 void imm_draw_circle_wire_aspect_2d(uint shdr_pos, float x, float y, float rad_x, float rad_y, int nsegments)
 {
-	imm_draw_circle(GWN_PRIM_LINE_LOOP, shdr_pos, x, y, rad_x, rad_y, nsegments);
+	imm_draw_circle(GPU_PRIM_LINE_LOOP, shdr_pos, x, y, rad_x, rad_y, nsegments);
 }
 void imm_draw_circle_fill_aspect_2d(uint shdr_pos, float x, float y, float rad_x, float rad_y, int nsegments)
 {
-	imm_draw_circle(GWN_PRIM_TRI_FAN, shdr_pos, x, y, rad_x, rad_y, nsegments);
+	imm_draw_circle(GPU_PRIM_TRI_FAN, shdr_pos, x, y, rad_x, rad_y, nsegments);
 }
 
 /**
  * \note We could have `imm_draw_lined_disk_partial` but currently there is no need.
  */
 static void imm_draw_disk_partial(
-        Gwn_PrimType prim_type, unsigned pos, float x, float y,
+        GPUPrimType prim_type, unsigned pos, float x, float y,
         float rad_inner, float rad_outer, int nsegments, float start, float sweep)
 {
 	/* shift & reverse angle, increase 'nsegments' to match gluPartialDisk */
@@ -175,11 +243,11 @@ void imm_draw_disk_partial_fill_2d(
         unsigned pos, float x, float y,
         float rad_inner, float rad_outer, int nsegments, float start, float sweep)
 {
-	imm_draw_disk_partial(GWN_PRIM_TRI_STRIP, pos, x, y, rad_inner, rad_outer, nsegments, start, sweep);
+	imm_draw_disk_partial(GPU_PRIM_TRI_STRIP, pos, x, y, rad_inner, rad_outer, nsegments, start, sweep);
 }
 
 static void imm_draw_circle_3D(
-        Gwn_PrimType prim_type, unsigned pos, float x, float y,
+        GPUPrimType prim_type, unsigned pos, float x, float y,
         float rad, int nsegments)
 {
 	immBegin(prim_type, nsegments);
@@ -192,26 +260,26 @@ static void imm_draw_circle_3D(
 
 void imm_draw_circle_wire_3d(unsigned pos, float x, float y, float rad, int nsegments)
 {
-	imm_draw_circle_3D(GWN_PRIM_LINE_LOOP, pos, x, y, rad, nsegments);
+	imm_draw_circle_3D(GPU_PRIM_LINE_LOOP, pos, x, y, rad, nsegments);
 }
 
 void imm_draw_circle_fill_3d(unsigned pos, float x, float y, float rad, int nsegments)
 {
-	imm_draw_circle_3D(GWN_PRIM_TRI_FAN, pos, x, y, rad, nsegments);
+	imm_draw_circle_3D(GPU_PRIM_TRI_FAN, pos, x, y, rad, nsegments);
 }
 
 /**
-* Draw a lined box.
-*
-* \param pos The vertex attribute number for position.
-* \param x1 left.
-* \param y1 bottom.
-* \param x2 right.
-* \param y2 top.
-*/
+ * Draw a lined box.
+ *
+ * \param pos The vertex attribute number for position.
+ * \param x1 left.
+ * \param y1 bottom.
+ * \param x2 right.
+ * \param y2 top.
+ */
 void imm_draw_box_wire_2d(unsigned pos, float x1, float y1, float x2, float y2)
 {
-	immBegin(GWN_PRIM_LINE_LOOP, 4);
+	immBegin(GPU_PRIM_LINE_LOOP, 4);
 	immVertex2f(pos, x1, y1);
 	immVertex2f(pos, x1, y2);
 	immVertex2f(pos, x2, y2);
@@ -221,8 +289,8 @@ void imm_draw_box_wire_2d(unsigned pos, float x1, float y1, float x2, float y2)
 
 void imm_draw_box_wire_3d(unsigned pos, float x1, float y1, float x2, float y2)
 {
-	/* use this version when Gwn_VertFormat has a vec3 position */
-	immBegin(GWN_PRIM_LINE_LOOP, 4);
+	/* use this version when GPUVertFormat has a vec3 position */
+	immBegin(GPU_PRIM_LINE_LOOP, 4);
 	immVertex3f(pos, x1, y1, 0.0f);
 	immVertex3f(pos, x1, y2, 0.0f);
 	immVertex3f(pos, x2, y2, 0.0f);
@@ -235,7 +303,7 @@ void imm_draw_box_wire_3d(unsigned pos, float x1, float y1, float x2, float y2)
  */
 void imm_draw_box_checker_2d(float x1, float y1, float x2, float y2)
 {
-	uint pos = GWN_vertformat_attr_add(immVertexFormat(), "pos", GWN_COMP_F32, 2, GWN_FETCH_FLOAT);
+	uint pos = GPU_vertformat_attr_add(immVertexFormat(), "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
 	immBindBuiltinProgram(GPU_SHADER_2D_CHECKER);
 
 	immUniform4f("color1", 0.15f, 0.15f, 0.15f, 1.0f);
@@ -255,7 +323,7 @@ void imm_draw_cube_fill_3d(uint pos, const float co[3], const float aspect[3])
 		madd_v3_v3v3v3(coords[i], co, cube_coords[i], aspect);
 	}
 
-	immBegin(GWN_PRIM_TRIS, ARRAY_SIZE(cube_quad_index) * 3 * 2);
+	immBegin(GPU_PRIM_TRIS, ARRAY_SIZE(cube_quad_index) * 3 * 2);
 	for (int i = 0; i < ARRAY_SIZE(cube_quad_index); i++) {
 		immVertex3fv(pos, coords[cube_quad_index[i][0]]);
 		immVertex3fv(pos, coords[cube_quad_index[i][1]]);
@@ -276,7 +344,7 @@ void imm_draw_cube_wire_3d(uint pos, const float co[3], const float aspect[3])
 		madd_v3_v3v3v3(coords[i], co, cube_coords[i], aspect);
 	}
 
-	immBegin(GWN_PRIM_LINES, ARRAY_SIZE(cube_line_index) * 2);
+	immBegin(GPU_PRIM_LINES, ARRAY_SIZE(cube_line_index) * 2);
 	for (int i = 0; i < ARRAY_SIZE(cube_line_index); i++) {
 		immVertex3fv(pos, coords[cube_line_index[i][0]]);
 		immVertex3fv(pos, coords[cube_line_index[i][1]]);
@@ -285,21 +353,21 @@ void imm_draw_cube_wire_3d(uint pos, const float co[3], const float aspect[3])
 }
 
 /**
-* Draw a cylinder. Replacement for gluCylinder.
-* _warning_ : Slow, better use it only if you no other choices.
-*
-* \param pos The vertex attribute number for position.
-* \param nor The vertex attribute number for normal.
-* \param base Specifies the radius of the cylinder at z = 0.
-* \param top Specifies the radius of the cylinder at z = height.
-* \param height Specifies the height of the cylinder.
-* \param slices Specifies the number of subdivisions around the z axis.
-* \param stacks Specifies the number of subdivisions along the z axis.
-*/
+ * Draw a cylinder. Replacement for gluCylinder.
+ * _warning_ : Slow, better use it only if you no other choices.
+ *
+ * \param pos The vertex attribute number for position.
+ * \param nor The vertex attribute number for normal.
+ * \param base Specifies the radius of the cylinder at z = 0.
+ * \param top Specifies the radius of the cylinder at z = height.
+ * \param height Specifies the height of the cylinder.
+ * \param slices Specifies the number of subdivisions around the z axis.
+ * \param stacks Specifies the number of subdivisions along the z axis.
+ */
 void imm_draw_cylinder_fill_normal_3d(
         unsigned int pos, unsigned int nor, float base, float top, float height, int slices, int stacks)
 {
-	immBegin(GWN_PRIM_TRIS, 6 * slices * stacks);
+	immBegin(GPU_PRIM_TRIS, 6 * slices * stacks);
 	for (int i = 0; i < slices; ++i) {
 		const float angle1 = (float)(2 * M_PI) * ((float)i / (float)slices);
 		const float angle2 = (float)(2 * M_PI) * ((float)(i + 1) / (float)slices);
@@ -316,10 +384,10 @@ void imm_draw_cylinder_fill_normal_3d(
 			float h1 = height * ((float)j / (float)stacks);
 			float h2 = height * ((float)(j + 1) / (float)stacks);
 
-			float v1[3] = {r1 *cos2, r1 * sin2, h1};
-			float v2[3] = {r2 *cos2, r2 * sin2, h2};
-			float v3[3] = {r2 *cos1, r2 * sin1, h2};
-			float v4[3] = {r1 *cos1, r1 * sin1, h1};
+			float v1[3] = {r1 * cos2, r1 * sin2, h1};
+			float v2[3] = {r2 * cos2, r2 * sin2, h2};
+			float v3[3] = {r2 * cos1, r2 * sin1, h2};
+			float v4[3] = {r1 * cos1, r1 * sin1, h1};
 			float n1[3], n2[3];
 
 			/* calc normals */
@@ -350,7 +418,7 @@ void imm_draw_cylinder_fill_normal_3d(
 
 void imm_draw_cylinder_wire_3d(unsigned int pos, float base, float top, float height, int slices, int stacks)
 {
-	immBegin(GWN_PRIM_LINES, 6 * slices * stacks);
+	immBegin(GPU_PRIM_LINES, 6 * slices * stacks);
 	for (int i = 0; i < slices; ++i) {
 		const float angle1 = (float)(2 * M_PI) * ((float)i / (float)slices);
 		const float angle2 = (float)(2 * M_PI) * ((float)(i + 1) / (float)slices);
@@ -387,7 +455,7 @@ void imm_draw_cylinder_wire_3d(unsigned int pos, float base, float top, float he
 
 void imm_draw_cylinder_fill_3d(unsigned int pos, float base, float top, float height, int slices, int stacks)
 {
-	immBegin(GWN_PRIM_TRIS, 6 * slices * stacks);
+	immBegin(GPU_PRIM_TRIS, 6 * slices * stacks);
 	for (int i = 0; i < slices; ++i) {
 		const float angle1 = (float)(2 * M_PI) * ((float)i / (float)slices);
 		const float angle2 = (float)(2 * M_PI) * ((float)(i + 1) / (float)slices);
diff --git a/source/blender/gpu/intern/gpu_init_exit.c b/source/blender/gpu/intern/gpu_init_exit.c
index 78d4f491b66..55d0466c929 100644
--- a/source/blender/gpu/intern/gpu_init_exit.c
+++ b/source/blender/gpu/intern/gpu_init_exit.c
@@ -57,8 +57,6 @@ void GPU_init(void)
 
 	gpu_extensions_init(); /* must come first */
 
-	GPU_texture_orphans_init();
-	GPU_material_orphans_init();
 	gpu_codegen_init();
 	gpu_framebuffer_module_init();
 
@@ -84,9 +82,6 @@ void GPU_exit(void)
 
 	gpu_batch_exit();
 
-	GPU_texture_orphans_exit();
-	GPU_material_orphans_exit();
-
 	if (G.debug & G_DEBUG_GPU)
 		gpu_debug_exit();
 
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index bd0e35f5ab6..b03df2c643c 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -36,32 +36,19 @@
 
 #include "MEM_guardedalloc.h"
 
-#include "DNA_lamp_types.h"
 #include "DNA_material_types.h"
-#include "DNA_object_types.h"
 #include "DNA_scene_types.h"
 #include "DNA_world_types.h"
 
 #include "BLI_math.h"
-#include "BLI_blenlib.h"
+#include "BLI_listbase.h"
 #include "BLI_utildefines.h"
-#include "BLI_rand.h"
-#include "BLI_threads.h"
-
-#include "BKE_anim.h"
-#include "BKE_colorband.h"
-#include "BKE_colortools.h"
-#include "BKE_global.h"
-#include "BKE_image.h"
-#include "BKE_layer.h"
+#include "BLI_string.h"
+
 #include "BKE_main.h"
 #include "BKE_node.h"
 #include "BKE_scene.h"
 
-#include "IMB_imbuf_types.h"
-
-#include "GPU_extensions.h"
-#include "GPU_framebuffer.h"
 #include "GPU_material.h"
 #include "GPU_shader.h"
 #include "GPU_texture.h"
@@ -75,10 +62,13 @@
 #  include "BKE_DerivedMesh.h"
 #endif
 
-static ListBase g_orphaned_mat = {NULL, NULL};
-static ThreadMutex g_orphan_lock;
-
 /* Structs */
+#define MAX_COLOR_BAND 128
+
+typedef struct GPUColorBandBuilder {
+	float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
+	int current_layer;
+} GPUColorBandBuilder;
 
 struct GPUMaterial {
 	Scene *scene; /* DEPRECATED was only usefull for lamps */
@@ -125,6 +115,9 @@ struct GPUMaterial {
 	 */
 	int domain;
 
+	/* Only used by Eevee to know which bsdf are used. */
+	int flag;
+
 	/* Used by 2.8 pipeline */
 	GPUUniformBuffer *ubo; /* UBOs for shader uniforms. */
 
@@ -137,6 +130,13 @@ struct GPUMaterial {
 	short int sss_falloff;
 	float sss_sharpness;
 	bool sss_dirty;
+
+	GPUTexture *coba_tex; /* 1D Texture array containing all color bands. */
+	GPUColorBandBuilder *coba_builder;
+
+#ifndef NDEBUG
+	char name[64];
+#endif
 };
 
 enum {
@@ -147,6 +147,47 @@ enum {
 
 /* Functions */
 
+/* Returns the adress of the future pointer to coba_tex */
+GPUTexture **gpu_material_ramp_texture_row_set(GPUMaterial *mat, int size, float *pixels, float *row)
+{
+	/* In order to put all the colorbands into one 1D array texture,
+	 * we need them to be the same size. */
+	BLI_assert(size == CM_TABLE + 1);
+
+	if (mat->coba_builder == NULL) {
+		mat->coba_builder = MEM_mallocN(sizeof(GPUColorBandBuilder), "GPUColorBandBuilder");
+		mat->coba_builder->current_layer = 0;
+	}
+
+	int layer = mat->coba_builder->current_layer;
+	*row = (float)layer;
+
+	if (*row == MAX_COLOR_BAND) {
+		printf("Too many color band in shader! Remove some Curve, Black Body or Color Ramp Node.\n");
+	}
+	else {
+		float *dst = (float *)mat->coba_builder->pixels[layer];
+		memcpy(dst, pixels, sizeof(float) * (CM_TABLE + 1) * 4);
+		mat->coba_builder->current_layer += 1;
+	}
+
+	return &mat->coba_tex;
+}
+
+static void gpu_material_ramp_texture_build(GPUMaterial *mat)
+{
+	if (mat->coba_builder == NULL)
+		return;
+
+	GPUColorBandBuilder *builder = mat->coba_builder;
+
+	mat->coba_tex = GPU_texture_create_1D_array(CM_TABLE + 1, builder->current_layer, GPU_RGBA16F,
+	                                            (float *)builder->pixels, NULL);
+
+	MEM_freeN(builder);
+	mat->coba_builder = NULL;
+}
+
 static void gpu_material_free_single(GPUMaterial *material)
 {
 	/* Cancel / wait any pending lazy compilation. */
@@ -155,64 +196,33 @@ static void gpu_material_free_single(GPUMaterial *material)
 	GPU_pass_free_nodes(&material->nodes);
 	GPU_inputs_free(&material->inputs);
 
-	if (material->pass)
+	if (material->pass != NULL) {
 		GPU_pass_release(material->pass);
-
+	}
 	if (material->ubo != NULL) {
 		GPU_uniformbuffer_free(material->ubo);
 	}
-
 	if (material->sss_tex_profile != NULL) {
 		GPU_texture_free(material->sss_tex_profile);
 	}
-
 	if (material->sss_profile != NULL) {
 		GPU_uniformbuffer_free(material->sss_profile);
 	}
+	if (material->coba_tex != NULL) {
+		GPU_texture_free(material->coba_tex);
+	}
 }
 
 void GPU_material_free(ListBase *gpumaterial)
 {
 	for (LinkData *link = gpumaterial->first; link; link = link->next) {
 		GPUMaterial *material = link->data;
-
-		/* TODO(fclem): Check if the thread has an ogl context. */
-		if (BLI_thread_is_main()) {
-			gpu_material_free_single(material);
-			MEM_freeN(material);
-		}
-		else {
-			BLI_mutex_lock(&g_orphan_lock);
-			BLI_addtail(&g_orphaned_mat, BLI_genericNodeN(material));
-			BLI_mutex_unlock(&g_orphan_lock);
-		}
+		gpu_material_free_single(material);
+		MEM_freeN(material);
 	}
 	BLI_freelistN(gpumaterial);
 }
 
-void GPU_material_orphans_init(void)
-{
-	BLI_mutex_init(&g_orphan_lock);
-}
-
-void GPU_material_orphans_delete(void)
-{
-	BLI_mutex_lock(&g_orphan_lock);
-	LinkData *link;
-	while ((link = BLI_pophead(&g_orphaned_mat))) {
-		gpu_material_free_single((GPUMaterial *)link->data);
-		MEM_freeN(link->data);
-		MEM_freeN(link);
-	}
-	BLI_mutex_unlock(&g_orphan_lock);
-}
-
-void GPU_material_orphans_exit(void)
-{
-	GPU_material_orphans_delete();
-	BLI_mutex_end(&g_orphan_lock);
-}
-
 GPUBuiltin GPU_get_material_builtins(GPUMaterial *material)
 {
 	return material->builtins;
@@ -608,6 +618,16 @@ bool GPU_material_use_domain_volume(GPUMaterial *mat)
 	return (mat->domain & GPU_DOMAIN_VOLUME);
 }
 
+void GPU_material_flag_set(GPUMaterial *mat, GPUMatFlag flag)
+{
+	mat->flag |= flag;
+}
+
+bool GPU_material_flag_get(GPUMaterial *mat, GPUMatFlag flag)
+{
+	return (mat->flag & flag);
+}
+
 GPUMaterial *GPU_material_from_nodetree_find(
         ListBase *gpumaterials, const void *engine_type, int options)
 {
@@ -630,7 +650,7 @@ GPUMaterial *GPU_material_from_nodetree_find(
  */
 GPUMaterial *GPU_material_from_nodetree(
         Scene *scene, struct bNodeTree *ntree, ListBase *gpumaterials, const void *engine_type, int options,
-        const char *vert_code, const char *geom_code, const char *frag_lib, const char *defines)
+        const char *vert_code, const char *geom_code, const char *frag_lib, const char *defines, const char *name)
 {
 	LinkData *link;
 	bool has_volume_output, has_surface_output;
@@ -643,8 +663,17 @@ GPUMaterial *GPU_material_from_nodetree(
 	mat->scene = scene;
 	mat->engine_type = engine_type;
 	mat->options = options;
+#ifndef NDEBUG
+	BLI_snprintf(mat->name, sizeof(mat->name), "%s", name);
+#else
+	UNUSED_VARS(name);
+#endif
+
+	/* localize tree to create links for reroute and mute */
+	bNodeTree *localtree = ntreeLocalize(ntree);
+	ntreeGPUMaterialNodes(localtree, mat, &has_surface_output, &has_volume_output);
 
-	ntreeGPUMaterialNodes(ntree, mat, &has_surface_output, &has_volume_output);
+	gpu_material_ramp_texture_build(mat);
 
 	if (has_surface_output) {
 		mat->domain |= GPU_DOMAIN_SURFACE;
@@ -659,14 +688,15 @@ GPUMaterial *GPU_material_from_nodetree(
 		GPU_nodes_prune(&mat->nodes, mat->outlink);
 		GPU_nodes_get_vertex_attributes(&mat->nodes, &mat->attribs);
 		/* Create source code and search pass cache for an already compiled version. */
-		mat->pass = GPU_generate_pass_new(mat,
-		                      mat->outlink,
-		                      &mat->attribs,
-		                      &mat->nodes,
-		                      vert_code,
-		                      geom_code,
-		                      frag_lib,
-		                      defines);
+		mat->pass = GPU_generate_pass_new(
+		        mat,
+		        mat->outlink,
+		        &mat->attribs,
+		        &mat->nodes,
+		        vert_code,
+		        geom_code,
+		        frag_lib,
+		        defines);
 
 		if (mat->pass == NULL) {
 			/* We had a cache hit and the shader has already failed to compile. */
@@ -688,6 +718,11 @@ GPUMaterial *GPU_material_from_nodetree(
 		mat->status = GPU_MAT_FAILED;
 	}
 
+	/* Only free after GPU_pass_shader_get where GPUUniformBuffer
+	 * read data from the local tree. */
+	ntreeFreeTree(localtree);
+	MEM_freeN(localtree);
+
 	/* note that even if building the shader fails in some way, we still keep
 	 * it to avoid trying to compile again and again, and simply do not use
 	 * the actual shader on drawing */
@@ -707,7 +742,12 @@ void GPU_material_compile(GPUMaterial *mat)
 
 	/* NOTE: The shader may have already been compiled here since we are
 	 * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
-	GPU_pass_compile(mat->pass);
+#ifndef NDEBUG
+	GPU_pass_compile(mat->pass, mat->name);
+#else
+	GPU_pass_compile(mat->pass, __func__);
+#endif
+
 	GPUShader *sh = GPU_pass_shader_get(mat->pass);
 
 	if (sh != NULL) {
diff --git a/source/blender/gpu/intern/gpu_matrix.c b/source/blender/gpu/intern/gpu_matrix.c
index b6214f2778b..0fa4a158c00 100644
--- a/source/blender/gpu/intern/gpu_matrix.c
+++ b/source/blender/gpu/intern/gpu_matrix.c
@@ -29,7 +29,7 @@
  *  \ingroup gpu
  */
 
-#include "../../../intern/gawain/gawain/gwn_shader_interface.h"
+#include "GPU_shader_interface.h"
 
 #define SUPPRESS_GENERIC_MATRIX_API
 #define USE_GPU_PY_MATRIX_API  /* only so values are declared */
@@ -86,7 +86,7 @@ static MatrixState state = {
 #define ProjectionStack state.projection_stack
 #define Projection ProjectionStack.stack[ProjectionStack.top]
 
-void gpuMatrixReset(void)
+void GPU_matrix_reset(void)
 {
 	state.model_view_stack.top = 0;
 	state.projection_stack.top = 0;
@@ -110,7 +110,7 @@ static void checkmat(cosnt float *m)
 	}
 }
 
-#define CHECKMAT(m) checkmat((const float*)m)
+#define CHECKMAT(m) checkmat((const float *)m)
 
 #else
 
@@ -119,76 +119,76 @@ static void checkmat(cosnt float *m)
 #endif
 
 
-void gpuPushMatrix(void)
+void GPU_matrix_push(void)
 {
 	BLI_assert(ModelViewStack.top + 1 < MATRIX_STACK_DEPTH);
 	ModelViewStack.top++;
 	copy_m4_m4(ModelView, ModelViewStack.stack[ModelViewStack.top - 1]);
 }
 
-void gpuPopMatrix(void)
+void GPU_matrix_pop(void)
 {
 	BLI_assert(ModelViewStack.top > 0);
 	ModelViewStack.top--;
 	state.dirty = true;
 }
 
-void gpuPushProjectionMatrix(void)
+void GPU_matrix_push_projection(void)
 {
 	BLI_assert(ProjectionStack.top + 1 < MATRIX_STACK_DEPTH);
 	ProjectionStack.top++;
 	copy_m4_m4(Projection, ProjectionStack.stack[ProjectionStack.top - 1]);
 }
 
-void gpuPopProjectionMatrix(void)
+void GPU_matrix_pop_projection(void)
 {
 	BLI_assert(ProjectionStack.top > 0);
 	ProjectionStack.top--;
 	state.dirty = true;
 }
 
-void gpuLoadMatrix(const float m[4][4])
+void GPU_matrix_set(const float m[4][4])
 {
 	copy_m4_m4(ModelView, m);
 	CHECKMAT(ModelView3D);
 	state.dirty = true;
 }
 
-void gpuLoadIdentityProjectionMatrix(void)
+void GPU_matrix_identity_projection_set(void)
 {
 	unit_m4(Projection);
 	CHECKMAT(Projection3D);
 	state.dirty = true;
 }
 
-void gpuLoadProjectionMatrix(const float m[4][4])
+void GPU_matrix_projection_set(const float m[4][4])
 {
 	copy_m4_m4(Projection, m);
 	CHECKMAT(Projection3D);
 	state.dirty = true;
 }
 
-void gpuLoadIdentity(void)
+void GPU_matrix_identity_set(void)
 {
 	unit_m4(ModelView);
 	state.dirty = true;
 }
 
-void gpuTranslate2f(float x, float y)
+void GPU_matrix_translate_2f(float x, float y)
 {
 	Mat4 m;
 	unit_m4(m);
 	m[3][0] = x;
 	m[3][1] = y;
-	gpuMultMatrix(m);
+	GPU_matrix_mul(m);
 }
 
-void gpuTranslate2fv(const float vec[2])
+void GPU_matrix_translate_2fv(const float vec[2])
 {
-	gpuTranslate2f(vec[0], vec[1]);
+	GPU_matrix_translate_2f(vec[0], vec[1]);
 }
 
-void gpuTranslate3f(float x, float y, float z)
+void GPU_matrix_translate_3f(float x, float y, float z)
 {
 #if 1
 	translate_m4(ModelView, x, y, z);
@@ -199,61 +199,61 @@ void gpuTranslate3f(float x, float y, float z)
 	m[3][0] = x;
 	m[3][1] = y;
 	m[3][2] = z;
-	gpuMultMatrix(m);
+	GPU_matrix_mul(m);
 #endif
 	state.dirty = true;
 }
 
-void gpuTranslate3fv(const float vec[3])
+void GPU_matrix_translate_3fv(const float vec[3])
 {
-	gpuTranslate3f(vec[0], vec[1], vec[2]);
+	GPU_matrix_translate_3f(vec[0], vec[1], vec[2]);
 }
 
-void gpuScaleUniform(float factor)
+void GPU_matrix_scale_1f(float factor)
 {
 	Mat4 m;
 	scale_m4_fl(m, factor);
-	gpuMultMatrix(m);
+	GPU_matrix_mul(m);
 }
 
-void gpuScale2f(float x, float y)
+void GPU_matrix_scale_2f(float x, float y)
 {
 	Mat4 m = {{0.0f}};
 	m[0][0] = x;
 	m[1][1] = y;
 	m[2][2] = 1.0f;
 	m[3][3] = 1.0f;
-	gpuMultMatrix(m);
+	GPU_matrix_mul(m);
 }
 
-void gpuScale2fv(const float vec[2])
+void GPU_matrix_scale_2fv(const float vec[2])
 {
-	gpuScale2f(vec[0], vec[1]);
+	GPU_matrix_scale_2f(vec[0], vec[1]);
 }
 
-void gpuScale3f(float x, float y, float z)
+void GPU_matrix_scale_3f(float x, float y, float z)
 {
 	Mat4 m = {{0.0f}};
 	m[0][0] = x;
 	m[1][1] = y;
 	m[2][2] = z;
 	m[3][3] = 1.0f;
-	gpuMultMatrix(m);
+	GPU_matrix_mul(m);
 }
 
-void gpuScale3fv(const float vec[3])
+void GPU_matrix_scale_3fv(const float vec[3])
 {
-	gpuScale3f(vec[0], vec[1], vec[2]);
+	GPU_matrix_scale_3f(vec[0], vec[1], vec[2]);
 }
 
-void gpuMultMatrix(const float m[4][4])
+void GPU_matrix_mul(const float m[4][4])
 {
 	mul_m4_m4_post(ModelView, m);
 	CHECKMAT(ModelView);
 	state.dirty = true;
 }
 
-void gpuRotate2D(float deg)
+void GPU_matrix_rotate_2d(float deg)
 {
 	/* essentially RotateAxis('Z')
 	 * TODO: simpler math for 2D case
@@ -261,20 +261,20 @@ void gpuRotate2D(float deg)
 	rotate_m4(ModelView, 'Z', DEG2RADF(deg));
 }
 
-void gpuRotate3f(float deg, float x, float y, float z)
+void GPU_matrix_rotate_3f(float deg, float x, float y, float z)
 {
 	const float axis[3] = {x, y, z};
-	gpuRotate3fv(deg, axis);
+	GPU_matrix_rotate_3fv(deg, axis);
 }
 
-void gpuRotate3fv(float deg, const float axis[3])
+void GPU_matrix_rotate_3fv(float deg, const float axis[3])
 {
 	Mat4 m;
 	axis_angle_to_mat4(m, axis, DEG2RADF(deg));
-	gpuMultMatrix(m);
+	GPU_matrix_mul(m);
 }
 
-void gpuRotateAxis(float deg, char axis)
+void GPU_matrix_rotate_axis(float deg, char axis)
 {
 	/* rotate_m4 works in place */
 	rotate_m4(ModelView, axis, DEG2RADF(deg));
@@ -398,14 +398,14 @@ static void mat4_look_from_origin(float m[4][4], float lookdir[3], float camup[3
 	state.dirty = true;
 }
 
-void gpuOrtho(float left, float right, float bottom, float top, float near, float far)
+void GPU_matrix_ortho_set(float left, float right, float bottom, float top, float near, float far)
 {
 	mat4_ortho_set(Projection, left, right, bottom, top, near, far);
 	CHECKMAT(Projection);
 	state.dirty = true;
 }
 
-void gpuOrtho2D(float left, float right, float bottom, float top)
+void GPU_matrix_ortho_2d_set(float left, float right, float bottom, float top)
 {
 	Mat4 m;
 	mat4_ortho_set(m, left, right, bottom, top, -1.0f, 1.0f);
@@ -413,21 +413,21 @@ void gpuOrtho2D(float left, float right, float bottom, float top)
 	state.dirty = true;
 }
 
-void gpuFrustum(float left, float right, float bottom, float top, float near, float far)
+void GPU_matrix_frustum_set(float left, float right, float bottom, float top, float near, float far)
 {
 	mat4_frustum_set(Projection, left, right, bottom, top, near, far);
 	CHECKMAT(Projection);
 	state.dirty = true;
 }
 
-void gpuPerspective(float fovy, float aspect, float near, float far)
+void GPU_matrix_perspective_set(float fovy, float aspect, float near, float far)
 {
 	float half_height = tanf(fovy * (float)(M_PI / 360.0)) * near;
 	float half_width = half_height * aspect;
-	gpuFrustum(-half_width, +half_width, -half_height, +half_height, near, far);
+	GPU_matrix_frustum_set(-half_width, +half_width, -half_height, +half_height, near, far);
 }
 
-void gpuLookAt(float eyeX, float eyeY, float eyeZ, float centerX, float centerY, float centerZ, float upX, float upY, float upZ)
+void GPU_matrix_look_at(float eyeX, float eyeY, float eyeZ, float centerX, float centerY, float centerZ, float upX, float upY, float upZ)
 {
 	Mat4 cm;
 	float lookdir[3];
@@ -439,11 +439,11 @@ void gpuLookAt(float eyeX, float eyeY, float eyeZ, float centerX, float centerY,
 
 	mat4_look_from_origin(cm, lookdir, camup);
 
-	gpuMultMatrix(cm);
-	gpuTranslate3f(-eyeX, -eyeY, -eyeZ);
+	GPU_matrix_mul(cm);
+	GPU_matrix_translate_3f(-eyeX, -eyeY, -eyeZ);
 }
 
-void gpuProject(const float world[3], const float model[4][4], const float proj[4][4], const int view[4], float win[3])
+void GPU_matrix_project(const float world[3], const float model[4][4], const float proj[4][4], const int view[4], float win[3])
 {
 	float v[4];
 
@@ -459,7 +459,7 @@ void gpuProject(const float world[3], const float model[4][4], const float proj[
 	win[2] = (v[2] + 1) * 0.5f;
 }
 
-bool gpuUnProject(const float win[3], const float model[4][4], const float proj[4][4], const int view[4], float world[3])
+bool GPU_matrix_unproject(const float win[3], const float model[4][4], const float proj[4][4], const int view[4], float world[3])
 {
 	float pm[4][4];
 	float in[4];
@@ -497,7 +497,7 @@ bool gpuUnProject(const float win[3], const float model[4][4], const float proj[
 	return true;
 }
 
-const float (*gpuGetModelViewMatrix(float m[4][4]))[4]
+const float (*GPU_matrix_model_view_get(float m[4][4]))[4]
 {
 	if (m) {
 		copy_m4_m4(m, ModelView);
@@ -508,7 +508,7 @@ const float (*gpuGetModelViewMatrix(float m[4][4]))[4]
 	}
 }
 
-const float (*gpuGetProjectionMatrix(float m[4][4]))[4]
+const float (*GPU_matrix_projection_get(float m[4][4]))[4]
 {
 	if (m) {
 		copy_m4_m4(m, Projection);
@@ -519,7 +519,7 @@ const float (*gpuGetProjectionMatrix(float m[4][4]))[4]
 	}
 }
 
-const float (*gpuGetModelViewProjectionMatrix(float m[4][4]))[4]
+const float (*GPU_matrix_model_view_projection_get(float m[4][4]))[4]
 {
 	if (m == NULL) {
 		static Mat4 temp;
@@ -530,14 +530,14 @@ const float (*gpuGetModelViewProjectionMatrix(float m[4][4]))[4]
 	return m;
 }
 
-const float (*gpuGetNormalMatrix(float m[3][3]))[3]
+const float (*GPU_matrix_normal_get(float m[3][3]))[3]
 {
 	if (m == NULL) {
 		static Mat3 temp3;
 		m = temp3;
 	}
 
-	copy_m3_m4(m, (const float (*)[4])gpuGetModelViewMatrix(NULL));
+	copy_m3_m4(m, (const float (*)[4])GPU_matrix_model_view_get(NULL));
 
 	invert_m3(m);
 	transpose_m3(m);
@@ -545,40 +545,40 @@ const float (*gpuGetNormalMatrix(float m[3][3]))[3]
 	return m;
 }
 
-const float (*gpuGetNormalMatrixInverse(float m[3][3]))[3]
+const float (*GPU_matrix_normal_inverse_get(float m[3][3]))[3]
 {
 	if (m == NULL) {
 		static Mat3 temp3;
 		m = temp3;
 	}
 
-	gpuGetNormalMatrix(m);
+	GPU_matrix_normal_get(m);
 	invert_m3(m);
 
 	return m;
 }
 
-void gpuBindMatrices(const Gwn_ShaderInterface *shaderface)
+void GPU_matrix_bind(const GPUShaderInterface *shaderface)
 {
 	/* set uniform values to matrix stack values
 	 * call this before a draw call if desired matrices are dirty
 	 * call glUseProgram before this, as glUniform expects program to be bound
 	 */
 
-	const Gwn_ShaderInput *MV = GWN_shaderinterface_uniform_builtin(shaderface, GWN_UNIFORM_MODELVIEW);
-	const Gwn_ShaderInput *P = GWN_shaderinterface_uniform_builtin(shaderface, GWN_UNIFORM_PROJECTION);
-	const Gwn_ShaderInput *MVP = GWN_shaderinterface_uniform_builtin(shaderface, GWN_UNIFORM_MVP);
+	const GPUShaderInput *MV = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_MODELVIEW);
+	const GPUShaderInput *P = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_PROJECTION);
+	const GPUShaderInput *MVP = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_MVP);
 
-	const Gwn_ShaderInput *N = GWN_shaderinterface_uniform_builtin(shaderface, GWN_UNIFORM_NORMAL);
-	const Gwn_ShaderInput *MV_inv = GWN_shaderinterface_uniform_builtin(shaderface, GWN_UNIFORM_MODELVIEW_INV);
-	const Gwn_ShaderInput *P_inv = GWN_shaderinterface_uniform_builtin(shaderface, GWN_UNIFORM_PROJECTION_INV);
+	const GPUShaderInput *N = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_NORMAL);
+	const GPUShaderInput *MV_inv = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_MODELVIEW_INV);
+	const GPUShaderInput *P_inv = GPU_shaderinterface_uniform_builtin(shaderface, GPU_UNIFORM_PROJECTION_INV);
 
 	if (MV) {
 #if DEBUG_MATRIX_BIND
 		puts("setting MV matrix");
 #endif
 
-		glUniformMatrix4fv(MV->location, 1, GL_FALSE, (const float *)gpuGetModelViewMatrix(NULL));
+		glUniformMatrix4fv(MV->location, 1, GL_FALSE, (const float *)GPU_matrix_model_view_get(NULL));
 	}
 
 	if (P) {
@@ -586,7 +586,7 @@ void gpuBindMatrices(const Gwn_ShaderInterface *shaderface)
 		puts("setting P matrix");
 #endif
 
-		glUniformMatrix4fv(P->location, 1, GL_FALSE, (const float *)gpuGetProjectionMatrix(NULL));
+		glUniformMatrix4fv(P->location, 1, GL_FALSE, (const float *)GPU_matrix_projection_get(NULL));
 	}
 
 	if (MVP) {
@@ -594,7 +594,7 @@ void gpuBindMatrices(const Gwn_ShaderInterface *shaderface)
 		puts("setting MVP matrix");
 #endif
 
-		glUniformMatrix4fv(MVP->location, 1, GL_FALSE, (const float *)gpuGetModelViewProjectionMatrix(NULL));
+		glUniformMatrix4fv(MVP->location, 1, GL_FALSE, (const float *)GPU_matrix_model_view_projection_get(NULL));
 	}
 
 	if (N) {
@@ -602,19 +602,19 @@ void gpuBindMatrices(const Gwn_ShaderInterface *shaderface)
 		puts("setting normal matrix");
 #endif
 
-		glUniformMatrix3fv(N->location, 1, GL_FALSE, (const float *)gpuGetNormalMatrix(NULL));
+		glUniformMatrix3fv(N->location, 1, GL_FALSE, (const float *)GPU_matrix_normal_get(NULL));
 	}
 
 	if (MV_inv) {
 		Mat4 m;
-		gpuGetModelViewMatrix(m);
+		GPU_matrix_model_view_get(m);
 		invert_m4(m);
 		glUniformMatrix4fv(MV_inv->location, 1, GL_FALSE, (const float *)m);
 	}
 
 	if (P_inv) {
 		Mat4 m;
-		gpuGetProjectionMatrix(m);
+		GPU_matrix_projection_get(m);
 		invert_m4(m);
 		glUniformMatrix4fv(P_inv->location, 1, GL_FALSE, (const float *)m);
 	}
@@ -622,7 +622,7 @@ void gpuBindMatrices(const Gwn_ShaderInterface *shaderface)
 	state.dirty = false;
 }
 
-bool gpuMatricesDirty(void)
+bool GPU_matrix_dirty_get(void)
 {
 	return state.dirty;
 }
diff --git a/source/blender/gpu/intern/gpu_primitive.c b/source/blender/gpu/intern/gpu_primitive.c
new file mode 100644
index 00000000000..189d17f2dd2
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_primitive.c
@@ -0,0 +1,84 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_primitive.c
+ *  \ingroup gpu
+ *
+ * GPU geometric primitives
+ */
+
+#include "GPU_primitive.h"
+#include "gpu_primitive_private.h"
+
+GPUPrimClass GPU_primtype_class(GPUPrimType prim_type)
+{
+	static const GPUPrimClass classes[] = {
+		[GPU_PRIM_POINTS] = GPU_PRIM_CLASS_POINT,
+		[GPU_PRIM_LINES] = GPU_PRIM_CLASS_LINE,
+		[GPU_PRIM_LINE_STRIP] = GPU_PRIM_CLASS_LINE,
+		[GPU_PRIM_LINE_LOOP] = GPU_PRIM_CLASS_LINE,
+		[GPU_PRIM_TRIS] = GPU_PRIM_CLASS_SURFACE,
+		[GPU_PRIM_TRI_STRIP] = GPU_PRIM_CLASS_SURFACE,
+		[GPU_PRIM_TRI_FAN] = GPU_PRIM_CLASS_SURFACE,
+
+		[GPU_PRIM_LINES_ADJ] = GPU_PRIM_CLASS_LINE,
+		[GPU_PRIM_LINE_STRIP_ADJ] = GPU_PRIM_CLASS_LINE,
+		[GPU_PRIM_TRIS_ADJ] = GPU_PRIM_CLASS_SURFACE,
+
+		[GPU_PRIM_NONE] = GPU_PRIM_CLASS_NONE
+	};
+
+	return classes[prim_type];
+}
+
+bool GPU_primtype_belongs_to_class(GPUPrimType prim_type, GPUPrimClass prim_class)
+{
+	if (prim_class == GPU_PRIM_CLASS_NONE && prim_type == GPU_PRIM_NONE) {
+		return true;
+	}
+	return prim_class & GPU_primtype_class(prim_type);
+}
+
+GLenum convert_prim_type_to_gl(GPUPrimType prim_type)
+{
+#if TRUST_NO_ONE
+	assert(prim_type != GPU_PRIM_NONE);
+#endif
+	static const GLenum table[] = {
+		[GPU_PRIM_POINTS] = GL_POINTS,
+		[GPU_PRIM_LINES] = GL_LINES,
+		[GPU_PRIM_LINE_STRIP] = GL_LINE_STRIP,
+		[GPU_PRIM_LINE_LOOP] = GL_LINE_LOOP,
+		[GPU_PRIM_TRIS] = GL_TRIANGLES,
+		[GPU_PRIM_TRI_STRIP] = GL_TRIANGLE_STRIP,
+		[GPU_PRIM_TRI_FAN] = GL_TRIANGLE_FAN,
+
+		[GPU_PRIM_LINES_ADJ] = GL_LINES_ADJACENCY,
+		[GPU_PRIM_LINE_STRIP_ADJ] = GL_LINE_STRIP_ADJACENCY,
+		[GPU_PRIM_TRIS_ADJ] = GL_TRIANGLES_ADJACENCY,
+	};
+
+	return table[prim_type];
+}
diff --git a/source/blender/gpu/intern/gpu_primitive_private.h b/source/blender/gpu/intern/gpu_primitive_private.h
new file mode 100644
index 00000000000..d057f29fdc5
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_primitive_private.h
@@ -0,0 +1,37 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_primitive_private.h
+ *  \ingroup gpu
+ *
+ * GPU geometric primitives
+ */
+
+#ifndef __GPU_PRIMITIVE_PRIVATE_H__
+#define __GPU_PRIMITIVE_PRIVATE_H__
+
+GLenum convert_prim_type_to_gl(GPUPrimType);
+
+#endif /* __GPU_PRIMITIVE_PRIVATE_H__ */
diff --git a/source/blender/gpu/intern/gpu_private.h b/source/blender/gpu/intern/gpu_private.h
index 996ba9c63a1..df55f7922b3 100644
--- a/source/blender/gpu/intern/gpu_private.h
+++ b/source/blender/gpu/intern/gpu_private.h
@@ -25,6 +25,9 @@
 #ifndef __GPU_PRIVATE_H__
 #define __GPU_PRIVATE_H__
 
+struct GPUContext;
+struct GPUFrameBuffer;
+
 /* call this before running any of the functions below */
 void gpu_extensions_init(void);
 void gpu_extensions_exit(void);
diff --git a/source/blender/gpu/intern/gpu_select.c b/source/blender/gpu/intern/gpu_select.c
index 7023e44d289..1c0e7ed4c1c 100644
--- a/source/blender/gpu/intern/gpu_select.c
+++ b/source/blender/gpu/intern/gpu_select.c
@@ -242,7 +242,7 @@ bool GPU_select_is_cached(void)
 const uint *GPU_select_buffer_near(const uint *buffer, int hits)
 {
 	const uint *buffer_near = NULL;
-	uint depth_min = (uint)-1;
+	uint depth_min = (uint) - 1;
 	for (int i = 0; i < hits; i++) {
 		if (buffer[1] < depth_min) {
 			BLI_assert(buffer[3] != -1);
diff --git a/source/blender/gpu/intern/gpu_shader.c b/source/blender/gpu/intern/gpu_shader.c
index 8c978be81c1..67ae1414b66 100644
--- a/source/blender/gpu/intern/gpu_shader.c
+++ b/source/blender/gpu/intern/gpu_shader.c
@@ -31,6 +31,7 @@
 #include "BLI_math_base.h"
 #include "BLI_math_vector.h"
 #include "BLI_path_util.h"
+#include "BLI_string.h"
 
 #include "BKE_appdir.h"
 #include "BKE_global.h"
@@ -38,6 +39,7 @@
 #include "DNA_space_types.h"
 
 #include "GPU_extensions.h"
+#include "GPU_context.h"
 #include "GPU_matrix.h"
 #include "GPU_shader.h"
 #include "GPU_texture.h"
@@ -161,9 +163,20 @@ extern char datatoc_gpu_shader_vsm_store_frag_glsl[];
 extern char datatoc_gpu_shader_sep_gaussian_blur_vert_glsl[];
 extern char datatoc_gpu_shader_sep_gaussian_blur_frag_glsl[];
 
+extern char datatoc_gpu_shader_gpencil_stroke_vert_glsl[];
+extern char datatoc_gpu_shader_gpencil_stroke_frag_glsl[];
+extern char datatoc_gpu_shader_gpencil_stroke_geom_glsl[];
+
+extern char datatoc_gpu_shader_gpencil_fill_vert_glsl[];
+extern char datatoc_gpu_shader_gpencil_fill_frag_glsl[];
+
 /* cache of built-in shaders (each is created on first use) */
 static GPUShader *builtin_shaders[GPU_NUM_BUILTIN_SHADERS] = { NULL };
 
+#ifndef NDEBUG
+static uint g_shaderid = 0;
+#endif
+
 typedef struct {
 	const char *vert;
 	const char *frag;
@@ -214,8 +227,9 @@ static void gpu_shader_standard_extensions(char defines[MAX_EXT_DEFINE_LENGTH])
 	}
 }
 
-static void gpu_shader_standard_defines(char defines[MAX_DEFINE_LENGTH],
-                                        bool use_opensubdiv)
+static void gpu_shader_standard_defines(
+        char defines[MAX_DEFINE_LENGTH],
+        bool use_opensubdiv)
 {
 	/* some useful defines to detect GPU type */
 	if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_ANY)) {
@@ -244,11 +258,12 @@ static void gpu_shader_standard_defines(char defines[MAX_DEFINE_LENGTH],
 		 * a global typedef which we don't have better place to define
 		 * in yet.
 		 */
-		strcat(defines, "struct VertexData {\n"
-		                "  vec4 position;\n"
-		                "  vec3 normal;\n"
-		                "  vec2 uv;"
-		                "};\n");
+		strcat(defines,
+		       "struct VertexData {\n"
+		       "  vec4 position;\n"
+		       "  vec3 normal;\n"
+		       "  vec2 uv;"
+		       "};\n");
 	}
 #else
 	UNUSED_VARS(use_opensubdiv);
@@ -257,21 +272,25 @@ static void gpu_shader_standard_defines(char defines[MAX_DEFINE_LENGTH],
 	return;
 }
 
-GPUShader *GPU_shader_create(const char *vertexcode,
-                             const char *fragcode,
-                             const char *geocode,
-                             const char *libcode,
-                             const char *defines)
+GPUShader *GPU_shader_create(
+        const char *vertexcode,
+        const char *fragcode,
+        const char *geocode,
+        const char *libcode,
+        const char *defines,
+        const char *shname)
 {
-	return GPU_shader_create_ex(vertexcode,
-	                            fragcode,
-	                            geocode,
-	                            libcode,
-	                            defines,
-	                            GPU_SHADER_FLAGS_NONE,
-	                            GPU_SHADER_TFB_NONE,
-	                            NULL,
-	                            0);
+	return GPU_shader_create_ex(
+	        vertexcode,
+	        fragcode,
+	        geocode,
+	        libcode,
+	        defines,
+	        GPU_SHADER_FLAGS_NONE,
+	        GPU_SHADER_TFB_NONE,
+	        NULL,
+	        0,
+	        shname);
 }
 
 #define DEBUG_SHADER_NONE ""
@@ -321,15 +340,17 @@ static void gpu_dump_shaders(const char **code, const int num_shaders, const cha
 	printf("Shader file written to disk: %s\n", shader_path);
 }
 
-GPUShader *GPU_shader_create_ex(const char *vertexcode,
-                                const char *fragcode,
-                                const char *geocode,
-                                const char *libcode,
-                                const char *defines,
-                                const int flags,
-                                const GPUShaderTFBType tf_type,
-                                const char **tf_names,
-                                const int tf_count)
+GPUShader *GPU_shader_create_ex(
+        const char *vertexcode,
+        const char *fragcode,
+        const char *geocode,
+        const char *libcode,
+        const char *defines,
+        const int flags,
+        const GPUShaderTFBType tf_type,
+        const char **tf_names,
+        const int tf_count,
+        const char *shname)
 {
 #ifdef WITH_OPENSUBDIV
 	bool use_opensubdiv = (flags & GPU_SHADER_FLAGS_SPECIAL_OPENSUBDIV) != 0;
@@ -347,6 +368,12 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
 	shader = MEM_callocN(sizeof(GPUShader), "GPUShader");
 	gpu_dump_shaders(NULL, 0, DEBUG_SHADER_NONE);
 
+#ifndef NDEBUG
+	BLI_snprintf(shader->name, sizeof(shader->name), "%s_%u", shname, g_shaderid++);
+#else
+	UNUSED_VARS(shname);
+#endif
+
 	if (vertexcode)
 		shader->vertex = glCreateShader(GL_VERTEX_SHADER);
 	if (fragcode)
@@ -366,8 +393,9 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
 		return NULL;
 	}
 
-	gpu_shader_standard_defines(standard_defines,
-	                            use_opensubdiv);
+	gpu_shader_standard_defines(
+	        standard_defines,
+	        use_opensubdiv);
 	gpu_shader_standard_extensions(standard_extensions);
 
 	if (vertexcode) {
@@ -410,12 +438,13 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
 #ifdef WITH_OPENSUBDIV
 		/* TODO(sergey): Move to fragment shader source code generation. */
 		if (use_opensubdiv) {
-			source[num_source++] =
+			source[num_source++] = (
 			        "#ifdef USE_OPENSUBDIV\n"
 			        "in block {\n"
 			        "	VertexData v;\n"
 			        "} inpt;\n"
-			        "#endif\n";
+			        "#endif\n"
+			);
 		}
 #endif
 
@@ -496,24 +525,26 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
 		return NULL;
 	}
 
-	shader->interface = GWN_shaderinterface_create(shader->program);
+	shader->interface = GPU_shaderinterface_create(shader->program);
 
 #ifdef WITH_OPENSUBDIV
 	/* TODO(sergey): Find a better place for this. */
 	if (use_opensubdiv) {
 		if (GLEW_VERSION_4_1) {
-			glProgramUniform1i(shader->program,
-			                   GWN_shaderinterface_uniform(shader->interface, "FVarDataOffsetBuffer")->location,
-			                   30);  /* GL_TEXTURE30 */
-
-			glProgramUniform1i(shader->program,
-			                   GWN_shaderinterface_uniform(shader->interface, "FVarDataBuffer")->location,
-			                   31);  /* GL_TEXTURE31 */
+			glProgramUniform1i(
+			        shader->program,
+			        GPU_shaderinterface_uniform(shader->interface, "FVarDataOffsetBuffer")->location,
+			        30);  /* GL_TEXTURE30 */
+
+			glProgramUniform1i(
+			        shader->program,
+			        GPU_shaderinterface_uniform(shader->interface, "FVarDataBuffer")->location,
+			        31);  /* GL_TEXTURE31 */
 		}
 		else {
 			glUseProgram(shader->program);
-			glUniform1i(GWN_shaderinterface_uniform(shader->interface, "FVarDataOffsetBuffer")->location, 30);
-			glUniform1i(GWN_shaderinterface_uniform(shader->interface, "FVarDataBuffer")->location, 31);
+			glUniform1i(GPU_shaderinterface_uniform(shader->interface, "FVarDataOffsetBuffer")->location, 30);
+			glUniform1i(GPU_shaderinterface_uniform(shader->interface, "FVarDataBuffer")->location, 31);
 			glUseProgram(0);
 		}
 	}
@@ -532,7 +563,7 @@ void GPU_shader_bind(GPUShader *shader)
 	BLI_assert(shader && shader->program);
 
 	glUseProgram(shader->program);
-	gpuBindMatrices(shader->interface);
+	GPU_matrix_bind(shader->interface);
 }
 
 void GPU_shader_unbind(void)
@@ -563,6 +594,10 @@ void GPU_shader_transform_feedback_disable(GPUShader *UNUSED(shader))
 
 void GPU_shader_free(GPUShader *shader)
 {
+#if 0 /* Would be nice to have, but for now the Deferred compilation
+       * does not have a GPUContext. */
+	BLI_assert(GPU_context_active_get() != NULL);
+#endif
 	BLI_assert(shader);
 
 	if (shader->vertex)
@@ -575,7 +610,7 @@ void GPU_shader_free(GPUShader *shader)
 		glDeleteProgram(shader->program);
 
 	if (shader->interface)
-		GWN_shaderinterface_discard(shader->interface);
+		GPU_shaderinterface_discard(shader->interface);
 
 	MEM_freeN(shader);
 }
@@ -583,14 +618,14 @@ void GPU_shader_free(GPUShader *shader)
 int GPU_shader_get_uniform(GPUShader *shader, const char *name)
 {
 	BLI_assert(shader && shader->program);
-	const Gwn_ShaderInput *uniform = GWN_shaderinterface_uniform(shader->interface, name);
+	const GPUShaderInput *uniform = GPU_shaderinterface_uniform(shader->interface, name);
 	return uniform ? uniform->location : -1;
 }
 
 int GPU_shader_get_builtin_uniform(GPUShader *shader, int builtin)
 {
 	BLI_assert(shader && shader->program);
-	const Gwn_ShaderInput *uniform = GWN_shaderinterface_uniform_builtin(shader->interface, builtin);
+	const GPUShaderInput *uniform = GPU_shaderinterface_uniform_builtin(shader->interface, builtin);
 	return uniform ? uniform->location : -1;
 }
 
@@ -598,7 +633,7 @@ int GPU_shader_get_uniform_block(GPUShader *shader, const char *name)
 {
 	BLI_assert(shader && shader->program);
 
-	const Gwn_ShaderInput *ubo = GWN_shaderinterface_ubo(shader->interface, name);
+	const GPUShaderInput *ubo = GPU_shaderinterface_ubo(shader->interface, name);
 	return ubo ? ubo->location : -1;
 }
 
@@ -675,7 +710,7 @@ void GPU_shader_uniform_texture(GPUShader *UNUSED(shader), int location, GPUText
 int GPU_shader_get_attribute(GPUShader *shader, const char *name)
 {
 	BLI_assert(shader && shader->program);
-	const Gwn_ShaderInput *attrib = GWN_shaderinterface_attr(shader->interface, name);
+	const GPUShaderInput *attrib = GPU_shaderinterface_attr(shader->interface, name);
 	return attrib ? attrib->location : -1;
 }
 
@@ -727,6 +762,10 @@ GPUShader *GPU_shader_get_builtin_shader(GPUBuiltinShader shader)
 		[GPU_SHADER_2D_IMAGE_MULTISAMPLE_4] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
 		[GPU_SHADER_2D_IMAGE_MULTISAMPLE_8] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
 		[GPU_SHADER_2D_IMAGE_MULTISAMPLE_16] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
+		[GPU_SHADER_2D_IMAGE_MULTISAMPLE_2_DEPTH_TEST] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
+		[GPU_SHADER_2D_IMAGE_MULTISAMPLE_4_DEPTH_TEST] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
+		[GPU_SHADER_2D_IMAGE_MULTISAMPLE_8_DEPTH_TEST] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
+		[GPU_SHADER_2D_IMAGE_MULTISAMPLE_16_DEPTH_TEST] = { datatoc_gpu_shader_2D_vert_glsl, datatoc_gpu_shader_image_multisample_resolve_frag_glsl },
 
 		[GPU_SHADER_2D_IMAGE_INTERLACE] = { datatoc_gpu_shader_2D_image_vert_glsl,
 		                                    datatoc_gpu_shader_image_interlace_frag_glsl },
@@ -853,6 +892,13 @@ GPUShader *GPU_shader_get_builtin_shader(GPUBuiltinShader shader)
 		                             datatoc_gpu_shader_2D_nodelink_frag_glsl },
 		[GPU_SHADER_2D_NODELINK_INST] = { datatoc_gpu_shader_2D_nodelink_vert_glsl,
 		                                  datatoc_gpu_shader_2D_nodelink_frag_glsl },
+
+		[GPU_SHADER_GPENCIL_STROKE] = { datatoc_gpu_shader_gpencil_stroke_vert_glsl,
+		                                datatoc_gpu_shader_gpencil_stroke_frag_glsl,
+		                                datatoc_gpu_shader_gpencil_stroke_geom_glsl },
+
+		[GPU_SHADER_GPENCIL_FILL] = { datatoc_gpu_shader_gpencil_fill_vert_glsl,
+		                              datatoc_gpu_shader_gpencil_fill_frag_glsl },
 	};
 
 	if (builtin_shaders[shader] == NULL) {
@@ -862,15 +908,31 @@ GPUShader *GPU_shader_get_builtin_shader(GPUBuiltinShader shader)
 			case GPU_SHADER_2D_IMAGE_MULTISAMPLE_2:
 				defines = "#define SAMPLES 2\n";
 				break;
+			case GPU_SHADER_2D_IMAGE_MULTISAMPLE_2_DEPTH_TEST:
+				defines = "#define SAMPLES 2\n"
+				          "#define USE_DEPTH\n";
+				break;
 			case GPU_SHADER_2D_IMAGE_MULTISAMPLE_4:
 				defines = "#define SAMPLES 4\n";
 				break;
+			case GPU_SHADER_2D_IMAGE_MULTISAMPLE_4_DEPTH_TEST:
+				defines = "#define SAMPLES 4\n"
+				          "#define USE_DEPTH\n";
+				break;
 			case GPU_SHADER_2D_IMAGE_MULTISAMPLE_8:
 				defines = "#define SAMPLES 8\n";
 				break;
+			case GPU_SHADER_2D_IMAGE_MULTISAMPLE_8_DEPTH_TEST:
+				defines = "#define SAMPLES 8\n"
+				          "#define USE_DEPTH\n";
+				break;
 			case GPU_SHADER_2D_IMAGE_MULTISAMPLE_16:
 				defines = "#define SAMPLES 16\n";
 				break;
+			case GPU_SHADER_2D_IMAGE_MULTISAMPLE_16_DEPTH_TEST:
+				defines = "#define SAMPLES 16\n"
+				          "#define USE_DEPTH\n";
+				break;
 			case GPU_SHADER_2D_WIDGET_BASE_INST:
 			case GPU_SHADER_2D_NODELINK_INST:
 				defines = "#define USE_INSTANCE\n";
@@ -903,22 +965,25 @@ GPUShader *GPU_shader_get_builtin_shader(GPUBuiltinShader shader)
 
 		if (shader == GPU_SHADER_EDGES_FRONT_BACK_PERSP && !GLEW_VERSION_3_2) {
 			/* TODO: remove after switch to core profile (maybe) */
-			static const GPUShaderStages legacy_fancy_edges =
-				{ datatoc_gpu_shader_edges_front_back_persp_legacy_vert_glsl,
-				  datatoc_gpu_shader_flat_color_alpha_test_0_frag_glsl };
+			static const GPUShaderStages legacy_fancy_edges = {
+				datatoc_gpu_shader_edges_front_back_persp_legacy_vert_glsl,
+				datatoc_gpu_shader_flat_color_alpha_test_0_frag_glsl,
+			};
 			stages = &legacy_fancy_edges;
 		}
 
 		if (shader == GPU_SHADER_3D_LINE_DASHED_UNIFORM_COLOR && !GLEW_VERSION_3_2) {
 			/* Dashed need geometry shader, which are not supported by legacy OpenGL, fallback to solid lines. */
 			/* TODO: remove after switch to core profile (maybe) */
-			static const GPUShaderStages legacy_dashed_lines = { datatoc_gpu_shader_3D_line_dashed_uniform_color_legacy_vert_glsl,
-			                                                     datatoc_gpu_shader_2D_line_dashed_frag_glsl };
+			static const GPUShaderStages legacy_dashed_lines = {
+				datatoc_gpu_shader_3D_line_dashed_uniform_color_legacy_vert_glsl,
+				datatoc_gpu_shader_2D_line_dashed_frag_glsl,
+			};
 			stages = &legacy_dashed_lines;
 		}
 
 		/* common case */
-		builtin_shaders[shader] = GPU_shader_create(stages->vert, stages->frag, stages->geom, NULL, defines);
+		builtin_shaders[shader] = GPU_shader_create(stages->vert, stages->frag, stages->geom, NULL, defines, __func__);
 	}
 
 	return builtin_shaders[shader];
diff --git a/source/blender/gpu/intern/gpu_shader_interface.c b/source/blender/gpu/intern/gpu_shader_interface.c
new file mode 100644
index 00000000000..f6bbc228ae9
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_shader_interface.c
@@ -0,0 +1,368 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_shader_interface.c
+ *  \ingroup gpu
+ *
+ * GPU shader interface (C --> GLSL)
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "GPU_shader_interface.h"
+
+#include "gpu_batch_private.h"
+#include "gpu_context_private.h"
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+
+#define DEBUG_SHADER_INTERFACE 0
+
+#if DEBUG_SHADER_INTERFACE
+#  include <stdio.h>
+#endif
+
+static const char *BuiltinUniform_name(GPUUniformBuiltin u)
+{
+	static const char *names[] = {
+		[GPU_UNIFORM_NONE] = NULL,
+
+		[GPU_UNIFORM_MODEL] = "ModelMatrix",
+		[GPU_UNIFORM_VIEW] = "ViewMatrix",
+		[GPU_UNIFORM_MODELVIEW] = "ModelViewMatrix",
+		[GPU_UNIFORM_PROJECTION] = "ProjectionMatrix",
+		[GPU_UNIFORM_VIEWPROJECTION] = "ViewProjectionMatrix",
+		[GPU_UNIFORM_MVP] = "ModelViewProjectionMatrix",
+
+		[GPU_UNIFORM_MODEL_INV] = "ModelMatrixInverse",
+		[GPU_UNIFORM_VIEW_INV] = "ViewMatrixInverse",
+		[GPU_UNIFORM_MODELVIEW_INV] = "ModelViewMatrixInverse",
+		[GPU_UNIFORM_PROJECTION_INV] = "ProjectionMatrixInverse",
+		[GPU_UNIFORM_VIEWPROJECTION_INV] = "ViewProjectionMatrixInverse",
+
+		[GPU_UNIFORM_NORMAL] = "NormalMatrix",
+		[GPU_UNIFORM_WORLDNORMAL] = "WorldNormalMatrix",
+		[GPU_UNIFORM_CAMERATEXCO] = "CameraTexCoFactors",
+		[GPU_UNIFORM_ORCO] = "OrcoTexCoFactors",
+
+		[GPU_UNIFORM_COLOR] = "color",
+		[GPU_UNIFORM_EYE] = "eye",
+		[GPU_UNIFORM_CALLID] = "callId",
+
+		[GPU_UNIFORM_CUSTOM] = NULL,
+		[GPU_NUM_UNIFORMS] = NULL,
+	};
+
+	return names[u];
+}
+
+GPU_INLINE bool match(const char *a, const char *b)
+{
+	return strcmp(a, b) == 0;
+}
+
+GPU_INLINE uint hash_string(const char *str)
+{
+	uint i = 0, c;
+	while ((c = *str++)) {
+		i = i * 37 + c;
+	}
+	return i;
+}
+
+GPU_INLINE void set_input_name(
+        GPUShaderInterface *shaderface, GPUShaderInput *input,
+        const char *name, uint32_t name_len)
+{
+	input->name_offset = shaderface->name_buffer_offset;
+	input->name_hash = hash_string(name);
+	shaderface->name_buffer_offset += name_len + 1; /* include NULL terminator */
+}
+
+GPU_INLINE void shader_input_to_bucket(
+        GPUShaderInput *input,
+        GPUShaderInput *buckets[GPU_NUM_SHADERINTERFACE_BUCKETS])
+{
+	const uint bucket_index = input->name_hash % GPU_NUM_SHADERINTERFACE_BUCKETS;
+	input->next = buckets[bucket_index];
+	buckets[bucket_index] = input;
+}
+
+GPU_INLINE const GPUShaderInput *buckets_lookup(
+        GPUShaderInput *const buckets[GPU_NUM_SHADERINTERFACE_BUCKETS],
+        const char *name_buffer, const char *name)
+{
+	const uint name_hash = hash_string(name);
+	const uint bucket_index = name_hash % GPU_NUM_SHADERINTERFACE_BUCKETS;
+	const GPUShaderInput *input = buckets[bucket_index];
+	if (input == NULL) {
+		/* Requested uniform is not found at all. */
+		return NULL;
+	}
+	/* Optimization bit: if there is no hash collision detected when constructing shader interface
+	 * it means we can only request the single possible uniform. Surely, it's possible we request
+	 * uniform which causes hash collision, but that will be detected in debug builds. */
+	if (input->next == NULL) {
+		if (name_hash == input->name_hash) {
+#if TRUST_NO_ONE
+			assert(match(name_buffer + input->name_offset, name));
+#endif
+			return input;
+		}
+		return NULL;
+	}
+	/* Work through possible collisions. */
+	const GPUShaderInput *next = input;
+	while (next != NULL) {
+		input = next;
+		next = input->next;
+		if (input->name_hash != name_hash) {
+			continue;
+		}
+		if (match(name_buffer + input->name_offset, name)) {
+			return input;
+		}
+	}
+	return NULL; /* not found */
+}
+
+GPU_INLINE void buckets_free(GPUShaderInput *buckets[GPU_NUM_SHADERINTERFACE_BUCKETS])
+{
+	for (uint bucket_index = 0; bucket_index < GPU_NUM_SHADERINTERFACE_BUCKETS; ++bucket_index) {
+		GPUShaderInput *input = buckets[bucket_index];
+		while (input != NULL) {
+			GPUShaderInput *input_next = input->next;
+			MEM_freeN(input);
+			input = input_next;
+		}
+	}
+}
+
+static bool setup_builtin_uniform(GPUShaderInput *input, const char *name)
+{
+	/* TODO: reject DOUBLE, IMAGE, ATOMIC_COUNTER gl_types */
+
+	/* detect built-in uniforms (name must match) */
+	for (GPUUniformBuiltin u = GPU_UNIFORM_NONE + 1; u < GPU_UNIFORM_CUSTOM; ++u) {
+		const char *builtin_name = BuiltinUniform_name(u);
+		if (match(name, builtin_name)) {
+			input->builtin_type = u;
+			return true;
+		}
+	}
+	input->builtin_type = GPU_UNIFORM_CUSTOM;
+	return false;
+}
+
+static const GPUShaderInput *add_uniform(GPUShaderInterface *shaderface, const char *name)
+{
+	GPUShaderInput *input = MEM_mallocN(sizeof(GPUShaderInput), "GPUShaderInput Unif");
+
+	input->location = glGetUniformLocation(shaderface->program, name);
+
+	uint name_len = strlen(name);
+	shaderface->name_buffer = MEM_reallocN(shaderface->name_buffer, shaderface->name_buffer_offset + name_len + 1); /* include NULL terminator */
+	char *name_buffer = shaderface->name_buffer + shaderface->name_buffer_offset;
+	strcpy(name_buffer, name);
+
+	set_input_name(shaderface, input, name, name_len);
+	setup_builtin_uniform(input, name);
+
+	shader_input_to_bucket(input, shaderface->uniform_buckets);
+	if (input->builtin_type != GPU_UNIFORM_NONE &&
+	    input->builtin_type != GPU_UNIFORM_CUSTOM)
+	{
+		shaderface->builtin_uniforms[input->builtin_type] = input;
+	}
+#if DEBUG_SHADER_INTERFACE
+	printf("GPUShaderInterface %p, program %d, uniform[] '%s' at location %d\n",
+	       shaderface,
+	       shaderface->program,
+	       name,
+	       input->location);
+#endif
+	return input;
+}
+
+GPUShaderInterface *GPU_shaderinterface_create(int32_t program)
+{
+	GPUShaderInterface *shaderface = MEM_callocN(sizeof(GPUShaderInterface), "GPUShaderInterface");
+	shaderface->program = program;
+
+#if DEBUG_SHADER_INTERFACE
+	printf("%s {\n", __func__); /* enter function */
+	printf("GPUShaderInterface %p, program %d\n", shaderface, program);
+#endif
+
+	GLint max_attrib_name_len, attr_len;
+	glGetProgramiv(program, GL_ACTIVE_ATTRIBUTE_MAX_LENGTH, &max_attrib_name_len);
+	glGetProgramiv(program, GL_ACTIVE_ATTRIBUTES, &attr_len);
+
+	GLint max_ubo_name_len, ubo_len;
+	glGetProgramiv(program, GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH, &max_ubo_name_len);
+	glGetProgramiv(program, GL_ACTIVE_UNIFORM_BLOCKS, &ubo_len);
+
+	const uint32_t name_buffer_len = attr_len * max_attrib_name_len + ubo_len * max_ubo_name_len;
+	shaderface->name_buffer = MEM_mallocN(name_buffer_len, "name_buffer");
+
+	/* Attributes */
+	for (uint32_t i = 0; i < attr_len; ++i) {
+		GPUShaderInput *input = MEM_mallocN(sizeof(GPUShaderInput), "GPUShaderInput Attr");
+		GLsizei remaining_buffer = name_buffer_len - shaderface->name_buffer_offset;
+		char *name = shaderface->name_buffer + shaderface->name_buffer_offset;
+		GLsizei name_len = 0;
+
+		glGetActiveAttrib(program, i, remaining_buffer, &name_len, &input->size, &input->gl_type, name);
+
+		/* remove "[0]" from array name */
+		if (name[name_len - 1] == ']') {
+			name[name_len - 3] = '\0';
+			name_len -= 3;
+		}
+
+		/* TODO: reject DOUBLE gl_types */
+
+		input->location = glGetAttribLocation(program, name);
+
+		set_input_name(shaderface, input, name, name_len);
+
+		shader_input_to_bucket(input, shaderface->attrib_buckets);
+
+#if DEBUG_SHADER_INTERFACE
+		printf("attrib[%u] '%s' at location %d\n", i, name, input->location);
+#endif
+	}
+	/* Uniform Blocks */
+	for (uint32_t i = 0; i < ubo_len; ++i) {
+		GPUShaderInput *input = MEM_mallocN(sizeof(GPUShaderInput), "GPUShaderInput UBO");
+		GLsizei remaining_buffer = name_buffer_len - shaderface->name_buffer_offset;
+		char *name = shaderface->name_buffer + shaderface->name_buffer_offset;
+		GLsizei name_len = 0;
+
+		glGetActiveUniformBlockName(program, i, remaining_buffer, &name_len, name);
+
+		input->location = i;
+
+		set_input_name(shaderface, input, name, name_len);
+
+		shader_input_to_bucket(input, shaderface->ubo_buckets);
+
+#if DEBUG_SHADER_INTERFACE
+		printf("ubo '%s' at location %d\n", name, input->location);
+#endif
+	}
+	/* Builtin Uniforms */
+	for (GPUUniformBuiltin u = GPU_UNIFORM_NONE + 1; u < GPU_UNIFORM_CUSTOM; ++u) {
+		const char *builtin_name = BuiltinUniform_name(u);
+		if (glGetUniformLocation(program, builtin_name) != -1) {
+			add_uniform((GPUShaderInterface *)shaderface, builtin_name);
+		}
+	}
+	/* Batches ref buffer */
+	shaderface->batches_len = GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
+	shaderface->batches = MEM_callocN(shaderface->batches_len * sizeof(GPUBatch *), "GPUShaderInterface batches");
+
+	return shaderface;
+}
+
+void GPU_shaderinterface_discard(GPUShaderInterface *shaderface)
+{
+	/* Free memory used by buckets and has entries. */
+	buckets_free(shaderface->uniform_buckets);
+	buckets_free(shaderface->attrib_buckets);
+	buckets_free(shaderface->ubo_buckets);
+	/* Free memory used by name_buffer. */
+	MEM_freeN(shaderface->name_buffer);
+	/* Remove this interface from all linked Batches vao cache. */
+	for (int i = 0; i < shaderface->batches_len; ++i) {
+		if (shaderface->batches[i] != NULL) {
+			gpu_batch_remove_interface_ref(shaderface->batches[i], shaderface);
+		}
+	}
+	MEM_freeN(shaderface->batches);
+	/* Free memory used by shader interface by its self. */
+	MEM_freeN(shaderface);
+}
+
+const GPUShaderInput *GPU_shaderinterface_uniform(const GPUShaderInterface *shaderface, const char *name)
+{
+	/* TODO: Warn if we find a matching builtin, since these can be looked up much quicker. */
+	const GPUShaderInput *input = buckets_lookup(shaderface->uniform_buckets, shaderface->name_buffer, name);
+	/* If input is not found add it so it's found next time. */
+	if (input == NULL) {
+		input = add_uniform((GPUShaderInterface *)shaderface, name);
+	}
+	return (input->location != -1) ? input : NULL;
+}
+
+const GPUShaderInput *GPU_shaderinterface_uniform_builtin(
+	    const GPUShaderInterface *shaderface, GPUUniformBuiltin builtin)
+{
+#if TRUST_NO_ONE
+	assert(builtin != GPU_UNIFORM_NONE);
+	assert(builtin != GPU_UNIFORM_CUSTOM);
+	assert(builtin != GPU_NUM_UNIFORMS);
+#endif
+	return shaderface->builtin_uniforms[builtin];
+}
+
+const GPUShaderInput *GPU_shaderinterface_ubo(const GPUShaderInterface *shaderface, const char *name)
+{
+	return buckets_lookup(shaderface->ubo_buckets, shaderface->name_buffer, name);
+}
+
+const GPUShaderInput *GPU_shaderinterface_attr(const GPUShaderInterface *shaderface, const char *name)
+{
+	return buckets_lookup(shaderface->attrib_buckets, shaderface->name_buffer, name);
+}
+
+void GPU_shaderinterface_add_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
+{
+	int i; /* find first unused slot */
+	for (i = 0; i < shaderface->batches_len; ++i) {
+		if (shaderface->batches[i] == NULL) {
+			break;
+		}
+	}
+	if (i == shaderface->batches_len) {
+		/* Not enough place, realloc the array. */
+		i = shaderface->batches_len;
+		shaderface->batches_len += GPU_SHADERINTERFACE_REF_ALLOC_COUNT;
+		shaderface->batches = MEM_recallocN(shaderface->batches, sizeof(GPUBatch *) * shaderface->batches_len);
+	}
+	shaderface->batches[i] = batch;
+}
+
+void GPU_shaderinterface_remove_batch_ref(GPUShaderInterface *shaderface, GPUBatch *batch)
+{
+	for (int i = 0; i < shaderface->batches_len; ++i) {
+		if (shaderface->batches[i] == batch) {
+			shaderface->batches[i] = NULL;
+			break; /* cannot have duplicates */
+		}
+	}
+}
diff --git a/source/blender/gpu/intern/gpu_shader_private.h b/source/blender/gpu/intern/gpu_shader_private.h
index de5439c5638..69c0c41cef4 100644
--- a/source/blender/gpu/intern/gpu_shader_private.h
+++ b/source/blender/gpu/intern/gpu_shader_private.h
@@ -26,7 +26,7 @@
 #define __GPU_SHADER_PRIVATE_H__
 
 #include "GPU_glew.h"
-#include "gawain/gwn_shader_interface.h"
+#include "GPU_shader_interface.h"
 
 struct GPUShader {
 	GLuint program;  /* handle for full program (links shader stages below) */
@@ -35,9 +35,12 @@ struct GPUShader {
 	GLuint geometry; /* handle for geometry shader */
 	GLuint fragment; /* handle for fragment shader */
 
-	Gwn_ShaderInterface *interface; /* cached uniform & attrib interface for shader */
+	GPUShaderInterface *interface; /* cached uniform & attrib interface for shader */
 
 	int feedback_transform_type;
+#ifndef NDEBUG
+	char name[64];
+#endif
 };
 
 #endif  /* __GPU_SHADER_PRIVATE_H__ */
diff --git a/source/blender/gpu/intern/gpu_state.c b/source/blender/gpu/intern/gpu_state.c
index 588d61640bd..68d846ccfba 100644
--- a/source/blender/gpu/intern/gpu_state.c
+++ b/source/blender/gpu/intern/gpu_state.c
@@ -66,10 +66,11 @@ void GPU_blend_set_func_separate(
 	GPUBlendFunction src_rgb, GPUBlendFunction dst_rgb,
 	GPUBlendFunction src_alpha, GPUBlendFunction dst_alpha)
 {
-	glBlendFuncSeparate(gpu_get_gl_blendfunction(src_rgb),
-		gpu_get_gl_blendfunction(dst_rgb),
-		gpu_get_gl_blendfunction(src_alpha),
-		gpu_get_gl_blendfunction(dst_alpha));
+	glBlendFuncSeparate(
+	        gpu_get_gl_blendfunction(src_rgb),
+	        gpu_get_gl_blendfunction(dst_rgb),
+	        gpu_get_gl_blendfunction(src_alpha),
+	        gpu_get_gl_blendfunction(dst_alpha));
 }
 
 void GPU_depth_test(bool enable)
diff --git a/source/blender/gpu/intern/gpu_texture.c b/source/blender/gpu/intern/gpu_texture.c
index d9248e06dfb..2ccc9f10269 100644
--- a/source/blender/gpu/intern/gpu_texture.c
+++ b/source/blender/gpu/intern/gpu_texture.c
@@ -38,22 +38,22 @@
 #include "BKE_global.h"
 
 #include "GPU_batch.h"
+#include "GPU_context.h"
 #include "GPU_debug.h"
 #include "GPU_draw.h"
 #include "GPU_extensions.h"
-#include "GPU_framebuffer.h"
 #include "GPU_glew.h"
+#include "GPU_framebuffer.h"
 #include "GPU_texture.h"
 
+#include "gpu_context_private.h"
+
 static struct GPUTextureGlobal {
 	GPUTexture *invalid_tex_1D; /* texture used in place of invalid textures (not loaded correctly, missing) */
 	GPUTexture *invalid_tex_2D;
 	GPUTexture *invalid_tex_3D;
 } GG = {NULL, NULL, NULL};
 
-static ListBase g_orphaned_tex = {NULL, NULL};
-static ThreadMutex g_orphan_lock;
-
 /* Maximum number of FBOs a texture can be attached to. */
 #define GPU_TEX_MAX_FBO_ATTACHED 8
 
@@ -160,9 +160,12 @@ static int gpu_get_component_count(GPUTextureFormat format)
 /* Definitely not complete, edit according to the gl specification. */
 static void gpu_validate_data_format(GPUTextureFormat tex_format, GPUDataFormat data_format)
 {
-	if (ELEM(tex_format, GPU_DEPTH_COMPONENT24,
-	                    GPU_DEPTH_COMPONENT16,
-	                    GPU_DEPTH_COMPONENT32F))
+	(void)data_format;
+
+	if (ELEM(tex_format,
+	         GPU_DEPTH_COMPONENT24,
+	         GPU_DEPTH_COMPONENT16,
+	         GPU_DEPTH_COMPONENT32F))
 	{
 		BLI_assert(data_format == GPU_DATA_FLOAT);
 	}
@@ -196,9 +199,10 @@ static void gpu_validate_data_format(GPUTextureFormat tex_format, GPUDataFormat
 
 static GPUDataFormat gpu_get_data_format_from_tex_format(GPUTextureFormat tex_format)
 {
-	if (ELEM(tex_format, GPU_DEPTH_COMPONENT24,
-	                    GPU_DEPTH_COMPONENT16,
-	                    GPU_DEPTH_COMPONENT32F))
+	if (ELEM(tex_format,
+	         GPU_DEPTH_COMPONENT24,
+	         GPU_DEPTH_COMPONENT16,
+	         GPU_DEPTH_COMPONENT32F))
 	{
 		return GPU_DATA_FLOAT;
 	}
@@ -232,9 +236,10 @@ static GPUDataFormat gpu_get_data_format_from_tex_format(GPUTextureFormat tex_fo
 /* Definitely not complete, edit according to the gl specification. */
 static GLenum gpu_get_gl_dataformat(GPUTextureFormat data_type, GPUTextureFormatFlag *format_flag)
 {
-	if (ELEM(data_type, GPU_DEPTH_COMPONENT24,
-	                    GPU_DEPTH_COMPONENT16,
-	                    GPU_DEPTH_COMPONENT32F))
+	if (ELEM(data_type,
+	         GPU_DEPTH_COMPONENT24,
+	         GPU_DEPTH_COMPONENT16,
+	         GPU_DEPTH_COMPONENT32F))
 	{
 		*format_flag |= GPU_FORMAT_DEPTH;
 		return GL_DEPTH_COMPONENT;
@@ -429,6 +434,11 @@ static bool gpu_texture_try_alloc(
 			glTexImage2D(proxy, 0, internalformat, tex->w, tex->h, 0, data_format, data_type, NULL);
 			break;
 		case GL_PROXY_TEXTURE_2D_ARRAY:
+			/* HACK: Some driver wrongly check GL_PROXY_TEXTURE_2D_ARRAY as a GL_PROXY_TEXTURE_3D
+			 * checking all dimensions against GPU_max_texture_layers (see T55888). */
+			return (tex->w > 0) && (tex->w <= GPU_max_texture_size()) &&
+			       (tex->h > 0) && (tex->h <= GPU_max_texture_size()) &&
+			       (tex->d > 0) && (tex->d <= GPU_max_texture_layers());
 		case GL_PROXY_TEXTURE_3D:
 			glTexImage3D(proxy, 0, internalformat, tex->w, tex->h, tex->d, 0, data_format, data_type, NULL);
 			break;
@@ -532,7 +542,7 @@ GPUTexture *GPU_texture_create_nD(
 	gpu_texture_memory_footprint_add(tex);
 
 	/* Generate Texture object */
-	glGenTextures(1, &tex->bindcode);
+	tex->bindcode = GPU_tex_alloc();
 
 	if (!tex->bindcode) {
 		if (err_out)
@@ -665,7 +675,7 @@ static GPUTexture *GPU_texture_cube_create(
 	gpu_texture_memory_footprint_add(tex);
 
 	/* Generate Texture object */
-	glGenTextures(1, &tex->bindcode);
+	tex->bindcode = GPU_tex_alloc();
 
 	if (!tex->bindcode) {
 		if (err_out)
@@ -749,7 +759,7 @@ GPUTexture *GPU_texture_create_buffer(GPUTextureFormat tex_format, const GLuint
 	}
 
 	/* Generate Texture object */
-	glGenTextures(1, &tex->bindcode);
+	tex->bindcode = GPU_tex_alloc();
 
 	if (!tex->bindcode) {
 		fprintf(stderr, "GPUTexture: texture create failed\n");
@@ -861,6 +871,13 @@ GPUTexture *GPU_texture_create_1D(
 	return GPU_texture_create_nD(w, 0, 0, 1, pixels, tex_format, data_format, 0, false, err_out);
 }
 
+GPUTexture *GPU_texture_create_1D_array(
+        int w, int h, GPUTextureFormat tex_format, const float *pixels, char err_out[256])
+{
+	GPUDataFormat data_format = gpu_get_data_format_from_tex_format(tex_format);
+	return GPU_texture_create_nD(w, h, 0, 1, pixels, tex_format, data_format, 0, false, err_out);
+}
+
 GPUTexture *GPU_texture_create_2D(
         int w, int h, GPUTextureFormat tex_format, const float *pixels, char err_out[256])
 {
@@ -911,28 +928,28 @@ GPUTexture *GPU_texture_create_cube(
 	                               tex_format, GPU_DATA_FLOAT, err_out);
 }
 
-GPUTexture *GPU_texture_create_from_vertbuf(Gwn_VertBuf *vert)
+GPUTexture *GPU_texture_create_from_vertbuf(GPUVertBuf *vert)
 {
-	Gwn_VertFormat *format = &vert->format;
-	Gwn_VertAttr *attr = &format->attribs[0];
+	GPUVertFormat *format = &vert->format;
+	GPUVertAttr *attr = &format->attribs[0];
 
 	/* Detect incompatible cases (not supported by texture buffers) */
 	BLI_assert(format->attr_len == 1 && vert->vbo_id != 0);
 	BLI_assert(attr->comp_len != 3); /* Not until OGL 4.0 */
-	BLI_assert(attr->comp_type != GWN_COMP_I10);
-	BLI_assert(attr->fetch_mode != GWN_FETCH_INT_TO_FLOAT);
+	BLI_assert(attr->comp_type != GPU_COMP_I10);
+	BLI_assert(attr->fetch_mode != GPU_FETCH_INT_TO_FLOAT);
 
 	unsigned int byte_per_comp = attr->sz / attr->comp_len;
-	bool is_uint = ELEM(attr->comp_type, GWN_COMP_U8, GWN_COMP_U16, GWN_COMP_U32);
+	bool is_uint = ELEM(attr->comp_type, GPU_COMP_U8, GPU_COMP_U16, GPU_COMP_U32);
 
 	/* Cannot fetch signed int or 32bit ints as normalized float. */
-	if (attr->fetch_mode == GWN_FETCH_INT_TO_FLOAT_UNIT) {
+	if (attr->fetch_mode == GPU_FETCH_INT_TO_FLOAT_UNIT) {
 		BLI_assert(is_uint || byte_per_comp <= 2);
 	}
 
 	GPUTextureFormat data_type;
 	switch (attr->fetch_mode) {
-		case GWN_FETCH_FLOAT:
+		case GPU_FETCH_FLOAT:
 			switch (attr->comp_len) {
 				case 1: data_type = GPU_R32F; break;
 				case 2: data_type = GPU_RG32F; break;
@@ -940,7 +957,7 @@ GPUTexture *GPU_texture_create_from_vertbuf(Gwn_VertBuf *vert)
 				default: data_type = GPU_RGBA32F; break;
 			}
 			break;
-		case GWN_FETCH_INT:
+		case GPU_FETCH_INT:
 			switch (attr->comp_len) {
 				case 1:
 					switch (byte_per_comp) {
@@ -965,7 +982,7 @@ GPUTexture *GPU_texture_create_from_vertbuf(Gwn_VertBuf *vert)
 					break;
 			}
 			break;
-		case GWN_FETCH_INT_TO_FLOAT_UNIT:
+		case GPU_FETCH_INT_TO_FLOAT_UNIT:
 			switch (attr->comp_len) {
 				case 1: data_type = (byte_per_comp == 1) ? GPU_R8 : GPU_R16; break;
 				case 2: data_type = (byte_per_comp == 1) ? GPU_RG8 : GPU_RG16; break;
@@ -1161,8 +1178,9 @@ void GPU_texture_bind(GPUTexture *tex, int number)
 	if ((G.debug & G_DEBUG)) {
 		for (int i = 0; i < GPU_TEX_MAX_FBO_ATTACHED; ++i) {
 			if (tex->fb[i] && GPU_framebuffer_bound(tex->fb[i])) {
-				fprintf(stderr, "Feedback loop warning!: Attempting to bind "
-				                "texture attached to current framebuffer!\n");
+				fprintf(stderr,
+				        "Feedback loop warning!: Attempting to bind "
+				        "texture attached to current framebuffer!\n");
 				BLI_assert(0); /* Should never happen! */
 				break;
 			}
@@ -1297,17 +1315,6 @@ void GPU_texture_filters(GPUTexture *tex, GPUFilterFunction min_filter, GPUFilte
 	glTexParameteri(tex->target_base, GL_TEXTURE_MAG_FILTER, gpu_get_gl_filterfunction(mag_filter));
 }
 
-
-static void gpu_texture_delete(GPUTexture *tex)
-{
-	if (tex->bindcode)
-		glDeleteTextures(1, &tex->bindcode);
-
-	gpu_texture_memory_footprint_remove(tex);
-
-	MEM_freeN(tex);
-}
-
 void GPU_texture_free(GPUTexture *tex)
 {
 	tex->refcount--;
@@ -1322,38 +1329,13 @@ void GPU_texture_free(GPUTexture *tex)
 			}
 		}
 
-		/* TODO(fclem): Check if the thread has an ogl context. */
-		if (BLI_thread_is_main()) {
-			gpu_texture_delete(tex);
-		}
-		else {
-			BLI_mutex_lock(&g_orphan_lock);
-			BLI_addtail(&g_orphaned_tex, BLI_genericNodeN(tex));
-			BLI_mutex_unlock(&g_orphan_lock);
-		}
-	}
-}
+		if (tex->bindcode)
+			GPU_tex_free(tex->bindcode);
 
-void GPU_texture_orphans_init(void)
-{
-	BLI_mutex_init(&g_orphan_lock);
-}
+		gpu_texture_memory_footprint_remove(tex);
 
-void GPU_texture_orphans_delete(void)
-{
-	BLI_mutex_lock(&g_orphan_lock);
-	LinkData *link;
-	while ((link = BLI_pophead(&g_orphaned_tex))) {
-		gpu_texture_delete((GPUTexture *)link->data);
-		MEM_freeN(link);
+		MEM_freeN(tex);
 	}
-	BLI_mutex_unlock(&g_orphan_lock);
-}
-
-void GPU_texture_orphans_exit(void)
-{
-	GPU_texture_orphans_delete();
-	BLI_mutex_end(&g_orphan_lock);
 }
 
 void GPU_texture_ref(GPUTexture *tex)
diff --git a/source/blender/gpu/intern/gpu_uniformbuffer.c b/source/blender/gpu/intern/gpu_uniformbuffer.c
index 1e39b2ea5b7..3f5706c1f7b 100644
--- a/source/blender/gpu/intern/gpu_uniformbuffer.c
+++ b/source/blender/gpu/intern/gpu_uniformbuffer.c
@@ -35,6 +35,7 @@
 #include "BLI_blenlib.h"
 
 #include "gpu_codegen.h"
+#include "gpu_context_private.h"
 
 #include "GPU_extensions.h"
 #include "GPU_glew.h"
@@ -88,7 +89,7 @@ GPUUniformBuffer *GPU_uniformbuffer_create(int size, const void *data, char err_
 	ubo->bindpoint = -1;
 
 	/* Generate Buffer object */
-	glGenBuffers(1, &ubo->bindcode);
+	ubo->bindcode = GPU_buf_alloc();
 
 	if (!ubo->bindcode) {
 		if (err_out)
@@ -127,7 +128,7 @@ GPUUniformBuffer *GPU_uniformbuffer_dynamic_create(ListBase *inputs, char err_ou
 	ubo->flag = GPU_UBO_FLAG_DIRTY;
 
 	/* Generate Buffer object. */
-	glGenBuffers(1, &ubo->buffer.bindcode);
+	ubo->buffer.bindcode = GPU_buf_alloc();
 
 	if (!ubo->buffer.bindcode) {
 		if (err_out)
@@ -158,9 +159,8 @@ GPUUniformBuffer *GPU_uniformbuffer_dynamic_create(ListBase *inputs, char err_ou
 	float *offset = ubo->data;
 	for (LinkData *link = inputs->first; link; link = link->next) {
 		GPUInput *input = link->data;
-		const GPUType gputype = get_padded_gpu_type(link);
-		memcpy(offset, input->dynamicvec, gputype * sizeof(float));
-		offset += gputype;
+		memcpy(offset, input->dynamicvec, input->type * sizeof(float));
+		offset += get_padded_gpu_type(link);
 	}
 
 	/* Note since we may create the UBOs in the CPU in a different thread than the main drawing one,
@@ -190,7 +190,7 @@ void GPU_uniformbuffer_free(GPUUniformBuffer *ubo)
 		gpu_uniformbuffer_dynamic_free(ubo);
 	}
 
-	glDeleteBuffers(1, &ubo->bindcode);
+	GPU_buf_free(ubo->bindcode);
 	MEM_freeN(ubo);
 }
 
diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.c b/source/blender/gpu/intern/gpu_vertex_buffer.c
new file mode 100644
index 00000000000..05100b8a23f
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_vertex_buffer.c
@@ -0,0 +1,272 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation, Clément Foucault
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_vertex_buffer.c
+ *  \ingroup gpu
+ *
+ * GPU vertex buffer
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "GPU_vertex_buffer.h"
+
+#include "gpu_context_private.h"
+#include "gpu_vertex_format_private.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#define KEEP_SINGLE_COPY 1
+
+static uint vbo_memory_usage;
+
+static GLenum convert_usage_type_to_gl(GPUUsageType type)
+{
+	static const GLenum table[] = {
+		[GPU_USAGE_STREAM] = GL_STREAM_DRAW,
+		[GPU_USAGE_STATIC] = GL_STATIC_DRAW,
+		[GPU_USAGE_DYNAMIC] = GL_DYNAMIC_DRAW
+	};
+	return table[type];
+}
+
+GPUVertBuf *GPU_vertbuf_create(GPUUsageType usage)
+{
+	GPUVertBuf *verts = MEM_mallocN(sizeof(GPUVertBuf), "GPUVertBuf");
+	GPU_vertbuf_init(verts, usage);
+	return verts;
+}
+
+GPUVertBuf *GPU_vertbuf_create_with_format_ex(const GPUVertFormat *format, GPUUsageType usage)
+{
+	GPUVertBuf *verts = GPU_vertbuf_create(usage);
+	GPU_vertformat_copy(&verts->format, format);
+	if (!format->packed) {
+		VertexFormat_pack(&verts->format);
+	}
+	return verts;
+
+	/* this function might seem redundant, but there is potential for memory savings here... */
+	/* TODO: implement those memory savings */
+}
+
+void GPU_vertbuf_init(GPUVertBuf *verts, GPUUsageType usage)
+{
+	memset(verts, 0, sizeof(GPUVertBuf));
+	verts->usage = usage;
+	verts->dirty = true;
+}
+
+void GPU_vertbuf_init_with_format_ex(GPUVertBuf *verts, const GPUVertFormat *format, GPUUsageType usage)
+{
+	GPU_vertbuf_init(verts, usage);
+	GPU_vertformat_copy(&verts->format, format);
+	if (!format->packed) {
+		VertexFormat_pack(&verts->format);
+	}
+}
+
+void GPU_vertbuf_discard(GPUVertBuf *verts)
+{
+	if (verts->vbo_id) {
+		GPU_buf_free(verts->vbo_id);
+#if VRAM_USAGE
+		vbo_memory_usage -= GPU_vertbuf_size_get(verts);
+#endif
+	}
+	if (verts->data) {
+		MEM_freeN(verts->data);
+	}
+	MEM_freeN(verts);
+}
+
+uint GPU_vertbuf_size_get(const GPUVertBuf *verts)
+{
+	return vertex_buffer_size(&verts->format, verts->vertex_len);
+}
+
+/* create a new allocation, discarding any existing data */
+void GPU_vertbuf_data_alloc(GPUVertBuf *verts, uint v_len)
+{
+	GPUVertFormat *format = &verts->format;
+	if (!format->packed) {
+		VertexFormat_pack(format);
+	}
+#if TRUST_NO_ONE
+	/* catch any unnecessary use */
+	assert(verts->vertex_alloc != v_len || verts->data == NULL);
+#endif
+	/* only create the buffer the 1st time */
+	if (verts->vbo_id == 0) {
+		verts->vbo_id = GPU_buf_alloc();
+	}
+	/* discard previous data if any */
+	if (verts->data) {
+		MEM_freeN(verts->data);
+	}
+#if VRAM_USAGE
+	uint new_size = vertex_buffer_size(&verts->format, v_len);
+	vbo_memory_usage += new_size - GPU_vertbuf_size_get(verts);
+#endif
+	verts->dirty = true;
+	verts->vertex_len = verts->vertex_alloc = v_len;
+	verts->data = MEM_mallocN(sizeof(GLubyte) * GPU_vertbuf_size_get(verts), "GPUVertBuf data");
+}
+
+/* resize buffer keeping existing data */
+void GPU_vertbuf_data_resize(GPUVertBuf *verts, uint v_len)
+{
+#if TRUST_NO_ONE
+	assert(verts->data != NULL);
+	assert(verts->vertex_alloc != v_len);
+#endif
+
+#if VRAM_USAGE
+	uint new_size = vertex_buffer_size(&verts->format, v_len);
+	vbo_memory_usage += new_size - GPU_vertbuf_size_get(verts);
+#endif
+	verts->dirty = true;
+	verts->vertex_len = verts->vertex_alloc = v_len;
+	verts->data = MEM_reallocN(verts->data, sizeof(GLubyte) * GPU_vertbuf_size_get(verts));
+}
+
+/* Set vertex count but does not change allocation.
+ * Only this many verts will be uploaded to the GPU and rendered.
+ * This is usefull for streaming data. */
+void GPU_vertbuf_vertex_count_set(GPUVertBuf *verts, uint v_len)
+{
+#if TRUST_NO_ONE
+	assert(verts->data != NULL); /* only for dynamic data */
+	assert(v_len <= verts->vertex_alloc);
+#endif
+
+#if VRAM_USAGE
+	uint new_size = vertex_buffer_size(&verts->format, v_len);
+	vbo_memory_usage += new_size - GPU_vertbuf_size_get(verts);
+#endif
+	verts->vertex_len = v_len;
+}
+
+void GPU_vertbuf_attr_set(GPUVertBuf *verts, uint a_idx, uint v_idx, const void *data)
+{
+	const GPUVertFormat *format = &verts->format;
+	const GPUVertAttr *a = format->attribs + a_idx;
+
+#if TRUST_NO_ONE
+	assert(a_idx < format->attr_len);
+	assert(v_idx < verts->vertex_alloc);
+	assert(verts->data != NULL);
+#endif
+	verts->dirty = true;
+	memcpy((GLubyte *)verts->data + a->offset + v_idx * format->stride, data, a->sz);
+}
+
+void GPU_vertbuf_attr_fill(GPUVertBuf *verts, uint a_idx, const void *data)
+{
+	const GPUVertFormat *format = &verts->format;
+	const GPUVertAttr *a = format->attribs + a_idx;
+
+#if TRUST_NO_ONE
+	assert(a_idx < format->attr_len);
+#endif
+	const uint stride = a->sz; /* tightly packed input data */
+
+	GPU_vertbuf_attr_fill_stride(verts, a_idx, stride, data);
+}
+
+void GPU_vertbuf_attr_fill_stride(GPUVertBuf *verts, uint a_idx, uint stride, const void *data)
+{
+	const GPUVertFormat *format = &verts->format;
+	const GPUVertAttr *a = format->attribs + a_idx;
+
+#if TRUST_NO_ONE
+	assert(a_idx < format->attr_len);
+	assert(verts->data != NULL);
+#endif
+	verts->dirty = true;
+	const uint vertex_len = verts->vertex_len;
+
+	if (format->attr_len == 1 && stride == format->stride) {
+		/* we can copy it all at once */
+		memcpy(verts->data, data, vertex_len * a->sz);
+	}
+	else {
+		/* we must copy it per vertex */
+		for (uint v = 0; v < vertex_len; ++v) {
+			memcpy((GLubyte *)verts->data + a->offset + v * format->stride, (const GLubyte *)data + v * stride, a->sz);
+		}
+	}
+}
+
+void GPU_vertbuf_attr_get_raw_data(GPUVertBuf *verts, uint a_idx, GPUVertBufRaw *access)
+{
+	const GPUVertFormat *format = &verts->format;
+	const GPUVertAttr *a = format->attribs + a_idx;
+
+#if TRUST_NO_ONE
+	assert(a_idx < format->attr_len);
+	assert(verts->data != NULL);
+#endif
+
+	verts->dirty = true;
+
+	access->size = a->sz;
+	access->stride = format->stride;
+	access->data = (GLubyte *)verts->data + a->offset;
+	access->data_init = access->data;
+#if TRUST_NO_ONE
+	access->_data_end = access->data_init + (size_t)(verts->vertex_alloc * format->stride);
+#endif
+}
+
+static void VertBuffer_upload_data(GPUVertBuf *verts)
+{
+	uint buffer_sz = GPU_vertbuf_size_get(verts);
+
+	/* orphan the vbo to avoid sync */
+	glBufferData(GL_ARRAY_BUFFER, buffer_sz, NULL, convert_usage_type_to_gl(verts->usage));
+	/* upload data */
+	glBufferSubData(GL_ARRAY_BUFFER, 0, buffer_sz, verts->data);
+
+	if (verts->usage == GPU_USAGE_STATIC) {
+		MEM_freeN(verts->data);
+		verts->data = NULL;
+	}
+	verts->dirty = false;
+}
+
+void GPU_vertbuf_use(GPUVertBuf *verts)
+{
+	glBindBuffer(GL_ARRAY_BUFFER, verts->vbo_id);
+	if (verts->dirty) {
+		VertBuffer_upload_data(verts);
+	}
+}
+
+uint GPU_vertbuf_get_memory_usage(void)
+{
+	return vbo_memory_usage;
+}
diff --git a/source/blender/gpu/intern/gpu_vertex_format.c b/source/blender/gpu/intern/gpu_vertex_format.c
new file mode 100644
index 00000000000..eef4945d9ef
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_vertex_format.c
@@ -0,0 +1,312 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation, Clément Foucault
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_vertex_format.c
+ *  \ingroup gpu
+ *
+ * GPU vertex format
+ */
+
+#include "GPU_vertex_format.h"
+#include "gpu_vertex_format_private.h"
+#include <stddef.h>
+#include <string.h>
+
+#define PACK_DEBUG 0
+
+#if PACK_DEBUG
+#  include <stdio.h>
+#endif
+
+void GPU_vertformat_clear(GPUVertFormat *format)
+{
+#if TRUST_NO_ONE
+	memset(format, 0, sizeof(GPUVertFormat));
+#else
+	format->attr_len = 0;
+	format->packed = false;
+	format->name_offset = 0;
+	format->name_len = 0;
+
+	for (unsigned i = 0; i < GPU_VERT_ATTR_MAX_LEN; i++) {
+		format->attribs[i].name_len = 0;
+	}
+#endif
+}
+
+void GPU_vertformat_copy(GPUVertFormat *dest, const GPUVertFormat *src)
+{
+	/* copy regular struct fields */
+	memcpy(dest, src, sizeof(GPUVertFormat));
+
+	for (unsigned i = 0; i < dest->attr_len; i++) {
+		for (unsigned j = 0; j < dest->attribs[i].name_len; j++) {
+			dest->attribs[i].name[j] = (char *)dest + (src->attribs[i].name[j] - ((char *)src));
+		}
+	}
+}
+
+static GLenum convert_comp_type_to_gl(GPUVertCompType type)
+{
+	static const GLenum table[] = {
+		[GPU_COMP_I8] = GL_BYTE,
+		[GPU_COMP_U8] = GL_UNSIGNED_BYTE,
+		[GPU_COMP_I16] = GL_SHORT,
+		[GPU_COMP_U16] = GL_UNSIGNED_SHORT,
+		[GPU_COMP_I32] = GL_INT,
+		[GPU_COMP_U32] = GL_UNSIGNED_INT,
+
+		[GPU_COMP_F32] = GL_FLOAT,
+
+		[GPU_COMP_I10] = GL_INT_2_10_10_10_REV
+	};
+	return table[type];
+}
+
+static unsigned comp_sz(GPUVertCompType type)
+{
+#if TRUST_NO_ONE
+	assert(type <= GPU_COMP_F32); /* other types have irregular sizes (not bytes) */
+#endif
+	const GLubyte sizes[] = {1, 1, 2, 2, 4, 4, 4};
+	return sizes[type];
+}
+
+static unsigned attrib_sz(const GPUVertAttr *a)
+{
+	if (a->comp_type == GPU_COMP_I10) {
+		return 4; /* always packed as 10_10_10_2 */
+	}
+	return a->comp_len * comp_sz(a->comp_type);
+}
+
+static unsigned attrib_align(const GPUVertAttr *a)
+{
+	if (a->comp_type == GPU_COMP_I10) {
+		return 4; /* always packed as 10_10_10_2 */
+	}
+	unsigned c = comp_sz(a->comp_type);
+	if (a->comp_len == 3 && c <= 2) {
+		return 4 * c; /* AMD HW can't fetch these well, so pad it out (other vendors too?) */
+	}
+	else {
+		return c; /* most fetches are ok if components are naturally aligned */
+	}
+}
+
+unsigned vertex_buffer_size(const GPUVertFormat *format, unsigned vertex_len)
+{
+#if TRUST_NO_ONE
+	assert(format->packed && format->stride > 0);
+#endif
+	return format->stride * vertex_len;
+}
+
+static const char *copy_attrib_name(GPUVertFormat *format, const char *name)
+{
+	/* strncpy does 110% of what we need; let's do exactly 100% */
+	char *name_copy = format->names + format->name_offset;
+	unsigned available = GPU_VERT_ATTR_NAMES_BUF_LEN - format->name_offset;
+	bool terminated = false;
+
+	for (unsigned i = 0; i < available; ++i) {
+		const char c = name[i];
+		name_copy[i] = c;
+		if (c == '\0') {
+			terminated = true;
+			format->name_offset += (i + 1);
+			break;
+		}
+	}
+#if TRUST_NO_ONE
+	assert(terminated);
+	assert(format->name_offset <= GPU_VERT_ATTR_NAMES_BUF_LEN);
+#else
+	(void)terminated;
+#endif
+	return name_copy;
+}
+
+unsigned GPU_vertformat_attr_add(
+        GPUVertFormat *format, const char *name,
+        GPUVertCompType comp_type, unsigned comp_len, GPUVertFetchMode fetch_mode)
+{
+#if TRUST_NO_ONE
+	assert(format->name_len < GPU_VERT_ATTR_MAX_LEN); /* there's room for more */
+	assert(format->attr_len < GPU_VERT_ATTR_MAX_LEN); /* there's room for more */
+	assert(!format->packed); /* packed means frozen/locked */
+	assert((comp_len >= 1 && comp_len <= 4) || comp_len == 8 || comp_len == 12 || comp_len == 16);
+
+	switch (comp_type) {
+		case GPU_COMP_F32:
+			/* float type can only kept as float */
+			assert(fetch_mode == GPU_FETCH_FLOAT);
+			break;
+		case GPU_COMP_I10:
+			/* 10_10_10 format intended for normals (xyz) or colors (rgb)
+			 * extra component packed.w can be manually set to { -2, -1, 0, 1 } */
+			assert(comp_len == 3 || comp_len == 4);
+			assert(fetch_mode == GPU_FETCH_INT_TO_FLOAT_UNIT); /* not strictly required, may relax later */
+			break;
+		default:
+			/* integer types can be kept as int or converted/normalized to float */
+			assert(fetch_mode != GPU_FETCH_FLOAT);
+			/* only support float matrices (see Batch_update_program_bindings) */
+			assert(comp_len != 8 && comp_len != 12 && comp_len != 16);
+	}
+#endif
+	format->name_len++; /* multiname support */
+
+	const unsigned attrib_id = format->attr_len++;
+	GPUVertAttr *attrib = format->attribs + attrib_id;
+
+	attrib->name[attrib->name_len++] = copy_attrib_name(format, name);
+	attrib->comp_type = comp_type;
+	attrib->gl_comp_type = convert_comp_type_to_gl(comp_type);
+	attrib->comp_len = (comp_type == GPU_COMP_I10) ? 4 : comp_len; /* system needs 10_10_10_2 to be 4 or BGRA */
+	attrib->sz = attrib_sz(attrib);
+	attrib->offset = 0; /* offsets & stride are calculated later (during pack) */
+	attrib->fetch_mode = fetch_mode;
+
+	return attrib_id;
+}
+
+void GPU_vertformat_alias_add(GPUVertFormat *format, const char *alias)
+{
+	GPUVertAttr *attrib = format->attribs + (format->attr_len - 1);
+#if TRUST_NO_ONE
+	assert(format->name_len < GPU_VERT_ATTR_MAX_LEN); /* there's room for more */
+	assert(attrib->name_len < GPU_VERT_ATTR_MAX_NAMES);
+#endif
+	format->name_len++; /* multiname support */
+	attrib->name[attrib->name_len++] = copy_attrib_name(format, alias);
+}
+
+unsigned padding(unsigned offset, unsigned alignment)
+{
+	const unsigned mod = offset % alignment;
+	return (mod == 0) ? 0 : (alignment - mod);
+}
+
+#if PACK_DEBUG
+static void show_pack(unsigned a_idx, unsigned sz, unsigned pad)
+{
+	const char c = 'A' + a_idx;
+	for (unsigned i = 0; i < pad; ++i) {
+		putchar('-');
+	}
+	for (unsigned i = 0; i < sz; ++i) {
+		putchar(c);
+	}
+}
+#endif
+
+void VertexFormat_pack(GPUVertFormat *format)
+{
+	/* For now, attributes are packed in the order they were added,
+	 * making sure each attrib is naturally aligned (add padding where necessary)
+	 * Later we can implement more efficient packing w/ reordering
+	 * (keep attrib ID order, adjust their offsets to reorder in buffer). */
+
+	/* TODO: realloc just enough to hold the final combo string. And just enough to
+	 * hold used attribs, not all 16. */
+
+	GPUVertAttr *a0 = format->attribs + 0;
+	a0->offset = 0;
+	unsigned offset = a0->sz;
+
+#if PACK_DEBUG
+	show_pack(0, a0->sz, 0);
+#endif
+
+	for (unsigned a_idx = 1; a_idx < format->attr_len; ++a_idx) {
+		GPUVertAttr *a = format->attribs + a_idx;
+		unsigned mid_padding = padding(offset, attrib_align(a));
+		offset += mid_padding;
+		a->offset = offset;
+		offset += a->sz;
+
+#if PACK_DEBUG
+		show_pack(a_idx, a->sz, mid_padding);
+#endif
+	}
+
+	unsigned end_padding = padding(offset, attrib_align(a0));
+
+#if PACK_DEBUG
+	show_pack(0, 0, end_padding);
+	putchar('\n');
+#endif
+	format->stride = offset + end_padding;
+	format->packed = true;
+}
+
+
+/* OpenGL ES packs in a different order as desktop GL but component conversion is the same.
+ * Of the code here, only struct GPUPackedNormal needs to change. */
+
+#define SIGNED_INT_10_MAX  511
+#define SIGNED_INT_10_MIN -512
+
+static int clampi(int x, int min_allowed, int max_allowed)
+{
+#if TRUST_NO_ONE
+	assert(min_allowed <= max_allowed);
+#endif
+	if (x < min_allowed) {
+		return min_allowed;
+	}
+	else if (x > max_allowed) {
+		return max_allowed;
+	}
+	else {
+		return x;
+	}
+}
+
+static int quantize(float x)
+{
+	int qx = x * 511.0f;
+	return clampi(qx, SIGNED_INT_10_MIN, SIGNED_INT_10_MAX);
+}
+
+static int convert_i16(short x)
+{
+	/* 16-bit signed --> 10-bit signed */
+	/* TODO: round? */
+	return x >> 6;
+}
+
+GPUPackedNormal GPU_normal_convert_i10_v3(const float data[3])
+{
+	GPUPackedNormal n = { .x = quantize(data[0]), .y = quantize(data[1]), .z = quantize(data[2]) };
+	return n;
+}
+
+GPUPackedNormal GPU_normal_convert_i10_s3(const short data[3])
+{
+	GPUPackedNormal n = { .x = convert_i16(data[0]), .y = convert_i16(data[1]), .z = convert_i16(data[2]) };
+	return n;
+}
diff --git a/source/blender/gpu/intern/gpu_vertex_format_private.h b/source/blender/gpu/intern/gpu_vertex_format_private.h
new file mode 100644
index 00000000000..e4fe61e8697
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_vertex_format_private.h
@@ -0,0 +1,39 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) 2016 by Mike Erwin.
+ * All rights reserved.
+ *
+ * Contributor(s): Blender Foundation
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/gpu/intern/gpu_vertex_format_private.h
+ *  \ingroup gpu
+ *
+ * GPU vertex format
+ */
+
+#ifndef __GPU_VERTEX_FORMAT_PRIVATE_H__
+#define __GPU_VERTEX_FORMAT_PRIVATE_H__
+
+void VertexFormat_pack(GPUVertFormat *format);
+uint padding(uint offset, uint alignment);
+uint vertex_buffer_size(const GPUVertFormat *format, uint vertex_len);
+
+#endif /* __GPU_VERTEX_FORMAT_PRIVATE_H__ */
diff --git a/source/blender/gpu/intern/gpu_viewport.c b/source/blender/gpu/intern/gpu_viewport.c
index 0bf215f31a8..5d495779ba1 100644
--- a/source/blender/gpu/intern/gpu_viewport.c
+++ b/source/blender/gpu/intern/gpu_viewport.c
@@ -539,9 +539,9 @@ void GPU_viewport_draw_to_screen(GPUViewport *viewport, const rcti *rect)
 	glUniform1i(GPU_shader_get_uniform(shader, "image"), 0);
 	glUniform4f(GPU_shader_get_uniform(shader, "rect_icon"), halfx, halfy, 1.0f + halfx, 1.0f + halfy);
 	glUniform4f(GPU_shader_get_uniform(shader, "rect_geom"), x1, y1, x2, y2);
-	glUniform4f(GPU_shader_get_builtin_uniform(shader, GWN_UNIFORM_COLOR), 1.0f, 1.0f, 1.0f, 1.0f);
+	glUniform4f(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_COLOR), 1.0f, 1.0f, 1.0f, 1.0f);
 
-	GWN_draw_primitive(GWN_PRIM_TRI_STRIP, 4);
+	GPU_draw_primitive(GPU_PRIM_TRI_STRIP, 4);
 
 	GPU_texture_unbind(color);
 }