10 files changed, 2401 insertions, 0 deletions
diff --git a/intern/gawain/src/attrib_binding.c b/intern/gawain/src/attrib_binding.c
new file mode 100644
index 00000000000..bb42aaf66eb
--- /dev/null
+++ b/intern/gawain/src/attrib_binding.c
@@ -0,0 +1,69 @@
+
+// Gawain vertex attribute binding
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2016 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "attrib_binding.h"
+
+#if MAX_VERTEX_ATTRIBS != 16
+  #error "attrib binding code assumes MAX_VERTEX_ATTRIBS = 16"
+#endif
+
+void clear_AttribBinding(AttribBinding* binding)
+	{
+	binding->loc_bits = 0;
+	binding->enabled_bits = 0;
+	}
+
+unsigned read_attrib_location(const AttribBinding* binding, unsigned a_idx)
+	{
+#if TRUST_NO_ONE
+	assert(a_idx < MAX_VERTEX_ATTRIBS);
+	assert(binding->enabled_bits & (1 << a_idx));
+#endif
+
+	return (binding->loc_bits >> (4 * a_idx)) & 0xF;
+	}
+
+static void write_attrib_location(AttribBinding* binding, unsigned a_idx, unsigned location)
+	{
+#if TRUST_NO_ONE
+	assert(a_idx < MAX_VERTEX_ATTRIBS);
+	assert(location < MAX_VERTEX_ATTRIBS);
+#endif
+
+	const unsigned shift = 4 * a_idx;
+	const uint64_t mask = ((uint64_t)0xF) << shift;
+	// overwrite this attrib's previous location
+	binding->loc_bits = (binding->loc_bits & ~mask) | (location << shift);
+	// mark this attrib as enabled
+	binding->enabled_bits |= 1 << a_idx;
+	}
+
+void get_attrib_locations(const VertexFormat* format, AttribBinding* binding, GLuint program)
+	{
+#if TRUST_NO_ONE
+	assert(glIsProgram(program));
+#endif
+
+	clear_AttribBinding(binding);
+
+	for (unsigned a_idx = 0; a_idx < format->attrib_ct; ++a_idx)
+		{
+		const Attrib* a = format->attribs + a_idx;
+		GLint loc = glGetAttribLocation(program, a->name);
+
+#if TRUST_NO_ONE
+		assert(loc != -1);
+		// TODO: make this a recoverable runtime error? indicates mismatch between vertex format and program
+#endif
+
+		write_attrib_location(binding, a_idx, loc);
+		}
+	}
diff --git a/intern/gawain/src/batch.c b/intern/gawain/src/batch.c
new file mode 100644
index 00000000000..cac34d445bb
--- /dev/null
+++ b/intern/gawain/src/batch.c
@@ -0,0 +1,405 @@
+
+// Gawain geometry batch
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2016 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "batch.h"
+#include "buffer_id.h"
+#include <stdlib.h>
+
+// necessary functions from matrix API
+extern void gpuBindMatrices(GLuint program);
+extern bool gpuMatricesDirty(void); // how best to use this here?
+
+Batch* Batch_create(PrimitiveType prim_type, VertexBuffer* verts, ElementList* elem)
+	{
+	Batch* batch = calloc(1, sizeof(Batch));
+
+	Batch_init(batch, prim_type, verts, elem);
+
+	return batch;
+	}
+
+void Batch_init(Batch* batch, PrimitiveType prim_type, VertexBuffer* verts, ElementList* elem)
+	{
+#if TRUST_NO_ONE
+	assert(verts != NULL);
+	// assert(prim_type == PRIM_POINTS || prim_type == PRIM_LINES || prim_type == PRIM_TRIANGLES);
+	// we will allow other primitive types in a future update
+#endif
+
+	batch->verts[0] = verts;
+	for (int v = 1; v < BATCH_MAX_VBO_CT; ++v)
+		batch->verts[v] = NULL;
+	batch->elem = elem;
+	batch->prim_type = prim_type;
+	batch->phase = READY_TO_DRAW;
+	}
+
+void Batch_discard(Batch* batch)
+	{
+	if (batch->vao_id)
+		vao_id_free(batch->vao_id);
+
+	free(batch);
+	}
+
+void Batch_discard_all(Batch* batch)
+	{
+	for (int v = 0; v < BATCH_MAX_VBO_CT; ++v)
+		{
+		if (batch->verts[v] == NULL)
+			break;
+		VertexBuffer_discard(batch->verts[v]);
+		}
+
+	if (batch->elem)
+		ElementList_discard(batch->elem);
+
+	Batch_discard(batch);
+	}
+
+int Batch_add_VertexBuffer(Batch* batch, VertexBuffer* verts)
+	{
+	for (unsigned v = 0; v < BATCH_MAX_VBO_CT; ++v)
+		{
+		if (batch->verts[v] == NULL)
+			{
+#if TRUST_NO_ONE
+			// for now all VertexBuffers must have same vertex_ct
+			assert(verts->vertex_ct == batch->verts[0]->vertex_ct);
+			// in the near future we will enable instanced attribs which have their own vertex_ct
+#endif
+			batch->verts[v] = verts;
+			// TODO: mark dirty so we can keep attrib bindings up-to-date
+			return v;
+			}
+		}
+	
+	// we only make it this far if there is no room for another VertexBuffer
+#if TRUST_NO_ONE
+	assert(false);
+#endif
+	return -1;
+	}
+
+void Batch_set_program(Batch* batch, GLuint program)
+	{
+#if TRUST_NO_ONE
+	assert(glIsProgram(program));
+#endif
+
+	batch->program = program;
+	batch->program_dirty = true;
+
+	Batch_use_program(batch); // hack! to make Batch_Uniform* simpler
+	}
+
+static void Batch_update_program_bindings(Batch* batch)
+	{
+	// disable all as a precaution
+	// why are we not using prev_attrib_enabled_bits?? see immediate.c
+	for (unsigned a_idx = 0; a_idx < MAX_VERTEX_ATTRIBS; ++a_idx)
+		glDisableVertexAttribArray(a_idx);
+
+	for (int v = 0; v < BATCH_MAX_VBO_CT; ++v)
+		{
+		VertexBuffer* verts = batch->verts[v];
+		if (verts == NULL)
+			break;
+
+		const VertexFormat* format = &verts->format;
+
+		const unsigned attrib_ct = format->attrib_ct;
+		const unsigned stride = format->stride;
+
+		VertexBuffer_use(verts);
+
+		for (unsigned a_idx = 0; a_idx < attrib_ct; ++a_idx)
+			{
+			const Attrib* a = format->attribs + a_idx;
+
+			const GLvoid* pointer = (const GLubyte*)0 + a->offset;
+
+			const GLint loc = glGetAttribLocation(batch->program, a->name);
+
+			if (loc == -1) continue;
+
+			glEnableVertexAttribArray(loc);
+
+			switch (a->fetch_mode)
+				{
+				case KEEP_FLOAT:
+				case CONVERT_INT_TO_FLOAT:
+					glVertexAttribPointer(loc, a->comp_ct, a->comp_type, GL_FALSE, stride, pointer);
+					break;
+				case NORMALIZE_INT_TO_FLOAT:
+					glVertexAttribPointer(loc, a->comp_ct, a->comp_type, GL_TRUE, stride, pointer);
+					break;
+				case KEEP_INT:
+					glVertexAttribIPointer(loc, a->comp_ct, a->comp_type, stride, pointer);
+				}
+			}
+		}
+
+	batch->program_dirty = false;
+	}
+
+void Batch_use_program(Batch* batch)
+	{
+	// NOTE: use_program & done_using_program are fragile, depend on staying in sync with
+	//       the GL context's active program. use_program doesn't mark other programs as "not used".
+	// TODO: make not fragile (somehow)
+
+	if (!batch->program_in_use)
+		{
+		glUseProgram(batch->program);
+		batch->program_in_use = true;
+		}
+	}
+
+void Batch_done_using_program(Batch* batch)
+	{
+	if (batch->program_in_use)
+		{
+		glUseProgram(0);
+		batch->program_in_use = false;
+		}
+	}
+
+void Batch_Uniform1i(Batch* batch, const char* name, int value)
+	{
+	int loc = glGetUniformLocation(batch->program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform1i(loc, value);
+	}
+
+void Batch_Uniform1b(Batch* batch, const char* name, bool value)
+	{
+	int loc = glGetUniformLocation(batch->program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform1i(loc, value ? GL_TRUE : GL_FALSE);
+	}
+
+void Batch_Uniform2f(Batch* batch, const char* name, float x, float y)
+	{
+	int loc = glGetUniformLocation(batch->program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform2f(loc, x, y);
+	}
+
+void Batch_Uniform3f(Batch* batch, const char* name, float x, float y, float z)
+	{
+	int loc = glGetUniformLocation(batch->program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform3f(loc, x, y, z);
+	}
+
+void Batch_Uniform4f(Batch* batch, const char* name, float x, float y, float z, float w)
+	{
+	int loc = glGetUniformLocation(batch->program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform4f(loc, x, y, z, w);
+	}
+
+void Batch_Uniform1f(Batch* batch, const char* name, float x)
+	{
+	int loc = glGetUniformLocation(batch->program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform1f(loc, x);
+	}
+
+void Batch_Uniform3fv(Batch* batch, const char* name, const float data[3])
+	{
+	int loc = glGetUniformLocation(batch->program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform3fv(loc, 1, data);
+	}
+
+void Batch_Uniform4fv(Batch* batch, const char* name, const float data[4])
+	{
+	int loc = glGetUniformLocation(batch->program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform4fv(loc, 1, data);
+	}
+
+static void Batch_prime(Batch* batch)
+	{
+	batch->vao_id = vao_id_alloc();
+	glBindVertexArray(batch->vao_id);
+
+	for (int v = 0; v < BATCH_MAX_VBO_CT; ++v)
+		{
+		if (batch->verts[v] == NULL)
+			break;
+		VertexBuffer_use(batch->verts[v]);
+		}
+
+	if (batch->elem)
+		ElementList_use(batch->elem);
+
+	// vertex attribs and element list remain bound to this VAO
+	}
+
+void Batch_draw(Batch* batch)
+	{
+#if TRUST_NO_ONE
+	assert(batch->phase == READY_TO_DRAW);
+	assert(glIsProgram(batch->program));
+#endif
+
+	if (batch->vao_id)
+		glBindVertexArray(batch->vao_id);
+	else
+		Batch_prime(batch);
+
+	if (batch->program_dirty)
+		Batch_update_program_bindings(batch);
+
+	Batch_use_program(batch);
+
+	gpuBindMatrices(batch->program);
+
+	if (batch->elem)
+		{
+		const ElementList* el = batch->elem;
+
+#if TRACK_INDEX_RANGE
+		if (el->base_index)
+			glDrawRangeElementsBaseVertex(batch->prim_type, el->min_index, el->max_index, el->index_ct, el->index_type, 0, el->base_index);
+		else
+			glDrawRangeElements(batch->prim_type, el->min_index, el->max_index, el->index_ct, el->index_type, 0);
+#else
+		glDrawElements(batch->prim_type, el->index_ct, GL_UNSIGNED_INT, 0);
+#endif
+		}
+	else
+		glDrawArrays(batch->prim_type, 0, batch->verts[0]->vertex_ct);
+
+	Batch_done_using_program(batch);
+	glBindVertexArray(0);
+	}
+
+
+
+// clement : temp stuff
+void Batch_draw_stupid(Batch* batch)
+{
+	if (batch->vao_id)
+		glBindVertexArray(batch->vao_id);
+	else
+		Batch_prime(batch);
+
+	if (batch->program_dirty)
+		Batch_update_program_bindings(batch);
+
+	// Batch_use_program(batch);
+
+	//gpuBindMatrices(batch->program);
+
+	if (batch->elem)
+		{
+		const ElementList* el = batch->elem;
+
+#if TRACK_INDEX_RANGE
+		if (el->base_index)
+			glDrawRangeElementsBaseVertex(batch->prim_type, el->min_index, el->max_index, el->index_ct, el->index_type, 0, el->base_index);
+		else
+			glDrawRangeElements(batch->prim_type, el->min_index, el->max_index, el->index_ct, el->index_type, 0);
+#else
+		glDrawElements(batch->prim_type, el->index_ct, GL_UNSIGNED_INT, 0);
+#endif
+		}
+	else
+		glDrawArrays(batch->prim_type, 0, batch->verts[0]->vertex_ct);
+
+	// Batch_done_using_program(batch);
+	glBindVertexArray(0);
+}
+
+// clement : temp stuff
+void Batch_draw_stupid_instanced(Batch* batch, unsigned int instance_vbo, int instance_count,
+                                 int attrib_nbr, int attrib_stride, int attrib_size[16], int attrib_loc[16])
+{
+	if (batch->vao_id)
+		glBindVertexArray(batch->vao_id);
+	else
+		Batch_prime(batch);
+
+	if (batch->program_dirty)
+		Batch_update_program_bindings(batch);
+
+	glBindBuffer(GL_ARRAY_BUFFER, instance_vbo);
+	int ptr_ofs = 0;
+	for (int i = 0; i < attrib_nbr; ++i) {
+		int size = attrib_size[i];
+		int loc = attrib_loc[i];
+		int atr_ofs = 0;
+
+		while (size > 0) {
+			glEnableVertexAttribArray(loc + atr_ofs);
+			glVertexAttribPointer(loc + atr_ofs, (size > 4) ? 4 : size, GL_FLOAT, GL_FALSE,
+			                      sizeof(float) * attrib_stride, (GLvoid*)(sizeof(float) * ptr_ofs));
+			glVertexAttribDivisor(loc + atr_ofs, 1);
+			atr_ofs++;
+			ptr_ofs += (size > 4) ? 4 : size;
+			size -= 4;
+		}
+	}
+	glBindBuffer(GL_ARRAY_BUFFER, 0);
+
+	// Batch_use_program(batch);
+
+	//gpuBindMatrices(batch->program);
+
+	if (batch->elem)
+		{
+		const ElementList* el = batch->elem;
+
+		glDrawElementsInstanced(batch->prim_type, el->index_ct, GL_UNSIGNED_INT, 0, instance_count);
+		}
+	else
+		glDrawArraysInstanced(batch->prim_type, 0, batch->verts[0]->vertex_ct, instance_count);
+
+	// Batch_done_using_program(batch);
+	glBindVertexArray(0);
+}
+
diff --git a/intern/gawain/src/buffer_id.cpp b/intern/gawain/src/buffer_id.cpp
new file mode 100644
index 00000000000..450656c4ebf
--- /dev/null
+++ b/intern/gawain/src/buffer_id.cpp
@@ -0,0 +1,115 @@
+
+// Gawain buffer IDs
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2016 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.#include "buffer_id.h"
+
+#include "buffer_id.h"
+#include <mutex>
+#include <vector>
+
+#define ORPHAN_DEBUG 0
+
+#if ORPHAN_DEBUG
+	#include <cstdio>
+#endif
+
+static std::vector<GLuint> orphaned_buffer_ids;
+static std::vector<GLuint> orphaned_vao_ids;
+
+static std::mutex orphan_mutex;
+
+extern "C" {
+extern int BLI_thread_is_main(void); // Blender-specific function
+}
+
+static bool thread_is_main()
+	{
+	// "main" here means the GL context's thread
+	return BLI_thread_is_main();
+	}
+
+GLuint buffer_id_alloc()
+	{
+#if TRUST_NO_ONE
+	assert(thread_is_main());
+#endif
+
+	// delete orphaned IDs
+	orphan_mutex.lock();
+	if (!orphaned_buffer_ids.empty())
+		{
+		const auto orphaned_buffer_ct = (unsigned)orphaned_buffer_ids.size();
+#if ORPHAN_DEBUG
+		printf("deleting %u orphaned VBO%s\n", orphaned_buffer_ct, orphaned_buffer_ct == 1 ? "" : "s");
+#endif
+		glDeleteBuffers(orphaned_buffer_ct, orphaned_buffer_ids.data());
+		orphaned_buffer_ids.clear();
+		}
+	orphan_mutex.unlock();
+
+	GLuint new_buffer_id = 0;
+	glGenBuffers(1, &new_buffer_id);
+	return new_buffer_id;
+	}
+
+void buffer_id_free(GLuint buffer_id)
+	{
+	if (thread_is_main())
+		glDeleteBuffers(1, &buffer_id);
+	else
+		{
+		// add this ID to the orphaned list
+		orphan_mutex.lock();
+#if ORPHAN_DEBUG
+		printf("orphaning VBO %u\n", buffer_id);
+#endif
+		orphaned_buffer_ids.emplace_back(buffer_id);
+		orphan_mutex.unlock();
+		}
+	}
+
+GLuint vao_id_alloc()
+	{
+#if TRUST_NO_ONE
+	assert(thread_is_main());
+#endif
+
+	// delete orphaned IDs
+	orphan_mutex.lock();
+	if (!orphaned_vao_ids.empty())
+		{
+		const auto orphaned_vao_ct = (unsigned)orphaned_vao_ids.size();
+#if ORPHAN_DEBUG
+		printf("deleting %u orphaned VAO%s\n", orphaned_vao_ct, orphaned_vao_ct == 1 ? "" : "s");
+#endif
+		glDeleteVertexArrays(orphaned_vao_ct, orphaned_vao_ids.data());
+		orphaned_vao_ids.clear();
+		}
+	orphan_mutex.unlock();
+
+	GLuint new_vao_id = 0;
+	glGenVertexArrays(1, &new_vao_id);
+	return new_vao_id;
+	}
+
+void vao_id_free(GLuint vao_id)
+	{
+	if (thread_is_main())
+		glDeleteVertexArrays(1, &vao_id);
+	else
+		{
+		// add this ID to the orphaned list
+		orphan_mutex.lock();
+#if ORPHAN_DEBUG
+		printf("orphaning VAO %u\n", vao_id);
+#endif
+		orphaned_vao_ids.emplace_back(vao_id);
+		orphan_mutex.unlock();
+		}
+	}
diff --git a/intern/gawain/src/element.c b/intern/gawain/src/element.c
new file mode 100644
index 00000000000..3c3ca1c7626
--- /dev/null
+++ b/intern/gawain/src/element.c
@@ -0,0 +1,283 @@
+
+// Gawain element list (AKA index buffer)
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2016 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "element.h"
+#include "buffer_id.h"
+#include <stdlib.h>
+
+#define KEEP_SINGLE_COPY 1
+
+unsigned ElementList_size(const ElementList* elem)
+	{
+#if TRACK_INDEX_RANGE
+	switch (elem->index_type)
+		{
+		case INDEX_U8: return elem->index_ct * sizeof(GLubyte);
+		case INDEX_U16: return elem->index_ct * sizeof(GLushort);
+		case INDEX_U32: return elem->index_ct * sizeof(GLuint);
+		default:
+	#if TRUST_NO_ONE
+			assert(false);
+	#endif
+			return 0;
+		}
+
+#else
+	return elem->index_ct * sizeof(GLuint);
+#endif
+	}
+
+static void ElementList_prime(ElementList* elem)
+	{
+	elem->vbo_id = buffer_id_alloc();
+	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elem->vbo_id);
+	// fill with delicious data & send to GPU the first time only
+	glBufferData(GL_ELEMENT_ARRAY_BUFFER, ElementList_size(elem), elem->data, GL_STATIC_DRAW);
+
+#if KEEP_SINGLE_COPY
+	// now that GL has a copy, discard original
+	free(elem->data);
+	elem->data = NULL;
+#endif
+	}
+
+void ElementList_use(ElementList* elem)
+	{
+	if (elem->vbo_id)
+		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elem->vbo_id);
+	else
+		ElementList_prime(elem);
+	}
+
+void ElementListBuilder_init(ElementListBuilder* builder, PrimitiveType prim_type, unsigned prim_ct, unsigned vertex_ct)
+	{
+	unsigned verts_per_prim = 0;
+	switch (prim_type)
+		{
+		case PRIM_POINTS:
+			verts_per_prim = 1;
+			break;
+		case PRIM_LINES:
+			verts_per_prim = 2;
+			break;
+		case PRIM_TRIANGLES:
+			verts_per_prim = 3;
+			break;
+		default:
+#if TRUST_NO_ONE
+			assert(false);
+#endif
+			return;
+		}
+
+	builder->max_allowed_index = vertex_ct - 1;
+	builder->max_index_ct = prim_ct * verts_per_prim;
+	builder->index_ct = 0; // start empty
+	builder->prim_type = prim_type;
+	builder->data = calloc(builder->max_index_ct, sizeof(unsigned));
+	}
+
+void add_generic_vertex(ElementListBuilder* builder, unsigned v)
+	{
+#if TRUST_NO_ONE
+	assert(builder->data != NULL);
+	assert(builder->index_ct < builder->max_index_ct);
+	assert(v <= builder->max_allowed_index);
+#endif
+
+	builder->data[builder->index_ct++] = v;
+	}
+
+void add_point_vertex(ElementListBuilder* builder, unsigned v)
+	{
+#if TRUST_NO_ONE
+	assert(builder->prim_type == PRIM_POINTS);
+#endif
+
+	add_generic_vertex(builder, v);
+	}
+
+void add_line_vertices(ElementListBuilder* builder, unsigned v1, unsigned v2)
+	{
+#if TRUST_NO_ONE
+	assert(builder->prim_type == PRIM_LINES);
+	assert(v1 != v2);
+#endif
+
+	add_generic_vertex(builder, v1);
+	add_generic_vertex(builder, v2);
+	}
+
+void add_triangle_vertices(ElementListBuilder* builder, unsigned v1, unsigned v2, unsigned v3)
+	{
+#if TRUST_NO_ONE
+	assert(builder->prim_type == PRIM_TRIANGLES);
+	assert(v1 != v2 && v2 != v3 && v3 != v1);
+#endif
+
+	add_generic_vertex(builder, v1);
+	add_generic_vertex(builder, v2);
+	add_generic_vertex(builder, v3);
+	}
+
+#if TRACK_INDEX_RANGE
+// Everything remains 32 bit while building to keep things simple.
+// Find min/max after, then convert to smallest index type possible.
+
+static unsigned index_range(const unsigned values[], unsigned value_ct, unsigned* min_out, unsigned* max_out)
+	{
+	unsigned min_value = values[0];
+	unsigned max_value = values[0];
+	for (unsigned i = 1; i < value_ct; ++i)
+		{
+		const unsigned value = values[i];
+		if (value < min_value)
+			min_value = value;
+		else if (value > max_value)
+			max_value = value;
+		}
+	*min_out = min_value;
+	*max_out = max_value;
+	return max_value - min_value;
+	}
+
+static void squeeze_indices_byte(const unsigned values[], ElementList* elem)
+	{
+	const unsigned index_ct = elem->index_ct;
+	GLubyte* data = malloc(index_ct * sizeof(GLubyte));
+
+	if (elem->max_index > 0xFF)
+		{
+		const unsigned base = elem->min_index;
+
+		elem->base_index = base;
+		elem->min_index = 0;
+		elem->max_index -= base;
+
+		for (unsigned i = 0; i < index_ct; ++i)
+			data[i] = (GLubyte)(values[i] - base);
+		}
+	else
+		{
+		elem->base_index = 0;
+
+		for (unsigned i = 0; i < index_ct; ++i)
+			data[i] = (GLubyte)(values[i]);
+		}
+
+	elem->data = data;
+	}
+
+static void squeeze_indices_short(const unsigned values[], ElementList* elem)
+	{
+	const unsigned index_ct = elem->index_ct;
+	GLushort* data = malloc(index_ct * sizeof(GLushort));
+
+	if (elem->max_index > 0xFFFF)
+		{
+		const unsigned base = elem->min_index;
+
+		elem->base_index = base;
+		elem->min_index = 0;
+		elem->max_index -= base;
+
+		for (unsigned i = 0; i < index_ct; ++i)
+			data[i] = (GLushort)(values[i] - base);
+		}
+	else
+		{
+		elem->base_index = 0;
+
+		for (unsigned i = 0; i < index_ct; ++i)
+			data[i] = (GLushort)(values[i]);
+		}
+
+	elem->data = data;
+	}
+
+#endif // TRACK_INDEX_RANGE
+
+ElementList* ElementList_build(ElementListBuilder* builder)
+	{
+	ElementList* elem = calloc(1, sizeof(ElementList));
+	ElementList_build_in_place(builder, elem);
+	return elem;
+	}
+
+void ElementList_build_in_place(ElementListBuilder* builder, ElementList* elem)
+	{
+#if TRUST_NO_ONE
+	assert(builder->data != NULL);
+#endif
+
+	elem->index_ct = builder->index_ct;
+
+#if TRACK_INDEX_RANGE
+	const unsigned range = index_range(builder->data, builder->index_ct, &elem->min_index, &elem->max_index);
+
+	if (range <= 0xFF)
+		{
+		elem->index_type = INDEX_U8;
+		squeeze_indices_byte(builder->data, elem);
+		}
+	else if (range <= 0xFFFF)
+		{
+		elem->index_type = INDEX_U16;
+		squeeze_indices_short(builder->data, elem);
+		}
+	else
+		{
+		elem->index_type = INDEX_U32;
+		elem->base_index = 0;
+
+		if (builder->index_ct < builder->max_index_ct)
+			{
+			builder->data = realloc(builder->data, builder->index_ct * sizeof(unsigned));
+			// TODO: realloc only if index_ct is much smaller than max_index_ct
+			}
+
+		elem->data = builder->data;
+		}
+#else
+	if (builder->index_ct < builder->max_index_ct)
+		{
+		builder->data = realloc(builder->data, builder->index_ct * sizeof(unsigned));
+		// TODO: realloc only if index_ct is much smaller than max_index_ct
+		}
+
+	elem->data = builder->data;
+#endif
+
+	// elem->data will never be *larger* than builder->data... how about converting
+	// in place to avoid extra allocation?
+
+	elem->vbo_id = 0;
+	// TODO: create GL buffer object directly, based on an input flag
+
+	// discard builder (one-time use)
+	if (builder->data != elem->data)
+		free(builder->data);
+	builder->data = NULL;
+	// other fields are safe to leave
+	}
+
+void ElementList_discard(ElementList* elem)
+	{
+	if (elem->vbo_id)
+		buffer_id_free(elem->vbo_id);
+#if KEEP_SINGLE_COPY
+	else
+#endif
+	if (elem->data)
+		free(elem->data);
+
+	free(elem);
+	}
diff --git a/intern/gawain/src/imm_util.c b/intern/gawain/src/imm_util.c
new file mode 100644
index 00000000000..74caeb6fd3a
--- /dev/null
+++ b/intern/gawain/src/imm_util.c
@@ -0,0 +1,46 @@
+
+// Gawain immediate mode drawing utilities
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2016 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "imm_util.h"
+#include "immediate.h"
+
+
+void immRectf(unsigned pos, float x1, float y1, float x2, float y2)
+{
+	immBegin(PRIM_TRIANGLE_FAN, 4);
+	immVertex2f(pos, x1, y1);
+	immVertex2f(pos, x2, y1);
+	immVertex2f(pos, x2, y2);
+	immVertex2f(pos, x1, y2);
+	immEnd();
+}
+
+void immRecti(unsigned pos, int x1, int y1, int x2, int y2)
+{
+	immBegin(PRIM_TRIANGLE_FAN, 4);
+	immVertex2i(pos, x1, y1);
+	immVertex2i(pos, x2, y1);
+	immVertex2i(pos, x2, y2);
+	immVertex2i(pos, x1, y2);
+	immEnd();
+}
+
+#if 0 // more complete version in case we want that
+void immRecti_complete(int x1, int y1, int x2, int y2, const float color[4])
+{
+	VertexFormat *format = immVertexFormat();
+	unsigned pos = add_attrib(format, "pos", COMP_I32, 2, CONVERT_INT_TO_FLOAT);
+	immBindBuiltinProgram(GPU_SHADER_2D_UNIFORM_COLOR);
+	immUniformColor4fv(color);
+	immRecti(pos, x1, y1, x2, y2);
+	immUnbindProgram();
+}
+#endif
diff --git a/intern/gawain/src/immediate.c b/intern/gawain/src/immediate.c
new file mode 100644
index 00000000000..4e584dcb3cb
--- /dev/null
+++ b/intern/gawain/src/immediate.c
@@ -0,0 +1,884 @@
+
+// Gawain immediate mode work-alike
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2016 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "immediate.h"
+#include "attrib_binding.h"
+#include "buffer_id.h"
+#include <string.h>
+
+// necessary functions from matrix API
+extern void gpuBindMatrices(GLuint program);
+extern bool gpuMatricesDirty(void);
+
+typedef struct {
+	// TODO: organize this struct by frequency of change (run-time)
+
+#if IMM_BATCH_COMBO
+	Batch* batch;
+#endif
+
+	// current draw call
+	GLubyte* buffer_data;
+	unsigned buffer_offset;
+	unsigned buffer_bytes_mapped;
+	unsigned vertex_ct;
+	bool strict_vertex_ct;
+	PrimitiveType prim_type;
+
+	VertexFormat vertex_format;
+
+	// current vertex
+	unsigned vertex_idx;
+	GLubyte* vertex_data;
+	uint16_t unassigned_attrib_bits; // which attributes of current vertex have not been given values?
+
+	GLuint vbo_id;
+	GLuint vao_id;
+	
+	GLuint bound_program;
+	AttribBinding attrib_binding;
+	uint16_t prev_enabled_attrib_bits; // <-- only affects this VAO, so we're ok
+} Immediate;
+
+// size of internal buffer -- make this adjustable?
+#define IMM_BUFFER_SIZE (4 * 1024 * 1024)
+
+static bool initialized = false;
+static Immediate imm;
+
+void immInit(void)
+	{
+#if TRUST_NO_ONE
+	assert(!initialized);
+#endif
+
+	memset(&imm, 0, sizeof(Immediate));
+
+	imm.vbo_id = buffer_id_alloc();
+	glBindBuffer(GL_ARRAY_BUFFER, imm.vbo_id);
+	glBufferData(GL_ARRAY_BUFFER, IMM_BUFFER_SIZE, NULL, GL_DYNAMIC_DRAW);
+
+#if APPLE_LEGACY
+	glBufferParameteriAPPLE(GL_ARRAY_BUFFER, GL_BUFFER_SERIALIZED_MODIFY_APPLE, GL_FALSE);
+	glBufferParameteriAPPLE(GL_ARRAY_BUFFER, GL_BUFFER_FLUSHING_UNMAP_APPLE, GL_FALSE);
+#endif
+
+	imm.prim_type = PRIM_NONE;
+	imm.strict_vertex_ct = true;
+
+	glBindBuffer(GL_ARRAY_BUFFER, 0);
+	initialized = true;
+
+	immActivate();
+	}
+
+void immActivate(void)
+	{
+#if TRUST_NO_ONE
+	assert(initialized);
+	assert(imm.prim_type == PRIM_NONE); // make sure we're not between a Begin/End pair
+	assert(imm.vao_id == 0);
+#endif
+
+	imm.vao_id = vao_id_alloc();
+	}
+
+void immDeactivate(void)
+	{
+#if TRUST_NO_ONE
+	assert(initialized);
+	assert(imm.prim_type == PRIM_NONE); // make sure we're not between a Begin/End pair
+	assert(imm.vao_id != 0);
+#endif
+
+	vao_id_free(imm.vao_id);
+	imm.vao_id = 0;
+	imm.prev_enabled_attrib_bits = 0;
+	}
+
+void immDestroy(void)
+	{
+	immDeactivate();
+	buffer_id_free(imm.vbo_id);
+	initialized = false;
+	}
+
+VertexFormat* immVertexFormat(void)
+	{
+	VertexFormat_clear(&imm.vertex_format);
+	return &imm.vertex_format;
+	}
+
+void immBindProgram(GLuint program)
+	{
+#if TRUST_NO_ONE
+	assert(imm.bound_program == 0);
+	assert(glIsProgram(program));
+#endif
+
+	if (!imm.vertex_format.packed)
+		VertexFormat_pack(&imm.vertex_format);
+
+	glUseProgram(program);
+	get_attrib_locations(&imm.vertex_format, &imm.attrib_binding, program);
+	imm.bound_program = program;
+
+	gpuBindMatrices(program);
+	}
+
+void immUnbindProgram(void)
+	{
+#if TRUST_NO_ONE
+	assert(imm.bound_program != 0);
+#endif
+
+	glUseProgram(0);
+	imm.bound_program = 0;
+	}
+
+#if TRUST_NO_ONE
+static bool vertex_count_makes_sense_for_primitive(unsigned vertex_ct, PrimitiveType prim_type)
+	{
+	// does vertex_ct make sense for this primitive type?
+	if (vertex_ct == 0)
+		return false;
+
+	switch (prim_type)
+		{
+		case PRIM_POINTS:
+			return true;
+		case PRIM_LINES:
+			return vertex_ct % 2 == 0;
+		case PRIM_LINE_STRIP:
+		case PRIM_LINE_LOOP:
+			return vertex_ct >= 2;
+		case PRIM_LINE_STRIP_ADJACENCY:
+			return vertex_ct >= 4;
+		case PRIM_TRIANGLES:
+			return vertex_ct % 3 == 0;
+		case PRIM_TRIANGLE_STRIP:
+		case PRIM_TRIANGLE_FAN:
+			return vertex_ct >= 3;
+  #ifdef WITH_GL_PROFILE_COMPAT
+		case PRIM_QUADS:
+			return vertex_ct % 4 == 0;
+  #endif
+		default:
+			return false;
+		}
+	}
+#endif
+
+void immBegin(PrimitiveType prim_type, unsigned vertex_ct)
+	{
+#if TRUST_NO_ONE
+	assert(initialized);
+	assert(imm.prim_type == PRIM_NONE); // make sure we haven't already begun
+	assert(vertex_count_makes_sense_for_primitive(vertex_ct, prim_type));
+#endif
+
+	imm.prim_type = prim_type;
+	imm.vertex_ct = vertex_ct;
+	imm.vertex_idx = 0;
+	imm.unassigned_attrib_bits = imm.attrib_binding.enabled_bits;
+
+	// how many bytes do we need for this draw call?
+	const unsigned bytes_needed = vertex_buffer_size(&imm.vertex_format, vertex_ct);
+
+#if TRUST_NO_ONE
+	assert(bytes_needed <= IMM_BUFFER_SIZE);
+#endif
+
+	glBindBuffer(GL_ARRAY_BUFFER, imm.vbo_id);
+
+	// does the current buffer have enough room?
+	const unsigned available_bytes = IMM_BUFFER_SIZE - imm.buffer_offset;
+	// ensure vertex data is aligned
+	const unsigned pre_padding = padding(imm.buffer_offset, imm.vertex_format.stride); // might waste a little space, but it's safe
+	if ((bytes_needed + pre_padding) <= available_bytes)
+		imm.buffer_offset += pre_padding;
+	else
+		{
+		// orphan this buffer & start with a fresh one
+#if APPLE_LEGACY
+		glBufferData(GL_ARRAY_BUFFER, IMM_BUFFER_SIZE, NULL, GL_DYNAMIC_DRAW);
+#else
+		if (GLEW_VERSION_4_3 || GLEW_ARB_invalidate_subdata)
+			glInvalidateBufferData(imm.vbo_id);
+		else
+			glMapBufferRange(GL_ARRAY_BUFFER, 0, IMM_BUFFER_SIZE, GL_MAP_INVALIDATE_BUFFER_BIT);
+#endif
+
+		imm.buffer_offset = 0;
+		}
+
+//	printf("mapping %u to %u\n", imm.buffer_offset, imm.buffer_offset + bytes_needed - 1);
+
+#if APPLE_LEGACY
+	imm.buffer_data = glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY) + imm.buffer_offset;
+#else
+	imm.buffer_data = glMapBufferRange(GL_ARRAY_BUFFER, imm.buffer_offset, bytes_needed,
+	                                   GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | (imm.strict_vertex_ct ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT));
+#endif
+
+#if TRUST_NO_ONE
+	assert(imm.buffer_data != NULL);
+#endif
+
+	imm.buffer_bytes_mapped = bytes_needed;
+	imm.vertex_data = imm.buffer_data;
+	}
+
+void immBeginAtMost(PrimitiveType prim_type, unsigned vertex_ct)
+	{
+#if TRUST_NO_ONE
+	assert(vertex_ct > 0);
+#endif
+
+	imm.strict_vertex_ct = false;
+	immBegin(prim_type, vertex_ct);
+	}
+
+#if IMM_BATCH_COMBO
+
+Batch* immBeginBatch(PrimitiveType prim_type, unsigned vertex_ct)
+	{
+#if TRUST_NO_ONE
+	assert(initialized);
+	assert(imm.prim_type == PRIM_NONE); // make sure we haven't already begun
+	assert(vertex_count_makes_sense_for_primitive(vertex_ct, prim_type));
+#endif
+
+	imm.prim_type = prim_type;
+	imm.vertex_ct = vertex_ct;
+	imm.vertex_idx = 0;
+	imm.unassigned_attrib_bits = imm.attrib_binding.enabled_bits;
+
+	VertexBuffer* verts = VertexBuffer_create_with_format(&imm.vertex_format);
+	VertexBuffer_allocate_data(verts, vertex_ct);
+
+	imm.buffer_bytes_mapped = VertexBuffer_size(verts);
+	imm.vertex_data = verts->data;
+
+	imm.batch = Batch_create(prim_type, verts, NULL);
+	imm.batch->phase = BUILDING;
+
+	Batch_set_program(imm.batch, imm.bound_program);
+
+	return imm.batch;
+	}
+
+Batch* immBeginBatchAtMost(PrimitiveType prim_type, unsigned vertex_ct)
+	{
+	imm.strict_vertex_ct = false;
+	return immBeginBatch(prim_type, vertex_ct);
+	}
+
+#endif // IMM_BATCH_COMBO
+
+static void immDrawSetup(void)
+	{
+	// set up VAO -- can be done during Begin or End really
+	glBindVertexArray(imm.vao_id);
+
+	// enable/disable vertex attribs as needed
+	if (imm.attrib_binding.enabled_bits != imm.prev_enabled_attrib_bits)
+		{
+		for (unsigned loc = 0; loc < MAX_VERTEX_ATTRIBS; ++loc)
+			{
+			bool is_enabled = imm.attrib_binding.enabled_bits & (1 << loc);
+			bool was_enabled = imm.prev_enabled_attrib_bits & (1 << loc);
+
+			if (is_enabled && !was_enabled)
+				{
+//				printf("enabling attrib %u\n", loc);
+				glEnableVertexAttribArray(loc);
+				}
+			else if (was_enabled && !is_enabled)
+				{
+//				printf("disabling attrib %u\n", loc);
+				glDisableVertexAttribArray(loc);
+				}
+			}
+
+		imm.prev_enabled_attrib_bits = imm.attrib_binding.enabled_bits;
+		}
+
+	const unsigned stride = imm.vertex_format.stride;
+
+	for (unsigned a_idx = 0; a_idx < imm.vertex_format.attrib_ct; ++a_idx)
+		{
+		const Attrib* a = imm.vertex_format.attribs + a_idx;
+
+		const unsigned offset = imm.buffer_offset + a->offset;
+		const GLvoid* pointer = (const GLubyte*)0 + offset;
+
+		const unsigned loc = read_attrib_location(&imm.attrib_binding, a_idx);
+
+//		printf("specifying attrib %u '%s' with offset %u, stride %u\n", loc, a->name, offset, stride);
+
+		switch (a->fetch_mode)
+			{
+			case KEEP_FLOAT:
+			case CONVERT_INT_TO_FLOAT:
+				glVertexAttribPointer(loc, a->comp_ct, a->comp_type, GL_FALSE, stride, pointer);
+				break;
+			case NORMALIZE_INT_TO_FLOAT:
+				glVertexAttribPointer(loc, a->comp_ct, a->comp_type, GL_TRUE, stride, pointer);
+				break;
+			case KEEP_INT:
+				glVertexAttribIPointer(loc, a->comp_ct, a->comp_type, stride, pointer);
+			}
+		}
+
+	if (gpuMatricesDirty())
+		gpuBindMatrices(imm.bound_program);
+	}
+
+void immEnd(void)
+	{
+#if TRUST_NO_ONE
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	unsigned buffer_bytes_used;
+	if (imm.strict_vertex_ct)
+		{
+#if TRUST_NO_ONE
+		assert(imm.vertex_idx == imm.vertex_ct); // with all vertices defined
+#endif
+		buffer_bytes_used = imm.buffer_bytes_mapped;
+		}
+	else
+		{
+#if TRUST_NO_ONE
+		assert(imm.vertex_idx <= imm.vertex_ct);
+#endif
+		// printf("used %u of %u verts,", imm.vertex_idx, imm.vertex_ct);
+		if (imm.vertex_idx == imm.vertex_ct)
+			{
+			buffer_bytes_used = imm.buffer_bytes_mapped;
+			}
+		else
+			{
+#if TRUST_NO_ONE
+			assert(imm.vertex_idx == 0 || vertex_count_makes_sense_for_primitive(imm.vertex_idx, imm.prim_type));
+#endif
+			imm.vertex_ct = imm.vertex_idx;
+			buffer_bytes_used = vertex_buffer_size(&imm.vertex_format, imm.vertex_ct);
+			// unused buffer bytes are available to the next immBegin
+			// printf(" %u of %u bytes\n", buffer_bytes_used, imm.buffer_bytes_mapped);
+			}
+#if !APPLE_LEGACY
+		// tell OpenGL what range was modified so it doesn't copy the whole mapped range
+		// printf("flushing %u to %u\n", imm.buffer_offset, imm.buffer_offset + buffer_bytes_used - 1);
+		glFlushMappedBufferRange(GL_ARRAY_BUFFER, 0, buffer_bytes_used);
+#endif
+		}
+
+#if IMM_BATCH_COMBO
+	if (imm.batch)
+		{
+		if (buffer_bytes_used != imm.buffer_bytes_mapped)
+			{
+			VertexBuffer_resize_data(imm.batch->verts[0], imm.vertex_ct);
+			// TODO: resize only if vertex count is much smaller
+			}
+
+		imm.batch->phase = READY_TO_DRAW;
+		imm.batch = NULL; // don't free, batch belongs to caller
+		}
+	else
+#endif
+		{
+#if APPLE_LEGACY
+		// tell OpenGL what range was modified so it doesn't copy the whole buffer
+		// printf("flushing %u to %u\n", imm.buffer_offset, imm.buffer_offset + buffer_bytes_used - 1);
+		glFlushMappedBufferRangeAPPLE(GL_ARRAY_BUFFER, imm.buffer_offset, buffer_bytes_used);
+#endif
+		glUnmapBuffer(GL_ARRAY_BUFFER);
+
+		if (imm.vertex_ct > 0)
+			{
+			immDrawSetup();
+			glDrawArrays(imm.prim_type, 0, imm.vertex_ct);
+			}
+
+		glBindBuffer(GL_ARRAY_BUFFER, 0);
+		glBindVertexArray(0);
+
+		// prep for next immBegin
+		imm.buffer_offset += buffer_bytes_used;
+		}
+
+	// prep for next immBegin
+	imm.prim_type = PRIM_NONE;
+	imm.strict_vertex_ct = true;
+	}
+
+static void setAttribValueBit(unsigned attrib_id)
+	{
+	uint16_t mask = 1 << attrib_id;
+
+#if TRUST_NO_ONE
+	assert(imm.unassigned_attrib_bits & mask); // not already set
+#endif
+
+	imm.unassigned_attrib_bits &= ~mask;
+	}
+
+
+// --- generic attribute functions ---
+
+void immAttrib1f(unsigned attrib_id, float x)
+	{
+	Attrib* attrib = imm.vertex_format.attribs + attrib_id;
+
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attrib_ct);
+	assert(attrib->comp_type == COMP_F32);
+	assert(attrib->comp_ct == 1);
+	assert(imm.vertex_idx < imm.vertex_ct);
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	setAttribValueBit(attrib_id);
+
+	float* data = (float*)(imm.vertex_data + attrib->offset);
+//	printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data);
+
+	data[0] = x;
+	}
+
+void immAttrib2f(unsigned attrib_id, float x, float y)
+	{
+	Attrib* attrib = imm.vertex_format.attribs + attrib_id;
+
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attrib_ct);
+	assert(attrib->comp_type == COMP_F32);
+	assert(attrib->comp_ct == 2);
+	assert(imm.vertex_idx < imm.vertex_ct);
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	setAttribValueBit(attrib_id);
+
+	float* data = (float*)(imm.vertex_data + attrib->offset);
+//	printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data);
+
+	data[0] = x;
+	data[1] = y;
+	}
+
+void immAttrib3f(unsigned attrib_id, float x, float y, float z)
+	{
+	Attrib* attrib = imm.vertex_format.attribs + attrib_id;
+
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attrib_ct);
+	assert(attrib->comp_type == COMP_F32);
+	assert(attrib->comp_ct == 3);
+	assert(imm.vertex_idx < imm.vertex_ct);
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	setAttribValueBit(attrib_id);
+
+	float* data = (float*)(imm.vertex_data + attrib->offset);
+//	printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data);
+
+	data[0] = x;
+	data[1] = y;
+	data[2] = z;
+	}
+
+void immAttrib4f(unsigned attrib_id, float x, float y, float z, float w)
+	{
+	Attrib* attrib = imm.vertex_format.attribs + attrib_id;
+
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attrib_ct);
+	assert(attrib->comp_type == COMP_F32);
+	assert(attrib->comp_ct == 4);
+	assert(imm.vertex_idx < imm.vertex_ct);
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	setAttribValueBit(attrib_id);
+
+	float* data = (float*)(imm.vertex_data + attrib->offset);
+//	printf("%s %td %p\n", __FUNCTION__, (GLubyte*)data - imm.buffer_data, data);
+
+	data[0] = x;
+	data[1] = y;
+	data[2] = z;
+	data[3] = w;
+	}
+
+void immAttrib2i(unsigned attrib_id, int x, int y)
+	{
+	Attrib* attrib = imm.vertex_format.attribs + attrib_id;
+
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attrib_ct);
+	assert(attrib->comp_type == COMP_I32);
+	assert(attrib->comp_ct == 2);
+	assert(imm.vertex_idx < imm.vertex_ct);
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	setAttribValueBit(attrib_id);
+
+	int* data = (int*)(imm.vertex_data + attrib->offset);
+
+	data[0] = x;
+	data[1] = y;
+	}
+
+void immAttrib2s(unsigned attrib_id, short x, short y)
+	{
+	Attrib* attrib = imm.vertex_format.attribs + attrib_id;
+
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attrib_ct);
+	assert(attrib->comp_type == COMP_I16);
+	assert(attrib->comp_ct == 2);
+	assert(imm.vertex_idx < imm.vertex_ct);
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	setAttribValueBit(attrib_id);
+
+	short* data = (short*)(imm.vertex_data + attrib->offset);
+
+	data[0] = x;
+	data[1] = y;
+	}
+
+void immAttrib3fv(unsigned attrib_id, const float data[3])
+	{
+	immAttrib3f(attrib_id, data[0], data[1], data[2]);
+	}
+
+void immAttrib4fv(unsigned attrib_id, const float data[4])
+	{
+	immAttrib4f(attrib_id, data[0], data[1], data[2], data[3]);
+	}
+
+void immAttrib3ub(unsigned attrib_id, unsigned char r, unsigned char g, unsigned char b)
+	{
+	Attrib* attrib = imm.vertex_format.attribs + attrib_id;
+
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attrib_ct);
+	assert(attrib->comp_type == COMP_U8);
+	assert(attrib->comp_ct == 3);
+	assert(imm.vertex_idx < imm.vertex_ct);
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	setAttribValueBit(attrib_id);
+
+	GLubyte* data = imm.vertex_data + attrib->offset;
+//	printf("%s %td %p\n", __FUNCTION__, data - imm.buffer_data, data);
+
+	data[0] = r;
+	data[1] = g;
+	data[2] = b;
+	}
+
+void immAttrib4ub(unsigned attrib_id, unsigned char r, unsigned char g, unsigned char b, unsigned char a)
+	{
+	Attrib* attrib = imm.vertex_format.attribs + attrib_id;
+
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attrib_ct);
+	assert(attrib->comp_type == COMP_U8);
+	assert(attrib->comp_ct == 4);
+	assert(imm.vertex_idx < imm.vertex_ct);
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	setAttribValueBit(attrib_id);
+
+	GLubyte* data = imm.vertex_data + attrib->offset;
+//	printf("%s %td %p\n", __FUNCTION__, data - imm.buffer_data, data);
+
+	data[0] = r;
+	data[1] = g;
+	data[2] = b;
+	data[3] = a;
+	}
+
+void immAttrib3ubv(unsigned attrib_id, const unsigned char data[3])
+	{
+	immAttrib3ub(attrib_id, data[0], data[1], data[2]);
+	}
+
+void immAttrib4ubv(unsigned attrib_id, const unsigned char data[4])
+	{
+	immAttrib4ub(attrib_id, data[0], data[1], data[2], data[3]);
+	}
+
+void immSkipAttrib(unsigned attrib_id)
+	{
+#if TRUST_NO_ONE
+	assert(attrib_id < imm.vertex_format.attrib_ct);
+	assert(imm.vertex_idx < imm.vertex_ct);
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+#endif
+
+	setAttribValueBit(attrib_id);
+	}
+
+static void immEndVertex(void) // and move on to the next vertex
+	{
+#if TRUST_NO_ONE
+	assert(imm.prim_type != PRIM_NONE); // make sure we're between a Begin/End pair
+	assert(imm.vertex_idx < imm.vertex_ct);
+#endif
+
+	// have all attribs been assigned values?
+	// if not, copy value from previous vertex
+	if (imm.unassigned_attrib_bits)
+		{
+#if TRUST_NO_ONE
+		assert(imm.vertex_idx > 0); // first vertex must have all attribs specified
+#endif
+
+		for (unsigned a_idx = 0; a_idx < imm.vertex_format.attrib_ct; ++a_idx)
+			{
+			if ((imm.unassigned_attrib_bits >> a_idx) & 1)
+				{
+				const Attrib* a = imm.vertex_format.attribs + a_idx;
+
+//				printf("copying %s from vertex %u to %u\n", a->name, imm.vertex_idx - 1, imm.vertex_idx);
+
+				GLubyte* data = imm.vertex_data + a->offset;
+				memcpy(data, data - imm.vertex_format.stride, a->sz);
+				// TODO: consolidate copy of adjacent attributes
+				}
+			}
+		}
+
+	imm.vertex_idx++;
+	imm.vertex_data += imm.vertex_format.stride;
+	imm.unassigned_attrib_bits = imm.attrib_binding.enabled_bits;
+	}
+
+void immVertex2f(unsigned attrib_id, float x, float y)
+	{
+	immAttrib2f(attrib_id, x, y);
+	immEndVertex();
+	}
+
+void immVertex3f(unsigned attrib_id, float x, float y, float z)
+	{
+	immAttrib3f(attrib_id, x, y, z);
+	immEndVertex();
+	}
+
+void immVertex2i(unsigned attrib_id, int x, int y)
+	{
+	immAttrib2i(attrib_id, x, y);
+	immEndVertex();
+	}
+
+void immVertex2s(unsigned attrib_id, short x, short y)
+	{
+	immAttrib2s(attrib_id, x, y);
+	immEndVertex();
+	}
+
+void immVertex2fv(unsigned attrib_id, const float data[2])
+	{
+	immAttrib2f(attrib_id, data[0], data[1]);
+	immEndVertex();
+	}
+
+void immVertex3fv(unsigned attrib_id, const float data[3])
+	{
+	immAttrib3f(attrib_id, data[0], data[1], data[2]);
+	immEndVertex();
+	}
+
+void immVertex2iv(unsigned attrib_id, const int data[2])
+	{
+	immAttrib2i(attrib_id, data[0], data[1]);
+	immEndVertex();
+	}
+
+
+// --- generic uniform functions ---
+
+void immUniform1f(const char* name, float x)
+	{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform1f(loc, x);
+	}
+
+void immUniform2f(const char* name, float x, float y)
+{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform2f(loc, x, y);
+}
+
+void immUniform2fv(const char* name, const float data[2])
+{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform2fv(loc, 1, data);
+}
+
+void immUniform3f(const char* name, float x, float y, float z)
+	{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform3f(loc, x, y, z);
+	}
+
+void immUniform3fv(const char* name, const float data[3])
+	{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform3fv(loc, 1, data);
+	}
+
+void immUniformArray3fv(const char* name, const float *data, int count)
+	{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+	assert(count > 0);
+#endif
+
+	glUniform3fv(loc, count, data);
+	}
+
+void immUniform4f(const char* name, float x, float y, float z, float w)
+	{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform4f(loc, x, y, z, w);
+	}
+
+void immUniform4fv(const char* name, const float data[4])
+	{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform4fv(loc, 1, data);
+	}
+
+void immUniformMatrix4fv(const char* name, const float data[4][4])
+	{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniformMatrix4fv(loc, 1, GL_FALSE, (float *)data);
+	}
+
+void immUniform1i(const char* name, int x)
+	{
+	int loc = glGetUniformLocation(imm.bound_program, name);
+
+#if TRUST_NO_ONE
+	assert(loc != -1);
+#endif
+
+	glUniform1i(loc, x);
+	}
+
+
+// --- convenience functions for setting "uniform vec4 color" ---
+
+void immUniformColor4f(float r, float g, float b, float a)
+	{
+	immUniform4f("color", r, g, b, a);
+	}
+
+void immUniformColor4fv(const float rgba[4])
+	{
+	immUniform4fv("color", rgba);
+	}
+
+void immUniformColor3f(float r, float g, float b)
+	{
+	immUniform4f("color", r, g, b, 1.0f);
+	}
+
+void immUniformColor3fv(const float rgb[3])
+	{
+	immUniform4f("color", rgb[0], rgb[1], rgb[2], 1.0f);
+	}
+
+void immUniformColor3fvAlpha(const float rgb[3], float a)
+	{
+	immUniform4f("color", rgb[0], rgb[1], rgb[2], a);
+	}
+
+// TODO: v-- treat as sRGB? --v
+
+void immUniformColor3ub(unsigned char r, unsigned char g, unsigned char b)
+	{
+	const float scale = 1.0f / 255.0f;
+	immUniform4f("color", scale * r, scale * g, scale * b, 1.0f);
+	}
+
+void immUniformColor4ub(unsigned char r, unsigned char g, unsigned char b, unsigned char a)
+	{
+	const float scale = 1.0f / 255.0f;
+	immUniform4f("color", scale * r, scale * g, scale * b, scale * a);
+	}
+
+void immUniformColor3ubv(const unsigned char rgb[3])
+	{
+	immUniformColor3ub(rgb[0], rgb[1], rgb[2]);
+	}
+
+void immUniformColor4ubv(const unsigned char rgba[4])
+	{
+	immUniformColor4ub(rgba[0], rgba[1], rgba[2], rgba[3]);
+	}
diff --git a/intern/gawain/src/primitive.c b/intern/gawain/src/primitive.c
new file mode 100644
index 00000000000..95472c289e8
--- /dev/null
+++ b/intern/gawain/src/primitive.c
@@ -0,0 +1,41 @@
+
+// Gawain geometric primitives
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2017 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "primitive.h"
+
+PrimitiveClass prim_class_of_type(PrimitiveType prim_type)
+	{
+	static const PrimitiveClass classes[] =
+		{
+		[PRIM_NONE] = PRIM_CLASS_NONE,
+		[PRIM_POINTS] = PRIM_CLASS_POINT,
+		[PRIM_LINES] = PRIM_CLASS_LINE,
+		[PRIM_LINE_STRIP] = PRIM_CLASS_LINE,
+		[PRIM_LINE_LOOP] = PRIM_CLASS_LINE,
+		[PRIM_TRIANGLES] = PRIM_CLASS_SURFACE,
+		[PRIM_TRIANGLE_STRIP] = PRIM_CLASS_SURFACE,
+		[PRIM_TRIANGLE_FAN] = PRIM_CLASS_SURFACE,
+
+#ifdef WITH_GL_PROFILE_COMPAT
+		[PRIM_QUADS] = PRIM_CLASS_SURFACE,
+#endif
+		};
+
+	return classes[prim_type];
+	}
+
+bool prim_type_belongs_to_class(PrimitiveType prim_type, PrimitiveClass prim_class)
+	{
+	if (prim_class == PRIM_CLASS_NONE && prim_type == PRIM_NONE)
+		return true;
+
+	return prim_class & prim_class_of_type(prim_type);
+	}
diff --git a/intern/gawain/src/shader_interface.c b/intern/gawain/src/shader_interface.c
new file mode 100644
index 00000000000..93a1283d895
--- /dev/null
+++ b/intern/gawain/src/shader_interface.c
@@ -0,0 +1,140 @@
+
+// Gawain shader interface (C --> GLSL)
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2017 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "shader_interface.h"
+#include <stdlib.h>
+#include <stddef.h>
+
+#define DEBUG_SHADER_INTERFACE 0
+
+#if DEBUG_SHADER_INTERFACE
+ #include <stdio.h>
+#endif
+
+#if 0
+
+static const char* BuiltinUniform_name(BuiltinUniform u)
+	{
+	static const char* names[] =
+		{
+		[UNIFORM_NONE] = NULL,
+
+		[UNIFORM_MODELVIEW_3D] = "ModelViewMatrix",
+		[UNIFORM_PROJECTION_3D] = "ProjectionMatrix",
+		[UNIFORM_MVP_3D] = "ModelViewProjectionMatrix",
+		[UNIFORM_NORMAL_3D] = "NormalMatrix",
+		[UNIFORM_INV_NORMAL_3D] = "InverseNormalMatrix",
+
+		[UNIFORM_MODELVIEW_2D] = "ModelViewMatrix",
+		[UNIFORM_PROJECTION_2D] = "ProjectionMatrix",
+		[UNIFORM_MVP_2D] = "ModelViewProjectionMatrix",
+
+		[UNIFORM_COLOR] = "color",
+
+		[UNIFORM_CUSTOM] = NULL
+		};
+
+	return names[u];
+	}
+
+#endif
+
+static bool setup_builtin_uniform(ShaderInput* input, const char* name)
+	{
+	// TODO: reject DOUBLE, IMAGE, ATOMIC_COUNTER gl_types
+
+	// TODO: detect built-in uniforms (gl_type and name must match)
+	//       if a match is found, use BuiltinUniform_name so name buffer space can be reclaimed
+	input->name = name;
+	input->builtin_type = UNIFORM_CUSTOM;
+	return false;
+	}
+
+ShaderInterface* ShaderInterface_create(GLint program)
+	{
+#if DEBUG_SHADER_INTERFACE
+	printf("%s {\n", __func__); // enter function
+#endif
+
+	GLint uniform_ct, attrib_ct;
+	glGetProgramiv(program, GL_ACTIVE_UNIFORMS, &uniform_ct);
+	glGetProgramiv(program, GL_ACTIVE_ATTRIBUTES, &attrib_ct);
+	const GLint input_ct = uniform_ct + attrib_ct;
+
+	GLint max_uniform_name_len, max_attrib_name_len;
+	glGetProgramiv(program, GL_ACTIVE_UNIFORM_MAX_LENGTH, &max_uniform_name_len);
+	glGetProgramiv(program, GL_ACTIVE_ATTRIBUTE_MAX_LENGTH, &max_attrib_name_len);
+	const uint32_t name_buffer_len = uniform_ct * max_uniform_name_len + attrib_ct * max_attrib_name_len;
+
+	// allocate enough space for input counts, details for each input, and a buffer for name strings
+	ShaderInterface* shaderface = calloc(1, offsetof(ShaderInterface, inputs) + input_ct * sizeof(ShaderInput) + name_buffer_len);
+
+	char* name_buffer = (char*)shaderface + offsetof(ShaderInterface, inputs) + input_ct * sizeof(ShaderInput);
+	uint32_t name_buffer_offset = 0;
+
+	for (uint32_t i = 0; i < uniform_ct; ++i)
+		{
+		ShaderInput* input = shaderface->inputs + i;
+		GLsizei remaining_buffer = name_buffer_len - name_buffer_offset;
+		char* name = name_buffer + name_buffer_offset;
+		GLsizei name_len = 0;
+
+		glGetActiveUniform(program, i, remaining_buffer, &name_len, &input->size, &input->gl_type, name);
+
+		if (setup_builtin_uniform(input, name))
+			; // reclaim space from name buffer (don't advance offset)
+		else
+			name_buffer_offset += name_len + 1; // include NULL terminator
+
+		input->location = glGetUniformLocation(program, name);
+
+#if DEBUG_SHADER_INTERFACE
+		printf("uniform[%u] '%s' at location %d\n", i, name, input->location);
+#endif
+		}
+
+	for (uint32_t i = 0; i < attrib_ct; ++i)
+		{
+		ShaderInput* input = shaderface->inputs + uniform_ct + i;
+		GLsizei remaining_buffer = name_buffer_len - name_buffer_offset;
+		char* name = name_buffer + name_buffer_offset;
+		GLsizei name_len = 0;
+
+		glGetActiveAttrib(program, i, remaining_buffer, &name_len, &input->size, &input->gl_type, name);
+
+		// TODO: reject DOUBLE gl_types
+
+		input->name = name;
+		name_buffer_offset += name_len + 1; // include NULL terminator
+
+		input->location = glGetAttribLocation(program, name);
+
+#if DEBUG_SHADER_INTERFACE
+		printf("attrib[%u] '%s' at location %d\n", i, name, input->location);
+#endif
+		}
+
+	// TODO: realloc shaderface to shrink name buffer
+	//       each input->name will need adjustment (except static built-in names)
+
+#if DEBUG_SHADER_INTERFACE
+	printf("using %u of %u bytes from name buffer\n", name_buffer_offset, name_buffer_len);
+	printf("}\n"); // exit function
+#endif
+
+	return shaderface;
+	}
+
+void ShaderInterface_discard(ShaderInterface* shaderface)
+	{
+	// allocated as one chunk, so discard is simple
+	free(shaderface);
+	}
diff --git a/intern/gawain/src/vertex_buffer.c b/intern/gawain/src/vertex_buffer.c
new file mode 100644
index 00000000000..827703403e3
--- /dev/null
+++ b/intern/gawain/src/vertex_buffer.c
@@ -0,0 +1,170 @@
+
+// Gawain geometry batch
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2016 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "vertex_buffer.h"
+#include "buffer_id.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define KEEP_SINGLE_COPY 1
+
+VertexBuffer* VertexBuffer_create(void)
+	{
+	VertexBuffer* verts = malloc(sizeof(VertexBuffer));
+	VertexBuffer_init(verts);
+	return verts;
+	}
+
+VertexBuffer* VertexBuffer_create_with_format(const VertexFormat* format)
+	{
+	VertexBuffer* verts = VertexBuffer_create();
+	VertexFormat_copy(&verts->format, format);
+	if (!format->packed)
+		VertexFormat_pack(&verts->format);
+	return verts;
+
+	// this function might seem redundant, but there is potential for memory savings here...
+	// TODO: implement those memory savings
+	}
+
+void VertexBuffer_init(VertexBuffer* verts)
+	{
+	memset(verts, 0, sizeof(VertexBuffer));
+	}
+
+void VertexBuffer_init_with_format(VertexBuffer* verts, const VertexFormat* format)
+	{
+	VertexBuffer_init(verts);
+	VertexFormat_copy(&verts->format, format);
+	if (!format->packed)
+		VertexFormat_pack(&verts->format);
+	}
+
+void VertexBuffer_discard(VertexBuffer* verts)
+	{
+	if (verts->vbo_id)
+		buffer_id_free(verts->vbo_id);
+#if KEEP_SINGLE_COPY
+	else
+#endif
+	if (verts->data)
+		free(verts->data);
+
+	free(verts);
+	}
+
+unsigned VertexBuffer_size(const VertexBuffer* verts)
+	{
+	return vertex_buffer_size(&verts->format, verts->vertex_ct);
+	}
+
+void VertexBuffer_allocate_data(VertexBuffer* verts, unsigned v_ct)
+	{
+	VertexFormat* format = &verts->format;
+	if (!format->packed)
+		VertexFormat_pack(format);
+
+	verts->vertex_ct = v_ct;
+
+	// Data initially lives in main memory. Will be transferred to VRAM when we "prime" it.
+	verts->data = malloc(VertexBuffer_size(verts));
+	}
+
+void VertexBuffer_resize_data(VertexBuffer* verts, unsigned v_ct)
+	{
+#if TRUST_NO_ONE
+	assert(verts->vertex_ct != v_ct); // allow this?
+	assert(verts->data != NULL); // has already been allocated
+	assert(verts->vbo_id == 0); // has not been sent to VRAM
+#endif
+
+	verts->vertex_ct = v_ct;
+	verts->data = realloc(verts->data, VertexBuffer_size(verts));
+	// TODO: skip realloc if v_ct < existing vertex count
+	// extra space will be reclaimed, and never sent to VRAM (see VertexBuffer_prime)
+	}
+
+void setAttrib(VertexBuffer* verts, unsigned a_idx, unsigned v_idx, const void* data)
+	{
+	const VertexFormat* format = &verts->format;
+	const Attrib* a = format->attribs + a_idx;
+
+#if TRUST_NO_ONE
+	assert(a_idx < format->attrib_ct);
+	assert(v_idx < verts->vertex_ct);
+	assert(verts->data != NULL); // data must be in main mem
+#endif
+
+	memcpy((GLubyte*)verts->data + a->offset + v_idx * format->stride, data, a->sz);
+	}
+
+void fillAttrib(VertexBuffer* verts, unsigned a_idx, const void* data)
+	{
+	const VertexFormat* format = &verts->format;
+	const Attrib* a = format->attribs + a_idx;
+
+#if TRUST_NO_ONE
+	assert(a_idx < format->attrib_ct);
+#endif
+
+	const unsigned stride = a->sz; // tightly packed input data
+
+	fillAttribStride(verts, a_idx, stride, data);
+	}
+
+void fillAttribStride(VertexBuffer* verts, unsigned a_idx, unsigned stride, const void* data)
+	{
+	const VertexFormat* format = &verts->format;
+	const Attrib* a = format->attribs + a_idx;
+
+#if TRUST_NO_ONE
+	assert(a_idx < format->attrib_ct);
+	assert(verts->data != NULL); // data must be in main mem
+#endif
+
+	const unsigned vertex_ct = verts->vertex_ct;
+
+	if (format->attrib_ct == 1 && stride == format->stride)
+		{
+		// we can copy it all at once
+		memcpy(verts->data, data, vertex_ct * a->sz);
+		}
+	else
+		{
+		// we must copy it per vertex
+		for (unsigned v = 0; v < vertex_ct; ++v)
+			memcpy((GLubyte*)verts->data + a->offset + v * format->stride, (const GLubyte*)data + v * stride, a->sz);
+		}
+	}
+
+static void VertexBuffer_prime(VertexBuffer* verts)
+	{
+	const VertexFormat* format = &verts->format;
+
+	verts->vbo_id = buffer_id_alloc();
+	glBindBuffer(GL_ARRAY_BUFFER, verts->vbo_id);
+	// fill with delicious data & send to GPU the first time only
+	glBufferData(GL_ARRAY_BUFFER, vertex_buffer_size(format, verts->vertex_ct), verts->data, GL_STATIC_DRAW);
+
+#if KEEP_SINGLE_COPY
+	// now that GL has a copy, discard original
+	free(verts->data);
+	verts->data = NULL;
+#endif
+	}
+
+void VertexBuffer_use(VertexBuffer* verts)
+	{
+	if (verts->vbo_id)
+		glBindBuffer(GL_ARRAY_BUFFER, verts->vbo_id);
+	else
+		VertexBuffer_prime(verts);
+	}
diff --git a/intern/gawain/src/vertex_format.c b/intern/gawain/src/vertex_format.c
new file mode 100644
index 00000000000..d42398f3a4f
--- /dev/null
+++ b/intern/gawain/src/vertex_format.c
@@ -0,0 +1,248 @@
+
+// Gawain vertex format
+//
+// This code is part of the Gawain library, with modifications
+// specific to integration with Blender.
+//
+// Copyright 2016 Mike Erwin
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of
+// the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+#include "vertex_format.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define PACK_DEBUG 0
+
+#if PACK_DEBUG
+  #include <stdio.h>
+#endif
+
+void VertexFormat_clear(VertexFormat* format)
+	{
+#if TRUST_NO_ONE
+	memset(format, 0, sizeof(VertexFormat));
+#else
+	format->attrib_ct = 0;
+	format->packed = false;
+	format->name_offset = 0;
+#endif
+	}
+
+void VertexFormat_copy(VertexFormat* dest, const VertexFormat* src)
+	{
+	// copy regular struct fields
+	memcpy(dest, src, sizeof(VertexFormat));
+	}
+
+static unsigned comp_sz(VertexCompType type)
+	{
+#if TRUST_NO_ONE
+	assert(type >= GL_BYTE && type <= GL_FLOAT);
+#endif
+
+	const GLubyte sizes[] = {1,1,2,2,4,4,4};
+	return sizes[type - GL_BYTE];
+	}
+
+static unsigned attrib_sz(const Attrib *a)
+	{
+#if USE_10_10_10
+	if (a->comp_type == COMP_I10)
+		return 4; // always packed as 10_10_10_2
+#endif
+
+	return a->comp_ct * comp_sz(a->comp_type);
+	}
+
+static unsigned attrib_align(const Attrib *a)
+	{
+#if USE_10_10_10
+	if (a->comp_type == COMP_I10)
+		return 4; // always packed as 10_10_10_2
+#endif
+
+	unsigned c = comp_sz(a->comp_type);
+	if (a->comp_ct == 3 && c <= 2)
+		return 4 * c; // AMD HW can't fetch these well, so pad it out (other vendors too?)
+	else
+		return c; // most fetches are ok if components are naturally aligned
+	}
+
+unsigned vertex_buffer_size(const VertexFormat* format, unsigned vertex_ct)
+	{
+#if TRUST_NO_ONE
+	assert(format->packed && format->stride > 0);
+#endif
+
+	return format->stride * vertex_ct;
+	}
+
+static const char* copy_attrib_name(VertexFormat* format, const char* name)
+	{
+	// strncpy does 110% of what we need; let's do exactly 100%
+	char* name_copy = format->names + format->name_offset;
+	unsigned available = VERTEX_ATTRIB_NAMES_BUFFER_LEN - format->name_offset;
+	bool terminated = false;
+
+	for (unsigned i = 0; i < available; ++i)
+		{
+		const char c = name[i];
+		name_copy[i] = c;
+		if (c == '\0')
+			{
+			terminated = true;
+			format->name_offset += (i + 1);
+			break;
+			}
+		}
+
+#if TRUST_NO_ONE
+	assert(terminated);
+	assert(format->name_offset <= VERTEX_ATTRIB_NAMES_BUFFER_LEN);
+#endif
+
+	return name_copy;
+	}
+
+unsigned add_attrib(VertexFormat* format, const char* name, VertexCompType comp_type, unsigned comp_ct, VertexFetchMode fetch_mode)
+	{
+#if TRUST_NO_ONE
+	assert(format->attrib_ct < MAX_VERTEX_ATTRIBS); // there's room for more
+	assert(!format->packed); // packed means frozen/locked
+	assert(comp_ct >= 1 && comp_ct <= 4);
+	switch (comp_type)
+		{
+		case COMP_F32:
+			// float type can only kept as float
+			assert(fetch_mode == KEEP_FLOAT);
+			break;
+ #if USE_10_10_10
+		case COMP_I10:
+			// 10_10_10 format intended for normals (xyz) or colors (rgb)
+			// extra component packed.w can be manually set to { -2, -1, 0, 1 }
+			assert(comp_ct == 3 || comp_ct == 4);
+			assert(fetch_mode == NORMALIZE_INT_TO_FLOAT); // not strictly required, may relax later
+			break;
+ #endif
+		default:
+			// integer types can be kept as int or converted/normalized to float
+			assert(fetch_mode != KEEP_FLOAT);
+		}
+#endif
+
+	const unsigned attrib_id = format->attrib_ct++;
+	Attrib* attrib = format->attribs + attrib_id;
+
+	attrib->name = copy_attrib_name(format, name);
+	attrib->comp_type = comp_type;
+#if USE_10_10_10
+	attrib->comp_ct = (comp_type == COMP_I10) ? 4 : comp_ct; // system needs 10_10_10_2 to be 4 or BGRA
+#else
+	attrib->comp_ct = comp_ct;
+#endif
+	attrib->sz = attrib_sz(attrib);
+	attrib->offset = 0; // offsets & stride are calculated later (during pack)
+	attrib->fetch_mode = fetch_mode;
+
+	return attrib_id;
+	}
+
+unsigned padding(unsigned offset, unsigned alignment)
+	{
+	const unsigned mod = offset % alignment;
+	return (mod == 0) ? 0 : (alignment - mod);
+	}
+
+#if PACK_DEBUG
+static void show_pack(unsigned a_idx, unsigned sz, unsigned pad)
+	{
+	const char c = 'A' + a_idx;
+	for (unsigned i = 0; i < pad; ++i)
+		putchar('-');
+	for (unsigned i = 0; i < sz; ++i)
+		putchar(c);
+	}
+#endif
+
+void VertexFormat_pack(VertexFormat* format)
+	{
+	// for now, attributes are packed in the order they were added,
+	// making sure each attrib is naturally aligned (add padding where necessary)
+
+	// later we can implement more efficient packing w/ reordering
+	// (keep attrib ID order, adjust their offsets to reorder in buffer)
+
+	// TODO:
+	// realloc just enough to hold the final combo string. And just enough to
+	// hold used attribs, not all 16.
+
+	Attrib* a0 = format->attribs + 0;
+	a0->offset = 0;
+	unsigned offset = a0->sz;
+
+#if PACK_DEBUG
+	show_pack(0, a0->sz, 0);
+#endif
+
+	for (unsigned a_idx = 1; a_idx < format->attrib_ct; ++a_idx)
+		{
+		Attrib* a = format->attribs + a_idx;
+		unsigned mid_padding = padding(offset, attrib_align(a));
+		offset += mid_padding;
+		a->offset = offset;
+		offset += a->sz;
+
+#if PACK_DEBUG
+		show_pack(a_idx, a->sz, mid_padding);
+#endif
+		}
+
+	unsigned end_padding = padding(offset, attrib_align(a0));
+
+#if PACK_DEBUG
+	show_pack(0, 0, end_padding);
+	putchar('\n');
+#endif
+
+	format->stride = offset + end_padding;
+	format->packed = true;
+	}
+
+
+#if USE_10_10_10
+
+// OpenGL ES packs in a different order as desktop GL but component conversion is the same.
+// Of the code here, only struct PackedNormal needs to change.
+
+#define SIGNED_INT_10_MAX  511
+#define SIGNED_INT_10_MIN -512
+
+static int clampi(int x, int min_allowed, int max_allowed)
+	{
+#if TRUST_NO_ONE
+	assert(min_allowed <= max_allowed);
+#endif
+
+	if (x < min_allowed)
+		return min_allowed;
+	else if (x > max_allowed)
+		return max_allowed;
+	else
+		return x;
+	}
+
+static int quantize(float x)
+	{
+	int qx = x * 511.0f;
+	return clampi(qx, SIGNED_INT_10_MIN, SIGNED_INT_10_MAX);
+	}
+
+PackedNormal convert_i10_v3(const float data[3])
+	{
+	PackedNormal n = { .x = quantize(data[0]), .y = quantize(data[1]), .z = quantize(data[2]) };
+	return n;
+	}
+
+#endif // USE_10_10_10