Merge branch 'master' into sculpt-mode-features

author: Pablo Dobarro <pablodp606@gmail.com> 2019-08-20 20:13:25 +0300
committer: Pablo Dobarro <pablodp606@gmail.com> 2019-08-20 20:13:25 +0300
commit: 8c509bb69cc9d473236e51b1ba51b74176286223 (patch)
tree: 3c5694ae0c89b1814791b5b5dc79958826b918fa /source/blender/gpu
parent: d6d51674a2fee3a1110d241b96d31480ce440cf1 (diff)
parent: a942d97b7971dc0e7add44e3e9ba1c02fb914f7d (diff)
27 files changed, 665 insertions, 408 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index f30eff1484b..fb7d3c1ace8 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -116,6 +116,7 @@ set(SRC
   intern/gpu_batch_private.h
   intern/gpu_codegen.h
   intern/gpu_context_private.h
+  intern/gpu_matrix_private.h
   intern/gpu_primitive_private.h
   intern/gpu_private.h
   intern/gpu_select_private.h
@@ -221,9 +222,6 @@ data_to_c_simple(shaders/gpu_shader_2D_edituvs_edges_vert.glsl SRC)
 data_to_c_simple(shaders/gpu_shader_2D_edituvs_faces_vert.glsl SRC)
 data_to_c_simple(shaders/gpu_shader_2D_edituvs_stretch_vert.glsl SRC)
 
-data_to_c_simple(shaders/gpu_shader_3D_selection_id_vert.glsl SRC)
-data_to_c_simple(shaders/gpu_shader_selection_id_frag.glsl SRC)
-
 data_to_c_simple(shaders/gpu_shader_text_simple_vert.glsl SRC)
 data_to_c_simple(shaders/gpu_shader_text_simple_geom.glsl SRC)
 data_to_c_simple(shaders/gpu_shader_text_vert.glsl SRC)
diff --git a/source/blender/gpu/GPU_batch.h b/source/blender/gpu/GPU_batch.h
index 5b0cab220c0..175033f70d9 100644
--- a/source/blender/gpu/GPU_batch.h
+++ b/source/blender/gpu/GPU_batch.h
@@ -40,7 +40,7 @@ typedef enum {
   GPU_BATCH_READY_TO_DRAW,
 } GPUBatchPhase;
 
-#define GPU_BATCH_VBO_MAX_LEN 4
+#define GPU_BATCH_VBO_MAX_LEN 6
 #define GPU_BATCH_VAO_STATIC_LEN 3
 #define GPU_BATCH_VAO_DYN_ALLOC_COUNT 16
 
@@ -105,8 +105,9 @@ void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src);
 #define GPU_batch_create(prim, verts, elem) GPU_batch_create_ex(prim, verts, elem, 0)
 #define GPU_batch_init(batch, prim, verts, elem) GPU_batch_init_ex(batch, prim, verts, elem, 0)
 
-void GPU_batch_clear(
-    GPUBatch *); /* Same as discard but does not free. (does not clal free callback) */
+/* Same as discard but does not free. (does not call free callback). */
+void GPU_batch_clear(GPUBatch *);
+
 void GPU_batch_discard(GPUBatch *); /* verts & elem are not discarded */
 
 void GPU_batch_vao_cache_clear(GPUBatch *);
@@ -114,6 +115,7 @@ void GPU_batch_vao_cache_clear(GPUBatch *);
 void GPU_batch_callback_free_set(GPUBatch *, void (*callback)(GPUBatch *, void *), void *);
 
 void GPU_batch_instbuf_set(GPUBatch *, GPUVertBuf *, bool own_vbo); /* Instancing */
+void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo);
 
 int GPU_batch_vertbuf_add_ex(GPUBatch *, GPUVertBuf *, bool own_vbo);
 
diff --git a/source/blender/gpu/GPU_element.h b/source/blender/gpu/GPU_element.h
index 4ac89d2658b..75caf4cbd6a 100644
--- a/source/blender/gpu/GPU_element.h
+++ b/source/blender/gpu/GPU_element.h
@@ -36,14 +36,19 @@ typedef enum {
 } GPUIndexBufType;
 
 typedef struct GPUIndexBuf {
+  uint index_start;
   uint index_len;
+  bool is_subrange;
 #if GPU_TRACK_INDEX_RANGE
   GPUIndexBufType index_type;
   uint32_t gl_index_type;
   uint base_index;
 #endif
   uint32_t ibo_id; /* 0 indicates not yet sent to VRAM */
-  void *data;      /* non-NULL indicates not yet sent to VRAM */
+  union {
+    void *data;              /* non-NULL indicates not yet sent to VRAM */
+    struct GPUIndexBuf *src; /* if is_subrange is true, this is the source buffer. */
+  };
 } GPUIndexBuf;
 
 void GPU_indexbuf_use(GPUIndexBuf *);
@@ -71,9 +76,21 @@ void GPU_indexbuf_add_line_verts(GPUIndexBufBuilder *, uint v1, uint v2);
 void GPU_indexbuf_add_tri_verts(GPUIndexBufBuilder *, uint v1, uint v2, uint v3);
 void GPU_indexbuf_add_line_adj_verts(GPUIndexBufBuilder *, uint v1, uint v2, uint v3, uint v4);
 
+void GPU_indexbuf_set_point_vert(GPUIndexBufBuilder *builder, uint elem, uint v1);
+void GPU_indexbuf_set_line_verts(GPUIndexBufBuilder *builder, uint elem, uint v1, uint v2);
+void GPU_indexbuf_set_tri_verts(GPUIndexBufBuilder *builder, uint elem, uint v1, uint v2, uint v3);
+
+/* Skip primitive rendering at the given index. */
+void GPU_indexbuf_set_point_restart(GPUIndexBufBuilder *builder, uint elem);
+void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem);
+void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem);
+
 GPUIndexBuf *GPU_indexbuf_build(GPUIndexBufBuilder *);
 void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *, GPUIndexBuf *);
 
+/* Create a subrange of an existing indexbuffer. */
+GPUIndexBuf *GPU_indexbuf_create_subrange(GPUIndexBuf *ibo, uint start, uint length);
+
 void GPU_indexbuf_discard(GPUIndexBuf *);
 
 int GPU_indexbuf_primitive_len(GPUPrimType prim_type);
diff --git a/source/blender/gpu/GPU_extensions.h b/source/blender/gpu/GPU_extensions.h
index d0abf671fcd..023cbb804d9 100644
--- a/source/blender/gpu/GPU_extensions.h
+++ b/source/blender/gpu/GPU_extensions.h
@@ -43,6 +43,7 @@ int GPU_max_ubo_binds(void);
 int GPU_max_ubo_size(void);
 float GPU_max_line_width(void);
 void GPU_get_dfdy_factors(float fac[2]);
+bool GPU_arb_base_instance_is_supported(void);
 bool GPU_mip_render_workaround(void);
 bool GPU_depth_blitting_workaround(void);
 bool GPU_unused_fb_slot_workaround(void);
diff --git a/source/blender/gpu/GPU_framebuffer.h b/source/blender/gpu/GPU_framebuffer.h
index b919a3dd8f3..7d0f8b0bcbf 100644
--- a/source/blender/gpu/GPU_framebuffer.h
+++ b/source/blender/gpu/GPU_framebuffer.h
@@ -71,7 +71,7 @@ GPUFrameBuffer *GPU_framebuffer_active_get(void);
     } \
   } while (0)
 
-/* Framebuffer setup : You need to call GPU_framebuffer_bind for theses
+/* Framebuffer setup : You need to call GPU_framebuffer_bind for these
  * to be effective. */
 
 void GPU_framebuffer_texture_attach(GPUFrameBuffer *fb, struct GPUTexture *tex, int slot, int mip);
diff --git a/source/blender/gpu/GPU_matrix.h b/source/blender/gpu/GPU_matrix.h
index 61622c40ff0..a424f3180de 100644
--- a/source/blender/gpu/GPU_matrix.h
+++ b/source/blender/gpu/GPU_matrix.h
@@ -52,12 +52,12 @@ void GPU_matrix_translate_3f(float x, float y, float z);
 void GPU_matrix_translate_3fv(const float vec[3]);
 void GPU_matrix_scale_3f(float x, float y, float z);
 void GPU_matrix_scale_3fv(const float vec[3]);
-void GPU_matrix_rotate_3f(float deg,
-                          float x,
-                          float y,
-                          float z); /* axis of rotation should be a unit vector */
-void GPU_matrix_rotate_3fv(float deg,
-                           const float axis[3]);   /* axis of rotation should be a unit vector */
+
+/* Axis of rotation should be a unit vector. */
+void GPU_matrix_rotate_3f(float deg, float x, float y, float z);
+/* Axis of rotation should be a unit vector. */
+void GPU_matrix_rotate_3fv(float deg, const float axis[3]);
+
 void GPU_matrix_rotate_axis(float deg, char axis); /* TODO: enum for axis? */
 
 void GPU_matrix_look_at(float eyeX,
diff --git a/source/blender/gpu/GPU_shader.h b/source/blender/gpu/GPU_shader.h
index cae2392e503..f4a94c7759a 100644
--- a/source/blender/gpu/GPU_shader.h
+++ b/source/blender/gpu/GPU_shader.h
@@ -355,11 +355,8 @@ typedef enum eGPUBuiltinShader {
   GPU_SHADER_2D_UV_FACES,
   GPU_SHADER_2D_UV_FACES_STRETCH_AREA,
   GPU_SHADER_2D_UV_FACES_STRETCH_ANGLE,
-  /* Selection */
-  GPU_SHADER_3D_FLAT_SELECT_ID,
-  GPU_SHADER_3D_UNIFORM_SELECT_ID,
 } eGPUBuiltinShader;
-#define GPU_SHADER_BUILTIN_LEN (GPU_SHADER_3D_UNIFORM_SELECT_ID + 1)
+#define GPU_SHADER_BUILTIN_LEN (GPU_SHADER_2D_UV_FACES_STRETCH_ANGLE + 1)
 
 /** Support multiple configurations. */
 typedef enum eGPUShaderConfig {
@@ -399,7 +396,9 @@ void GPU_shader_free_builtin_shaders(void);
 
 /* Vertex attributes for shaders */
 
-#define GPU_MAX_ATTR 32
+/* Hardware limit is 16. Position attribute is always needed so we reduce to 15.
+ * This makes sure the GPUVertexFormat name buffer does not overflow. */
+#define GPU_MAX_ATTR 15
 
 typedef struct GPUVertAttrLayers {
   struct {
diff --git a/source/blender/gpu/GPU_vertex_buffer.h b/source/blender/gpu/GPU_vertex_buffer.h
index 3e178e193dc..2d728422c42 100644
--- a/source/blender/gpu/GPU_vertex_buffer.h
+++ b/source/blender/gpu/GPU_vertex_buffer.h
@@ -87,9 +87,10 @@ void GPU_vertbuf_data_len_set(GPUVertBuf *, uint v_len);
  * should not be a problem. */
 
 void GPU_vertbuf_attr_set(GPUVertBuf *, uint a_idx, uint v_idx, const void *data);
-void GPU_vertbuf_attr_fill(GPUVertBuf *,
-                           uint a_idx,
-                           const void *data); /* tightly packed, non interleaved input data */
+
+/* Tightly packed, non interleaved input data. */
+void GPU_vertbuf_attr_fill(GPUVertBuf *, uint a_idx, const void *data);
+
 void GPU_vertbuf_attr_fill_stride(GPUVertBuf *, uint a_idx, uint stride, const void *data);
 
 /* For low level access only */
diff --git a/source/blender/gpu/GPU_vertex_format.h b/source/blender/gpu/GPU_vertex_format.h
index 68608a98a79..8c22e3e1104 100644
--- a/source/blender/gpu/GPU_vertex_format.h
+++ b/source/blender/gpu/GPU_vertex_format.h
@@ -31,9 +31,11 @@
 #include "BLI_assert.h"
 
 #define GPU_VERT_ATTR_MAX_LEN 16
-#define GPU_VERT_ATTR_MAX_NAMES 5
-#define GPU_VERT_ATTR_NAME_AVERAGE_LEN 11
-#define GPU_VERT_ATTR_NAMES_BUF_LEN ((GPU_VERT_ATTR_NAME_AVERAGE_LEN + 1) * GPU_VERT_ATTR_MAX_LEN)
+#define GPU_VERT_ATTR_MAX_NAMES 6
+#define GPU_VERT_ATTR_NAMES_BUF_LEN 256
+#define GPU_VERT_FORMAT_MAX_NAMES 63 /* More than enough, actual max is ~30. */
+/* Computed as GPU_VERT_ATTR_NAMES_BUF_LEN / 30 (actual max format name). */
+#define GPU_MAX_SAFE_ATTRIB_NAME 12
 
 typedef enum {
   GPU_COMP_I8,
@@ -80,14 +82,16 @@ BLI_STATIC_ASSERT(GPU_VERT_ATTR_NAMES_BUF_LEN <= 256,
 typedef struct GPUVertFormat {
   /** 0 to 16 (GPU_VERT_ATTR_MAX_LEN). */
   uint attr_len : 5;
-  /** Total count of active vertex attribute. */
-  uint name_len : 5;
+  /** Total count of active vertex attribute names. (max GPU_VERT_FORMAT_MAX_NAMES) */
+  uint name_len : 6;
   /** Stride in bytes, 1 to 1024. */
   uint stride : 11;
   /** Has the format been packed. */
   uint packed : 1;
   /** Current offset in names[]. */
   uint name_offset : 8;
+  /** Store each attrib in one contiguous buffer region. */
+  uint deinterleaved : 1;
 
   GPUVertAttr attrs[GPU_VERT_ATTR_MAX_LEN];
   char names[GPU_VERT_ATTR_NAMES_BUF_LEN];
@@ -104,6 +108,8 @@ uint GPU_vertformat_attr_add(
     GPUVertFormat *, const char *name, GPUVertCompType, uint comp_len, GPUVertFetchMode);
 void GPU_vertformat_alias_add(GPUVertFormat *, const char *alias);
 
+void GPU_vertformat_deinterleave(GPUVertFormat *format);
+
 int GPU_vertformat_attr_id_get(const GPUVertFormat *, const char *name);
 
 BLI_INLINE const char *GPU_vertformat_attr_name_get(const GPUVertFormat *format,
@@ -113,6 +119,8 @@ BLI_INLINE const char *GPU_vertformat_attr_name_get(const GPUVertFormat *format,
   return format->names + attr->names[n_idx];
 }
 
+void GPU_vertformat_safe_attrib_name(const char *attrib_name, char *r_safe_name, uint max_len);
+
 /* format conversion */
 
 typedef struct GPUPackedNormal {
@@ -122,7 +130,59 @@ typedef struct GPUPackedNormal {
   int w : 2; /* 0 by default, can manually set to { -2, -1, 0, 1 } */
 } GPUPackedNormal;
 
-GPUPackedNormal GPU_normal_convert_i10_v3(const float data[3]);
-GPUPackedNormal GPU_normal_convert_i10_s3(const short data[3]);
+/* OpenGL ES packs in a different order as desktop GL but component conversion is the same.
+ * Of the code here, only struct GPUPackedNormal needs to change. */
+
+#define SIGNED_INT_10_MAX 511
+#define SIGNED_INT_10_MIN -512
+
+BLI_INLINE int clampi(int x, int min_allowed, int max_allowed)
+{
+#if TRUST_NO_ONE
+  assert(min_allowed <= max_allowed);
+#endif
+  if (x < min_allowed) {
+    return min_allowed;
+  }
+  else if (x > max_allowed) {
+    return max_allowed;
+  }
+  else {
+    return x;
+  }
+}
+
+BLI_INLINE int gpu_convert_normalized_f32_to_i10(float x)
+{
+  int qx = x * 511.0f;
+  return clampi(qx, SIGNED_INT_10_MIN, SIGNED_INT_10_MAX);
+}
+
+BLI_INLINE int gpu_convert_i16_to_i10(short x)
+{
+  /* 16-bit signed --> 10-bit signed */
+  /* TODO: round? */
+  return x >> 6;
+}
+
+BLI_INLINE GPUPackedNormal GPU_normal_convert_i10_v3(const float data[3])
+{
+  GPUPackedNormal n = {
+      gpu_convert_normalized_f32_to_i10(data[0]),
+      gpu_convert_normalized_f32_to_i10(data[1]),
+      gpu_convert_normalized_f32_to_i10(data[2]),
+  };
+  return n;
+}
+
+BLI_INLINE GPUPackedNormal GPU_normal_convert_i10_s3(const short data[3])
+{
+  GPUPackedNormal n = {
+      gpu_convert_i16_to_i10(data[0]),
+      gpu_convert_i16_to_i10(data[1]),
+      gpu_convert_i16_to_i10(data[2]),
+  };
+  return n;
+}
 
 #endif /* __GPU_VERTEX_FORMAT_H__ */
diff --git a/source/blender/gpu/intern/gpu_batch.c b/source/blender/gpu/intern/gpu_batch.c
index ba3c7f68518..583551e3e58 100644
--- a/source/blender/gpu/intern/gpu_batch.c
+++ b/source/blender/gpu/intern/gpu_batch.c
@@ -28,6 +28,7 @@
 
 #include "GPU_batch.h"
 #include "GPU_batch_presets.h"
+#include "GPU_extensions.h"
 #include "GPU_matrix.h"
 #include "GPU_shader.h"
 
@@ -181,6 +182,25 @@ void GPU_batch_instbuf_set(GPUBatch *batch, GPUVertBuf *inst, bool own_vbo)
   }
 }
 
+void GPU_batch_elembuf_set(GPUBatch *batch, GPUIndexBuf *elem, bool own_ibo)
+{
+  BLI_assert(elem != NULL);
+  /* redo the bindings */
+  GPU_batch_vao_cache_clear(batch);
+
+  if (batch->elem != NULL && (batch->owns_flag & GPU_BATCH_OWNS_INDEX)) {
+    GPU_indexbuf_discard(batch->elem);
+  }
+  batch->elem = elem;
+
+  if (own_ibo) {
+    batch->owns_flag |= GPU_BATCH_OWNS_INDEX;
+  }
+  else {
+    batch->owns_flag &= ~GPU_BATCH_OWNS_INDEX;
+  }
+}
+
 /* Returns the index of verts in the batch. */
 int GPU_batch_vertbuf_add_ex(GPUBatch *batch, GPUVertBuf *verts, bool own_vbo)
 {
@@ -361,13 +381,23 @@ static void create_bindings(GPUVertBuf *verts,
   const GPUVertFormat *format = &verts->format;
 
   const uint attr_len = format->attr_len;
-  const uint stride = format->stride;
+  uint stride = format->stride;
+  uint offset = 0;
 
   GPU_vertbuf_use(verts);
 
   for (uint a_idx = 0; a_idx < attr_len; ++a_idx) {
     const GPUVertAttr *a = &format->attrs[a_idx];
-    const GLvoid *pointer = (const GLubyte *)0 + a->offset + v_first * stride;
+
+    if (format->deinterleaved) {
+      offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].sz) * verts->vertex_len;
+      stride = a->sz;
+    }
+    else {
+      offset = a->offset;
+    }
+
+    const GLvoid *pointer = (const GLubyte *)0 + offset + v_first * stride;
 
     for (uint n_idx = 0; n_idx < a->name_len; ++n_idx) {
       const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
@@ -418,8 +448,11 @@ static void create_bindings(GPUVertBuf *verts,
 
 static void batch_update_program_bindings(GPUBatch *batch, uint v_first)
 {
-  for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN && batch->verts[v] != NULL; ++v) {
-    create_bindings(batch->verts[v], batch->interface, (batch->inst) ? 0 : v_first, false);
+  /* Reverse order so first vbos have more prevalence (in term of attrib override). */
+  for (int v = GPU_BATCH_VBO_MAX_LEN - 1; v > -1; --v) {
+    if (batch->verts[v] != NULL) {
+      create_bindings(batch->verts[v], batch->interface, (batch->inst) ? 0 : v_first, false);
+    }
   }
   if (batch->inst) {
     create_bindings(batch->inst, batch->interface, v_first, true);
@@ -549,10 +582,10 @@ static void *elem_offset(const GPUIndexBuf *el, int v_first)
 {
 #if GPU_TRACK_INDEX_RANGE
   if (el->index_type == GPU_INDEX_U16) {
-    return (GLushort *)0 + v_first;
+    return (GLushort *)0 + v_first + el->index_start;
   }
 #endif
-  return (GLuint *)0 + v_first;
+  return (GLuint *)0 + v_first + el->index_start;
 }
 
 /* Use when drawing with GPU_batch_draw_advanced */
@@ -599,7 +632,7 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
     i_count = (batch->inst) ? batch->inst->vertex_len : 1;
   }
 
-  if (!GLEW_ARB_base_instance) {
+  if (!GPU_arb_base_instance_is_supported()) {
     if (i_first > 0 && i_count > 0) {
       /* If using offset drawing with instancing, we must
        * use the default VAO and redo bindings. */
@@ -624,7 +657,7 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
 #endif
     void *v_first_ofs = elem_offset(el, v_first);
 
-    if (GLEW_ARB_base_instance) {
+    if (GPU_arb_base_instance_is_supported()) {
       glDrawElementsInstancedBaseVertexBaseInstance(
           batch->gl_prim_type, v_count, index_type, v_first_ofs, i_count, base_index, i_first);
     }
@@ -637,7 +670,7 @@ void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_fi
 #ifdef __APPLE__
     glDisable(GL_PRIMITIVE_RESTART);
 #endif
-    if (GLEW_ARB_base_instance) {
+    if (GPU_arb_base_instance_is_supported()) {
       glDrawArraysInstancedBaseInstance(batch->gl_prim_type, v_first, v_count, i_count, i_first);
     }
     else {
diff --git a/source/blender/gpu/intern/gpu_codegen.c b/source/blender/gpu/intern/gpu_codegen.c
index 0c751808489..0e15fdd000b 100644
--- a/source/blender/gpu/intern/gpu_codegen.c
+++ b/source/blender/gpu/intern/gpu_codegen.c
@@ -46,6 +46,7 @@
 #include "GPU_shader.h"
 #include "GPU_texture.h"
 #include "GPU_uniformbuffer.h"
+#include "GPU_vertex_format.h"
 
 #include "BLI_sys_types.h" /* for intptr_t support */
 
@@ -929,12 +930,15 @@ static char *code_generate_fragment(GPUMaterial *material,
   /* XXX This cannot go into gpu_shader_material.glsl because main()
    * would be parsed and generate error */
   /* Old glsl mode compat. */
+  /* TODO(fclem) This is only used by world shader now. get rid of it? */
   BLI_dynstr_append(ds, "#ifndef NODETREE_EXEC\n");
   BLI_dynstr_append(ds, "out vec4 fragColor;\n");
   BLI_dynstr_append(ds, "void main()\n");
   BLI_dynstr_append(ds, "{\n");
   BLI_dynstr_append(ds, "\tClosure cl = nodetree_exec();\n");
-  BLI_dynstr_append(ds, "\tfragColor = vec4(cl.radiance, cl.opacity);\n");
+  BLI_dynstr_append(ds,
+                    "\tfragColor = vec4(cl.radiance, "
+                    "saturate(1.0 - avg(cl.transmittance)));\n");
   BLI_dynstr_append(ds, "}\n");
   BLI_dynstr_append(ds, "#endif\n\n");
 
@@ -1008,19 +1012,24 @@ static char *code_generate_vertex(ListBase *nodes, const char *vert_code, bool u
               ds, "#define att%d %s\n", input->attr_id, attr_prefix_get(input->attr_type));
         }
         else {
-          uint hash = BLI_ghashutil_strhash_p(input->attr_name);
+          char attr_safe_name[GPU_MAX_SAFE_ATTRIB_NAME];
+          GPU_vertformat_safe_attrib_name(
+              input->attr_name, attr_safe_name, GPU_MAX_SAFE_ATTRIB_NAME);
           BLI_dynstr_appendf(ds,
-                             "DEFINE_ATTR(%s, %s%u);\n",
+                             "DEFINE_ATTR(%s, %s%s);\n",
                              GPU_DATATYPE_STR[input->type],
                              attr_prefix_get(input->attr_type),
-                             hash);
-          BLI_dynstr_appendf(
-              ds, "#define att%d %s%u\n", input->attr_id, attr_prefix_get(input->attr_type), hash);
+                             attr_safe_name);
+          BLI_dynstr_appendf(ds,
+                             "#define att%d %s%s\n",
+                             input->attr_id,
+                             attr_prefix_get(input->attr_type),
+                             attr_safe_name);
           /* Auto attribute can be vertex color byte buffer.
            * We need to know and convert them to linear space in VS. */
           if (input->attr_type == CD_AUTO_FROM_NAME) {
-            BLI_dynstr_appendf(ds, "uniform bool ba%u;\n", hash);
-            BLI_dynstr_appendf(ds, "#define att%d_is_srgb ba%u\n", input->attr_id, hash);
+            BLI_dynstr_appendf(ds, "uniform bool ba%s;\n", attr_safe_name);
+            BLI_dynstr_appendf(ds, "#define att%d_is_srgb ba%s\n", input->attr_id, attr_safe_name);
           }
         }
         BLI_dynstr_appendf(ds,
@@ -2138,7 +2147,7 @@ static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
 
 bool GPU_pass_compile(GPUPass *pass, const char *shname)
 {
-  bool sucess = true;
+  bool success = true;
   if (!pass->compiled) {
     GPUShader *shader = GPU_shader_create(
         pass->vertexcode, pass->fragmentcode, pass->geometrycode, NULL, pass->defines, shname);
@@ -2146,7 +2155,7 @@ bool GPU_pass_compile(GPUPass *pass, const char *shname)
     /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
      * We need to make sure to count active samplers to avoid undefined behavior. */
     if (!gpu_pass_shader_validate(pass, shader)) {
-      sucess = false;
+      success = false;
       if (shader != NULL) {
         fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
         GPU_shader_free(shader);
@@ -2169,7 +2178,7 @@ bool GPU_pass_compile(GPUPass *pass, const char *shname)
     MEM_SAFE_FREE(pass->binary.content);
   }
 
-  return sucess;
+  return success;
 }
 
 void GPU_pass_release(GPUPass *pass)
diff --git a/source/blender/gpu/intern/gpu_context.cpp b/source/blender/gpu/intern/gpu_context.cpp
index a0e03e61d5d..17b86e3eec8 100644
--- a/source/blender/gpu/intern/gpu_context.cpp
+++ b/source/blender/gpu/intern/gpu_context.cpp
@@ -36,6 +36,7 @@
 
 #include "gpu_batch_private.h"
 #include "gpu_context_private.h"
+#include "gpu_matrix_private.h"
 
 #include <vector>
 #include <string.h>
@@ -71,6 +72,7 @@ struct GPUContext {
   std::unordered_set<GPUFrameBuffer *>
       framebuffers; /* Framebuffers that have FBO from this context */
 #endif
+  struct GPUMatrixState *matrix_state;
   std::vector<GLuint> orphaned_vertarray_ids;
   std::vector<GLuint> orphaned_framebuffer_ids;
   std::mutex orphans_mutex; /* todo: try spinlock instead */
@@ -88,12 +90,7 @@ struct GPUContext {
   }
 };
 
-#if defined(_MSC_VER) && (_MSC_VER == 1800)
-#  define thread_local __declspec(thread)
-thread_local GPUContext *active_ctx = NULL;
-#else
 static thread_local GPUContext *active_ctx = NULL;
-#endif
 
 static void orphans_add(GPUContext *ctx, std::vector<GLuint> *orphan_list, GLuint id)
 {
@@ -106,9 +103,11 @@ static void orphans_add(GPUContext *ctx, std::vector<GLuint> *orphan_list, GLuin
 
 static void orphans_clear(GPUContext *ctx)
 {
-  BLI_assert(ctx); /* need at least an active context */
-  BLI_assert(pthread_equal(pthread_self(),
-                           ctx->thread)); /* context has been activated by another thread! */
+  /* need at least an active context */
+  BLI_assert(ctx);
+
+  /* context has been activated by another thread! */
+  BLI_assert(pthread_equal(pthread_self(), ctx->thread));
 
   ctx->orphans_mutex.lock();
   if (!ctx->orphaned_vertarray_ids.empty()) {
@@ -144,6 +143,7 @@ GPUContext *GPU_context_create(GLuint default_framebuffer)
   GPUContext *ctx = new GPUContext;
   glGenVertexArrays(1, &ctx->default_vao);
   ctx->default_framebuffer = default_framebuffer;
+  ctx->matrix_state = GPU_matrix_state_create();
   GPU_context_active_set(ctx);
   return ctx;
 }
@@ -164,6 +164,7 @@ void GPU_context_discard(GPUContext *ctx)
     /* this removes the array entry */
     GPU_batch_vao_cache_clear(*ctx->batches.begin());
   }
+  GPU_matrix_state_discard(ctx->matrix_state);
   glDeleteVertexArrays(1, &ctx->default_vao);
   delete ctx;
   active_ctx = NULL;
@@ -338,3 +339,9 @@ GPUFrameBuffer *gpu_context_active_framebuffer_get(GPUContext *ctx)
 {
   return ctx->current_fbo;
 }
+
+struct GPUMatrixState *gpu_context_active_matrix_state_get()
+{
+  BLI_assert(active_ctx);
+  return active_ctx->matrix_state;
+}
diff --git a/source/blender/gpu/intern/gpu_context_private.h b/source/blender/gpu/intern/gpu_context_private.h
index 6825b67d2c8..c9379e5433f 100644
--- a/source/blender/gpu/intern/gpu_context_private.h
+++ b/source/blender/gpu/intern/gpu_context_private.h
@@ -59,6 +59,8 @@ void gpu_context_remove_framebuffer(GPUContext *ctx, struct GPUFrameBuffer *fb);
 void gpu_context_active_framebuffer_set(GPUContext *ctx, struct GPUFrameBuffer *fb);
 struct GPUFrameBuffer *gpu_context_active_framebuffer_get(GPUContext *ctx);
 
+struct GPUMatrixState *gpu_context_active_matrix_state_get(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/gpu/intern/gpu_element.c b/source/blender/gpu/intern/gpu_element.c
index 50e7df96503..6c9331b4903 100644
--- a/source/blender/gpu/intern/gpu_element.c
+++ b/source/blender/gpu/intern/gpu_element.c
@@ -162,6 +162,100 @@ void GPU_indexbuf_add_line_adj_verts(
   GPU_indexbuf_add_generic_vert(builder, v4);
 }
 
+void GPU_indexbuf_set_point_vert(GPUIndexBufBuilder *builder, uint elem, uint v1)
+{
+  BLI_assert(builder->prim_type == GPU_PRIM_POINTS);
+  BLI_assert(elem < builder->max_index_len);
+  builder->data[elem++] = v1;
+  if (builder->index_len < elem) {
+    builder->index_len = elem;
+  }
+}
+
+void GPU_indexbuf_set_line_verts(GPUIndexBufBuilder *builder, uint elem, uint v1, uint v2)
+{
+  BLI_assert(builder->prim_type == GPU_PRIM_LINES);
+  BLI_assert(v1 != v2);
+  BLI_assert(v1 <= builder->max_allowed_index);
+  BLI_assert(v2 <= builder->max_allowed_index);
+  BLI_assert((elem + 1) * 2 <= builder->max_index_len);
+  uint idx = elem * 2;
+  builder->data[idx++] = v1;
+  builder->data[idx++] = v2;
+  if (builder->index_len < idx) {
+    builder->index_len = idx;
+  }
+}
+
+void GPU_indexbuf_set_tri_verts(GPUIndexBufBuilder *builder, uint elem, uint v1, uint v2, uint v3)
+{
+  BLI_assert(builder->prim_type == GPU_PRIM_TRIS);
+  BLI_assert(v1 != v2 && v2 != v3 && v3 != v1);
+  BLI_assert(v1 <= builder->max_allowed_index);
+  BLI_assert(v2 <= builder->max_allowed_index);
+  BLI_assert(v3 <= builder->max_allowed_index);
+  BLI_assert((elem + 1) * 3 <= builder->max_index_len);
+  uint idx = elem * 3;
+  builder->data[idx++] = v1;
+  builder->data[idx++] = v2;
+  builder->data[idx++] = v3;
+  if (builder->index_len < idx) {
+    builder->index_len = idx;
+  }
+}
+
+void GPU_indexbuf_set_point_restart(GPUIndexBufBuilder *builder, uint elem)
+{
+  BLI_assert(builder->prim_type == GPU_PRIM_POINTS);
+  BLI_assert(elem < builder->max_index_len);
+  builder->data[elem++] = RESTART_INDEX;
+  if (builder->index_len < elem) {
+    builder->index_len = elem;
+  }
+}
+
+void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem)
+{
+  BLI_assert(builder->prim_type == GPU_PRIM_LINES);
+  BLI_assert((elem + 1) * 2 <= builder->max_index_len);
+  uint idx = elem * 2;
+  builder->data[idx++] = RESTART_INDEX;
+  builder->data[idx++] = RESTART_INDEX;
+  if (builder->index_len < idx) {
+    builder->index_len = idx;
+  }
+}
+
+void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem)
+{
+  BLI_assert(builder->prim_type == GPU_PRIM_TRIS);
+  BLI_assert((elem + 1) * 3 <= builder->max_index_len);
+  uint idx = elem * 3;
+  builder->data[idx++] = RESTART_INDEX;
+  builder->data[idx++] = RESTART_INDEX;
+  builder->data[idx++] = RESTART_INDEX;
+  if (builder->index_len < idx) {
+    builder->index_len = idx;
+  }
+}
+
+GPUIndexBuf *GPU_indexbuf_create_subrange(GPUIndexBuf *elem_src, uint start, uint length)
+{
+  GPUIndexBuf *elem = MEM_callocN(sizeof(GPUIndexBuf), "GPUIndexBuf");
+  BLI_assert(elem_src && !elem_src->is_subrange);
+  BLI_assert(start + length <= elem_src->index_len);
+#if GPU_TRACK_INDEX_RANGE
+  elem->index_type = elem_src->index_type;
+  elem->gl_index_type = elem_src->gl_index_type;
+  elem->base_index = elem_src->base_index;
+#endif
+  elem->is_subrange = true;
+  elem->src = elem_src;
+  elem->index_start = start;
+  elem->index_len = length;
+  return elem;
+}
+
 #if GPU_TRACK_INDEX_RANGE
 /* Everything remains 32 bit while building to keep things simple.
  * Find min/max after, then convert to smallest index type possible. */
@@ -271,6 +365,10 @@ static void indexbuf_upload_data(GPUIndexBuf *elem)
 
 void GPU_indexbuf_use(GPUIndexBuf *elem)
 {
+  if (elem->is_subrange) {
+    GPU_indexbuf_use(elem->src);
+    return;
+  }
   if (elem->ibo_id == 0) {
     elem->ibo_id = GPU_buf_alloc();
   }
@@ -285,7 +383,7 @@ void GPU_indexbuf_discard(GPUIndexBuf *elem)
   if (elem->ibo_id) {
     GPU_buf_free(elem->ibo_id);
   }
-  if (elem->data) {
+  if (!elem->is_subrange && elem->data) {
     MEM_freeN(elem->data);
   }
   MEM_freeN(elem);
diff --git a/source/blender/gpu/intern/gpu_extensions.c b/source/blender/gpu/intern/gpu_extensions.c
index ba5cf214a42..5839b34cd19 100644
--- a/source/blender/gpu/intern/gpu_extensions.c
+++ b/source/blender/gpu/intern/gpu_extensions.c
@@ -77,6 +77,9 @@ static struct GPUGlobal {
    * number is factor on screen and second is off-screen */
   float dfdyfactors[2];
   float max_anisotropy;
+  /* Some Intel drivers have limited support for `GLEW_ARB_base_instance` so in
+   * these cases it is best to indicate that it is not supported. See T67951 */
+  bool glew_arb_base_instance_is_supported;
   /* Some Intel drivers have issues with using mips as framebuffer targets if
    * GL_TEXTURE_MAX_LEVEL is higher than the target mip.
    * We need a workaround in this cases. */
@@ -197,6 +200,11 @@ void GPU_get_dfdy_factors(float fac[2])
   copy_v2_v2(fac, GG.dfdyfactors);
 }
 
+bool GPU_arb_base_instance_is_supported(void)
+{
+  return GG.glew_arb_base_instance_is_supported;
+}
+
 bool GPU_mip_render_workaround(void)
 {
   return GG.mip_render_workaround;
@@ -301,6 +309,7 @@ void gpu_extensions_init(void)
   }
   else if ((strstr(renderer, "Mesa DRI R")) ||
            (strstr(renderer, "Radeon") && strstr(vendor, "X.Org")) ||
+           (strstr(renderer, "AMD") && strstr(vendor, "X.Org")) ||
            (strstr(renderer, "Gallium ") && strstr(renderer, " on ATI ")) ||
            (strstr(renderer, "Gallium ") && strstr(renderer, " on AMD "))) {
     GG.device = GPU_DEVICE_ATI;
@@ -343,6 +352,7 @@ void gpu_extensions_init(void)
   GG.os = GPU_OS_UNIX;
 #endif
 
+  GG.glew_arb_base_instance_is_supported = GLEW_ARB_base_instance;
   gpu_detect_mip_render_workaround();
 
   if (G.debug & G_DEBUG_GPU_FORCE_WORKAROUNDS) {
@@ -378,11 +388,11 @@ void gpu_extensions_init(void)
       GG.dfdyfactors[1] = 1.0;
     }
 
-    if (strstr(version, "Build 10.18.10.3379") || strstr(version, "Build 10.18.10.3574") ||
-        strstr(version, "Build 10.18.10.4252") || strstr(version, "Build 10.18.10.4358") ||
-        strstr(version, "Build 10.18.10.4653") || strstr(version, "Build 10.18.10.5069") ||
-        strstr(version, "Build 10.18.14.4264") || strstr(version, "Build 10.18.14.4432") ||
-        strstr(version, "Build 10.18.14.5067")) {
+    if (strstr(version, "Build 10.18.10.3") || strstr(version, "Build 10.18.10.4") ||
+        strstr(version, "Build 10.18.14.4") || strstr(version, "Build 10.18.14.5")) {
+      /* Maybe not all of these drivers have problems with `GLEW_ARB_base_instance`.
+       * But it's hard to test each case. */
+      GG.glew_arb_base_instance_is_supported = false;
       GG.context_local_shaders_workaround = true;
     }
   }
diff --git a/source/blender/gpu/intern/gpu_immediate.c b/source/blender/gpu/intern/gpu_immediate.c
index 6b5c4836e83..0e3019ad122 100644
--- a/source/blender/gpu/intern/gpu_immediate.c
+++ b/source/blender/gpu/intern/gpu_immediate.c
@@ -220,8 +220,10 @@ void immBegin(GPUPrimType prim_type, uint vertex_len)
   /* does the current buffer have enough room? */
   const uint available_bytes = IMM_BUFFER_SIZE - imm.buffer_offset;
   /* ensure vertex data is aligned */
-  const uint pre_padding = padding(
-      imm.buffer_offset, imm.vertex_format.stride); /* might waste a little space, but it's safe */
+
+  /* Might waste a little space, but it's safe. */
+  const uint pre_padding = padding(imm.buffer_offset, imm.vertex_format.stride);
+
   if ((bytes_needed + pre_padding) <= available_bytes) {
     imm.buffer_offset += pre_padding;
   }
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index 6f1b8d2d0c6..20b91c0c95d 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -733,7 +733,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
 
 void GPU_material_compile(GPUMaterial *mat)
 {
-  bool sucess;
+  bool success;
 
   BLI_assert(mat->status == GPU_MAT_QUEUED);
   BLI_assert(mat->pass);
@@ -741,12 +741,12 @@ void GPU_material_compile(GPUMaterial *mat)
   /* NOTE: The shader may have already been compiled here since we are
    * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
 #ifndef NDEBUG
-  sucess = GPU_pass_compile(mat->pass, mat->name);
+  success = GPU_pass_compile(mat->pass, mat->name);
 #else
-  sucess = GPU_pass_compile(mat->pass, __func__);
+  success = GPU_pass_compile(mat->pass, __func__);
 #endif
 
-  if (sucess) {
+  if (success) {
     GPUShader *sh = GPU_pass_shader_get(mat->pass);
     if (sh != NULL) {
       mat->status = GPU_MAT_SUCCESS;
diff --git a/source/blender/gpu/intern/gpu_matrix.c b/source/blender/gpu/intern/gpu_matrix.c
index 58ca800a92c..fb0dffb58d1 100644
--- a/source/blender/gpu/intern/gpu_matrix.c
+++ b/source/blender/gpu/intern/gpu_matrix.c
@@ -23,6 +23,9 @@
 
 #include "GPU_shader_interface.h"
 
+#include "gpu_context_private.h"
+#include "gpu_matrix_private.h"
+
 #define SUPPRESS_GENERIC_MATRIX_API
 #define USE_GPU_PY_MATRIX_API /* only so values are declared */
 #include "GPU_matrix.h"
@@ -32,6 +35,8 @@
 #include "BLI_math_rotation.h"
 #include "BLI_math_vector.h"
 
+#include "MEM_guardedalloc.h"
+
 #define DEBUG_MATRIX_BIND 0
 
 #define MATRIX_STACK_DEPTH 32
@@ -44,7 +49,7 @@ typedef struct MatrixStack {
   uint top;
 } MatrixStack;
 
-typedef struct {
+typedef struct GPUMatrixState {
   MatrixStack model_view_stack;
   MatrixStack projection_stack;
 
@@ -56,8 +61,16 @@ typedef struct {
    * TODO: separate Model from View transform? Batches/objects have model,
    * camera/eye has view & projection
    */
-} MatrixState;
+} GPUMatrixState;
+
+#define ModelViewStack gpu_context_active_matrix_state_get()->model_view_stack
+#define ModelView ModelViewStack.stack[ModelViewStack.top]
 
+#define ProjectionStack gpu_context_active_matrix_state_get()->projection_stack
+#define Projection ProjectionStack.stack[ProjectionStack.top]
+
+GPUMatrixState *GPU_matrix_state_create(void)
+{
 #define MATRIX_4X4_IDENTITY \
   { \
     {1.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 0.0f, 0.0f}, {0.0f, 0.0f, 1.0f, 0.0f}, \
@@ -66,27 +79,36 @@ typedef struct {
     } \
   }
 
-static MatrixState state = {
-    .model_view_stack = {{MATRIX_4X4_IDENTITY}, 0},
-    .projection_stack = {{MATRIX_4X4_IDENTITY}, 0},
-    .dirty = true,
-};
+  GPUMatrixState *state = MEM_mallocN(sizeof(*state), __func__);
+  const MatrixStack identity_stack = {{MATRIX_4X4_IDENTITY}, 0};
+
+  state->model_view_stack = state->projection_stack = identity_stack;
+  state->dirty = true;
 
 #undef MATRIX_4X4_IDENTITY
 
-#define ModelViewStack state.model_view_stack
-#define ModelView ModelViewStack.stack[ModelViewStack.top]
+  return state;
+}
 
-#define ProjectionStack state.projection_stack
-#define Projection ProjectionStack.stack[ProjectionStack.top]
+void GPU_matrix_state_discard(GPUMatrixState *state)
+{
+  MEM_freeN(state);
+}
+
+static void gpu_matrix_state_active_set_dirty(bool value)
+{
+  GPUMatrixState *state = gpu_context_active_matrix_state_get();
+  state->dirty = value;
+}
 
 void GPU_matrix_reset(void)
 {
-  state.model_view_stack.top = 0;
-  state.projection_stack.top = 0;
+  GPUMatrixState *state = gpu_context_active_matrix_state_get();
+  state->model_view_stack.top = 0;
+  state->projection_stack.top = 0;
   unit_m4(ModelView);
   unit_m4(Projection);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 #ifdef WITH_GPU_SAFETY
@@ -123,7 +145,7 @@ void GPU_matrix_pop(void)
 {
   BLI_assert(ModelViewStack.top > 0);
   ModelViewStack.top--;
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_push_projection(void)
@@ -137,34 +159,34 @@ void GPU_matrix_pop_projection(void)
 {
   BLI_assert(ProjectionStack.top > 0);
   ProjectionStack.top--;
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_set(const float m[4][4])
 {
   copy_m4_m4(ModelView, m);
   CHECKMAT(ModelView3D);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_identity_projection_set(void)
 {
   unit_m4(Projection);
   CHECKMAT(Projection3D);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_projection_set(const float m[4][4])
 {
   copy_m4_m4(Projection, m);
   CHECKMAT(Projection3D);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_identity_set(void)
 {
   unit_m4(ModelView);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_translate_2f(float x, float y)
@@ -194,7 +216,7 @@ void GPU_matrix_translate_3f(float x, float y, float z)
   m[3][2] = z;
   GPU_matrix_mul(m);
 #endif
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_translate_3fv(const float vec[3])
@@ -243,7 +265,7 @@ void GPU_matrix_mul(const float m[4][4])
 {
   mul_m4_m4_post(ModelView, m);
   CHECKMAT(ModelView);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_rotate_2d(float deg)
@@ -272,7 +294,7 @@ void GPU_matrix_rotate_axis(float deg, char axis)
   /* rotate_m4 works in place */
   rotate_m4(ModelView, axis, DEG2RADF(deg));
   CHECKMAT(ModelView);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 static void mat4_ortho_set(
@@ -298,7 +320,7 @@ static void mat4_ortho_set(
   m[2][3] = 0.0f;
   m[3][3] = 1.0f;
 
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 static void mat4_frustum_set(
@@ -324,7 +346,7 @@ static void mat4_frustum_set(
   m[2][3] = -1.0f;
   m[3][3] = 0.0f;
 
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 static void mat4_look_from_origin(float m[4][4], float lookdir[3], float camup[3])
@@ -389,14 +411,14 @@ static void mat4_look_from_origin(float m[4][4], float lookdir[3], float camup[3
   m[2][3] = 0.0f;
   m[3][3] = 1.0f;
 
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_ortho_set(float left, float right, float bottom, float top, float near, float far)
 {
   mat4_ortho_set(Projection, left, right, bottom, top, near, far);
   CHECKMAT(Projection);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_ortho_2d_set(float left, float right, float bottom, float top)
@@ -404,7 +426,7 @@ void GPU_matrix_ortho_2d_set(float left, float right, float bottom, float top)
   Mat4 m;
   mat4_ortho_set(m, left, right, bottom, top, -1.0f, 1.0f);
   CHECKMAT(Projection2D);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_frustum_set(
@@ -412,7 +434,7 @@ void GPU_matrix_frustum_set(
 {
   mat4_frustum_set(Projection, left, right, bottom, top, near, far);
   CHECKMAT(Projection);
-  state.dirty = true;
+  gpu_matrix_state_active_set_dirty(true);
 }
 
 void GPU_matrix_perspective_set(float fovy, float aspect, float near, float far)
@@ -678,12 +700,13 @@ void GPU_matrix_bind(const GPUShaderInterface *shaderface)
     glUniformMatrix4fv(P_inv->location, 1, GL_FALSE, (const float *)m);
   }
 
-  state.dirty = false;
+  gpu_matrix_state_active_set_dirty(false);
 }
 
 bool GPU_matrix_dirty_get(void)
 {
-  return state.dirty;
+  GPUMatrixState *state = gpu_context_active_matrix_state_get();
+  return state->dirty;
 }
 
 /* -------------------------------------------------------------------- */
@@ -695,12 +718,14 @@ BLI_STATIC_ASSERT(GPU_PY_MATRIX_STACK_LEN + 1 == MATRIX_STACK_DEPTH, "define mis
 
 int GPU_matrix_stack_level_get_model_view(void)
 {
-  return (int)state.model_view_stack.top;
+  GPUMatrixState *state = gpu_context_active_matrix_state_get();
+  return (int)state->model_view_stack.top;
 }
 
 int GPU_matrix_stack_level_get_projection(void)
 {
-  return (int)state.projection_stack.top;
+  GPUMatrixState *state = gpu_context_active_matrix_state_get();
+  return (int)state->projection_stack.top;
 }
 
 /** \} */
diff --git a/source/blender/gpu/intern/gpu_matrix_private.h b/source/blender/gpu/intern/gpu_matrix_private.h
new file mode 100644
index 00000000000..862ef065481
--- /dev/null
+++ b/source/blender/gpu/intern/gpu_matrix_private.h
@@ -0,0 +1,35 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#ifndef __GPU_MATRIX_PRIVATE_H__
+#define __GPU_MATRIX_PRIVATE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct GPUMatrixState *GPU_matrix_state_create(void);
+void GPU_matrix_state_discard(struct GPUMatrixState *state);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __GPU_MATRIX_PRIVATE_H__ */
diff --git a/source/blender/gpu/intern/gpu_shader.c b/source/blender/gpu/intern/gpu_shader.c
index c142d8ccba2..3e930d19696 100644
--- a/source/blender/gpu/intern/gpu_shader.c
+++ b/source/blender/gpu/intern/gpu_shader.c
@@ -139,9 +139,6 @@ extern char datatoc_gpu_shader_2D_edituvs_edges_vert_glsl[];
 extern char datatoc_gpu_shader_2D_edituvs_faces_vert_glsl[];
 extern char datatoc_gpu_shader_2D_edituvs_stretch_vert_glsl[];
 
-extern char datatoc_gpu_shader_3D_selection_id_vert_glsl[];
-extern char datatoc_gpu_shader_selection_id_frag_glsl[];
-
 extern char datatoc_gpu_shader_2D_line_dashed_uniform_color_vert_glsl[];
 extern char datatoc_gpu_shader_2D_line_dashed_frag_glsl[];
 extern char datatoc_gpu_shader_2D_line_dashed_geom_glsl[];
@@ -1312,18 +1309,6 @@ static const GPUShaderStages builtin_shader_stages[GPU_SHADER_BUILTIN_LEN] = {
             .defs = "#define STRETCH_ANGLE\n",
         },
 
-    [GPU_SHADER_3D_FLAT_SELECT_ID] =
-        {
-            .vert = datatoc_gpu_shader_3D_selection_id_vert_glsl,
-            .frag = datatoc_gpu_shader_selection_id_frag_glsl,
-        },
-    [GPU_SHADER_3D_UNIFORM_SELECT_ID] =
-        {
-            .vert = datatoc_gpu_shader_3D_selection_id_vert_glsl,
-            .frag = datatoc_gpu_shader_selection_id_frag_glsl,
-            .defs = "#define UNIFORM_ID\n",
-        },
-
     [GPU_SHADER_GPENCIL_STROKE] =
         {
             .vert = datatoc_gpu_shader_gpencil_stroke_vert_glsl,
@@ -1370,9 +1355,7 @@ GPUShader *GPU_shader_get_builtin_shader_with_config(eGPUBuiltinShader shader,
                       GPU_SHADER_3D_GROUNDLINE,
                       GPU_SHADER_3D_GROUNDPOINT,
                       GPU_SHADER_DISTANCE_LINES,
-                      GPU_SHADER_INSTANCE_EDGES_VARIYING_COLOR,
-                      GPU_SHADER_3D_FLAT_SELECT_ID,
-                      GPU_SHADER_3D_UNIFORM_SELECT_ID) ||
+                      GPU_SHADER_INSTANCE_EDGES_VARIYING_COLOR) ||
                  ELEM(shader,
                       GPU_SHADER_3D_FLAT_COLOR,
                       GPU_SHADER_3D_LINE_DASHED_UNIFORM_COLOR,
diff --git a/source/blender/gpu/intern/gpu_texture.c b/source/blender/gpu/intern/gpu_texture.c
index dab17fcd72a..955b11036ef 100644
--- a/source/blender/gpu/intern/gpu_texture.c
+++ b/source/blender/gpu/intern/gpu_texture.c
@@ -1598,7 +1598,7 @@ void GPU_texture_generate_mipmap(GPUTexture *tex)
 
   if (GPU_texture_depth(tex)) {
     /* Some drivers have bugs when using glGenerateMipmap with depth textures (see T56789).
-     * In this case we just create a complete texture with mipmaps manually without downsampling.
+     * In this case we just create a complete texture with mipmaps manually without down-sampling.
      * You must initialize the texture levels using other methods like
      * GPU_framebuffer_recursive_downsample(). */
     int levels = 1 + floor(log2(max_ii(tex->w, tex->h)));
diff --git a/source/blender/gpu/intern/gpu_vertex_format.c b/source/blender/gpu/intern/gpu_vertex_format.c
index e745c525df6..66e5e254734 100644
--- a/source/blender/gpu/intern/gpu_vertex_format.c
+++ b/source/blender/gpu/intern/gpu_vertex_format.c
@@ -31,6 +31,8 @@
 #include <string.h>
 
 #include "BLI_utildefines.h"
+#include "BLI_string.h"
+#include "BLI_ghash.h"
 
 #define PACK_DEBUG 0
 
@@ -149,9 +151,9 @@ uint GPU_vertformat_attr_add(GPUVertFormat *format,
                              GPUVertFetchMode fetch_mode)
 {
 #if TRUST_NO_ONE
-  assert(format->name_len < GPU_VERT_ATTR_MAX_LEN); /* there's room for more */
-  assert(format->attr_len < GPU_VERT_ATTR_MAX_LEN); /* there's room for more */
-  assert(!format->packed);                          /* packed means frozen/locked */
+  assert(format->name_len < GPU_VERT_FORMAT_MAX_NAMES); /* there's room for more */
+  assert(format->attr_len < GPU_VERT_ATTR_MAX_LEN);     /* there's room for more */
+  assert(!format->packed);                              /* packed means frozen/locked */
   assert((comp_len >= 1 && comp_len <= 4) || comp_len == 8 || comp_len == 12 || comp_len == 16);
 
   switch (comp_type) {
@@ -163,8 +165,10 @@ uint GPU_vertformat_attr_add(GPUVertFormat *format,
       /* 10_10_10 format intended for normals (xyz) or colors (rgb)
        * extra component packed.w can be manually set to { -2, -1, 0, 1 } */
       assert(comp_len == 3 || comp_len == 4);
-      assert(fetch_mode ==
-             GPU_FETCH_INT_TO_FLOAT_UNIT); /* not strictly required, may relax later */
+
+      /* Not strictly required, may relax later. */
+      assert(fetch_mode == GPU_FETCH_INT_TO_FLOAT_UNIT);
+
       break;
     default:
       /* integer types can be kept as int or converted/normalized to float */
@@ -195,7 +199,7 @@ void GPU_vertformat_alias_add(GPUVertFormat *format, const char *alias)
 {
   GPUVertAttr *attr = &format->attrs[format->attr_len - 1];
 #if TRUST_NO_ONE
-  assert(format->name_len < GPU_VERT_ATTR_MAX_LEN); /* there's room for more */
+  assert(format->name_len < GPU_VERT_FORMAT_MAX_NAMES); /* there's room for more */
   assert(attr->name_len < GPU_VERT_ATTR_MAX_NAMES);
 #endif
   format->name_len++; /* multiname support */
@@ -216,6 +220,79 @@ int GPU_vertformat_attr_id_get(const GPUVertFormat *format, const char *name)
   return -1;
 }
 
+/* Encode 8 original bytes into 11 safe bytes. */
+static void safe_bytes(char out[11], const char data[8])
+{
+  char safe_chars[63] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
+
+  uint64_t in = *(uint64_t *)data;
+  for (int i = 0; i < 11; i++) {
+    /* Encoding in base63 */
+    out[i] = safe_chars[in % 63lu];
+    in /= 63lu;
+  }
+}
+
+/* Warning: Always add a prefix to the result of this function as
+ * the generated string can start with a number and not be a valid attribute name. */
+void GPU_vertformat_safe_attrib_name(const char *attrib_name,
+                                     char *r_safe_name,
+                                     uint UNUSED(max_len))
+{
+  char data[8] = {0};
+  uint len = strlen(attrib_name);
+
+  if (len > 8) {
+    /* Start with the first 4 chars of the name; */
+    for (int i = 0; i < 4; i++) {
+      data[i] = attrib_name[i];
+    }
+    /* We use a hash to identify each data layer based on its name.
+     * NOTE: This is still prone to hash collision but the risks are very low.*/
+    /* Start hashing after the first 2 chars. */
+    *(uint *)&data[4] = BLI_ghashutil_strhash_p_murmur(attrib_name + 4);
+  }
+  else {
+    /* Copy the whole name. Collision is barely possible
+     * (hash would have to be equal to the last 4 bytes). */
+    for (int i = 0; i < 8 && attrib_name[i] != '\0'; i++) {
+      data[i] = attrib_name[i];
+    }
+  }
+  /* Convert to safe bytes characters. */
+  safe_bytes(r_safe_name, data);
+  /* End the string */
+  r_safe_name[11] = '\0';
+
+  BLI_assert(GPU_MAX_SAFE_ATTRIB_NAME >= 12);
+#if 0 /* For debugging */
+  printf("%s > %lx > %s\n", attrib_name, *(uint64_t *)data, r_safe_name);
+#endif
+}
+
+/* Make attribute layout non-interleaved.
+ * Warning! This does not change data layout!
+ * Use direct buffer access to fill the data.
+ * This is for advanced usage.
+ *
+ * Deinterleaved data means all attrib data for each attrib
+ * is stored continuously like this :
+ * 000011112222
+ * instead of :
+ * 012012012012
+ *
+ * Note this is per attrib deinterleaving, NOT per component.
+ *  */
+void GPU_vertformat_deinterleave(GPUVertFormat *format)
+{
+  /* Ideally we should change the stride and offset here. This would allow
+   * us to use GPU_vertbuf_attr_set / GPU_vertbuf_attr_fill. But since
+   * we use only 11 bits for attr->offset this limits the size of the
+   * buffer considerably. So instead we do the conversion when creating
+   * bindings in create_bindings(). */
+  format->deinterleaved = true;
+}
+
 uint padding(uint offset, uint alignment)
 {
   const uint mod = offset % alignment;
@@ -389,58 +466,3 @@ void GPU_vertformat_from_interface(GPUVertFormat *format, const GPUShaderInterfa
     }
   }
 }
-
-/* OpenGL ES packs in a different order as desktop GL but component conversion is the same.
- * Of the code here, only struct GPUPackedNormal needs to change. */
-
-#define SIGNED_INT_10_MAX 511
-#define SIGNED_INT_10_MIN -512
-
-static int clampi(int x, int min_allowed, int max_allowed)
-{
-#if TRUST_NO_ONE
-  assert(min_allowed <= max_allowed);
-#endif
-  if (x < min_allowed) {
-    return min_allowed;
-  }
-  else if (x > max_allowed) {
-    return max_allowed;
-  }
-  else {
-    return x;
-  }
-}
-
-static int quantize(float x)
-{
-  int qx = x * 511.0f;
-  return clampi(qx, SIGNED_INT_10_MIN, SIGNED_INT_10_MAX);
-}
-
-static int convert_i16(short x)
-{
-  /* 16-bit signed --> 10-bit signed */
-  /* TODO: round? */
-  return x >> 6;
-}
-
-GPUPackedNormal GPU_normal_convert_i10_v3(const float data[3])
-{
-  GPUPackedNormal n = {
-      .x = quantize(data[0]),
-      .y = quantize(data[1]),
-      .z = quantize(data[2]),
-  };
-  return n;
-}
-
-GPUPackedNormal GPU_normal_convert_i10_s3(const short data[3])
-{
-  GPUPackedNormal n = {
-      .x = convert_i16(data[0]),
-      .y = convert_i16(data[1]),
-      .z = convert_i16(data[2]),
-  };
-  return n;
-}
diff --git a/source/blender/gpu/intern/gpu_vertex_format_private.h b/source/blender/gpu/intern/gpu_vertex_format_private.h
index a850d17a1dd..13459101669 100644
--- a/source/blender/gpu/intern/gpu_vertex_format_private.h
+++ b/source/blender/gpu/intern/gpu_vertex_format_private.h
@@ -27,6 +27,7 @@
 #define __GPU_VERTEX_FORMAT_PRIVATE_H__
 
 void VertexFormat_pack(GPUVertFormat *format);
+void VertexFormat_deinterleave(GPUVertFormat *format, uint vertex_len);
 uint padding(uint offset, uint alignment);
 uint vertex_buffer_size(const GPUVertFormat *format, uint vertex_len);
 
diff --git a/source/blender/gpu/shaders/gpu_shader_2D_edituvs_stretch_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_edituvs_stretch_vert.glsl
index 810784e2fbc..0ce5504dfa8 100644
--- a/source/blender/gpu/shaders/gpu_shader_2D_edituvs_stretch_vert.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_2D_edituvs_stretch_vert.glsl
@@ -8,7 +8,7 @@ in vec2 pos;
 in float stretch;
 #else
 
-in vec4 uv_adj;
+in vec2 uv_angles;
 in float angle;
 #endif
 
@@ -52,6 +52,11 @@ vec3 weight_to_rgb(float weight)
 
 #define M_PI 3.1415926535897932
 
+vec2 angle_to_v2(float angle)
+{
+  return vec2(cos(angle), sin(angle));
+}
+
 /* Adapted from BLI_math_vector.h */
 float angle_normalized_v2v2(vec2 v1, vec2 v2)
 {
@@ -69,7 +74,9 @@ void main()
   gl_Position = ModelViewProjectionMatrix * vec4(pos, 0.0, 1.0);
 
 #ifdef STRETCH_ANGLE
-  float uv_angle = angle_normalized_v2v2(uv_adj.xy, uv_adj.zw) / M_PI;
+  vec2 v1 = angle_to_v2(uv_angles.x * M_PI);
+  vec2 v2 = angle_to_v2(uv_angles.y * M_PI);
+  float uv_angle = angle_normalized_v2v2(v1, v2) / M_PI;
   float stretch = 1.0 - abs(uv_angle - angle);
   stretch = stretch;
   stretch = 1.0 - stretch * stretch;
diff --git a/source/blender/gpu/shaders/gpu_shader_3D_selection_id_vert.glsl b/source/blender/gpu/shaders/gpu_shader_3D_selection_id_vert.glsl
deleted file mode 100644
index 0d58909efd8..00000000000
--- a/source/blender/gpu/shaders/gpu_shader_3D_selection_id_vert.glsl
+++ /dev/null
@@ -1,26 +0,0 @@
-
-uniform mat4 ModelViewProjectionMatrix;
-
-in vec3 pos;
-
-#ifndef UNIFORM_ID
-uniform uint offset;
-in uint color;
-
-flat out uint id;
-#endif
-
-void main()
-{
-#ifndef UNIFORM_ID
-  id = offset + color;
-#endif
-
-  vec4 pos_4d = vec4(pos, 1.0);
-  gl_Position = ModelViewProjectionMatrix * pos_4d;
-
-#ifdef USE_WORLD_CLIP_PLANES
-  /* Warning: ModelMatrix is typically used but select drawing is different. */
-  world_clip_planes_calc_clip_distance(pos);
-#endif
-}
diff --git a/source/blender/gpu/shaders/gpu_shader_material.glsl b/source/blender/gpu/shaders/gpu_shader_material.glsl
index 6149409774a..1529279ca03 100644
--- a/source/blender/gpu/shaders/gpu_shader_material.glsl
+++ b/source/blender/gpu/shaders/gpu_shader_material.glsl
@@ -250,187 +250,156 @@ void camera(vec3 co, out vec3 outview, out float outdepth, out float outdist)
   outview = normalize(co);
 }
 
-void math_add(float val1, float val2, out float outval)
+void math_add(float a, float b, out float result)
 {
-  outval = val1 + val2;
+  result = a + b;
 }
 
-void math_subtract(float val1, float val2, out float outval)
+void math_subtract(float a, float b, out float result)
 {
-  outval = val1 - val2;
+  result = a - b;
 }
 
-void math_multiply(float val1, float val2, out float outval)
+void math_multiply(float a, float b, out float result)
 {
-  outval = val1 * val2;
+  result = a * b;
 }
 
-void math_divide(float val1, float val2, out float outval)
+void math_divide(float a, float b, out float result)
 {
-  if (val2 == 0.0) {
-    outval = 0.0;
+  result = (b != 0.0) ? a / b : 0.0;
+}
+
+void math_power(float a, float b, out float result)
+{
+  if (a >= 0.0) {
+    result = compatible_pow(a, b);
   }
   else {
-    outval = val1 / val2;
+    float fraction = mod(abs(b), 1.0);
+    if (fraction > 0.999 || fraction < 0.001) {
+      result = compatible_pow(a, floor(b + 0.5));
+    }
+    else {
+      result = 0.0;
+    }
   }
 }
 
-void math_sine(float val, out float outval)
+void math_logarithm(float a, float b, out float result)
 {
-  outval = sin(val);
+  result = (a > 0.0 && b > 0.0) ? log2(a) / log2(b) : 0.0;
 }
 
-void math_cosine(float val, out float outval)
+void math_sqrt(float a, float b, out float result)
 {
-  outval = cos(val);
+  result = (a > 0.0) ? sqrt(a) : 0.0;
 }
 
-void math_tangent(float val, out float outval)
+void math_absolute(float a, float b, out float result)
 {
-  outval = tan(val);
+  result = abs(a);
 }
 
-void math_asin(float val, out float outval)
+void math_minimum(float a, float b, out float result)
 {
-  if (val <= 1.0 && val >= -1.0) {
-    outval = asin(val);
-  }
-  else {
-    outval = 0.0;
-  }
+  result = min(a, b);
 }
 
-void math_acos(float val, out float outval)
+void math_maximum(float a, float b, out float result)
 {
-  if (val <= 1.0 && val >= -1.0) {
-    outval = acos(val);
-  }
-  else {
-    outval = 0.0;
-  }
+  result = max(a, b);
 }
 
-void math_atan(float val, out float outval)
+void math_less_than(float a, float b, out float result)
 {
-  outval = atan(val);
+  result = (a < b) ? 1.0 : 0.0;
 }
 
-void math_pow(float val1, float val2, out float outval)
+void math_greater_than(float a, float b, out float result)
 {
-  if (val1 >= 0.0) {
-    outval = compatible_pow(val1, val2);
-  }
-  else {
-    float val2_mod_1 = mod(abs(val2), 1.0);
-
-    if (val2_mod_1 > 0.999 || val2_mod_1 < 0.001) {
-      outval = compatible_pow(val1, floor(val2 + 0.5));
-    }
-    else {
-      outval = 0.0;
-    }
-  }
+  result = (a > b) ? 1.0 : 0.0;
 }
 
-void math_log(float val1, float val2, out float outval)
+void math_round(float a, float b, out float result)
 {
-  if (val1 > 0.0 && val2 > 0.0) {
-    outval = log2(val1) / log2(val2);
-  }
-  else {
-    outval = 0.0;
-  }
+  result = floor(a + 0.5);
 }
 
-void math_max(float val1, float val2, out float outval)
+void math_floor(float a, float b, out float result)
 {
-  outval = max(val1, val2);
+  result = floor(a);
 }
 
-void math_min(float val1, float val2, out float outval)
+void math_ceil(float a, float b, out float result)
 {
-  outval = min(val1, val2);
+  result = ceil(a);
 }
 
-void math_round(float val, out float outval)
+void math_fraction(float a, float b, out float result)
 {
-  outval = floor(val + 0.5);
+  result = a - floor(a);
 }
 
-void math_less_than(float val1, float val2, out float outval)
+/* Change sign to match C convention. mod in GLSL will take absolute for negative numbers.
+ * See https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/mod.xhtml
+ */
+void math_modulo(float a, float b, out float result)
 {
-  if (val1 < val2) {
-    outval = 1.0;
-  }
-  else {
-    outval = 0.0;
-  }
+  result = (b != 0.0) ? sign(a) * mod(abs(a), b) : 0.0;
 }
 
-void math_greater_than(float val1, float val2, out float outval)
+void math_sine(float a, float b, out float result)
 {
-  if (val1 > val2) {
-    outval = 1.0;
-  }
-  else {
-    outval = 0.0;
-  }
+  result = sin(a);
 }
 
-void math_modulo(float val1, float val2, out float outval)
+void math_cosine(float a, float b, out float result)
 {
-  if (val2 == 0.0) {
-    outval = 0.0;
-  }
-  else {
-    outval = mod(val1, val2);
-  }
+  result = cos(a);
+}
 
-  /* change sign to match C convention, mod in GLSL will take absolute for negative numbers,
-   * see https://www.opengl.org/sdk/docs/man/html/mod.xhtml */
-  outval = (val1 > 0.0) ? outval : outval - val2;
+void math_tangent(float a, float b, out float result)
+{
+  result = tan(a);
 }
 
-void math_abs(float val1, out float outval)
+void math_arcsine(float a, float b, out float result)
 {
-  outval = abs(val1);
+  result = (a <= 1.0 && a >= -1.0) ? asin(a) : 0.0;
 }
 
-void math_atan2(float val1, float val2, out float outval)
+void math_arccosine(float a, float b, out float result)
 {
-  outval = atan(val1, val2);
+  result = (a <= 1.0 && a >= -1.0) ? acos(a) : 0.0;
 }
 
-void math_floor(float val, out float outval)
+void math_arctangent(float a, float b, out float result)
 {
-  outval = floor(val);
+  result = atan(a);
 }
 
-void math_ceil(float val, out float outval)
+void math_arctan2(float a, float b, out float result)
 {
-  outval = ceil(val);
+  result = atan(a, b);
 }
 
-void math_fract(float val, out float outval)
+void squeeze(float val, float width, float center, out float outval)
 {
-  outval = val - floor(val);
+  outval = 1.0 / (1.0 + pow(2.71828183, -((val - center) * width)));
 }
 
-void math_sqrt(float val, out float outval)
+void map_range(
+    float value, float fromMin, float fromMax, float toMin, float toMax, out float result)
 {
-  if (val > 0.0) {
-    outval = sqrt(val);
+  if (fromMax != fromMin) {
+    result = toMin + ((value - fromMin) / (fromMax - fromMin)) * (toMax - toMin);
   }
   else {
-    outval = 0.0;
+    result = 0.0;
   }
 }
 
-void squeeze(float val, float width, float center, out float outval)
-{
-  outval = 1.0 / (1.0 + pow(2.71828183, -((val - center) * width)));
-}
-
 void vec_math_add(vec3 v1, vec3 v2, out vec3 outvec, out float outval)
 {
   outvec = v1 + v2;
@@ -962,9 +931,9 @@ void clamp_vec3(vec3 vec, vec3 min, vec3 max, out vec3 out_vec)
   out_vec = clamp(vec, min, max);
 }
 
-void clamp_val(float value, float min, float max, out float out_value)
+void clamp_value(float value, float min, float max, out float result)
 {
-  out_value = clamp(value, min, max);
+  result = clamp(value, min, max);
 }
 
 void hue_sat(float hue, float sat, float value, float fac, vec4 col, out vec4 outcol)
@@ -1230,10 +1199,9 @@ vec3 principled_sheen(float NV, vec3 basecol_tint, float sheen_tint)
 void node_bsdf_diffuse(vec4 color, float roughness, vec3 N, out Closure result)
 {
   N = normalize(N);
-  vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
   eevee_closure_diffuse(N, color.rgb, 1.0, result.radiance);
+  closure_load_ssr_data(vec3(0.0), 0.0, N, viewCameraVec, -1, result);
   result.radiance *= color.rgb;
 }
 
@@ -1245,9 +1213,7 @@ void node_bsdf_glossy(vec4 color, float roughness, vec3 N, float ssr_id, out Clo
   vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
   result.radiance = out_spec * color.rgb;
-  result.ssr_data = vec4(ssr_spec * color.rgb, roughness);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = int(ssr_id);
+  closure_load_ssr_data(ssr_spec * color.rgb, roughness, N, viewCameraVec, int(ssr_id), result);
 }
 
 void node_bsdf_anisotropic(vec4 color,
@@ -1276,9 +1242,8 @@ void node_bsdf_glass(
   vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
   result.radiance = mix(out_refr, out_spec, fresnel);
-  result.ssr_data = vec4(ssr_spec * color.rgb * fresnel, roughness);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = int(ssr_id);
+  closure_load_ssr_data(
+      ssr_spec * color.rgb * fresnel, roughness, N, viewCameraVec, int(ssr_id), result);
 }
 
 void node_bsdf_toon(vec4 color, float size, float tsmooth, vec3 N, out Closure result)
@@ -1343,7 +1308,7 @@ void node_bsdf_principled(vec4 base_color,
 
   vec3 mixed_ss_base_color = mix(diffuse, subsurface_color.rgb, subsurface);
 
-  float sss_scalef = dot(sss_scale, vec3(1.0 / 3.0)) * subsurface;
+  float sss_scalef = avg(sss_scale) * subsurface;
   eevee_closure_principled(N,
                            mixed_ss_base_color,
                            f0,
@@ -1367,28 +1332,34 @@ void node_bsdf_principled(vec4 base_color,
                                           vec3(1.0); /* Simulate 2 transmission event */
   out_refr *= refr_color * (1.0 - fresnel) * transmission;
 
-  vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
   result.radiance = out_spec + out_refr;
   result.radiance += out_diff * out_sheen; /* Coarse approx. */
+
+  closure_load_ssr_data(ssr_spec * alpha, roughness, N, viewCameraVec, int(ssr_id), result);
+
+  vec3 sss_radiance = (out_diff + out_trans) * alpha;
 #  ifndef USE_SSS
-  result.radiance += (out_diff + out_trans) * mixed_ss_base_color * (1.0 - transmission);
-#  endif
-  result.ssr_data = vec4(ssr_spec, roughness);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = int(ssr_id);
-#  ifdef USE_SSS
-  result.sss_data.a = sss_scalef;
-  result.sss_data.rgb = out_diff + out_trans;
+  result.radiance += sss_radiance * mixed_ss_base_color * (1.0 - transmission);
+#  else
 #    ifdef USE_SSS_ALBEDO
-  result.sss_albedo.rgb = mixed_ss_base_color;
+  vec3 sss_albedo = mixed_ss_base_color;
 #    else
-  result.sss_data.rgb *= mixed_ss_base_color;
+  sss_radiance *= mixed_ss_base_color;
 #    endif
-  result.sss_data.rgb *= (1.0 - transmission);
-#  endif
+  sss_radiance *= (1.0 - transmission);
+  closure_load_sss_data(sss_scalef,
+                        sss_radiance,
+#    ifdef USE_SSS_ALBEDO
+                        sss_albedo,
+#    endif
+                        int(sss_id),
+                        result);
+#  endif /* USE_SSS */
+
   result.radiance += emission.rgb;
-  result.opacity = alpha;
+  result.radiance *= alpha;
+  result.transmittance = vec3(1.0 - alpha);
 }
 
 void node_bsdf_principled_dielectric(vec4 base_color,
@@ -1434,14 +1405,12 @@ void node_bsdf_principled_dielectric(vec4 base_color,
   eevee_closure_default(
       N, diffuse, f0, vec3(1.0), int(ssr_id), roughness, 1.0, out_diff, out_spec, ssr_spec);
 
-  vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
   result.radiance = out_spec + out_diff * (diffuse + out_sheen);
-  result.ssr_data = vec4(ssr_spec, roughness);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = int(ssr_id);
+  closure_load_ssr_data(ssr_spec * alpha, roughness, N, viewCameraVec, int(ssr_id), result);
   result.radiance += emission.rgb;
-  result.opacity = alpha;
+  result.radiance *= alpha;
+  result.transmittance = vec3(1.0 - alpha);
 }
 
 void node_bsdf_principled_metallic(vec4 base_color,
@@ -1479,14 +1448,12 @@ void node_bsdf_principled_metallic(vec4 base_color,
 
   eevee_closure_glossy(N, base_color.rgb, f90, int(ssr_id), roughness, 1.0, out_spec, ssr_spec);
 
-  vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
   result.radiance = out_spec;
-  result.ssr_data = vec4(ssr_spec, roughness);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = int(ssr_id);
+  closure_load_ssr_data(ssr_spec * alpha, roughness, N, viewCameraVec, int(ssr_id), result);
   result.radiance += emission.rgb;
-  result.opacity = alpha;
+  result.radiance *= alpha;
+  result.transmittance = vec3(1.0 - alpha);
 }
 
 void node_bsdf_principled_clearcoat(vec4 base_color,
@@ -1534,14 +1501,12 @@ void node_bsdf_principled_clearcoat(vec4 base_color,
                           out_spec,
                           ssr_spec);
 
-  vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
   result.radiance = out_spec;
-  result.ssr_data = vec4(ssr_spec, roughness);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = int(ssr_id);
+  closure_load_ssr_data(ssr_spec * alpha, roughness, N, viewCameraVec, int(ssr_id), result);
   result.radiance += emission.rgb;
-  result.opacity = alpha;
+  result.radiance *= alpha;
+  result.transmittance = vec3(1.0 - alpha);
 }
 
 void node_bsdf_principled_subsurface(vec4 base_color,
@@ -1582,7 +1547,7 @@ void node_bsdf_principled_subsurface(vec4 base_color,
 
   subsurface_color = subsurface_color * (1.0 - metallic);
   vec3 mixed_ss_base_color = mix(diffuse, subsurface_color.rgb, subsurface);
-  float sss_scalef = dot(sss_scale, vec3(1.0 / 3.0)) * subsurface;
+  float sss_scalef = avg(sss_scale) * subsurface;
 
   float NV = dot(N, cameraVec);
   vec3 out_sheen = sheen * principled_sheen(NV, ctint, sheen_tint);
@@ -1602,26 +1567,33 @@ void node_bsdf_principled_subsurface(vec4 base_color,
                      out_spec,
                      ssr_spec);
 
-  vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
   result.radiance = out_spec;
-  result.ssr_data = vec4(ssr_spec, roughness);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = int(ssr_id);
-#  ifdef USE_SSS
-  result.sss_data.a = sss_scalef;
-  result.sss_data.rgb = out_diff + out_trans;
+  closure_load_ssr_data(ssr_spec * alpha, roughness, N, viewCameraVec, int(ssr_id), result);
+
+  vec3 sss_radiance = (out_diff + out_trans) * alpha;
+#  ifndef USE_SSS
+  result.radiance += sss_radiance * mixed_ss_base_color * (1.0 - transmission);
+#  else
 #    ifdef USE_SSS_ALBEDO
-  result.sss_albedo.rgb = mixed_ss_base_color;
+  vec3 sss_albedo = mixed_ss_base_color;
 #    else
-  result.sss_data.rgb *= mixed_ss_base_color;
+  sss_radiance *= mixed_ss_base_color;
 #    endif
-#  else
-  result.radiance += (out_diff + out_trans) * mixed_ss_base_color;
-#  endif
+  sss_radiance *= (1.0 - transmission);
+  closure_load_sss_data(sss_scalef,
+                        sss_radiance,
+#    ifdef USE_SSS_ALBEDO
+                        sss_albedo,
+#    endif
+                        int(sss_id),
+                        result);
+#  endif /* USE_SSS */
+
   result.radiance += out_diff * out_sheen;
   result.radiance += emission.rgb;
-  result.opacity = alpha;
+  result.radiance *= alpha;
+  result.transmittance = vec3(1.0 - alpha);
 }
 
 void node_bsdf_principled_glass(vec4 base_color,
@@ -1671,14 +1643,12 @@ void node_bsdf_principled_glass(vec4 base_color,
   out_spec *= spec_col;
   ssr_spec *= spec_col * fresnel;
 
-  vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
   result.radiance = mix(out_refr, out_spec, fresnel);
-  result.ssr_data = vec4(ssr_spec, roughness);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = int(ssr_id);
+  closure_load_ssr_data(ssr_spec * alpha, roughness, N, viewCameraVec, int(ssr_id), result);
   result.radiance += emission.rgb;
-  result.opacity = alpha;
+  result.radiance *= alpha;
+  result.transmittance = vec3(1.0 - alpha);
 }
 
 void node_bsdf_translucent(vec4 color, vec3 N, out Closure result)
@@ -1688,11 +1658,9 @@ void node_bsdf_translucent(vec4 color, vec3 N, out Closure result)
 
 void node_bsdf_transparent(vec4 color, out Closure result)
 {
-  /* this isn't right */
   result = CLOSURE_DEFAULT;
   result.radiance = vec3(0.0);
-  result.opacity = clamp(1.0 - dot(color.rgb, vec3(0.3333334)), 0.0, 1.0);
-  result.ssr_id = TRANSPARENT_CLOSURE_FLAG;
+  result.transmittance = abs(color.rgb);
 }
 
 void node_bsdf_velvet(vec4 color, float sigma, vec3 N, out Closure result)
@@ -1714,19 +1682,25 @@ void node_subsurface_scattering(vec4 color,
   vec3 out_diff, out_trans;
   vec3 vN = mat3(ViewMatrix) * N;
   result = CLOSURE_DEFAULT;
-  result.ssr_data = vec4(0.0);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = -1;
-  result.sss_data.a = scale;
+  closure_load_ssr_data(vec3(0.0), 0.0, N, viewCameraVec, -1, result);
+
   eevee_closure_subsurface(N, color.rgb, 1.0, scale, out_diff, out_trans);
-  result.sss_data.rgb = out_diff + out_trans;
+
+  vec3 sss_radiance = out_diff + out_trans;
 #    ifdef USE_SSS_ALBEDO
   /* Not perfect for texture_blur not exactly equal to 0.0 or 1.0. */
-  result.sss_albedo.rgb = mix(color.rgb, vec3(1.0), texture_blur);
-  result.sss_data.rgb *= mix(vec3(1.0), color.rgb, texture_blur);
+  vec3 sss_albedo = mix(color.rgb, vec3(1.0), texture_blur);
+  sss_radiance *= mix(vec3(1.0), color.rgb, texture_blur);
 #    else
-  result.sss_data.rgb *= color.rgb;
+  sss_radiance *= color.rgb;
 #    endif
+  closure_load_sss_data(scale,
+                        sss_radiance,
+#    ifdef USE_SSS_ALBEDO
+                        sss_albedo,
+#    endif
+                        int(sss_id),
+                        result);
 #  else
   node_bsdf_diffuse(color, 0.0, N, result);
 #  endif
@@ -1742,7 +1716,6 @@ void node_bsdf_refraction(vec4 color, float roughness, float ior, vec3 N, out Cl
   result = CLOSURE_DEFAULT;
   result.ssr_normal = normal_encode(vN, viewCameraVec);
   result.radiance = out_refr * color.rgb;
-  result.ssr_id = REFRACT_CLOSURE_FLAG;
 }
 
 void node_ambient_occlusion(
@@ -1843,7 +1816,7 @@ void node_background(vec4 color, float strength, out Closure result)
   color *= strength;
   result = CLOSURE_DEFAULT;
   result.radiance = color.rgb;
-  result.opacity = color.a;
+  result.transmittance = vec3(0.0);
 #else
   result = CLOSURE_DEFAULT;
 #endif
@@ -1951,6 +1924,15 @@ void node_volume_principled(vec4 color,
 #endif
 }
 
+void node_holdout(out Closure result)
+{
+  result = CLOSURE_DEFAULT;
+#ifndef VOLUMETRICS
+  result.holdout = 1.0;
+  result.flag = CLOSURE_HOLDOUT_FLAG;
+#endif
+}
+
 /* closures */
 
 void node_mix_shader(float fac, Closure shader1, Closure shader2, out Closure shader)
@@ -2025,7 +2007,7 @@ void node_attribute_volume_density(sampler3D tex, out vec4 outcol, out vec3 outv
 #endif
   outvec = texture(tex, cos).aaa;
   outcol = vec4(outvec, 1.0);
-  outf = dot(vec3(1.0 / 3.0), outvec);
+  outf = avg(outvec);
 }
 
 uniform vec3 volumeColor = vec3(1.0);
@@ -2046,7 +2028,7 @@ void node_attribute_volume_color(sampler3D tex, out vec4 outcol, out vec3 outvec
 
   outvec = value.rgb * volumeColor;
   outcol = vec4(outvec, 1.0);
-  outf = dot(vec3(1.0 / 3.0), outvec);
+  outf = avg(outvec);
 }
 
 void node_attribute_volume_flame(sampler3D tex, out vec4 outcol, out vec3 outvec, out float outf)
@@ -2080,7 +2062,7 @@ void node_attribute(vec3 attr, out vec4 outcol, out vec3 outvec, out float outf)
 {
   outcol = vec4(attr, 1.0);
   outvec = attr;
-  outf = dot(vec3(1.0 / 3.0), attr);
+  outf = avg(attr);
 }
 
 void node_uvmap(vec3 attr_uv, out vec3 outvec)
@@ -3493,7 +3475,7 @@ void node_output_world(Closure surface, Closure volume, out Closure result)
 {
 #ifndef VOLUMETRICS
   result.radiance = surface.radiance * backgroundAlpha;
-  result.opacity = backgroundAlpha;
+  result.transmittance = vec3(1.0 - backgroundAlpha);
 #else
   result = volume;
 #endif /* VOLUMETRICS */
@@ -3540,6 +3522,8 @@ void node_eevee_specular(vec4 diffuse,
                          float ssr_id,
                          out Closure result)
 {
+  normal = normalize(normal);
+
   vec3 out_diff, out_spec, ssr_spec;
   eevee_closure_default_clearcoat(normal,
                                   diffuse.rgb,
@@ -3555,19 +3539,19 @@ void node_eevee_specular(vec4 diffuse,
                                   out_spec,
                                   ssr_spec);
 
-  vec3 vN = normalize(mat3(ViewMatrix) * normal);
+  float alpha = 1.0 - transp;
   result = CLOSURE_DEFAULT;
   result.radiance = out_diff * diffuse.rgb + out_spec + emissive.rgb;
-  result.opacity = 1.0 - transp;
-  result.ssr_data = vec4(ssr_spec, roughness);
-  result.ssr_normal = normal_encode(vN, viewCameraVec);
-  result.ssr_id = int(ssr_id);
+  result.radiance *= alpha;
+  result.transmittance = vec3(transp);
+
+  closure_load_ssr_data(ssr_spec * alpha, roughness, normal, viewCameraVec, int(ssr_id), result);
 }
 
 void node_shader_to_rgba(Closure cl, out vec4 outcol, out float outalpha)
 {
   vec4 spec_accum = vec4(0.0);
-  if (ssrToggle && cl.ssr_id == outputSsrId) {
+  if (ssrToggle && FLAG_TEST(cl.flag, CLOSURE_SSR_FLAG)) {
     vec3 V = cameraVec;
     vec3 vN = normal_decode(cl.ssr_normal, viewCameraVec);
     vec3 N = transform_direction(ViewMatrixInverse, vN);
@@ -3576,7 +3560,7 @@ void node_shader_to_rgba(Closure cl, out vec4 outcol, out float outalpha)
     fallback_cubemap(N, V, worldPosition, viewPosition, roughness, roughnessSquared, spec_accum);
   }
 
-  outalpha = cl.opacity;
+  outalpha = avg(cl.transmittance);
   outcol = vec4((spec_accum.rgb * cl.ssr_data.rgb) + cl.radiance, 1.0);
 
 #  ifdef USE_SSS
diff --git a/source/blender/gpu/shaders/gpu_shader_selection_id_frag.glsl b/source/blender/gpu/shaders/gpu_shader_selection_id_frag.glsl
deleted file mode 100644
index 1f22b9cb0b4..00000000000
--- a/source/blender/gpu/shaders/gpu_shader_selection_id_frag.glsl
+++ /dev/null
@@ -1,13 +0,0 @@
-
-#ifdef UNIFORM_ID
-uniform uint id;
-#else
-flat in uint id;
-#endif
-
-out uint fragColor;
-
-void main()
-{
-  fragColor = id;
-}
author	Pablo Dobarro <pablodp606@gmail.com>	2019-08-20 20:13:25 +0300
committer	Pablo Dobarro <pablodp606@gmail.com>	2019-08-20 20:13:25 +0300
commit	8c509bb69cc9d473236e51b1ba51b74176286223 (patch)
tree	3c5694ae0c89b1814791b5b5dc79958826b918fa /source/blender/gpu
parent	d6d51674a2fee3a1110d241b96d31480ce440cf1 (diff)
parent	a942d97b7971dc0e7add44e3e9ba1c02fb914f7d (diff)