Merge branch 'master' into wm-drag-drop-rewritewm-drag-drop-rewrite

author: Julian Eisel <julian@blender.org> 2020-06-05 14:09:31 +0300
committer: Julian Eisel <julian@blender.org> 2020-06-05 14:09:31 +0300
commit: 920a58d9b6d667894cf166cbbd25e4c2fbd238ea (patch)
tree: 7ca5a9da640753b5e070c439ac3bdd14dfad92cf /source/blender/draw/intern
parent: c94b6209861ca7cc3985b53474feed7d94c0221a (diff)
parent: a1d55bdd530390e58c51abe9707b8d3b0ae3e861 (diff)
26 files changed, 1246 insertions, 896 deletions
diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h
index 382e7313f21..e7dd9e449b7 100644
--- a/source/blender/draw/intern/DRW_render.h
+++ b/source/blender/draw/intern/DRW_render.h
@@ -210,10 +210,7 @@ struct GPUShader *DRW_shader_create_with_transform_feedback(const char *vert,
                                                             const eGPUShaderTFBType prim_type,
                                                             const char **varying_names,
                                                             const int varying_count);
-struct GPUShader *DRW_shader_create_2d(const char *frag, const char *defines);
-struct GPUShader *DRW_shader_create_3d(const char *frag, const char *defines);
 struct GPUShader *DRW_shader_create_fullscreen(const char *frag, const char *defines);
-struct GPUShader *DRW_shader_create_3d_depth_only(eGPUShaderConfig slot);
 struct GPUMaterial *DRW_shader_find_from_world(struct World *wo,
                                                const void *engine_type,
                                                const int options,
@@ -224,6 +221,7 @@ struct GPUMaterial *DRW_shader_find_from_material(struct Material *ma,
                                                   bool deferred);
 struct GPUMaterial *DRW_shader_create_from_world(struct Scene *scene,
                                                  struct World *wo,
+                                                 struct bNodeTree *ntree,
                                                  const void *engine_type,
                                                  const int options,
                                                  const bool is_volume_shader,
@@ -234,6 +232,7 @@ struct GPUMaterial *DRW_shader_create_from_world(struct Scene *scene,
                                                  bool deferred);
 struct GPUMaterial *DRW_shader_create_from_material(struct Scene *scene,
                                                     struct Material *ma,
+                                                    struct bNodeTree *ntree,
                                                     const void *engine_type,
                                                     const int options,
                                                     const bool is_volume_shader,
@@ -365,6 +364,8 @@ DRWShadingGroup *DRW_shgroup_transform_feedback_create(struct GPUShader *shader,
                                                        DRWPass *pass,
                                                        struct GPUVertBuf *tf_target);
 
+void DRW_shgroup_add_material_resources(DRWShadingGroup *grp, struct GPUMaterial *material);
+
 /* return final visibility */
 typedef bool(DRWCallVisibilityFn)(bool vis_in, void *user_data);
 
@@ -404,14 +405,14 @@ void DRW_shgroup_call_instances(DRWShadingGroup *shgroup,
                                 Object *ob,
                                 struct GPUBatch *geom,
                                 uint count);
-/* Warning: Only use with Shaders that have INSTANCED_ATTRIB defined. */
-void DRW_shgroup_call_instances_with_attribs(DRWShadingGroup *shgroup,
-                                             Object *ob,
-                                             struct GPUBatch *geom,
-                                             struct GPUBatch *inst_attributes);
+/* Warning: Only use with Shaders that have INSTANCED_ATTR defined. */
+void DRW_shgroup_call_instances_with_attrs(DRWShadingGroup *shgroup,
+                                           Object *ob,
+                                           struct GPUBatch *geom,
+                                           struct GPUBatch *inst_attributes);
 
-void DRW_shgroup_call_sculpt(DRWShadingGroup *sh, Object *ob, bool wire, bool mask, bool vcol);
-void DRW_shgroup_call_sculpt_with_materials(DRWShadingGroup **sh, Object *ob, bool vcol);
+void DRW_shgroup_call_sculpt(DRWShadingGroup *sh, Object *ob, bool wire, bool mask);
+void DRW_shgroup_call_sculpt_with_materials(DRWShadingGroup **sh, int num_sh, Object *ob);
 
 DRWCallBuffer *DRW_shgroup_call_buffer(DRWShadingGroup *shading_group,
                                        struct GPUVertFormat *format,
@@ -460,18 +461,15 @@ void DRW_shgroup_clear_framebuffer(DRWShadingGroup *shgroup,
 void DRW_shgroup_uniform_texture(DRWShadingGroup *shgroup,
                                  const char *name,
                                  const struct GPUTexture *tex);
-void DRW_shgroup_uniform_texture_persistent(DRWShadingGroup *shgroup,
-                                            const char *name,
-                                            const struct GPUTexture *tex);
-void DRW_shgroup_uniform_block(DRWShadingGroup *shgroup,
-                               const char *name,
-                               const struct GPUUniformBuffer *ubo);
-void DRW_shgroup_uniform_block_persistent(DRWShadingGroup *shgroup,
-                                          const char *name,
-                                          const struct GPUUniformBuffer *ubo);
 void DRW_shgroup_uniform_texture_ref(DRWShadingGroup *shgroup,
                                      const char *name,
                                      struct GPUTexture **tex);
+void DRW_shgroup_uniform_block(DRWShadingGroup *shgroup,
+                               const char *name,
+                               const struct GPUUniformBuffer *ubo);
+void DRW_shgroup_uniform_block_ref(DRWShadingGroup *shgroup,
+                                   const char *name,
+                                   struct GPUUniformBuffer **ubo);
 void DRW_shgroup_uniform_float(DRWShadingGroup *shgroup,
                                const char *name,
                                const float *value,
@@ -521,11 +519,17 @@ void DRW_shgroup_uniform_float_copy(DRWShadingGroup *shgroup, const char *name,
 void DRW_shgroup_uniform_vec2_copy(DRWShadingGroup *shgroup, const char *name, const float *value);
 void DRW_shgroup_uniform_vec3_copy(DRWShadingGroup *shgroup, const char *name, const float *value);
 void DRW_shgroup_uniform_vec4_copy(DRWShadingGroup *shgroup, const char *name, const float *value);
+void DRW_shgroup_uniform_vec4_array_copy(DRWShadingGroup *shgroup,
+                                         const char *name,
+                                         const float (*value)[4],
+                                         int arraysize);
 
 bool DRW_shgroup_is_empty(DRWShadingGroup *shgroup);
 
 /* Passes */
 DRWPass *DRW_pass_create(const char *name, DRWState state);
+DRWPass *DRW_pass_create_instance(const char *name, DRWPass *original, DRWState state);
+void DRW_pass_link(DRWPass *first, DRWPass *second);
 /* TODO Replace with passes inheritance. */
 void DRW_pass_state_set(DRWPass *pass, DRWState state);
 void DRW_pass_state_add(DRWPass *pass, DRWState state);
@@ -539,6 +543,8 @@ void DRW_pass_sort_shgroup_reverse(DRWPass *pass);
 bool DRW_pass_is_empty(DRWPass *pass);
 
 #define DRW_PASS_CREATE(pass, state) (pass = DRW_pass_create(#pass, state))
+#define DRW_PASS_INSTANCE_CREATE(pass, original, state) \
+  (pass = DRW_pass_create_instance(#pass, (original), state))
 
 /* Views */
 DRWView *DRW_view_create(const float viewmat[4][4],
@@ -670,6 +676,7 @@ bool DRW_state_do_color_management(void);
 bool DRW_state_is_scene_render(void);
 bool DRW_state_is_opengl_render(void);
 bool DRW_state_is_playback(void);
+bool DRW_state_is_navigating(void);
 bool DRW_state_show_text(void);
 bool DRW_state_draw_support(void);
 bool DRW_state_draw_background(void);
@@ -691,6 +698,8 @@ typedef struct DRWContextState {
 
   struct Depsgraph *depsgraph;
 
+  struct TaskGraph *task_graph;
+
   eObjectMode object_mode;
 
   eGPUShaderConfig sh_cfg;
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index ffd3be9280f..c23ea3d7c82 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -2898,12 +2898,12 @@ GPUBatch *DRW_cache_curve_edge_overlay_get(Object *ob)
   return DRW_curve_batch_cache_get_edit_edges(cu);
 }
 
-GPUBatch *DRW_cache_curve_vert_overlay_get(Object *ob, bool handles)
+GPUBatch *DRW_cache_curve_vert_overlay_get(Object *ob)
 {
   BLI_assert(ELEM(ob->type, OB_CURVE, OB_SURF));
 
   struct Curve *cu = ob->data;
-  return DRW_curve_batch_cache_get_edit_verts(cu, handles);
+  return DRW_curve_batch_cache_get_edit_verts(cu);
 }
 
 GPUBatch *DRW_cache_curve_surface_get(Object *ob)
@@ -3081,8 +3081,7 @@ GPUBatch *DRW_cache_text_loose_edges_get(Object *ob)
     return DRW_mesh_batch_cache_get_loose_edges(mesh_eval);
   }
   else {
-    /* TODO */
-    return NULL;
+    return DRW_curve_batch_cache_get_wire_edge(cu);
   }
 }
 
@@ -3535,13 +3534,15 @@ void drw_batch_cache_generate_requested(Object *ob)
   struct Mesh *mesh_eval = BKE_object_get_evaluated_mesh(ob);
   switch (ob->type) {
     case OB_MESH:
-      DRW_mesh_batch_cache_create_requested(ob, (Mesh *)ob->data, scene, is_paint_mode, use_hide);
+      DRW_mesh_batch_cache_create_requested(
+          DST.task_graph, ob, (Mesh *)ob->data, scene, is_paint_mode, use_hide);
       break;
     case OB_CURVE:
     case OB_FONT:
     case OB_SURF:
       if (mesh_eval) {
-        DRW_mesh_batch_cache_create_requested(ob, mesh_eval, scene, is_paint_mode, use_hide);
+        DRW_mesh_batch_cache_create_requested(
+            DST.task_graph, ob, mesh_eval, scene, is_paint_mode, use_hide);
       }
       DRW_curve_batch_cache_create_requested(ob);
       break;
diff --git a/source/blender/draw/intern/draw_cache.h b/source/blender/draw/intern/draw_cache.h
index 77c7b6b9307..221fb89612f 100644
--- a/source/blender/draw/intern/draw_cache.h
+++ b/source/blender/draw/intern/draw_cache.h
@@ -150,7 +150,7 @@ struct GPUBatch *DRW_cache_curve_edge_detection_get(struct Object *ob, bool *r_i
 /* edit-mode */
 struct GPUBatch *DRW_cache_curve_edge_normal_get(struct Object *ob);
 struct GPUBatch *DRW_cache_curve_edge_overlay_get(struct Object *ob);
-struct GPUBatch *DRW_cache_curve_vert_overlay_get(struct Object *ob, bool handles);
+struct GPUBatch *DRW_cache_curve_vert_overlay_get(struct Object *ob);
 
 /* Font */
 struct GPUBatch *DRW_cache_text_surface_get(struct Object *ob);
diff --git a/source/blender/draw/intern/draw_cache_extract.h b/source/blender/draw/intern/draw_cache_extract.h
index 0e02b07e95b..f203c2ff1ea 100644
--- a/source/blender/draw/intern/draw_cache_extract.h
+++ b/source/blender/draw/intern/draw_cache_extract.h
@@ -23,6 +23,8 @@
 #ifndef __DRAW_CACHE_EXTRACT_H__
 #define __DRAW_CACHE_EXTRACT_H__
 
+struct TaskGraph;
+
 /* Vertex Group Selection and display options */
 typedef struct DRW_MeshWeightState {
   int defgroup_active;
@@ -249,10 +251,12 @@ typedef struct MeshBatchCache {
   bool no_loose_wire;
 } MeshBatchCache;
 
-void mesh_buffer_cache_create_requested(MeshBatchCache *cache,
+void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
+                                        MeshBatchCache *cache,
                                         MeshBufferCache mbc,
                                         Mesh *me,
                                         const bool is_editmode,
+                                        const bool is_paint_mode,
                                         const float obmat[4][4],
                                         const bool do_final,
                                         const bool do_uvedit,
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.c b/source/blender/draw/intern/draw_cache_extract_mesh.c
index 54e745102f0..06462d5b9c5 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh.c
+++ b/source/blender/draw/intern/draw_cache_extract_mesh.c
@@ -105,6 +105,14 @@ typedef struct MeshRenderData {
   BMEditMesh *edit_bmesh;
   BMesh *bm;
   EditMeshData *edit_data;
+
+  /* For deformed edit-mesh data. */
+  /* Use for #ME_WRAPPER_TYPE_BMESH. */
+  const float (*bm_vert_coords)[3];
+  const float (*bm_vert_normals)[3];
+  const float (*bm_poly_normals)[3];
+  const float (*bm_poly_centers)[3];
+
   int *v_origindex, *e_origindex, *p_origindex;
   int crease_ofs;
   int bweight_ofs;
@@ -126,14 +134,12 @@ typedef struct MeshRenderData {
   float (*poly_normals)[3];
   int *lverts, *ledges;
 } MeshRenderData;
-
 static MeshRenderData *mesh_render_data_create(Mesh *me,
                                                const bool is_editmode,
+                                               const bool is_paint_mode,
                                                const float obmat[4][4],
                                                const bool do_final,
                                                const bool do_uvedit,
-                                               const eMRIterType iter_type,
-                                               const eMRDataType data_flag,
                                                const DRW_MeshCDMask *UNUSED(cd_used),
                                                const ToolSettings *ts)
 {
@@ -143,16 +149,28 @@ static MeshRenderData *mesh_render_data_create(Mesh *me,
 
   copy_m4_m4(mr->obmat, obmat);
 
-  const bool is_auto_smooth = (me->flag & ME_AUTOSMOOTH) != 0;
-  const float split_angle = is_auto_smooth ? me->smoothresh : (float)M_PI;
-
   if (is_editmode) {
     BLI_assert(me->edit_mesh->mesh_eval_cage && me->edit_mesh->mesh_eval_final);
     mr->bm = me->edit_mesh->bm;
     mr->edit_bmesh = me->edit_mesh;
-    mr->edit_data = me->runtime.edit_data;
     mr->me = (do_final) ? me->edit_mesh->mesh_eval_final : me->edit_mesh->mesh_eval_cage;
-    bool use_mapped = !do_uvedit && mr->me && !mr->me->runtime.is_original;
+    mr->edit_data = mr->me->runtime.edit_data;
+
+    if (mr->edit_data) {
+      EditMeshData *emd = mr->edit_data;
+      if (emd->vertexCos) {
+        BKE_editmesh_cache_ensure_vert_normals(mr->edit_bmesh, emd);
+        BKE_editmesh_cache_ensure_poly_normals(mr->edit_bmesh, emd);
+      }
+
+      mr->bm_vert_coords = mr->edit_data->vertexCos;
+      mr->bm_vert_normals = mr->edit_data->vertexNos;
+      mr->bm_poly_normals = mr->edit_data->polyNos;
+      mr->bm_poly_centers = mr->edit_data->polyCos;
+    }
+
+    bool has_mdata = (mr->me->runtime.wrapper_type == ME_WRAPPER_TYPE_MDATA);
+    bool use_mapped = has_mdata && !do_uvedit && mr->me && !mr->me->runtime.is_original;
 
     int bm_ensure_types = BM_VERT | BM_EDGE | BM_LOOP | BM_FACE;
 
@@ -183,7 +201,7 @@ static MeshRenderData *mesh_render_data_create(Mesh *me,
 
     /* Seems like the mesh_eval_final do not have the right origin indices.
      * Force not mapped in this case. */
-    if (do_final && me->edit_mesh->mesh_eval_final != me->edit_mesh->mesh_eval_cage) {
+    if (has_mdata && do_final && me->edit_mesh->mesh_eval_final != me->edit_mesh->mesh_eval_cage) {
       // mr->edit_bmesh = NULL;
       mr->extract_type = MR_EXTRACT_MESH;
     }
@@ -191,7 +209,17 @@ static MeshRenderData *mesh_render_data_create(Mesh *me,
   else {
     mr->me = me;
     mr->edit_bmesh = NULL;
-    mr->extract_type = MR_EXTRACT_MESH;
+
+    bool use_mapped = is_paint_mode && mr->me && !mr->me->runtime.is_original;
+    if (use_mapped) {
+      mr->v_origindex = CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX);
+      mr->e_origindex = CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX);
+      mr->p_origindex = CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX);
+
+      use_mapped = (mr->v_origindex || mr->e_origindex || mr->p_origindex);
+    }
+
+    mr->extract_type = use_mapped ? MR_EXTRACT_MAPPED : MR_EXTRACT_MESH;
   }
 
   if (mr->extract_type != MR_EXTRACT_BMESH) {
@@ -210,7 +238,32 @@ static MeshRenderData *mesh_render_data_create(Mesh *me,
     mr->v_origindex = CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX);
     mr->e_origindex = CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX);
     mr->p_origindex = CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX);
+  }
+  else {
+    /* BMesh */
+    BMesh *bm = mr->bm;
+
+    mr->vert_len = bm->totvert;
+    mr->edge_len = bm->totedge;
+    mr->loop_len = bm->totloop;
+    mr->poly_len = bm->totface;
+    mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len);
+  }
+
+  return mr;
+}
+
+/* Part of the creation of the MeshRenderData that happens in a thread. */
+static void mesh_render_data_update(MeshRenderData *mr,
+                                    const eMRIterType iter_type,
+                                    const eMRDataType data_flag)
+{
+  Mesh *me = mr->me;
+  const bool is_auto_smooth = (me->flag & ME_AUTOSMOOTH) != 0;
+  const float split_angle = is_auto_smooth ? me->smoothresh : (float)M_PI;
 
+  if (mr->extract_type != MR_EXTRACT_BMESH) {
+    /* Mesh */
     if (data_flag & (MR_DATA_POLY_NOR | MR_DATA_LOOP_NOR | MR_DATA_TAN_LOOP_NOR)) {
       mr->poly_normals = MEM_mallocN(sizeof(*mr->poly_normals) * mr->poly_len, __func__);
       BKE_mesh_calc_normals_poly((MVert *)mr->mvert,
@@ -289,23 +342,27 @@ static MeshRenderData *mesh_render_data_create(Mesh *me,
   else {
     /* BMesh */
     BMesh *bm = mr->bm;
-
-    mr->vert_len = bm->totvert;
-    mr->edge_len = bm->totedge;
-    mr->loop_len = bm->totloop;
-    mr->poly_len = bm->totface;
-    mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len);
-
     if (data_flag & MR_DATA_POLY_NOR) {
       /* Use bmface->no instead. */
     }
     if (((data_flag & MR_DATA_LOOP_NOR) && is_auto_smooth) || (data_flag & MR_DATA_TAN_LOOP_NOR)) {
+
+      const float(*vert_coords)[3] = NULL;
+      const float(*vert_normals)[3] = NULL;
+      const float(*poly_normals)[3] = NULL;
+
+      if (mr->edit_data && mr->edit_data->vertexCos) {
+        vert_coords = mr->bm_vert_coords;
+        vert_normals = mr->bm_vert_normals;
+        poly_normals = mr->bm_poly_normals;
+      }
+
       mr->loop_normals = MEM_mallocN(sizeof(*mr->loop_normals) * mr->loop_len, __func__);
       int clnors_offset = CustomData_get_offset(&mr->bm->ldata, CD_CUSTOMLOOPNORMAL);
       BM_loops_calc_normal_vcos(mr->bm,
-                                NULL,
-                                NULL,
-                                NULL,
+                                vert_coords,
+                                vert_normals,
+                                poly_normals,
                                 is_auto_smooth,
                                 split_angle,
                                 mr->loop_normals,
@@ -349,7 +406,6 @@ static MeshRenderData *mesh_render_data_create(Mesh *me,
       mr->loop_loose_len = mr->vert_loose_len + mr->edge_loose_len * 2;
     }
   }
-  return mr;
 }
 
 static void mesh_render_data_free(MeshRenderData *mr)
@@ -385,6 +441,42 @@ BLI_INLINE BMVert *bm_original_vert_get(const MeshRenderData *mr, int idx)
              NULL;
 }
 
+BLI_INLINE const float *bm_vert_co_get(const MeshRenderData *mr, const BMVert *eve)
+{
+  const float(*vert_coords)[3] = mr->bm_vert_coords;
+  if (vert_coords != NULL) {
+    return vert_coords[BM_elem_index_get(eve)];
+  }
+  else {
+    UNUSED_VARS(mr);
+    return eve->co;
+  }
+}
+
+BLI_INLINE const float *bm_vert_no_get(const MeshRenderData *mr, const BMVert *eve)
+{
+  const float(*vert_normals)[3] = mr->bm_vert_normals;
+  if (vert_normals != NULL) {
+    return vert_normals[BM_elem_index_get(eve)];
+  }
+  else {
+    UNUSED_VARS(mr);
+    return eve->co;
+  }
+}
+
+BLI_INLINE const float *bm_face_no_get(const MeshRenderData *mr, const BMFace *efa)
+{
+  const float(*poly_normals)[3] = mr->bm_poly_normals;
+  if (poly_normals != NULL) {
+    return poly_normals[BM_elem_index_get(efa)];
+  }
+  else {
+    UNUSED_VARS(mr);
+    return efa->no;
+  }
+}
+
 /** \} */
 
 /* ---------------------------------------------------------------------- */
@@ -596,7 +688,7 @@ static void extract_lines_loop_mesh(const MeshRenderData *mr,
 {
   const MEdge *medge = &mr->medge[mloop->e];
   if (!((mr->use_hide && (medge->flag & ME_HIDE)) ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) &&
+        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
          (mr->e_origindex[mloop->e] == ORIGINDEX_NONE)))) {
     int loopend = mpoly->totloop + mpoly->loopstart - 1;
     int other_loop = (l == loopend) ? mpoly->loopstart : (l + 1);
@@ -629,7 +721,7 @@ static void extract_lines_ledge_mesh(const MeshRenderData *mr,
   int ledge_idx = mr->edge_len + e;
   int edge_idx = mr->ledges[e];
   if (!((mr->use_hide && (medge->flag & ME_HIDE)) ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) &&
+        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
          (mr->e_origindex[edge_idx] == ORIGINDEX_NONE)))) {
     int l = mr->loop_len + e * 2;
     GPU_indexbuf_set_line_verts(elb, ledge_idx, l, l + 1);
@@ -661,46 +753,15 @@ static const MeshExtract extract_lines = {
     0,
     false,
 };
-
 /** \} */
 
 /* ---------------------------------------------------------------------- */
-/** \name Extract Loose Edges Indices
+/** \name Extract Loose Edges Sub Buffer
  * \{ */
 
-static void *extract_lines_loose_init(const MeshRenderData *UNUSED(mr), void *UNUSED(buf))
-{
-  return NULL;
-}
-
-static void extract_lines_loose_ledge_mesh(const MeshRenderData *UNUSED(mr),
-                                           int UNUSED(e),
-                                           const MEdge *UNUSED(medge),
-                                           void *UNUSED(elb))
-{
-  /* This function is intentionally empty. The existence of this functions ensures that
-   * `iter_type` `MR_ITER_LVERT` is set when initializing the `MeshRenderData` (See
-   * `mesh_extract_iter_type`). This flag ensures that `mr->edge_loose_len` field is filled. This
-   * field we use in the `extract_lines_loose_finish` function to create a subrange from the
-   * `ibo.lines`. */
-}
-
-static void extract_lines_loose_ledge_bmesh(const MeshRenderData *UNUSED(mr),
-                                            int UNUSED(e),
-                                            BMEdge *UNUSED(eed),
-                                            void *UNUSED(elb))
-{
-  /* This function is intentionally empty. The existence of this functions ensures that
-   * `iter_type` `MR_ITER_LVERT` is set when initializing the `MeshRenderData` (See
-   * `mesh_extract_iter_type`). This flag ensures that `mr->edge_loose_len` field is filled. This
-   * field we use in the `extract_lines_loose_finish` function to create a subrange from the
-   * `ibo.lines`. */
-}
-
-static void extract_lines_loose_finish(const MeshRenderData *mr,
-                                       void *UNUSED(ibo),
-                                       void *UNUSED(elb))
+static void extract_lines_loose_subbuffer(const MeshRenderData *mr)
 {
+  BLI_assert(mr->cache->final.ibo.lines);
   /* Multiply by 2 because these are edges indices. */
   const int start = mr->edge_len * 2;
   const int len = mr->edge_loose_len * 2;
@@ -709,17 +770,24 @@ static void extract_lines_loose_finish(const MeshRenderData *mr,
   mr->cache->no_loose_wire = (len == 0);
 }
 
-static const MeshExtract extract_lines_loose = {
-    extract_lines_loose_init,
-    NULL,
-    NULL,
+static void extract_lines_with_lines_loose_finish(const MeshRenderData *mr, void *ibo, void *elb)
+{
+  GPU_indexbuf_build_in_place(elb, ibo);
+  extract_lines_loose_subbuffer(mr);
+  MEM_freeN(elb);
+}
+
+static const MeshExtract extract_lines_with_lines_loose = {
+    extract_lines_init,
     NULL,
     NULL,
-    extract_lines_loose_ledge_bmesh,
-    extract_lines_loose_ledge_mesh,
+    extract_lines_loop_bmesh,
+    extract_lines_loop_mesh,
+    extract_lines_ledge_bmesh,
+    extract_lines_ledge_mesh,
     NULL,
     NULL,
-    extract_lines_loose_finish,
+    extract_lines_with_lines_loose_finish,
     0,
     false,
 };
@@ -755,7 +823,7 @@ BLI_INLINE void vert_set_mesh(GPUIndexBufBuilder *elb,
 {
   const MVert *mvert = &mr->mvert[vert_idx];
   if (!((mr->use_hide && (mvert->flag & ME_HIDE)) ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) &&
+        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) &&
          (mr->v_origindex[vert_idx] == ORIGINDEX_NONE)))) {
     GPU_indexbuf_set_point_vert(elb, vert_idx, loop);
   }
@@ -924,10 +992,14 @@ static void extract_lines_paint_mask_loop_mesh(const MeshRenderData *mr,
                                                void *_data)
 {
   MeshExtract_LinePaintMask_Data *data = (MeshExtract_LinePaintMask_Data *)_data;
-  if (!(mr->use_hide && (mpoly->flag & ME_HIDE))) {
+  const int edge_idx = mloop->e;
+  const MEdge *medge = &mr->medge[edge_idx];
+  if (!((mr->use_hide && (medge->flag & ME_HIDE)) ||
+        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
+         (mr->e_origindex[edge_idx] == ORIGINDEX_NONE)))) {
+
     int loopend = mpoly->totloop + mpoly->loopstart - 1;
     int other_loop = (l == loopend) ? mpoly->loopstart : (l + 1);
-    int edge_idx = mloop->e;
     if (mpoly->flag & ME_FACE_SEL) {
       if (BLI_BITMAP_TEST_AND_SET_ATOMIC(data->select_map, edge_idx)) {
         /* Hide edge as it has more than 2 selected loop. */
@@ -945,6 +1017,9 @@ static void extract_lines_paint_mask_loop_mesh(const MeshRenderData *mr,
       }
     }
   }
+  else {
+    GPU_indexbuf_set_line_restart(&data->elb, edge_idx);
+  }
 }
 static void extract_lines_paint_mask_finish(const MeshRenderData *UNUSED(mr),
                                             void *ibo,
@@ -1316,7 +1391,7 @@ static void extract_edituv_points_loop_mesh(const MeshRenderData *mr,
                                             const MPoly *mpoly,
                                             void *data)
 {
-  const bool real_vert = (mr->extract_type == MR_EXTRACT_MAPPED &&
+  const bool real_vert = (mr->extract_type == MR_EXTRACT_MAPPED && (mr->v_origindex) &&
                           mr->v_origindex[mloop->v] != ORIGINDEX_NONE);
   edituv_point_add(
       data, ((mpoly->flag & ME_HIDE) != 0) || !real_vert, (mpoly->flag & ME_FACE_SEL) != 0, l);
@@ -1390,7 +1465,7 @@ static void extract_edituv_fdots_loop_mesh(const MeshRenderData *mr,
                                            const MPoly *mpoly,
                                            void *data)
 {
-  const bool real_fdot = (mr->extract_type == MR_EXTRACT_MAPPED &&
+  const bool real_fdot = (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
                           mr->p_origindex[p] != ORIGINDEX_NONE);
   const bool subd_fdot = (!mr->use_subsurf_fdots ||
                           (mr->mvert[mloop->v].flag & ME_VERT_FACEDOT) != 0);
@@ -1462,7 +1537,7 @@ static void *extract_pos_nor_init(const MeshRenderData *mr, void *buf)
     BMVert *eve;
     int v;
     BM_ITER_MESH_INDEX (eve, &iter, mr->bm, BM_VERTS_OF_MESH, v) {
-      data->packed_nor[v] = GPU_normal_convert_i10_v3(eve->no);
+      data->packed_nor[v] = GPU_normal_convert_i10_v3(bm_vert_no_get(mr, eve));
     }
   }
   else {
@@ -1474,14 +1549,11 @@ static void *extract_pos_nor_init(const MeshRenderData *mr, void *buf)
   return data;
 }
 
-static void extract_pos_nor_loop_bmesh(const MeshRenderData *UNUSED(mr),
-                                       int l,
-                                       BMLoop *loop,
-                                       void *_data)
+static void extract_pos_nor_loop_bmesh(const MeshRenderData *mr, int l, BMLoop *loop, void *_data)
 {
   MeshExtract_PosNor_Data *data = _data;
   PosNorLoop *vert = data->vbo_data + l;
-  copy_v3_v3(vert->pos, loop->v->co);
+  copy_v3_v3(vert->pos, bm_vert_co_get(mr, loop->v));
   vert->nor = data->packed_nor[BM_elem_index_get(loop->v)];
   BMFace *efa = loop->f;
   vert->nor.w = BM_elem_flag_test(efa, BM_ELEM_HIDDEN) ? -1 : 0;
@@ -1500,7 +1572,9 @@ static void extract_pos_nor_loop_mesh(const MeshRenderData *mr,
   copy_v3_v3(vert->pos, mvert->co);
   vert->nor = data->packed_nor[mloop->v];
   /* Flag for paint mode overlay. */
-  if (mpoly->flag & ME_HIDE || mvert->flag & ME_HIDE) {
+  if (mpoly->flag & ME_HIDE || mvert->flag & ME_HIDE ||
+      ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) &&
+       (mr->v_origindex[mloop->v] == ORIGINDEX_NONE))) {
     vert->nor.w = -1;
   }
   else if (mvert->flag & SELECT) {
@@ -1516,8 +1590,8 @@ static void extract_pos_nor_ledge_bmesh(const MeshRenderData *mr, int e, BMEdge
   int l = mr->loop_len + e * 2;
   MeshExtract_PosNor_Data *data = _data;
   PosNorLoop *vert = data->vbo_data + l;
-  copy_v3_v3(vert[0].pos, eed->v1->co);
-  copy_v3_v3(vert[1].pos, eed->v2->co);
+  copy_v3_v3(vert[0].pos, bm_vert_co_get(mr, eed->v1));
+  copy_v3_v3(vert[1].pos, bm_vert_co_get(mr, eed->v2));
   vert[0].nor = data->packed_nor[BM_elem_index_get(eed->v1)];
   vert[1].nor = data->packed_nor[BM_elem_index_get(eed->v2)];
 }
@@ -1541,7 +1615,7 @@ static void extract_pos_nor_lvert_bmesh(const MeshRenderData *mr, int v, BMVert
   int l = mr->loop_len + mr->edge_loose_len * 2 + v;
   MeshExtract_PosNor_Data *data = _data;
   PosNorLoop *vert = data->vbo_data + l;
-  copy_v3_v3(vert->pos, eve->co);
+  copy_v3_v3(vert->pos, bm_vert_co_get(mr, eve));
   vert->nor = data->packed_nor[BM_elem_index_get(eve)];
 }
 
@@ -1607,34 +1681,40 @@ static void extract_lnor_hq_loop_bmesh(const MeshRenderData *mr, int l, BMLoop *
     normal_float_to_short_v3(&((gpuHQNor *)data)[l].x, mr->loop_normals[l]);
   }
   else if (BM_elem_flag_test(loop->f, BM_ELEM_SMOOTH)) {
-    normal_float_to_short_v3(&((gpuHQNor *)data)[l].x, loop->v->no);
+    normal_float_to_short_v3(&((gpuHQNor *)data)[l].x, bm_vert_no_get(mr, loop->v));
   }
   else {
-    normal_float_to_short_v3(&((gpuHQNor *)data)[l].x, loop->f->no);
+    normal_float_to_short_v3(&((gpuHQNor *)data)[l].x, bm_face_no_get(mr, loop->f));
   }
 }
 
 static void extract_lnor_hq_loop_mesh(
     const MeshRenderData *mr, int l, const MLoop *mloop, int p, const MPoly *mpoly, void *data)
 {
+  gpuHQNor *lnor_data = &((gpuHQNor *)data)[l];
   if (mr->loop_normals) {
-    normal_float_to_short_v3(&((gpuHQNor *)data)[l].x, mr->loop_normals[l]);
+    normal_float_to_short_v3(&lnor_data->x, mr->loop_normals[l]);
   }
   else if (mpoly->flag & ME_SMOOTH) {
-    copy_v3_v3_short(&((gpuHQNor *)data)[l].x, mr->mvert[mloop->v].no);
+    copy_v3_v3_short(&lnor_data->x, mr->mvert[mloop->v].no);
   }
   else {
-    normal_float_to_short_v3(&((gpuHQNor *)data)[l].x, mr->poly_normals[p]);
+    normal_float_to_short_v3(&lnor_data->x, mr->poly_normals[p]);
   }
-  /* Flag for paint mode overlay. */
-  if (mpoly->flag & ME_HIDE) {
-    ((gpuHQNor *)data)[l].w = -1;
+
+  /* Flag for paint mode overlay.
+   * Only use MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals. In
+   * paint mode it will use the unmapped data to draw the wireframe. */
+  if (mpoly->flag & ME_HIDE ||
+      (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED && (mr->v_origindex) &&
+       mr->v_origindex[mloop->v] == ORIGINDEX_NONE)) {
+    lnor_data->w = -1;
   }
   else if (mpoly->flag & ME_FACE_SEL) {
-    ((gpuHQNor *)data)[l].w = 1;
+    lnor_data->w = 1;
   }
   else {
-    ((gpuHQNor *)data)[l].w = 0;
+    lnor_data->w = 0;
   }
 }
 
@@ -1678,10 +1758,10 @@ static void extract_lnor_loop_bmesh(const MeshRenderData *mr, int l, BMLoop *loo
     ((GPUPackedNormal *)data)[l] = GPU_normal_convert_i10_v3(mr->loop_normals[l]);
   }
   else if (BM_elem_flag_test(loop->f, BM_ELEM_SMOOTH)) {
-    ((GPUPackedNormal *)data)[l] = GPU_normal_convert_i10_v3(loop->v->no);
+    ((GPUPackedNormal *)data)[l] = GPU_normal_convert_i10_v3(bm_vert_no_get(mr, loop->v));
   }
   else {
-    ((GPUPackedNormal *)data)[l] = GPU_normal_convert_i10_v3(loop->f->no);
+    ((GPUPackedNormal *)data)[l] = GPU_normal_convert_i10_v3(bm_face_no_get(mr, loop->f));
   }
   BMFace *efa = loop->f;
   ((GPUPackedNormal *)data)[l].w = BM_elem_flag_test(efa, BM_ELEM_HIDDEN) ? -1 : 0;
@@ -1690,24 +1770,30 @@ static void extract_lnor_loop_bmesh(const MeshRenderData *mr, int l, BMLoop *loo
 static void extract_lnor_loop_mesh(
     const MeshRenderData *mr, int l, const MLoop *mloop, int p, const MPoly *mpoly, void *data)
 {
+  GPUPackedNormal *lnor_data = &((GPUPackedNormal *)data)[l];
   if (mr->loop_normals) {
-    ((GPUPackedNormal *)data)[l] = GPU_normal_convert_i10_v3(mr->loop_normals[l]);
+    *lnor_data = GPU_normal_convert_i10_v3(mr->loop_normals[l]);
   }
   else if (mpoly->flag & ME_SMOOTH) {
-    ((GPUPackedNormal *)data)[l] = GPU_normal_convert_i10_s3(mr->mvert[mloop->v].no);
+    *lnor_data = GPU_normal_convert_i10_s3(mr->mvert[mloop->v].no);
   }
   else {
-    ((GPUPackedNormal *)data)[l] = GPU_normal_convert_i10_v3(mr->poly_normals[p]);
+    *lnor_data = GPU_normal_convert_i10_v3(mr->poly_normals[p]);
   }
-  /* Flag for paint mode overlay. */
-  if (mpoly->flag & ME_HIDE) {
-    ((GPUPackedNormal *)data)[l].w = -1;
+
+  /* Flag for paint mode overlay.
+   * Only use MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals. In
+   * paint mode it will use the unmapped data to draw the wireframe. */
+  if (mpoly->flag & ME_HIDE ||
+      (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED && (mr->v_origindex) &&
+       mr->v_origindex[mloop->v] == ORIGINDEX_NONE)) {
+    lnor_data->w = -1;
   }
   else if (mpoly->flag & ME_FACE_SEL) {
-    ((GPUPackedNormal *)data)[l].w = 1;
+    lnor_data->w = 1;
   }
   else {
-    ((GPUPackedNormal *)data)[l].w = 0;
+    lnor_data->w = 0;
   }
 }
 
@@ -1750,10 +1836,10 @@ static void *extract_uv_init(const MeshRenderData *mr, void *buf)
 
   for (int i = 0; i < MAX_MTFACE; i++) {
     if (uv_layers & (1 << i)) {
-      char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTRIB_NAME];
+      char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
       const char *layer_name = CustomData_get_layer_name(cd_ldata, CD_MLOOPUV, i);
 
-      GPU_vertformat_safe_attrib_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTRIB_NAME);
+      GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
       /* UV layer name. */
       BLI_snprintf(attr_name, sizeof(attr_name), "u%s", attr_safe_name);
       GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
@@ -1857,9 +1943,9 @@ static void extract_tan_ex(const MeshRenderData *mr, GPUVertBuf *vbo, const bool
 
   for (int i = 0; i < MAX_MTFACE; i++) {
     if (tan_layers & (1 << i)) {
-      char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTRIB_NAME];
+      char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
       const char *layer_name = CustomData_get_layer_name(cd_ldata, CD_MLOOPUV, i);
-      GPU_vertformat_safe_attrib_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTRIB_NAME);
+      GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
       /* Tangent layer name. */
       BLI_snprintf(attr_name, sizeof(attr_name), "t%s", attr_safe_name);
       GPU_vertformat_attr_add(&format, attr_name, comp_type, 4, fetch_mode);
@@ -1883,7 +1969,10 @@ static void extract_tan_ex(const MeshRenderData *mr, GPUVertBuf *vbo, const bool
     if (mr->extract_type == MR_EXTRACT_BMESH) {
       BMesh *bm = mr->bm;
       for (int v = 0; v < mr->vert_len; v++) {
-        copy_v3_v3(orco[v], BM_vert_at_index(bm, v)->co);
+        const BMVert *eve = BM_vert_at_index(bm, v);
+        /* Exceptional case where #bm_vert_co_get can be avoided, as we want the original coords.
+         * not the distorted ones. */
+        copy_v3_v3(orco[v], eve->co);
       }
     }
     else {
@@ -1934,9 +2023,9 @@ static void extract_tan_ex(const MeshRenderData *mr, GPUVertBuf *vbo, const bool
   }
 
   if (use_orco_tan) {
-    char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTRIB_NAME];
+    char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
     const char *layer_name = CustomData_get_layer_name(cd_ldata, CD_TANGENT, 0);
-    GPU_vertformat_safe_attrib_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTRIB_NAME);
+    GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
     BLI_snprintf(attr_name, sizeof(*attr_name), "t%s", attr_safe_name);
     GPU_vertformat_attr_add(&format, attr_name, comp_type, 4, fetch_mode);
     GPU_vertformat_alias_add(&format, "t");
@@ -2060,14 +2149,14 @@ static void *extract_vcol_init(const MeshRenderData *mr, void *buf)
   GPUVertFormat format = {0};
   GPU_vertformat_deinterleave(&format);
 
-  CustomData *cd_ldata = &mr->me->ldata;
+  CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata : &mr->me->ldata;
   uint32_t vcol_layers = mr->cache->cd_used.vcol;
 
-  for (int i = 0; i < 8; i++) {
+  for (int i = 0; i < MAX_MCOL; i++) {
     if (vcol_layers & (1 << i)) {
-      char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTRIB_NAME];
+      char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
       const char *layer_name = CustomData_get_layer_name(cd_ldata, CD_MLOOPCOL, i);
-      GPU_vertformat_safe_attrib_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTRIB_NAME);
+      GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
 
       BLI_snprintf(attr_name, sizeof(attr_name), "c%s", attr_safe_name);
       GPU_vertformat_attr_add(&format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
@@ -2095,14 +2184,32 @@ static void *extract_vcol_init(const MeshRenderData *mr, void *buf)
   } gpuMeshVcol;
 
   gpuMeshVcol *vcol_data = (gpuMeshVcol *)vbo->data;
-  for (int i = 0; i < 8; i++) {
+  for (int i = 0; i < MAX_MCOL; i++) {
     if (vcol_layers & (1 << i)) {
-      MLoopCol *mcol = (MLoopCol *)CustomData_get_layer_n(cd_ldata, CD_MLOOPCOL, i);
-      for (int l = 0; l < mr->loop_len; l++, mcol++, vcol_data++) {
-        vcol_data->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol->r]);
-        vcol_data->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol->g]);
-        vcol_data->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol->b]);
-        vcol_data->a = unit_float_to_ushort_clamp(mcol->a * (1.0f / 255.0f));
+      if (mr->extract_type == MR_EXTRACT_BMESH) {
+        int cd_ofs = CustomData_get_n_offset(cd_ldata, CD_MLOOPCOL, i);
+        BMIter f_iter, l_iter;
+        BMFace *efa;
+        BMLoop *loop;
+        BM_ITER_MESH (efa, &f_iter, mr->bm, BM_FACES_OF_MESH) {
+          BM_ITER_ELEM (loop, &l_iter, efa, BM_LOOPS_OF_FACE) {
+            const MLoopCol *mloopcol = BM_ELEM_CD_GET_VOID_P(loop, cd_ofs);
+            vcol_data->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->r]);
+            vcol_data->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->g]);
+            vcol_data->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->b]);
+            vcol_data->a = unit_float_to_ushort_clamp(mloopcol->a * (1.0f / 255.0f));
+            vcol_data++;
+          }
+        }
+      }
+      else {
+        const MLoopCol *mloopcol = (MLoopCol *)CustomData_get_layer_n(cd_ldata, CD_MLOOPCOL, i);
+        for (int l = 0; l < mr->loop_len; l++, mloopcol++, vcol_data++) {
+          vcol_data->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->r]);
+          vcol_data->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->g]);
+          vcol_data->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->b]);
+          vcol_data->a = unit_float_to_ushort_clamp(mloopcol->a * (1.0f / 255.0f));
+        }
       }
     }
   }
@@ -2139,7 +2246,7 @@ static void *extract_orco_init(const MeshRenderData *mr, void *buf)
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
     /* FIXME(fclem): We use the last component as a way to differentiate from generic vertex
-     * attribs. This is a substantial waste of Vram and should be done another way.
+     * attributes. This is a substantial waste of Vram and should be done another way.
      * Unfortunately, at the time of writing, I did not found any other "non disruptive"
      * alternative. */
     GPU_vertformat_attr_add(&format, "orco", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
@@ -2167,7 +2274,7 @@ static void extract_orco_loop_bmesh(const MeshRenderData *UNUSED(mr),
   MeshExtract_Orco_Data *orco_data = (MeshExtract_Orco_Data *)data;
   float *loop_orco = orco_data->vbo_data[l];
   copy_v3_v3(loop_orco, orco_data->orco[BM_elem_index_get(loop->v)]);
-  loop_orco[3] = 0.0; /* Tag as not a generic attrib */
+  loop_orco[3] = 0.0; /* Tag as not a generic attribute. */
 }
 
 static void extract_orco_loop_mesh(const MeshRenderData *UNUSED(mr),
@@ -2180,7 +2287,7 @@ static void extract_orco_loop_mesh(const MeshRenderData *UNUSED(mr),
   MeshExtract_Orco_Data *orco_data = (MeshExtract_Orco_Data *)data;
   float *loop_orco = orco_data->vbo_data[l];
   copy_v3_v3(loop_orco, orco_data->orco[mloop->v]);
-  loop_orco[3] = 0.0; /* Tag as not a generic attrib */
+  loop_orco[3] = 0.0; /* Tag as not a generic attribute. */
 }
 
 static void extract_orco_finish(const MeshRenderData *UNUSED(mr), void *UNUSED(buf), void *data)
@@ -2269,14 +2376,14 @@ static void *extract_edge_fac_init(const MeshRenderData *mr, void *buf)
   return data;
 }
 
-static void extract_edge_fac_loop_bmesh(const MeshRenderData *UNUSED(mr),
-                                        int l,
-                                        BMLoop *loop,
-                                        void *_data)
+static void extract_edge_fac_loop_bmesh(const MeshRenderData *mr, int l, BMLoop *loop, void *_data)
 {
   MeshExtract_EdgeFac_Data *data = (MeshExtract_EdgeFac_Data *)_data;
   if (BM_edge_is_manifold(loop->e)) {
-    float ratio = loop_edge_factor_get(loop->f->no, loop->v->co, loop->v->no, loop->next->v->co);
+    float ratio = loop_edge_factor_get(bm_face_no_get(mr, loop->f),
+                                       bm_vert_co_get(mr, loop->v),
+                                       bm_vert_no_get(mr, loop->v),
+                                       bm_vert_co_get(mr, loop->next->v));
     data->vbo_data[l] = ratio * 253 + 1;
   }
   else {
@@ -3101,7 +3208,7 @@ static void *extract_stretch_angle_init(const MeshRenderData *mr, void *buf)
   return data;
 }
 
-static void extract_stretch_angle_loop_bmesh(const MeshRenderData *UNUSED(mr),
+static void extract_stretch_angle_loop_bmesh(const MeshRenderData *mr,
                                              int l,
                                              BMLoop *loop,
                                              void *_data)
@@ -3118,8 +3225,12 @@ static void extract_stretch_angle_loop_bmesh(const MeshRenderData *UNUSED(mr),
     BMLoop *l_next_tmp = loop;
     luv = BM_ELEM_CD_GET_VOID_P(l_tmp, data->cd_ofs);
     luv_next = BM_ELEM_CD_GET_VOID_P(l_next_tmp, data->cd_ofs);
-    compute_normalize_edge_vectors(
-        auv, av, luv->uv, luv_next->uv, l_tmp->v->co, l_next_tmp->v->co);
+    compute_normalize_edge_vectors(auv,
+                                   av,
+                                   luv->uv,
+                                   luv_next->uv,
+                                   bm_vert_co_get(mr, l_tmp->v),
+                                   bm_vert_co_get(mr, l_next_tmp->v));
     /* Save last edge. */
     copy_v2_v2(last_auv, auv[1]);
     copy_v3_v3(last_av, av[1]);
@@ -3135,7 +3246,12 @@ static void extract_stretch_angle_loop_bmesh(const MeshRenderData *UNUSED(mr),
   else {
     luv = BM_ELEM_CD_GET_VOID_P(loop, data->cd_ofs);
     luv_next = BM_ELEM_CD_GET_VOID_P(l_next, data->cd_ofs);
-    compute_normalize_edge_vectors(auv, av, luv->uv, luv_next->uv, loop->v->co, l_next->v->co);
+    compute_normalize_edge_vectors(auv,
+                                   av,
+                                   luv->uv,
+                                   luv_next->uv,
+                                   bm_vert_co_get(mr, loop->v),
+                                   bm_vert_co_get(mr, l_next->v));
   }
   edituv_get_stretch_angle(auv, av, data->vbo_data + l);
 }
@@ -3275,7 +3391,7 @@ static void statvis_calc_overhang(const MeshRenderData *mr, float *r_overhang)
   if (mr->extract_type == MR_EXTRACT_BMESH) {
     int l = 0;
     BM_ITER_MESH (f, &iter, bm, BM_FACES_OF_MESH) {
-      float fac = angle_normalized_v3v3(f->no, dir) / (float)M_PI;
+      float fac = angle_normalized_v3v3(bm_face_no_get(mr, f), dir) / (float)M_PI;
       fac = overhang_remap(fac, min, max, minmax_irange);
       for (int i = 0; i < f->len; i++, l++) {
         r_overhang[l] = fac;
@@ -3353,7 +3469,11 @@ static void statvis_calc_thickness(const MeshRenderData *mr, float *r_thickness)
     for (int i = 0; i < mr->tri_len; i++) {
       BMLoop **ltri = looptris[i];
       const int index = BM_elem_index_get(ltri[0]->f);
-      const float *cos[3] = {ltri[0]->v->co, ltri[1]->v->co, ltri[2]->v->co};
+      const float *cos[3] = {
+          bm_vert_co_get(mr, ltri[0]->v),
+          bm_vert_co_get(mr, ltri[1]->v),
+          bm_vert_co_get(mr, ltri[2]->v),
+      };
       float ray_co[3];
       float ray_no[3];
 
@@ -3366,7 +3486,8 @@ static void statvis_calc_thickness(const MeshRenderData *mr, float *r_thickness)
 
         BMFace *f_hit = BKE_bmbvh_ray_cast(bmtree, ray_co, ray_no, 0.0f, &dist, NULL, NULL);
         if (f_hit && dist < face_dists[index]) {
-          float angle_fac = fabsf(dot_v3v3(ltri[0]->f->no, f_hit->no));
+          float angle_fac = fabsf(
+              dot_v3v3(bm_face_no_get(mr, ltri[0]->f), bm_face_no_get(mr, f_hit)));
           angle_fac = 1.0f - angle_fac;
           angle_fac = angle_fac * angle_fac * angle_fac;
           angle_fac = 1.0f - angle_fac;
@@ -3571,8 +3692,17 @@ static void statvis_calc_distort(const MeshRenderData *mr, float *r_distort)
     BMesh *bm = em->bm;
     BMFace *f;
 
+    if (mr->bm_vert_coords != NULL) {
+      BKE_editmesh_cache_ensure_poly_normals(em, mr->edit_data);
+
+      /* Most likely this is already valid, ensure just in case.
+       * Needed for #BM_loop_calc_face_normal_safe_vcos. */
+      BM_mesh_elem_index_ensure(em->bm, BM_VERT);
+    }
+
     int l = 0;
-    BM_ITER_MESH (f, &iter, bm, BM_FACES_OF_MESH) {
+    int p = 0;
+    BM_ITER_MESH_INDEX (f, &iter, bm, BM_FACES_OF_MESH, p) {
       float fac = -1.0f;
 
       if (f->len > 3) {
@@ -3581,13 +3711,23 @@ static void statvis_calc_distort(const MeshRenderData *mr, float *r_distort)
         fac = 0.0f;
         l_iter = l_first = BM_FACE_FIRST_LOOP(f);
         do {
+          const float *no_face;
           float no_corner[3];
-          BM_loop_calc_face_normal_safe(l_iter, no_corner);
+          if (mr->bm_vert_coords != NULL) {
+            no_face = mr->bm_poly_normals[p];
+            BM_loop_calc_face_normal_safe_vcos(l_iter, no_face, mr->bm_vert_coords, no_corner);
+          }
+          else {
+            no_face = f->no;
+            BM_loop_calc_face_normal_safe(l_iter, no_corner);
+          }
+
           /* simple way to detect (what is most likely) concave */
-          if (dot_v3v3(f->no, no_corner) < 0.0f) {
+          if (dot_v3v3(no_face, no_corner) < 0.0f) {
             negate_v3(no_corner);
           }
-          fac = max_ff(fac, angle_normalized_v3v3(f->no, no_corner));
+          fac = max_ff(fac, angle_normalized_v3v3(no_face, no_corner));
+
         } while ((l_iter = l_iter->next) != l_first);
         fac *= 2.0f;
       }
@@ -3810,14 +3950,14 @@ static void *extract_fdots_pos_init(const MeshRenderData *mr, void *buf)
   return vbo->data;
 }
 
-static void extract_fdots_pos_loop_bmesh(const MeshRenderData *UNUSED(mr),
+static void extract_fdots_pos_loop_bmesh(const MeshRenderData *mr,
                                          int UNUSED(l),
                                          BMLoop *loop,
                                          void *data)
 {
   float(*center)[3] = (float(*)[3])data;
   float w = 1.0f / (float)loop->f->len;
-  madd_v3_v3fl(center[BM_elem_index_get(loop->f)], loop->v->co, w);
+  madd_v3_v3fl(center[BM_elem_index_get(loop->f)], bm_vert_co_get(mr, loop->v), w);
 }
 
 static void extract_fdots_pos_loop_mesh(const MeshRenderData *mr,
@@ -3890,12 +4030,13 @@ static void extract_fdots_nor_finish(const MeshRenderData *mr, void *buf, void *
     for (int f = 0; f < mr->poly_len; f++) {
       efa = BM_face_at_index(mr->bm, f);
       const bool is_face_hidden = BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
-      if (is_face_hidden) {
+      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
+                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
         nor[f] = GPU_normal_convert_i10_v3(invalid_normal);
         nor[f].w = NOR_AND_FLAG_HIDDEN;
       }
       else {
-        nor[f] = GPU_normal_convert_i10_v3(efa->no);
+        nor[f] = GPU_normal_convert_i10_v3(bm_face_no_get(mr, efa));
         /* Select / Active Flag. */
         nor[f].w = (BM_elem_flag_test(efa, BM_ELEM_SELECT) ?
                         ((efa == mr->efa_act) ? NOR_AND_FLAG_ACTIVE : NOR_AND_FLAG_SELECT) :
@@ -3906,12 +4047,14 @@ static void extract_fdots_nor_finish(const MeshRenderData *mr, void *buf, void *
   else {
     for (int f = 0; f < mr->poly_len; f++) {
       efa = bm_original_face_get(mr, f);
-      if (!efa || BM_elem_flag_test(efa, BM_ELEM_HIDDEN)) {
+      const bool is_face_hidden = efa && BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
+      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
+                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
         nor[f] = GPU_normal_convert_i10_v3(invalid_normal);
         nor[f].w = NOR_AND_FLAG_HIDDEN;
       }
       else {
-        nor[f] = GPU_normal_convert_i10_v3(efa->no);
+        nor[f] = GPU_normal_convert_i10_v3(bm_face_no_get(mr, efa));
         /* Select / Active Flag. */
         nor[f].w = (BM_elem_flag_test(efa, BM_ELEM_SELECT) ?
                         ((efa == mr->efa_act) ? NOR_AND_FLAG_ACTIVE : NOR_AND_FLAG_SELECT) :
@@ -4136,7 +4279,7 @@ static void *extract_skin_roots_init(const MeshRenderData *mr, void *buf)
     const MVertSkin *vs = BM_ELEM_CD_GET_VOID_P(eve, cd_ofs);
     if (vs->flag & MVERT_SKIN_ROOT) {
       vbo_data->size = (vs->radius[0] + vs->radius[1]) * 0.5f;
-      copy_v3_v3(vbo_data->local_pos, eve->co);
+      copy_v3_v3(vbo_data->local_pos, bm_vert_co_get(mr, eve));
       vbo_data++;
       root_len++;
     }
@@ -4384,10 +4527,14 @@ static const MeshExtract extract_fdot_idx = {
 /** \} */
 
 /* ---------------------------------------------------------------------- */
-/** \name Extract Loop
+/** \name ExtractTaskData
  * \{ */
+typedef struct ExtractUserData {
+  void *user_data;
+} ExtractUserData;
 
 typedef struct ExtractTaskData {
+  void *next, *prev;
   const MeshRenderData *mr;
   const MeshExtract *extract;
   eMRIterType iter_type;
@@ -4395,9 +4542,16 @@ typedef struct ExtractTaskData {
   /** Decremented each time a task is finished. */
   int32_t *task_counter;
   void *buf;
-  void *user_data;
+  ExtractUserData *user_data;
 } ExtractTaskData;
 
+static void extract_task_data_free(void *data)
+{
+  ExtractTaskData *task_data = data;
+  MEM_SAFE_FREE(task_data->user_data);
+  MEM_freeN(task_data);
+}
+
 BLI_INLINE void mesh_extract_iter(const MeshRenderData *mr,
                                   const eMRIterType iter_type,
                                   int start,
@@ -4474,37 +4628,191 @@ BLI_INLINE void mesh_extract_iter(const MeshRenderData *mr,
   }
 }
 
-static void extract_run(TaskPool *__restrict UNUSED(pool), void *taskdata, int UNUSED(threadid))
+static void extract_init(ExtractTaskData *data)
+{
+  data->user_data->user_data = data->extract->init(data->mr, data->buf);
+}
+
+static void extract_run(void *__restrict taskdata)
 {
-  ExtractTaskData *data = taskdata;
-  mesh_extract_iter(
-      data->mr, data->iter_type, data->start, data->end, data->extract, data->user_data);
+  ExtractTaskData *data = (ExtractTaskData *)taskdata;
+  mesh_extract_iter(data->mr,
+                    data->iter_type,
+                    data->start,
+                    data->end,
+                    data->extract,
+                    data->user_data->user_data);
 
   /* If this is the last task, we do the finish function. */
   int remainin_tasks = atomic_sub_and_fetch_int32(data->task_counter, 1);
   if (remainin_tasks == 0 && data->extract->finish != NULL) {
-    data->extract->finish(data->mr, data->buf, data->user_data);
+    data->extract->finish(data->mr, data->buf, data->user_data->user_data);
+  }
+}
+
+static void extract_init_and_run(void *__restrict taskdata)
+{
+  extract_init((ExtractTaskData *)taskdata);
+  extract_run(taskdata);
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Task Node - Update Mesh Render Data
+ * \{ */
+typedef struct MeshRenderDataUpdateTaskData {
+  MeshRenderData *mr;
+  eMRIterType iter_type;
+  eMRDataType data_flag;
+} MeshRenderDataUpdateTaskData;
+
+static void mesh_render_data_update_task_data_free(MeshRenderDataUpdateTaskData *taskdata)
+{
+  BLI_assert(taskdata);
+  mesh_render_data_free(taskdata->mr);
+  MEM_freeN(taskdata);
+}
+
+static void mesh_extract_render_data_node_exec(void *__restrict task_data)
+{
+  MeshRenderDataUpdateTaskData *update_task_data = task_data;
+  mesh_render_data_update(
+      update_task_data->mr, update_task_data->iter_type, update_task_data->data_flag);
+}
+
+static struct TaskNode *mesh_extract_render_data_node_create(struct TaskGraph *task_graph,
+                                                             MeshRenderData *mr,
+                                                             const eMRIterType iter_type,
+                                                             const eMRDataType data_flag)
+{
+  MeshRenderDataUpdateTaskData *task_data = MEM_mallocN(sizeof(MeshRenderDataUpdateTaskData),
+                                                        __func__);
+  task_data->mr = mr;
+  task_data->iter_type = iter_type;
+  task_data->data_flag = data_flag;
+
+  struct TaskNode *task_node = BLI_task_graph_node_create(
+      task_graph,
+      mesh_extract_render_data_node_exec,
+      task_data,
+      (TaskGraphNodeFreeFunction)mesh_render_data_update_task_data_free);
+  return task_node;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Task Node - Extract Single Threaded
+ * \{ */
+typedef struct ExtractSingleThreadedTaskData {
+  ListBase task_datas;
+} ExtractSingleThreadedTaskData;
+
+static void extract_single_threaded_task_data_free(ExtractSingleThreadedTaskData *taskdata)
+{
+  BLI_assert(taskdata);
+  LISTBASE_FOREACH_MUTABLE (ExtractTaskData *, td, &taskdata->task_datas) {
+    extract_task_data_free(td);
+  }
+  BLI_listbase_clear(&taskdata->task_datas);
+  MEM_freeN(taskdata);
+}
+
+static void extract_single_threaded_task_node_exec(void *__restrict task_data)
+{
+  ExtractSingleThreadedTaskData *extract_task_data = task_data;
+  LISTBASE_FOREACH (ExtractTaskData *, td, &extract_task_data->task_datas) {
+    extract_init_and_run(td);
+  }
+}
+
+static struct TaskNode *extract_single_threaded_task_node_create(
+    struct TaskGraph *task_graph, ExtractSingleThreadedTaskData *task_data)
+{
+  struct TaskNode *task_node = BLI_task_graph_node_create(
+      task_graph,
+      extract_single_threaded_task_node_exec,
+      task_data,
+      (TaskGraphNodeFreeFunction)extract_single_threaded_task_data_free);
+  return task_node;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Task Node - UserData Initializer
+ * \{ */
+typedef struct UserDataInitTaskData {
+  ListBase task_datas;
+  int32_t *task_counters;
+
+} UserDataInitTaskData;
+
+static void user_data_init_task_data_free(UserDataInitTaskData *taskdata)
+{
+  BLI_assert(taskdata);
+  LISTBASE_FOREACH_MUTABLE (ExtractTaskData *, td, &taskdata->task_datas) {
+    extract_task_data_free(td);
   }
+  BLI_listbase_clear(&taskdata->task_datas);
+  MEM_SAFE_FREE(taskdata->task_counters);
+  MEM_freeN(taskdata);
+}
+
+static void user_data_init_task_data_exec(void *__restrict task_data)
+{
+  UserDataInitTaskData *extract_task_data = task_data;
+  LISTBASE_FOREACH (ExtractTaskData *, td, &extract_task_data->task_datas) {
+    extract_init(td);
+  }
+}
+
+static struct TaskNode *user_data_init_task_node_create(struct TaskGraph *task_graph,
+                                                        UserDataInitTaskData *task_data)
+{
+  struct TaskNode *task_node = BLI_task_graph_node_create(
+      task_graph,
+      user_data_init_task_data_exec,
+      task_data,
+      (TaskGraphNodeFreeFunction)user_data_init_task_data_free);
+  return task_node;
 }
 
-static void extract_range_task_create(
-    TaskPool *task_pool, ExtractTaskData *taskdata, const eMRIterType type, int start, int length)
+/** \} */
+/* ---------------------------------------------------------------------- */
+/** \name Extract Loop
+ * \{ */
+
+static void extract_range_task_create(struct TaskGraph *task_graph,
+                                      struct TaskNode *task_node_user_data_init,
+                                      ExtractTaskData *taskdata,
+                                      const eMRIterType type,
+                                      int start,
+                                      int length)
 {
   taskdata = MEM_dupallocN(taskdata);
   atomic_add_and_fetch_int32(taskdata->task_counter, 1);
   taskdata->iter_type = type;
   taskdata->start = start;
   taskdata->end = start + length;
-  BLI_task_pool_push(task_pool, extract_run, taskdata, true, TASK_PRIORITY_HIGH);
+  struct TaskNode *task_node = BLI_task_graph_node_create(
+      task_graph, extract_run, taskdata, MEM_freeN);
+  BLI_task_graph_edge_create(task_node_user_data_init, task_node);
 }
 
-static void extract_task_create(TaskPool *task_pool,
+static void extract_task_create(struct TaskGraph *task_graph,
+                                struct TaskNode *task_node_mesh_render_data,
+                                struct TaskNode *task_node_user_data_init,
+                                ListBase *single_threaded_task_datas,
+                                ListBase *user_data_init_task_datas,
                                 const Scene *scene,
                                 const MeshRenderData *mr,
                                 const MeshExtract *extract,
                                 void *buf,
                                 int32_t *task_counter)
 {
+  BLI_assert(scene != NULL);
   const bool do_hq_normals = (scene->r.perf_flag & SCE_PERF_HQ_NORMALS) != 0;
   if (do_hq_normals && (extract == &extract_lnor)) {
     extract = &extract_lnor_hq;
@@ -4515,59 +4823,77 @@ static void extract_task_create(TaskPool *task_pool,
 
   /* Divide extraction of the VBO/IBO into sensible chunks of works. */
   ExtractTaskData *taskdata = MEM_mallocN(sizeof(*taskdata), "ExtractTaskData");
+  taskdata->next = NULL;
+  taskdata->prev = NULL;
   taskdata->mr = mr;
   taskdata->extract = extract;
   taskdata->buf = buf;
-  taskdata->user_data = extract->init(mr, buf);
+
+  /* ExtractUserData is shared between the iterations as it holds counters to detect if the
+   * extraction is finished. To make sure the duplication of the userdata does not create a new
+   * instance of the counters we allocate the userdata in its own container.
+   *
+   * This structure makes sure that when extract_init is called, that the user data of all
+   * iterations are updated. */
+  taskdata->user_data = MEM_callocN(sizeof(ExtractUserData), __func__);
   taskdata->iter_type = mesh_extract_iter_type(extract);
   taskdata->task_counter = task_counter;
   taskdata->start = 0;
   taskdata->end = INT_MAX;
 
   /* Simple heuristic. */
-  const bool use_thread = (mr->loop_len + mr->loop_loose_len) > 8192;
+  const int chunk_size = 8192;
+  const bool use_thread = (mr->loop_len + mr->loop_loose_len) > chunk_size;
   if (use_thread && extract->use_threading) {
+
     /* Divide task into sensible chunks. */
-    const int chunk_size = 8192;
     if (taskdata->iter_type & MR_ITER_LOOPTRI) {
       for (int i = 0; i < mr->tri_len; i += chunk_size) {
-        extract_range_task_create(task_pool, taskdata, MR_ITER_LOOPTRI, i, chunk_size);
+        extract_range_task_create(
+            task_graph, task_node_user_data_init, taskdata, MR_ITER_LOOPTRI, i, chunk_size);
       }
     }
     if (taskdata->iter_type & MR_ITER_LOOP) {
       for (int i = 0; i < mr->poly_len; i += chunk_size) {
-        extract_range_task_create(task_pool, taskdata, MR_ITER_LOOP, i, chunk_size);
+        extract_range_task_create(
+            task_graph, task_node_user_data_init, taskdata, MR_ITER_LOOP, i, chunk_size);
       }
     }
     if (taskdata->iter_type & MR_ITER_LEDGE) {
       for (int i = 0; i < mr->edge_loose_len; i += chunk_size) {
-        extract_range_task_create(task_pool, taskdata, MR_ITER_LEDGE, i, chunk_size);
+        extract_range_task_create(
+            task_graph, task_node_user_data_init, taskdata, MR_ITER_LEDGE, i, chunk_size);
       }
     }
     if (taskdata->iter_type & MR_ITER_LVERT) {
       for (int i = 0; i < mr->vert_loose_len; i += chunk_size) {
-        extract_range_task_create(task_pool, taskdata, MR_ITER_LVERT, i, chunk_size);
+        extract_range_task_create(
+            task_graph, task_node_user_data_init, taskdata, MR_ITER_LVERT, i, chunk_size);
       }
     }
-    MEM_freeN(taskdata);
+    BLI_addtail(user_data_init_task_datas, taskdata);
   }
   else if (use_thread) {
     /* One task for the whole VBO. */
     (*task_counter)++;
-    BLI_task_pool_push(task_pool, extract_run, taskdata, true, TASK_PRIORITY_HIGH);
+    struct TaskNode *one_task = BLI_task_graph_node_create(
+        task_graph, extract_init_and_run, taskdata, extract_task_data_free);
+    BLI_task_graph_edge_create(task_node_mesh_render_data, one_task);
   }
   else {
     /* Single threaded extraction. */
     (*task_counter)++;
-    extract_run(NULL, taskdata, -1);
-    MEM_freeN(taskdata);
+    BLI_addtail(single_threaded_task_datas, taskdata);
   }
 }
 
-void mesh_buffer_cache_create_requested(MeshBatchCache *cache,
+void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
+                                        MeshBatchCache *cache,
                                         MeshBufferCache mbc,
                                         Mesh *me,
+
                                         const bool is_editmode,
+                                        const bool is_paint_mode,
                                         const float obmat[4][4],
                                         const bool do_final,
                                         const bool do_uvedit,
@@ -4577,9 +4903,41 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache,
                                         const ToolSettings *ts,
                                         const bool use_hide)
 {
+  /* For each mesh where batches needs to be updated a sub-graph will be added to the task_graph.
+   * This sub-graph starts with an extract_render_data_node. This fills/converts the required data
+   * from Mesh.
+   *
+   * Small extractions and extractions that can't be multi-threaded are grouped in a single
+   * `extract_single_threaded_task_node`.
+   *
+   * Other extractions will create a node for each loop exceeding 8192 items. these nodes are
+   * linked to the `user_data_init_task_node`. the `user_data_init_task_node` prepares the userdata
+   * needed for the extraction based on the data extracted from the mesh. counters are used to
+   * check if the finalize of a task has to be called.
+   *
+   *                           Mesh extraction sub graph
+   *
+   *                                                       +----------------------+
+   *                                               +-----> | extract_task1_loop_1 |
+   *                                               |       +----------------------+
+   * +------------------+     +----------------------+     +----------------------+
+   * | mesh_render_data | --> |                      | --> | extract_task1_loop_2 |
+   * +------------------+     |                      |     +----------------------+
+   *   |                      |                      |     +----------------------+
+   *   |                      |    user_data_init    | --> | extract_task2_loop_1 |
+   *   v                      |                      |     +----------------------+
+   * +------------------+     |                      |     +----------------------+
+   * | single_threaded  |     |                      | --> | extract_task2_loop_2 |
+   * +------------------+     +----------------------+     +----------------------+
+   *                                               |       +----------------------+
+   *                                               +-----> | extract_task2_loop_3 |
+   *                                                       +----------------------+
+   */
   eMRIterType iter_flag = 0;
   eMRDataType data_flag = 0;
 
+  const bool do_lines_loose_subbuffer = mbc.ibo.lines_loose != NULL;
+
 #define TEST_ASSIGN(type, type_lowercase, name) \
   do { \
     if (DRW_TEST_ASSIGN_##type(mbc.type_lowercase.name)) { \
@@ -4617,7 +4975,6 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache,
   TEST_ASSIGN(IBO, ibo, fdots);
   TEST_ASSIGN(IBO, ibo, lines_paint_mask);
   TEST_ASSIGN(IBO, ibo, lines_adjacency);
-  TEST_ASSIGN(IBO, ibo, lines_loose);
   TEST_ASSIGN(IBO, ibo, edituv_tris);
   TEST_ASSIGN(IBO, ibo, edituv_lines);
   TEST_ASSIGN(IBO, ibo, edituv_points);
@@ -4630,7 +4987,7 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache,
 #endif
 
   MeshRenderData *mr = mesh_render_data_create(
-      me, is_editmode, obmat, do_final, do_uvedit, iter_flag, data_flag, cd_layer_used, ts);
+      me, is_editmode, is_paint_mode, obmat, do_final, do_uvedit, cd_layer_used, ts);
   mr->cache = cache; /* HACK */
   mr->use_hide = use_hide;
   mr->use_subsurf_fdots = use_subsurf_fdots;
@@ -4640,20 +4997,32 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache,
   double rdata_end = PIL_check_seconds_timer();
 #endif
 
-  TaskScheduler *task_scheduler;
-  TaskPool *task_pool;
-
-  task_scheduler = BLI_task_scheduler_get();
-  task_pool = BLI_task_pool_create_suspended(task_scheduler, NULL);
-
   size_t counters_size = (sizeof(mbc) / sizeof(void *)) * sizeof(int32_t);
   int32_t *task_counters = MEM_callocN(counters_size, __func__);
   int counter_used = 0;
 
+  struct TaskNode *task_node_mesh_render_data = mesh_extract_render_data_node_create(
+      task_graph, mr, iter_flag, data_flag);
+  ExtractSingleThreadedTaskData *single_threaded_task_data = MEM_callocN(
+      sizeof(ExtractSingleThreadedTaskData), __func__);
+  UserDataInitTaskData *user_data_init_task_data = MEM_callocN(sizeof(UserDataInitTaskData),
+                                                               __func__);
+  user_data_init_task_data->task_counters = task_counters;
+  struct TaskNode *task_node_user_data_init = user_data_init_task_node_create(
+      task_graph, user_data_init_task_data);
+
 #define EXTRACT(buf, name) \
   if (mbc.buf.name) { \
-    extract_task_create( \
-        task_pool, scene, mr, &extract_##name, mbc.buf.name, &task_counters[counter_used++]); \
+    extract_task_create(task_graph, \
+                        task_node_mesh_render_data, \
+                        task_node_user_data_init, \
+                        &single_threaded_task_data->task_datas, \
+                        &user_data_init_task_data->task_datas, \
+                        scene, \
+                        mr, \
+                        &extract_##name, \
+                        mbc.buf.name, \
+                        &task_counters[counter_used++]); \
   } \
   ((void)0)
 
@@ -4681,7 +5050,33 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache,
   EXTRACT(vbo, skin_roots);
 
   EXTRACT(ibo, tris);
-  EXTRACT(ibo, lines);
+  if (mbc.ibo.lines) {
+    /* When `lines` and `lines_loose` are requested, schedule lines extraction that also creates
+     * the `lines_loose` sub-buffer. */
+    const MeshExtract *lines_extractor = do_lines_loose_subbuffer ?
+                                             &extract_lines_with_lines_loose :
+                                             &extract_lines;
+    extract_task_create(task_graph,
+                        task_node_mesh_render_data,
+                        task_node_user_data_init,
+                        &single_threaded_task_data->task_datas,
+                        &user_data_init_task_data->task_datas,
+                        scene,
+                        mr,
+                        lines_extractor,
+                        mbc.ibo.lines,
+                        &task_counters[counter_used++]);
+  }
+  else {
+    if (do_lines_loose_subbuffer) {
+      /* When `lines_loose` is requested without `lines` we can create the sub-buffer on the fly as
+       * the `lines` buffer should then already be up-to-date.
+       * (see `DRW_batch_requested(cache->batch.loose_edges, GPU_PRIM_LINES)` in
+       * `DRW_mesh_batch_cache_create_requested`).
+       */
+      extract_lines_loose_subbuffer(mr);
+    }
+  }
   EXTRACT(ibo, points);
   EXTRACT(ibo, fdots);
   EXTRACT(ibo, lines_paint_mask);
@@ -4691,27 +5086,29 @@ void mesh_buffer_cache_create_requested(MeshBatchCache *cache,
   EXTRACT(ibo, edituv_points);
   EXTRACT(ibo, edituv_fdots);
 
-  /* TODO(fclem) Ideally, we should have one global pool for all
-   * objects and wait for finish only before drawing when buffers
-   * need to be ready. */
-  BLI_task_pool_work_and_wait(task_pool);
-
-  /* The next task(s) rely on the result of the tasks above. */
+  /* Only create the edge when there is user data that needs to be initialized.
+   * The task is still part of the graph so the task_data will be freed when the graph is freed.
+   */
+  if (!BLI_listbase_is_empty(&user_data_init_task_data->task_datas)) {
+    BLI_task_graph_edge_create(task_node_mesh_render_data, task_node_user_data_init);
+  }
 
-  /* The `lines_loose` is a sub buffer from `ibo.lines`.
-   * We schedule it here due to potential synchronization issues.*/
-  EXTRACT(ibo, lines_loose);
+  if (!BLI_listbase_is_empty(&single_threaded_task_data->task_datas)) {
+    struct TaskNode *task_node = extract_single_threaded_task_node_create(
+        task_graph, single_threaded_task_data);
+    BLI_task_graph_edge_create(task_node_mesh_render_data, task_node);
+  }
+  else {
+    extract_single_threaded_task_data_free(single_threaded_task_data);
+  }
 
-  BLI_task_pool_work_and_wait(task_pool);
+  /* Trigger the sub-graph for this mesh. */
+  BLI_task_graph_node_push_work(task_node_mesh_render_data);
 
 #undef EXTRACT
 
-  BLI_task_pool_free(task_pool);
-  MEM_freeN(task_counters);
-
-  mesh_render_data_free(mr);
-
 #ifdef DEBUG_TIME
+  BLI_task_graph_work_and_wait(task_graph);
   double end = PIL_check_seconds_timer();
 
   static double avg = 0;
diff --git a/source/blender/draw/intern/draw_cache_impl.h b/source/blender/draw/intern/draw_cache_impl.h
index 3ce8a7d4e43..80649143537 100644
--- a/source/blender/draw/intern/draw_cache_impl.h
+++ b/source/blender/draw/intern/draw_cache_impl.h
@@ -31,6 +31,7 @@ struct ListBase;
 struct ModifierData;
 struct PTCacheEdit;
 struct ParticleSystem;
+struct TaskGraph;
 
 struct Curve;
 struct Hair;
@@ -93,7 +94,7 @@ struct GPUBatch *DRW_curve_batch_cache_get_wire_edge(struct Curve *cu);
 struct GPUBatch *DRW_curve_batch_cache_get_normal_edge(struct Curve *cu);
 struct GPUBatch *DRW_curve_batch_cache_get_edge_detection(struct Curve *cu, bool *r_is_manifold);
 struct GPUBatch *DRW_curve_batch_cache_get_edit_edges(struct Curve *cu);
-struct GPUBatch *DRW_curve_batch_cache_get_edit_verts(struct Curve *cu, bool handles);
+struct GPUBatch *DRW_curve_batch_cache_get_edit_verts(struct Curve *cu);
 
 struct GPUBatch *DRW_curve_batch_cache_get_triangles_with_normals(struct Curve *cu);
 struct GPUBatch **DRW_curve_batch_cache_get_surface_shaded(struct Curve *cu,
@@ -150,7 +151,8 @@ int DRW_volume_material_count_get(struct Volume *volume);
 struct GPUBatch *DRW_volume_batch_cache_get_wireframes_face(struct Volume *volume);
 
 /* Mesh */
-void DRW_mesh_batch_cache_create_requested(struct Object *ob,
+void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
+                                           struct Object *ob,
                                            struct Mesh *me,
                                            const struct Scene *scene,
                                            const bool is_paint_mode,
@@ -208,6 +210,7 @@ enum {
   VFLAG_EDGE_SEAM = 1 << 4,
   VFLAG_EDGE_SHARP = 1 << 5,
   VFLAG_EDGE_FREESTYLE = 1 << 6,
+  VFLAG_HANDLE_SELECTED = 1 << 7,
   /* Beware to not go over 1 << 7 (it's a byte flag)
    * (see gpu_shader_edit_mesh_overlay_geom.glsl) */
 };
diff --git a/source/blender/draw/intern/draw_cache_impl_curve.c b/source/blender/draw/intern/draw_cache_impl_curve.c
index 331f8073ed5..c6112994b65 100644
--- a/source/blender/draw/intern/draw_cache_impl_curve.c
+++ b/source/blender/draw/intern/draw_cache_impl_curve.c
@@ -25,6 +25,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "BLI_listbase.h"
 #include "BLI_math_vector.h"
 #include "BLI_utildefines.h"
 
@@ -48,7 +49,9 @@
 
 #define SELECT 1
 #define ACTIVE_NURB 1 << 2
-#define EVEN_U_BIT 1 << 3 /* Alternate this bit for every U vert. */
+#define BEZIER_HANDLE 1 << 3
+#define EVEN_U_BIT 1 << 4 /* Alternate this bit for every U vert. */
+#define COLOR_SHIFT 5
 
 /* Used as values of `color_id` in `edit_curve_overlay_handle_geom.glsl` */
 enum {
@@ -76,7 +79,7 @@ static void curve_render_overlay_verts_edges_len_get(ListBase *lb,
   BLI_assert(r_vert_len || r_edge_len);
   int vert_len = 0;
   int edge_len = 0;
-  for (Nurb *nu = lb->first; nu; nu = nu->next) {
+  LISTBASE_FOREACH (Nurb *, nu, lb) {
     if (nu->bezt) {
       vert_len += nu->pntsu * 3;
       /* 2x handles per point*/
@@ -106,7 +109,7 @@ static void curve_render_wire_verts_edges_len_get(const CurveCache *ob_curve_cac
   int vert_len = 0;
   int edge_len = 0;
   int curve_len = 0;
-  for (const BevList *bl = ob_curve_cache->bev.first; bl; bl = bl->next) {
+  LISTBASE_FOREACH (const BevList *, bl, &ob_curve_cache->bev) {
     if (bl->nr > 0) {
       const bool is_cyclic = bl->poly != -1;
       edge_len += (is_cyclic) ? bl->nr : bl->nr - 1;
@@ -114,7 +117,7 @@ static void curve_render_wire_verts_edges_len_get(const CurveCache *ob_curve_cac
       curve_len += 1;
     }
   }
-  for (const DispList *dl = ob_curve_cache->disp.first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, &ob_curve_cache->disp) {
     if (ELEM(dl->type, DL_SEGM, DL_POLY)) {
       BLI_assert(dl->parts == 1);
       const bool is_cyclic = dl->type == DL_POLY;
@@ -314,7 +317,7 @@ static void curve_cd_calc_used_gpu_layers(int *cd_layers,
     }
 
     ListBase gpu_attrs = GPU_material_attributes(gpumat);
-    for (GPUMaterialAttribute *gpu_attr = gpu_attrs.first; gpu_attr; gpu_attr = gpu_attr->next) {
+    LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
       const char *name = gpu_attr->name;
       int type = gpu_attr->type;
 
@@ -375,7 +378,7 @@ typedef struct CurveBatchCache {
     GPUIndexBuf *curves_lines;
     GPUIndexBuf *edges_adj_lines;
     /* Edit mode */
-    GPUIndexBuf *edit_verts_points; /* Only control points. Not handles. */
+    GPUIndexBuf *edit_verts;
     GPUIndexBuf *edit_lines;
   } ibo;
 
@@ -386,7 +389,6 @@ typedef struct CurveBatchCache {
     /* control handles and vertices */
     GPUBatch *edit_edges;
     GPUBatch *edit_verts;
-    GPUBatch *edit_handles_verts;
     GPUBatch *edit_normals;
     GPUBatch *edge_detection;
   } batch;
@@ -496,7 +498,6 @@ void DRW_curve_batch_cache_dirty_tag(Curve *cu, int mode)
 
       GPU_BATCH_DISCARD_SAFE(cache->batch.edit_edges);
       GPU_BATCH_DISCARD_SAFE(cache->batch.edit_verts);
-      GPU_BATCH_DISCARD_SAFE(cache->batch.edit_handles_verts);
       break;
     default:
       BLI_assert(0);
@@ -565,7 +566,7 @@ static void curve_create_curves_pos(CurveRenderData *rdata, GPUVertBuf *vbo_curv
   GPU_vertbuf_data_alloc(vbo_curves_pos, vert_len);
 
   int v_idx = 0;
-  for (const BevList *bl = rdata->ob_curve_cache->bev.first; bl; bl = bl->next) {
+  LISTBASE_FOREACH (const BevList *, bl, &rdata->ob_curve_cache->bev) {
     if (bl->nr <= 0) {
       continue;
     }
@@ -574,7 +575,7 @@ static void curve_create_curves_pos(CurveRenderData *rdata, GPUVertBuf *vbo_curv
       GPU_vertbuf_attr_set(vbo_curves_pos, attr_id.pos, v_idx, bevp->vec);
     }
   }
-  for (const DispList *dl = rdata->ob_curve_cache->disp.first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, &rdata->ob_curve_cache->disp) {
     if (ELEM(dl->type, DL_SEGM, DL_POLY)) {
       for (int i = 0; i < dl->nr; v_idx++, i++) {
         GPU_vertbuf_attr_set(vbo_curves_pos, attr_id.pos, v_idx, &((float(*)[3])dl->verts)[i]);
@@ -598,7 +599,7 @@ static void curve_create_curves_lines(CurveRenderData *rdata, GPUIndexBuf *ibo_c
   GPU_indexbuf_init_ex(&elb, GPU_PRIM_LINE_STRIP, index_len, vert_len);
 
   int v_idx = 0;
-  for (const BevList *bl = rdata->ob_curve_cache->bev.first; bl; bl = bl->next) {
+  LISTBASE_FOREACH (const BevList *, bl, &rdata->ob_curve_cache->bev) {
     if (bl->nr <= 0) {
       continue;
     }
@@ -612,7 +613,7 @@ static void curve_create_curves_lines(CurveRenderData *rdata, GPUIndexBuf *ibo_c
     GPU_indexbuf_add_primitive_restart(&elb);
     v_idx += bl->nr;
   }
-  for (const DispList *dl = rdata->ob_curve_cache->disp.first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, &rdata->ob_curve_cache->disp) {
     if (ELEM(dl->type, DL_SEGM, DL_POLY)) {
       const bool is_cyclic = dl->type == DL_POLY;
       if (is_cyclic) {
@@ -684,15 +685,22 @@ static void curve_create_edit_curves_nor(CurveRenderData *rdata, GPUVertBuf *vbo
   BLI_assert(vbo_len_used == verts_len_capacity);
 }
 
-static char beztriple_vflag_get(
-    CurveRenderData *rdata, char flag, char col_id, int v_idx, int nu_id)
+static char beztriple_vflag_get(CurveRenderData *rdata,
+                                char flag,
+                                char col_id,
+                                int v_idx,
+                                int nu_id,
+                                bool handle_point,
+                                const bool handle_selected)
 {
   char vflag = 0;
   SET_FLAG_FROM_TEST(vflag, (flag & SELECT), VFLAG_VERT_SELECTED);
   SET_FLAG_FROM_TEST(vflag, (v_idx == rdata->actvert && nu_id == rdata->actnu), VFLAG_VERT_ACTIVE);
   SET_FLAG_FROM_TEST(vflag, (nu_id == rdata->actnu), ACTIVE_NURB);
+  SET_FLAG_FROM_TEST(vflag, handle_point, BEZIER_HANDLE);
+  SET_FLAG_FROM_TEST(vflag, handle_selected, VFLAG_HANDLE_SELECTED);
   /* handle color id */
-  vflag |= col_id << 4; /* << 4 because of EVEN_U_BIT */
+  vflag |= col_id << COLOR_SHIFT;
   return vflag;
 }
 
@@ -703,7 +711,7 @@ static char bpoint_vflag_get(CurveRenderData *rdata, char flag, int v_idx, int n
   SET_FLAG_FROM_TEST(vflag, (v_idx == rdata->actvert && nu_id == rdata->actnu), VFLAG_VERT_ACTIVE);
   SET_FLAG_FROM_TEST(vflag, (nu_id == rdata->actnu), ACTIVE_NURB);
   SET_FLAG_FROM_TEST(vflag, ((u % 2) == 0), EVEN_U_BIT);
-  vflag |= COLOR_NURB_ULINE_ID << 4; /* << 4 because of EVEN_U_BIT */
+  vflag |= COLOR_NURB_ULINE_ID << COLOR_SHIFT;
   return vflag;
 }
 
@@ -752,14 +760,18 @@ static void curve_create_edit_data_and_handles(CurveRenderData *rdata,
   for (Nurb *nu = rdata->nurbs->first; nu; nu = nu->next, nu_id++) {
     const BezTriple *bezt = nu->bezt;
     const BPoint *bp = nu->bp;
+
     if (bezt) {
       for (int a = 0; a < nu->pntsu; a++, bezt++) {
         if (bezt->hide == true) {
           continue;
         }
+        const bool handle_selected = BEZT_ISSEL_ANY(bezt);
 
         if (elbp_verts) {
+          GPU_indexbuf_add_point_vert(elbp_verts, vbo_len_used + 0);
           GPU_indexbuf_add_point_vert(elbp_verts, vbo_len_used + 1);
+          GPU_indexbuf_add_point_vert(elbp_verts, vbo_len_used + 2);
         }
         if (elbp_lines) {
           GPU_indexbuf_add_line_verts(elbp_lines, vbo_len_used + 1, vbo_len_used + 0);
@@ -767,9 +779,9 @@ static void curve_create_edit_data_and_handles(CurveRenderData *rdata,
         }
         if (vbo_data) {
           const char vflag[3] = {
-              beztriple_vflag_get(rdata, bezt->f1, bezt->h1, a, nu_id),
-              beztriple_vflag_get(rdata, bezt->f2, bezt->h1, a, nu_id),
-              beztriple_vflag_get(rdata, bezt->f3, bezt->h2, a, nu_id),
+              beztriple_vflag_get(rdata, bezt->f1, bezt->h1, a, nu_id, true, handle_selected),
+              beztriple_vflag_get(rdata, bezt->f2, bezt->h1, a, nu_id, false, handle_selected),
+              beztriple_vflag_get(rdata, bezt->f3, bezt->h2, a, nu_id, true, handle_selected),
           };
           for (int j = 0; j < 3; j++) {
             GPU_vertbuf_attr_set(vbo_data, attr_id.data, vbo_len_used + j, &vflag[j]);
@@ -858,15 +870,10 @@ GPUBatch *DRW_curve_batch_cache_get_edit_edges(Curve *cu)
   return DRW_batch_request(&cache->batch.edit_edges);
 }
 
-GPUBatch *DRW_curve_batch_cache_get_edit_verts(Curve *cu, bool handles)
+GPUBatch *DRW_curve_batch_cache_get_edit_verts(Curve *cu)
 {
   CurveBatchCache *cache = curve_batch_cache_get(cu);
-  if (handles) {
-    return DRW_batch_request(&cache->batch.edit_handles_verts);
-  }
-  else {
-    return DRW_batch_request(&cache->batch.edit_verts);
-  }
+  return DRW_batch_request(&cache->batch.edit_verts);
 }
 
 GPUBatch *DRW_curve_batch_cache_get_triangles_with_normals(struct Curve *cu)
@@ -964,14 +971,10 @@ void DRW_curve_batch_cache_create_requested(Object *ob)
     DRW_vbo_request(cache->batch.edit_edges, &cache->edit.data);
   }
   if (DRW_batch_requested(cache->batch.edit_verts, GPU_PRIM_POINTS)) {
-    DRW_ibo_request(cache->batch.edit_verts, &cache->ibo.edit_verts_points);
+    DRW_ibo_request(cache->batch.edit_verts, &cache->ibo.edit_verts);
     DRW_vbo_request(cache->batch.edit_verts, &cache->edit.pos);
     DRW_vbo_request(cache->batch.edit_verts, &cache->edit.data);
   }
-  if (DRW_batch_requested(cache->batch.edit_handles_verts, GPU_PRIM_POINTS)) {
-    DRW_vbo_request(cache->batch.edit_handles_verts, &cache->edit.pos);
-    DRW_vbo_request(cache->batch.edit_handles_verts, &cache->edit.data);
-  }
   if (DRW_batch_requested(cache->batch.edit_normals, GPU_PRIM_LINES)) {
     DRW_vbo_request(cache->batch.edit_normals, &cache->edit.curves_nor);
   }
@@ -1011,7 +1014,7 @@ void DRW_curve_batch_cache_create_requested(Object *ob)
   DRW_ADD_FLAG_FROM_VBO_REQUEST(mr_flag, cache->edit.data, CU_DATATYPE_OVERLAY);
   DRW_ADD_FLAG_FROM_VBO_REQUEST(mr_flag, cache->edit.curves_nor, CU_DATATYPE_NORMAL);
   DRW_ADD_FLAG_FROM_VBO_REQUEST(mr_flag, cache->edit.curves_weight, CU_DATATYPE_OVERLAY);
-  DRW_ADD_FLAG_FROM_IBO_REQUEST(mr_flag, cache->ibo.edit_verts_points, CU_DATATYPE_OVERLAY);
+  DRW_ADD_FLAG_FROM_IBO_REQUEST(mr_flag, cache->ibo.edit_verts, CU_DATATYPE_OVERLAY);
   DRW_ADD_FLAG_FROM_IBO_REQUEST(mr_flag, cache->ibo.edit_lines, CU_DATATYPE_OVERLAY);
 
   for (int i = 0; i < cache->mat_len; i++) {
@@ -1039,7 +1042,7 @@ void DRW_curve_batch_cache_create_requested(Object *ob)
   }
 
   if (DRW_vbo_requested(cache->ordered.loop_pos_nor) ||
-      DRW_vbo_requested(cache->ordered.loop_uv)) {
+      DRW_vbo_requested(cache->ordered.loop_uv) || DRW_vbo_requested(cache->ordered.loop_tan)) {
     DRW_displist_vertbuf_create_loop_pos_and_nor_and_uv_and_tan(
         lb, cache->ordered.loop_pos_nor, cache->ordered.loop_uv, cache->ordered.loop_tan);
   }
@@ -1064,13 +1067,9 @@ void DRW_curve_batch_cache_create_requested(Object *ob)
   }
 
   if (DRW_vbo_requested(cache->edit.pos) || DRW_vbo_requested(cache->edit.data) ||
-      DRW_ibo_requested(cache->ibo.edit_verts_points) ||
-      DRW_ibo_requested(cache->ibo.edit_lines)) {
-    curve_create_edit_data_and_handles(rdata,
-                                       cache->edit.pos,
-                                       cache->edit.data,
-                                       cache->ibo.edit_verts_points,
-                                       cache->ibo.edit_lines);
+      DRW_ibo_requested(cache->ibo.edit_verts) || DRW_ibo_requested(cache->ibo.edit_lines)) {
+    curve_create_edit_data_and_handles(
+        rdata, cache->edit.pos, cache->edit.data, cache->ibo.edit_verts, cache->ibo.edit_lines);
   }
   if (DRW_vbo_requested(cache->edit.curves_nor)) {
     curve_create_edit_curves_nor(rdata, cache->edit.curves_nor);
diff --git a/source/blender/draw/intern/draw_cache_impl_displist.c b/source/blender/draw/intern/draw_cache_impl_displist.c
index 04889463447..e09f78aa51f 100644
--- a/source/blender/draw/intern/draw_cache_impl_displist.c
+++ b/source/blender/draw/intern/draw_cache_impl_displist.c
@@ -27,6 +27,7 @@
 
 #include "BLI_alloca.h"
 #include "BLI_edgehash.h"
+#include "BLI_listbase.h"
 #include "BLI_math_vector.h"
 #include "BLI_utildefines.h"
 
@@ -71,7 +72,7 @@ static int dl_tri_len(const DispList *dl)
 static int curve_render_surface_vert_len_get(const ListBase *lb)
 {
   int vert_len = 0;
-  for (const DispList *dl = lb->first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, lb) {
     vert_len += dl_vert_len(dl);
   }
   return vert_len;
@@ -80,7 +81,7 @@ static int curve_render_surface_vert_len_get(const ListBase *lb)
 static int curve_render_surface_tri_len_get(const ListBase *lb)
 {
   int tri_len = 0;
-  for (const DispList *dl = lb->first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, lb) {
     tri_len += dl_tri_len(dl);
   }
   return tri_len;
@@ -192,7 +193,7 @@ void DRW_displist_vertbuf_create_pos_and_nor(ListBase *lb, GPUVertBuf *vbo)
   BKE_displist_normals_add(lb);
 
   int vbo_len_used = 0;
-  for (const DispList *dl = lb->first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, lb) {
     const bool ndata_is_single = dl->type == DL_INDEX3;
     if (ELEM(dl->type, DL_INDEX3, DL_INDEX4, DL_SURF)) {
       const float *fp_co = dl->verts;
@@ -262,7 +263,7 @@ void DRW_displist_indexbuf_create_triangles_in_order(ListBase *lb, GPUIndexBuf *
   GPU_indexbuf_init(&elb, GPU_PRIM_TRIS, tri_len, vert_len);
 
   int ofs = 0;
-  for (const DispList *dl = lb->first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, lb) {
     displist_indexbufbuilder_set((SetTriIndicesFn *)GPU_indexbuf_add_tri_verts,
                                  (SetTriIndicesFn *)GPU_indexbuf_add_tri_verts,
                                  &elb,
@@ -289,7 +290,7 @@ void DRW_displist_indexbuf_create_triangles_loop_split_by_material(ListBase *lb,
 
   /* calc each index buffer builder */
   uint v_idx = 0;
-  for (const DispList *dl = lb->first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, lb) {
     v_idx = displist_indexbufbuilder_tess_set((SetTriIndicesFn *)GPU_indexbuf_add_tri_verts,
                                               (SetTriIndicesFn *)GPU_indexbuf_add_tri_verts,
                                               &elb[dl->col],
@@ -327,7 +328,7 @@ void DRW_displist_indexbuf_create_lines_in_order(ListBase *lb, GPUIndexBuf *ibo)
   GPU_indexbuf_init(&elb, GPU_PRIM_LINES, tri_len * 3, vert_len);
 
   int ofs = 0;
-  for (const DispList *dl = lb->first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, lb) {
     displist_indexbufbuilder_set(
         set_overlay_wires_tri_indices, set_overlay_wires_quad_tri_indices, &elb, dl, ofs);
     ofs += dl_vert_len(dl);
@@ -406,7 +407,7 @@ static void displist_vertbuf_attr_set_tri_pos_nor_uv(GPUVertBufRaw *pos_step,
   }
 }
 
-#define SURFACE_QUAD_ITER_START(dl) \
+#define SURFACE_QUAD_ITER_BEGIN(dl) \
   { \
     uint quad[4]; \
     int quad_index = 0; \
@@ -446,8 +447,7 @@ static void displist_surf_fnors_ensure(const DispList *dl, float (**fnors)[3])
   float(*nor_flat)[3] = MEM_mallocN(sizeof(float) * 3 * u_len * v_len, __func__);
   *fnors = nor_flat;
 
-  SURFACE_QUAD_ITER_START(dl)
-  {
+  SURFACE_QUAD_ITER_BEGIN (dl) {
     normal_quad_v3(*nor_flat, verts[quad[0]], verts[quad[1]], verts[quad[2]], verts[quad[3]]);
     nor_flat++;
   }
@@ -508,7 +508,7 @@ void DRW_displist_vertbuf_create_loop_pos_and_nor_and_uv_and_tan(ListBase *lb,
 
   BKE_displist_normals_add(lb);
 
-  for (const DispList *dl = lb->first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, lb) {
     const bool is_smooth = (dl->rt & CU_SMOOTH) != 0;
     if (ELEM(dl->type, DL_INDEX3, DL_INDEX4, DL_SURF)) {
       const float(*verts)[3] = (float(*)[3])dl->verts;
@@ -570,8 +570,7 @@ void DRW_displist_vertbuf_create_loop_pos_and_nor_and_uv_and_tan(ListBase *lb,
           BKE_displist_tangent_calc(dl, fnors, &tangents);
         }
 
-        SURFACE_QUAD_ITER_START(dl)
-        {
+        SURFACE_QUAD_ITER_BEGIN (dl) {
           if (vbo_uv) {
             surf_uv_quad(dl, quad, uv);
           }
@@ -781,7 +780,7 @@ void DRW_displist_indexbuf_create_edges_adjacency_lines(struct ListBase *lb,
   /* pack values to pass to `set_edges_adjacency_lines_indices` function. */
   void *thunk[3] = {&elb, eh, r_is_manifold};
   int v_idx = 0;
-  for (const DispList *dl = lb->first; dl; dl = dl->next) {
+  LISTBASE_FOREACH (const DispList *, dl, lb) {
     displist_indexbufbuilder_set((SetTriIndicesFn *)set_edges_adjacency_lines_indices,
                                  (SetTriIndicesFn *)set_edges_adjacency_lines_indices,
                                  thunk,
diff --git a/source/blender/draw/intern/draw_cache_impl_gpencil.c b/source/blender/draw/intern/draw_cache_impl_gpencil.c
index 349eb6b00ae..b4974330043 100644
--- a/source/blender/draw/intern/draw_cache_impl_gpencil.c
+++ b/source/blender/draw/intern/draw_cache_impl_gpencil.c
@@ -62,8 +62,6 @@ typedef struct GpencilBatchCache {
 
   /** Cache is dirty */
   bool is_dirty;
-  /** Edit mode flag */
-  bool is_editmode;
   /** Last cache frame */
   int cache_frame;
 } GpencilBatchCache;
@@ -71,21 +69,17 @@ typedef struct GpencilBatchCache {
 static bool gpencil_batch_cache_valid(GpencilBatchCache *cache, bGPdata *gpd, int cfra)
 {
   bool valid = true;
+
   if (cache == NULL) {
     return false;
   }
 
-  cache->is_editmode = GPENCIL_ANY_EDIT_MODE(gpd);
   if (cfra != cache->cache_frame) {
     valid = false;
   }
   else if (gpd->flag & GP_DATA_CACHE_IS_DIRTY) {
     valid = false;
   }
-  else if (gpd->flag & GP_DATA_PYTHON_UPDATED) {
-    gpd->flag &= ~GP_DATA_PYTHON_UPDATED;
-    valid = false;
-  }
   else if (cache->is_dirty) {
     valid = false;
   }
@@ -106,9 +100,9 @@ static GpencilBatchCache *gpencil_batch_cache_init(Object *ob, int cfra)
     memset(cache, 0, sizeof(*cache));
   }
 
-  cache->is_editmode = GPENCIL_ANY_EDIT_MODE(gpd);
   cache->is_dirty = true;
   cache->cache_frame = cfra;
+
   return cache;
 }
 
@@ -181,7 +175,8 @@ static GPUVertFormat *gpencil_stroke_format(void)
     GPU_vertformat_attr_add(&format, "ma", GPU_COMP_I32, 4, GPU_FETCH_INT);
     GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
     GPU_vertformat_attr_add(&format, "uv", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
-    /* IMPORTANT: This means having only 4 attributes to fit into GPU module limit of 16 attrib. */
+    /* IMPORTANT: This means having only 4 attributes
+     * to fit into GPU module limit of 16 attributes. */
     GPU_vertformat_multiload_enable(&format, 4);
   }
   return &format;
@@ -215,7 +210,8 @@ static GPUVertFormat *gpencil_color_format(void)
   if (format.attr_len == 0) {
     GPU_vertformat_attr_add(&format, "col", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
     GPU_vertformat_attr_add(&format, "fcol", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
-    /* IMPORTANT: This means having only 4 attributes to fit into GPU module limit of 16 attrib. */
+    /* IMPORTANT: This means having only 4 attributes
+     * to fit into GPU module limit of 16 attributes. */
     GPU_vertformat_multiload_enable(&format, 4);
   }
   return &format;
@@ -296,7 +292,7 @@ static void gpencil_buffer_add_point(gpStrokeVert *verts,
   vert->u_stroke = pt->uv_fac;
   vert->stroke_id = gps->runtime.stroke_start;
   vert->point_id = v;
-  vert->thickness = max_ff(0.0f, gps->thickness * pt->pressure) * (round_cap1 ? 1.0 : -1.0);
+  vert->thickness = max_ff(0.0f, gps->thickness * pt->pressure) * (round_cap1 ? 1.0f : -1.0f);
   /* Tag endpoint material to -1 so they get discarded by vertex shader. */
   vert->mat = (is_endpoint) ? -1 : (gps->mat_nr % GP_MATERIAL_BUFFER_LEN);
 
@@ -390,7 +386,8 @@ static void gpencil_batches_ensure(Object *ob, GpencilBatchCache *cache, int cfr
         .vert_len = 1, /* Start at 1 for the gl_InstanceID trick to work (see vert shader). */
         .tri_len = 0,
     };
-    BKE_gpencil_visible_stroke_iter(ob, NULL, gp_object_verts_count_cb, &iter, do_onion, cfra);
+    BKE_gpencil_visible_stroke_iter(
+        NULL, ob, NULL, gp_object_verts_count_cb, &iter, do_onion, cfra);
 
     /* Create VBOs. */
     GPUVertFormat *format = gpencil_stroke_format();
@@ -406,7 +403,7 @@ static void gpencil_batches_ensure(Object *ob, GpencilBatchCache *cache, int cfr
     GPU_indexbuf_init(&iter.ibo, GPU_PRIM_TRIS, iter.tri_len, iter.vert_len);
 
     /* Fill buffers with data. */
-    BKE_gpencil_visible_stroke_iter(ob, NULL, gpencil_stroke_iter_cb, &iter, do_onion, cfra);
+    BKE_gpencil_visible_stroke_iter(NULL, ob, NULL, gpencil_stroke_iter_cb, &iter, do_onion, cfra);
 
     /* Mark last 2 verts as invalid. */
     for (int i = 0; i < 2; i++) {
@@ -480,7 +477,7 @@ GPUBatch *DRW_cache_gpencil_face_wireframe_get(Object *ob)
 
     /* IMPORTANT: Keep in sync with gpencil_edit_batches_ensure() */
     bool do_onion = true;
-    BKE_gpencil_visible_stroke_iter(ob, NULL, gp_lines_indices_cb, &iter, do_onion, cfra);
+    BKE_gpencil_visible_stroke_iter(NULL, ob, NULL, gp_lines_indices_cb, &iter, do_onion, cfra);
 
     GPUIndexBuf *ibo = GPU_indexbuf_build(&iter.ibo);
 
@@ -542,15 +539,14 @@ static void gpencil_sbuffer_stroke_ensure(bGPdata *gpd, bool do_stroke, bool do_
 
     /* Get origin to reproject points. */
     float origin[3];
-    bGPDlayer *gpl = BKE_gpencil_layer_active_get(gpd);
     ToolSettings *ts = scene->toolsettings;
-    ED_gpencil_drawing_reference_get(scene, ob, gpl, ts->gpencil_v3d_align, origin);
+    ED_gpencil_drawing_reference_get(scene, ob, ts->gpencil_v3d_align, origin);
 
     for (int i = 0; i < vert_len; i++) {
       ED_gpencil_tpoint_to_point(region, origin, &tpoints[i], &gps->points[i]);
       mul_m4_v3(ob->imat, &gps->points[i].x);
       bGPDspoint *pt = &gps->points[i];
-      copy_v4_v4(pt->vert_color, gpd->runtime.vert_color);
+      copy_v4_v4(pt->vert_color, tpoints[i].vert_color);
     }
     /* Calc uv data along the stroke. */
     BKE_gpencil_stroke_uv_update(gps);
@@ -730,7 +726,8 @@ static void gpencil_edit_batches_ensure(Object *ob, GpencilBatchCache *cache, in
     iter.verts = (gpEditVert *)cache->edit_vbo->data;
 
     /* Fill buffers with data. */
-    BKE_gpencil_visible_stroke_iter(ob, NULL, gpencil_edit_stroke_iter_cb, &iter, do_onion, cfra);
+    BKE_gpencil_visible_stroke_iter(
+        NULL, ob, NULL, gpencil_edit_stroke_iter_cb, &iter, do_onion, cfra);
 
     /* Create the batches */
     cache->edit_points_batch = GPU_batch_create(GPU_PRIM_POINTS, cache->vbo, NULL);
diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.c b/source/blender/draw/intern/draw_cache_impl_mesh.c
index aedc86c2eae..99e285a18f1 100644
--- a/source/blender/draw/intern/draw_cache_impl_mesh.c
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.c
@@ -29,9 +29,11 @@
 #include "BLI_bitmap.h"
 #include "BLI_buffer.h"
 #include "BLI_edgehash.h"
+#include "BLI_listbase.h"
 #include "BLI_math_bits.h"
 #include "BLI_math_vector.h"
 #include "BLI_string.h"
+#include "BLI_task.h"
 #include "BLI_utildefines.h"
 
 #include "DNA_mesh_types.h"
@@ -95,11 +97,25 @@ static void mesh_cd_calc_edit_uv_layer(const Mesh *UNUSED(me), DRW_MeshCDMask *c
   cd_used->edit_uv = 1;
 }
 
+BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
+{
+  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_MDATA:
+      return &me->ldata;
+      break;
+    case ME_WRAPPER_TYPE_BMESH:
+      return &me->edit_mesh->bm->ldata;
+      break;
+  }
+
+  BLI_assert(0);
+  return &me->ldata;
+}
+
 static void mesh_cd_calc_active_uv_layer(const Mesh *me, DRW_MeshCDMask *cd_used)
 {
   const Mesh *me_final = (me->edit_mesh) ? me->edit_mesh->mesh_eval_final : me;
-  const CustomData *cd_ldata = &me_final->ldata;
-
+  const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final);
   int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPUV);
   if (layer != -1) {
     cd_used->uv |= (1 << layer);
@@ -109,8 +125,7 @@ static void mesh_cd_calc_active_uv_layer(const Mesh *me, DRW_MeshCDMask *cd_used
 static void mesh_cd_calc_active_mask_uv_layer(const Mesh *me, DRW_MeshCDMask *cd_used)
 {
   const Mesh *me_final = (me->edit_mesh) ? me->edit_mesh->mesh_eval_final : me;
-  const CustomData *cd_ldata = &me_final->ldata;
-
+  const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final);
   int layer = CustomData_get_stencil_layer(cd_ldata, CD_MLOOPUV);
   if (layer != -1) {
     cd_used->uv |= (1 << layer);
@@ -120,8 +135,7 @@ static void mesh_cd_calc_active_mask_uv_layer(const Mesh *me, DRW_MeshCDMask *cd
 static void mesh_cd_calc_active_vcol_layer(const Mesh *me, DRW_MeshCDMask *cd_used)
 {
   const Mesh *me_final = (me->edit_mesh) ? me->edit_mesh->mesh_eval_final : me;
-  const CustomData *cd_ldata = &me_final->ldata;
-
+  const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final);
   int layer = CustomData_get_active_layer(cd_ldata, CD_MLOOPCOL);
   if (layer != -1) {
     cd_used->vcol |= (1 << layer);
@@ -133,7 +147,7 @@ static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Mesh *me,
                                                    int gpumat_array_len)
 {
   const Mesh *me_final = (me->edit_mesh) ? me->edit_mesh->mesh_eval_final : me;
-  const CustomData *cd_ldata = &me_final->ldata;
+  const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final);
 
   /* See: DM_vertex_attributes_from_gpu for similar logic */
   DRW_MeshCDMask cd_used;
@@ -143,7 +157,7 @@ static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Mesh *me,
     GPUMaterial *gpumat = gpumat_array[i];
     if (gpumat) {
       ListBase gpu_attrs = GPU_material_attributes(gpumat);
-      for (GPUMaterialAttribute *gpu_attr = gpu_attrs.first; gpu_attr; gpu_attr = gpu_attr->next) {
+      LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
         const char *name = gpu_attr->name;
         int type = gpu_attr->type;
         int layer = -1;
@@ -301,7 +315,7 @@ static void drw_mesh_weight_state_extract(Object *ob,
   wstate->alert_mode = ts->weightuser;
 
   if (paint_mode && ts->multipaint) {
-    /* Multipaint needs to know all selected bones, not just the active group.
+    /* Multi-paint needs to know all selected bones, not just the active group.
      * This is actually a relatively expensive operation, but caching would be difficult. */
     wstate->defgroup_sel = BKE_object_defgroup_selected_get(
         ob, wstate->defgroup_len, &wstate->defgroup_sel_count);
@@ -436,8 +450,7 @@ static void mesh_batch_cache_check_vertex_group(MeshBatchCache *cache,
                                                 const struct DRW_MeshWeightState *wstate)
 {
   if (!drw_mesh_weight_state_compare(&cache->weight_state, wstate)) {
-    FOREACH_MESH_BUFFER_CACHE(cache, mbufcache)
-    {
+    FOREACH_MESH_BUFFER_CACHE (cache, mbufcache) {
       GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.weights);
     }
     GPU_BATCH_CLEAR_SAFE(cache->batch.surface_weights);
@@ -460,8 +473,7 @@ static void mesh_batch_cache_discard_shaded_batches(MeshBatchCache *cache)
 
 static void mesh_batch_cache_discard_shaded_tri(MeshBatchCache *cache)
 {
-  FOREACH_MESH_BUFFER_CACHE(cache, mbufcache)
-  {
+  FOREACH_MESH_BUFFER_CACHE (cache, mbufcache) {
     GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.pos_nor);
     GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.uv);
     GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.tan);
@@ -478,8 +490,7 @@ static void mesh_batch_cache_discard_shaded_tri(MeshBatchCache *cache)
 
 static void mesh_batch_cache_discard_uvedit(MeshBatchCache *cache)
 {
-  FOREACH_MESH_BUFFER_CACHE(cache, mbufcache)
-  {
+  FOREACH_MESH_BUFFER_CACHE (cache, mbufcache) {
     GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.stretch_angle);
     GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.stretch_area);
     GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.uv);
@@ -517,8 +528,7 @@ static void mesh_batch_cache_discard_uvedit(MeshBatchCache *cache)
 
 static void mesh_batch_cache_discard_uvedit_select(MeshBatchCache *cache)
 {
-  FOREACH_MESH_BUFFER_CACHE(cache, mbufcache)
-  {
+  FOREACH_MESH_BUFFER_CACHE (cache, mbufcache) {
     GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.edituv_data);
     GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.fdots_edituv_data);
     GPU_INDEXBUF_DISCARD_SAFE(mbufcache->ibo.edituv_tris);
@@ -544,8 +554,7 @@ void DRW_mesh_batch_cache_dirty_tag(Mesh *me, int mode)
   }
   switch (mode) {
     case BKE_MESH_BATCH_DIRTY_SELECT:
-      FOREACH_MESH_BUFFER_CACHE(cache, mbufcache)
-      {
+      FOREACH_MESH_BUFFER_CACHE (cache, mbufcache) {
         GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.edit_data);
         GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.fdots_nor);
       }
@@ -566,10 +575,9 @@ void DRW_mesh_batch_cache_dirty_tag(Mesh *me, int mode)
       mesh_batch_cache_discard_uvedit_select(cache);
       break;
     case BKE_MESH_BATCH_DIRTY_SELECT_PAINT:
-      /* Paint mode selection flag is packed inside the nor attrib.
+      /* Paint mode selection flag is packed inside the nor attribute.
        * Note that it can be slow if auto smooth is enabled. (see T63946) */
-      FOREACH_MESH_BUFFER_CACHE(cache, mbufcache)
-      {
+      FOREACH_MESH_BUFFER_CACHE (cache, mbufcache) {
         GPU_INDEXBUF_DISCARD_SAFE(mbufcache->ibo.lines_paint_mask);
         GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.pos_nor);
         GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.lnor);
@@ -595,8 +603,7 @@ void DRW_mesh_batch_cache_dirty_tag(Mesh *me, int mode)
       mesh_batch_cache_discard_uvedit(cache);
       break;
     case BKE_MESH_BATCH_DIRTY_UVEDIT_SELECT:
-      FOREACH_MESH_BUFFER_CACHE(cache, mbufcache)
-      {
+      FOREACH_MESH_BUFFER_CACHE (cache, mbufcache) {
         GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.edituv_data);
         GPU_VERTBUF_DISCARD_SAFE(mbufcache->vbo.fdots_edituv_data);
       }
@@ -619,8 +626,7 @@ static void mesh_batch_cache_clear(Mesh *me)
   if (!cache) {
     return;
   }
-  FOREACH_MESH_BUFFER_CACHE(cache, mbufcache)
-  {
+  FOREACH_MESH_BUFFER_CACHE (cache, mbufcache) {
     GPUVertBuf **vbos = (GPUVertBuf **)&mbufcache->vbo;
     GPUIndexBuf **ibos = (GPUIndexBuf **)&mbufcache->ibo;
     for (int i = 0; i < sizeof(mbufcache->vbo) / sizeof(void *); i++) {
@@ -1014,9 +1020,14 @@ void DRW_mesh_batch_cache_free_old(Mesh *me, int ctime)
 }
 
 /* Can be called for any surface type. Mesh *me is the final mesh. */
-void DRW_mesh_batch_cache_create_requested(
-    Object *ob, Mesh *me, const Scene *scene, const bool is_paint_mode, const bool use_hide)
-{
+void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
+                                           Object *ob,
+                                           Mesh *me,
+                                           const Scene *scene,
+                                           const bool is_paint_mode,
+                                           const bool use_hide)
+{
+  BLI_assert(task_graph);
   GPUIndexBuf **saved_elem_ranges = NULL;
   const ToolSettings *ts = NULL;
   if (scene) {
@@ -1060,8 +1071,7 @@ void DRW_mesh_batch_cache_create_requested(
    * index ranges initialized. So discard ibo.tris in order to recreate it.
    * This needs to happen before saved_elem_ranges is populated. */
   if ((batch_requested & MBC_SURF_PER_MAT) != 0 && (cache->batch_ready & MBC_SURF_PER_MAT) == 0) {
-    FOREACH_MESH_BUFFER_CACHE(cache, mbuffercache)
-    {
+    FOREACH_MESH_BUFFER_CACHE (cache, mbuffercache) {
       GPU_INDEXBUF_DISCARD_SAFE(mbuffercache->ibo.tris);
     }
     /* Clear all batches that reference ibo.tris. */
@@ -1098,8 +1108,7 @@ void DRW_mesh_batch_cache_create_requested(
      * material. */
     bool cd_overlap = mesh_cd_layers_type_overlap(cache->cd_used, cache->cd_needed);
     if (cd_overlap == false) {
-      FOREACH_MESH_BUFFER_CACHE(cache, mbuffercache)
-      {
+      FOREACH_MESH_BUFFER_CACHE (cache, mbuffercache) {
         if ((cache->cd_used.uv & cache->cd_needed.uv) != cache->cd_needed.uv) {
           GPU_VERTBUF_DISCARD_SAFE(mbuffercache->vbo.uv);
           cd_uv_update = true;
@@ -1145,8 +1154,7 @@ void DRW_mesh_batch_cache_create_requested(
     const bool is_uvsyncsel = ts && (ts->uv_flag & UV_SYNC_SELECTION);
     if (cd_uv_update || (cache->is_uvsyncsel != is_uvsyncsel)) {
       cache->is_uvsyncsel = is_uvsyncsel;
-      FOREACH_MESH_BUFFER_CACHE(cache, mbuffercache)
-      {
+      FOREACH_MESH_BUFFER_CACHE (cache, mbuffercache) {
         GPU_VERTBUF_DISCARD_SAFE(mbuffercache->vbo.edituv_data);
         GPU_VERTBUF_DISCARD_SAFE(mbuffercache->vbo.fdots_uv);
         GPU_INDEXBUF_DISCARD_SAFE(mbuffercache->ibo.edituv_tris);
@@ -1184,10 +1192,10 @@ void DRW_mesh_batch_cache_create_requested(
 
   MeshBufferCache *mbufcache = &cache->final;
 
-  /* Init batches and request VBOs & IBOs */
+  /* Initialize batches and request VBO's & IBO's. */
   if (DRW_batch_requested(cache->batch.surface, GPU_PRIM_TRIS)) {
     DRW_ibo_request(cache->batch.surface, &mbufcache->ibo.tris);
-    /* Order matters. First ones override latest vbos' attribs. */
+    /* Order matters. First ones override latest VBO's attributes. */
     DRW_vbo_request(cache->batch.surface, &mbufcache->vbo.lnor);
     DRW_vbo_request(cache->batch.surface, &mbufcache->vbo.pos_nor);
     if (cache->cd_used.uv != 0) {
@@ -1220,7 +1228,7 @@ void DRW_mesh_batch_cache_create_requested(
   }
   if (DRW_batch_requested(cache->batch.wire_loops, GPU_PRIM_LINES)) {
     DRW_ibo_request(cache->batch.wire_loops, &mbufcache->ibo.lines_paint_mask);
-    /* Order matters. First ones override latest vbos' attribs. */
+    /* Order matters. First ones override latest VBO's attributes. */
     DRW_vbo_request(cache->batch.wire_loops, &mbufcache->vbo.lnor);
     DRW_vbo_request(cache->batch.wire_loops, &mbufcache->vbo.pos_nor);
   }
@@ -1252,7 +1260,7 @@ void DRW_mesh_batch_cache_create_requested(
       else {
         DRW_ibo_request(cache->surface_per_mat[i], &mbufcache->ibo.tris);
       }
-      /* Order matters. First ones override latest vbos' attribs. */
+      /* Order matters. First ones override latest VBO's attributes. */
       DRW_vbo_request(cache->surface_per_mat[i], &mbufcache->vbo.lnor);
       DRW_vbo_request(cache->surface_per_mat[i], &mbufcache->vbo.pos_nor);
       if (cache->cd_used.uv != 0) {
@@ -1372,13 +1380,16 @@ void DRW_mesh_batch_cache_create_requested(
   }
 
   /* Meh loose Scene const correctness here. */
-  const bool use_subsurf_fdots = scene ? modifiers_usesSubsurfFacedots((Scene *)scene, ob) : false;
+  const bool use_subsurf_fdots = scene ? BKE_modifiers_uses_subsurf_facedots((Scene *)scene, ob) :
+                                         false;
 
   if (do_uvcage) {
-    mesh_buffer_cache_create_requested(cache,
+    mesh_buffer_cache_create_requested(task_graph,
+                                       cache,
                                        cache->uv_cage,
                                        me,
                                        is_editmode,
+                                       is_paint_mode,
                                        ob->obmat,
                                        false,
                                        true,
@@ -1390,10 +1401,12 @@ void DRW_mesh_batch_cache_create_requested(
   }
 
   if (do_cage) {
-    mesh_buffer_cache_create_requested(cache,
+    mesh_buffer_cache_create_requested(task_graph,
+                                       cache,
                                        cache->cage,
                                        me,
                                        is_editmode,
+                                       is_paint_mode,
                                        ob->obmat,
                                        false,
                                        false,
@@ -1404,10 +1417,12 @@ void DRW_mesh_batch_cache_create_requested(
                                        true);
   }
 
-  mesh_buffer_cache_create_requested(cache,
+  mesh_buffer_cache_create_requested(task_graph,
+                                     cache,
                                      cache->final,
                                      me,
                                      is_editmode,
+                                     is_paint_mode,
                                      ob->obmat,
                                      true,
                                      false,
@@ -1416,10 +1431,12 @@ void DRW_mesh_batch_cache_create_requested(
                                      scene,
                                      ts,
                                      use_hide);
-
 #ifdef DEBUG
 check:
   /* Make sure all requested batches have been setup. */
+  /* TODO(jbakker): we should move this to the draw_manager but that needs refactoring and
+   * additional looping.*/
+  BLI_task_graph_work_and_wait(task_graph);
   for (int i = 0; i < sizeof(cache->batch) / sizeof(void *); i++) {
     BLI_assert(!DRW_batch_requested(((GPUBatch **)&cache->batch)[i], 0));
   }
diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c
index 42a1dce891d..331a1f80bec 100644
--- a/source/blender/draw/intern/draw_cache_impl_particles.c
+++ b/source/blender/draw/intern/draw_cache_impl_particles.c
@@ -339,7 +339,8 @@ static void particle_calculate_parent_mcol(ParticleSystem *psys,
   if (num != DMCACHE_NOTFOUND && num != DMCACHE_ISCHILD) {
     MFace *mface = &psmd->mesh_final->mface[num];
     for (int j = 0; j < num_col_layers; j++) {
-      psys_interpolate_mcol(mcols[j] + num, mface->v4, particle->fuv, &r_mcol[j]);
+      /* CustomDataLayer CD_MCOL has 4 structs per face. */
+      psys_interpolate_mcol(mcols[j] + num * 4, mface->v4, particle->fuv, &r_mcol[j]);
     }
   }
 }
@@ -388,6 +389,7 @@ static void particle_interpolate_children_mcol(ParticleSystem *psys,
   if (num != DMCACHE_NOTFOUND) {
     MFace *mface = &psmd->mesh_final->mface[num];
     for (int j = 0; j < num_col_layers; j++) {
+      /* CustomDataLayer CD_MCOL has 4 structs per face. */
       psys_interpolate_mcol(mcols[j] + num * 4, mface->v4, particle->fuv, &r_mcol[j]);
     }
   }
@@ -877,9 +879,9 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
     GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len);
     GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]);
 
-    char attr_safe_name[GPU_MAX_SAFE_ATTRIB_NAME];
+    char attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
     const char *name = CustomData_get_layer_name(&psmd->mesh_final->ldata, CD_MLOOPUV, i);
-    GPU_vertformat_safe_attrib_name(name, attr_safe_name, GPU_MAX_SAFE_ATTRIB_NAME);
+    GPU_vertformat_safe_attr_name(name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
 
     int n = 0;
     BLI_snprintf(cache->uv_layer_names[i][n++], MAX_LAYER_NAME_LEN, "u%s", attr_safe_name);
@@ -898,9 +900,9 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
     GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len);
     GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]);
 
-    char attr_safe_name[GPU_MAX_SAFE_ATTRIB_NAME];
+    char attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
     const char *name = CustomData_get_layer_name(&psmd->mesh_final->ldata, CD_MLOOPCOL, i);
-    GPU_vertformat_safe_attrib_name(name, attr_safe_name, GPU_MAX_SAFE_ATTRIB_NAME);
+    GPU_vertformat_safe_attr_name(name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
 
     int n = 0;
     BLI_snprintf(cache->col_layer_names[i][n++], MAX_LAYER_NAME_LEN, "c%s", attr_safe_name);
@@ -1164,9 +1166,9 @@ static void particle_batch_cache_ensure_pos_and_seg(PTCacheEdit *edit,
 
     for (int i = 0; i < num_uv_layers; i++) {
 
-      char uuid[32], attr_safe_name[GPU_MAX_SAFE_ATTRIB_NAME];
+      char uuid[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
       const char *name = CustomData_get_layer_name(&psmd->mesh_final->ldata, CD_MLOOPUV, i);
-      GPU_vertformat_safe_attrib_name(name, attr_safe_name, GPU_MAX_SAFE_ATTRIB_NAME);
+      GPU_vertformat_safe_attr_name(name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
 
       BLI_snprintf(uuid, sizeof(uuid), "u%s", attr_safe_name);
       uv_id[i] = GPU_vertformat_attr_add(&format, uuid, GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
@@ -1177,9 +1179,9 @@ static void particle_batch_cache_ensure_pos_and_seg(PTCacheEdit *edit,
     }
 
     for (int i = 0; i < num_col_layers; i++) {
-      char uuid[32], attr_safe_name[GPU_MAX_SAFE_ATTRIB_NAME];
+      char uuid[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
       const char *name = CustomData_get_layer_name(&psmd->mesh_final->ldata, CD_MLOOPCOL, i);
-      GPU_vertformat_safe_attrib_name(name, attr_safe_name, GPU_MAX_SAFE_ATTRIB_NAME);
+      GPU_vertformat_safe_attr_name(name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
 
       BLI_snprintf(uuid, sizeof(uuid), "c%s", attr_safe_name);
       col_id[i] = GPU_vertformat_attr_add(&format, uuid, GPU_COMP_U16, 4, GPU_FETCH_FLOAT);
diff --git a/source/blender/draw/intern/draw_cache_impl_pointcloud.c b/source/blender/draw/intern/draw_cache_impl_pointcloud.c
index 83757cb714a..53939b35285 100644
--- a/source/blender/draw/intern/draw_cache_impl_pointcloud.c
+++ b/source/blender/draw/intern/draw_cache_impl_pointcloud.c
@@ -134,7 +134,7 @@ static void pointcloud_batch_cache_ensure_pos(Object *ob, PointCloudBatchCache *
     /* initialize vertex format */
     pos_id = GPU_vertformat_attr_add(&format, "pointcloud_pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
     radius_id = GPU_vertformat_attr_add(
-        &format, "pointcloud_radius", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+        &format, "pointcloud_radius", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
   }
 
   GPU_VERTBUF_DISCARD_SAFE(cache->pos);
diff --git a/source/blender/draw/intern/draw_cache_impl_volume.c b/source/blender/draw/intern/draw_cache_impl_volume.c
index cdac8b33fba..9c2c075ab4f 100644
--- a/source/blender/draw/intern/draw_cache_impl_volume.c
+++ b/source/blender/draw/intern/draw_cache_impl_volume.c
@@ -27,6 +27,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "BLI_listbase.h"
 #include "BLI_math_base.h"
 #include "BLI_math_vector.h"
 #include "BLI_utildefines.h"
@@ -123,7 +124,7 @@ static void volume_batch_cache_clear(Volume *volume)
     return;
   }
 
-  for (DRWVolumeGrid *grid = cache->grids.first; grid; grid = grid->next) {
+  LISTBASE_FOREACH (DRWVolumeGrid *, grid, &cache->grids) {
     MEM_SAFE_FREE(grid->name);
     DRW_TEXTURE_FREE_SAFE(grid->texture);
   }
@@ -266,6 +267,7 @@ static DRWVolumeGrid *volume_grid_cache_get(Volume *volume,
 
     GPU_texture_bind(cache_grid->texture, 0);
     GPU_texture_swizzle_channel_auto(cache_grid->texture, channels);
+    GPU_texture_wrap_mode(cache_grid->texture, false, false);
     GPU_texture_unbind(cache_grid->texture);
 
     MEM_freeN(voxels);
diff --git a/source/blender/draw/intern/draw_common.h b/source/blender/draw/intern/draw_common.h
index f14cdc0dbde..656d72b2808 100644
--- a/source/blender/draw/intern/draw_common.h
+++ b/source/blender/draw/intern/draw_common.h
@@ -172,23 +172,11 @@ bool DRW_object_axis_orthogonal_to_view(Object *ob, int axis);
 
 /* This creates a shading group with display hairs.
  * The draw call is already added by this function, just add additional uniforms. */
-struct DRWShadingGroup *DRW_shgroup_hair_create(struct Object *object,
-                                                struct ParticleSystem *psys,
-                                                struct ModifierData *md,
-                                                struct DRWPass *hair_pass,
-                                                struct GPUShader *shader);
-
 struct DRWShadingGroup *DRW_shgroup_hair_create_sub(struct Object *object,
                                                     struct ParticleSystem *psys,
                                                     struct ModifierData *md,
                                                     struct DRWShadingGroup *shgrp);
 
-struct DRWShadingGroup *DRW_shgroup_material_hair_create(struct Object *object,
-                                                         struct ParticleSystem *psys,
-                                                         struct ModifierData *md,
-                                                         struct DRWPass *hair_pass,
-                                                         struct GPUMaterial *material);
-
 void DRW_hair_init(void);
 void DRW_hair_update(void);
 void DRW_hair_free(void);
diff --git a/source/blender/draw/intern/draw_hair.c b/source/blender/draw/intern/draw_hair.c
index 048adccc4e6..2fdaf0d5345 100644
--- a/source/blender/draw/intern/draw_hair.c
+++ b/source/blender/draw/intern/draw_hair.c
@@ -32,7 +32,7 @@
 #include "DNA_modifier_types.h"
 #include "DNA_particle_types.h"
 
-#include "BKE_anim.h"
+#include "BKE_duplilist.h"
 
 #include "GPU_batch.h"
 #include "GPU_shader.h"
@@ -89,6 +89,7 @@ static GPUShader *hair_refine_shader_get(ParticleRefineShader sh)
   g_refine_shaders[sh] = DRW_shader_create(vert_with_lib,
                                            NULL,
                                            datatoc_gpu_shader_3D_smooth_color_frag_glsl,
+                                           "#define blender_srgb_to_framebuffer_space(a) a\n"
                                            "#define HAIR_PHASE_SUBDIV\n"
                                            "#define TF_WORKAROUND\n");
 #endif
@@ -123,13 +124,10 @@ void DRW_hair_init(void)
   }
 }
 
-static DRWShadingGroup *drw_shgroup_create_hair_procedural_ex(Object *object,
-                                                              ParticleSystem *psys,
-                                                              ModifierData *md,
-                                                              DRWPass *hair_pass,
-                                                              DRWShadingGroup *shgrp_parent,
-                                                              struct GPUMaterial *gpu_mat,
-                                                              GPUShader *gpu_shader)
+DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
+                                             ParticleSystem *psys,
+                                             ModifierData *md,
+                                             DRWShadingGroup *shgrp_parent)
 {
   /* TODO(fclem): Pass the scene as parameter */
   const DRWContextState *draw_ctx = DRW_context_state_get();
@@ -153,24 +151,7 @@ static DRWShadingGroup *drw_shgroup_create_hair_procedural_ex(Object *object,
     need_ft_update = hair_ensure_procedural_data(object, &hair_cache, subdiv, thickness_res);
   }
 
-  DRWShadingGroup *shgrp;
-  if (shgrp_parent) {
-    shgrp = DRW_shgroup_create_sub(shgrp_parent);
-  }
-  else if (gpu_mat) {
-    shgrp = DRW_shgroup_material_create(gpu_mat, hair_pass);
-  }
-  else if (gpu_shader) {
-    shgrp = DRW_shgroup_create(gpu_shader, hair_pass);
-  }
-  else {
-    shgrp = NULL;
-    BLI_assert(0);
-  }
-
-  if (shgrp == NULL) {
-    return NULL;
-  }
+  DRWShadingGroup *shgrp = DRW_shgroup_create_sub(shgrp_parent);
 
   /* TODO optimize this. Only bind the ones GPUMaterial needs. */
   for (int i = 0; i < hair_cache->num_uv_layers; i++) {
@@ -239,10 +220,7 @@ static DRWShadingGroup *drw_shgroup_create_hair_procedural_ex(Object *object,
   DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &hair_cache->final[subdiv].strands_res, 1);
   DRW_shgroup_uniform_int_copy(shgrp, "hairThicknessRes", thickness_res);
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadShape", hair_rad_shape);
-  DRW_shgroup_uniform_vec4_copy(shgrp, "hairDupliMatrix[0]", dupli_mat[0]);
-  DRW_shgroup_uniform_vec4_copy(shgrp, "hairDupliMatrix[1]", dupli_mat[1]);
-  DRW_shgroup_uniform_vec4_copy(shgrp, "hairDupliMatrix[2]", dupli_mat[2]);
-  DRW_shgroup_uniform_vec4_copy(shgrp, "hairDupliMatrix[3]", dupli_mat[3]);
+  DRW_shgroup_uniform_vec4_array_copy(shgrp, "hairDupliMatrix", dupli_mat, 4);
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadRoot", hair_rad_root);
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadTip", hair_rad_tip);
   DRW_shgroup_uniform_bool_copy(shgrp, "hairCloseTip", hair_close_tip);
@@ -286,29 +264,6 @@ static DRWShadingGroup *drw_shgroup_create_hair_procedural_ex(Object *object,
   return shgrp;
 }
 
-DRWShadingGroup *DRW_shgroup_hair_create(
-    Object *object, ParticleSystem *psys, ModifierData *md, DRWPass *hair_pass, GPUShader *shader)
-{
-  return drw_shgroup_create_hair_procedural_ex(object, psys, md, hair_pass, NULL, NULL, shader);
-}
-
-DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
-                                             ParticleSystem *psys,
-                                             ModifierData *md,
-                                             DRWShadingGroup *shgrp)
-{
-  return drw_shgroup_create_hair_procedural_ex(object, psys, md, NULL, shgrp, NULL, NULL);
-}
-
-DRWShadingGroup *DRW_shgroup_material_hair_create(Object *object,
-                                                  ParticleSystem *psys,
-                                                  ModifierData *md,
-                                                  DRWPass *hair_pass,
-                                                  struct GPUMaterial *material)
-{
-  return drw_shgroup_create_hair_procedural_ex(object, psys, md, hair_pass, NULL, material, NULL);
-}
-
 void DRW_hair_update(void)
 {
 #ifndef USE_TRANSFORM_FEEDBACK
diff --git a/source/blender/draw/intern/draw_hair_private.h b/source/blender/draw/intern/draw_hair_private.h
index 4d9eaf88a7d..b599ad389c1 100644
--- a/source/blender/draw/intern/draw_hair_private.h
+++ b/source/blender/draw/intern/draw_hair_private.h
@@ -25,7 +25,7 @@
 #define __DRAW_HAIR_PRIVATE_H__
 
 #define MAX_LAYER_NAME_CT 4 /* u0123456789, u, au, a0123456789 */
-#define MAX_LAYER_NAME_LEN GPU_MAX_SAFE_ATTRIB_NAME + 2
+#define MAX_LAYER_NAME_LEN GPU_MAX_SAFE_ATTR_NAME + 2
 #define MAX_THICKRES 2    /* see eHairType */
 #define MAX_HAIR_SUBDIV 4 /* see hair_subdiv rna */
 
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index cc618c76ccd..e7dff422105 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -27,14 +27,15 @@
 #include "BLI_memblock.h"
 #include "BLI_rect.h"
 #include "BLI_string.h"
+#include "BLI_task.h"
 #include "BLI_threads.h"
 
 #include "BLF_api.h"
 
-#include "BKE_anim.h"
 #include "BKE_colortools.h"
 #include "BKE_context.h"
 #include "BKE_curve.h"
+#include "BKE_duplilist.h"
 #include "BKE_editmesh.h"
 #include "BKE_global.h"
 #include "BKE_gpencil.h"
@@ -63,11 +64,11 @@
 #include "ED_space_api.h"
 #include "ED_view3d.h"
 
-#include "GPU_draw.h"
 #include "GPU_extensions.h"
 #include "GPU_framebuffer.h"
 #include "GPU_immediate.h"
 #include "GPU_matrix.h"
+#include "GPU_state.h"
 #include "GPU_uniformbuffer.h"
 #include "GPU_viewport.h"
 
@@ -111,17 +112,6 @@ static ListBase DRW_engines = {NULL, NULL};
 static void drw_state_prepare_clean_for_draw(DRWManager *dst)
 {
   memset(dst, 0x0, offsetof(DRWManager, gl_context));
-
-  /* Maybe not the best place for this. */
-  if (!DST.uniform_names.buffer) {
-    DST.uniform_names.buffer = MEM_callocN(DRW_UNIFORM_BUFFER_NAME, "Name Buffer");
-    DST.uniform_names.buffer_len = DRW_UNIFORM_BUFFER_NAME;
-  }
-  else if (DST.uniform_names.buffer_len > DRW_UNIFORM_BUFFER_NAME) {
-    DST.uniform_names.buffer = MEM_reallocN(DST.uniform_names.buffer, DRW_UNIFORM_BUFFER_NAME);
-    DST.uniform_names.buffer_len = DRW_UNIFORM_BUFFER_NAME;
-  }
-  DST.uniform_names.buffer_ofs = 0;
 }
 
 /* This function is used to reset draw manager to a state
@@ -136,6 +126,23 @@ static void drw_state_ensure_not_reused(DRWManager *dst)
 #endif
 
 /* -------------------------------------------------------------------- */
+/** \name Threading
+ * \{ */
+static void drw_task_graph_init(void)
+{
+  BLI_assert(DST.task_graph == NULL);
+  DST.task_graph = BLI_task_graph_create();
+}
+
+static void drw_task_graph_deinit(void)
+{
+  BLI_task_graph_work_and_wait(DST.task_graph);
+  BLI_task_graph_free(DST.task_graph);
+  DST.task_graph = NULL;
+}
+/* \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Settings
  * \{ */
 
@@ -145,11 +152,8 @@ bool DRW_object_is_renderable(const Object *ob)
 
   if (ob->type == OB_MESH) {
     if ((ob == DST.draw_ctx.object_edit) || DRW_object_is_in_edit_mode(ob)) {
-
       View3D *v3d = DST.draw_ctx.v3d;
-      const int mask = (V3D_OVERLAY_EDIT_OCCLUDE_WIRE | V3D_OVERLAY_EDIT_WEIGHT);
-
-      if (v3d && v3d->overlay.edit_flag & mask) {
+      if (v3d && v3d->overlay.edit_flag & V3D_OVERLAY_EDIT_OCCLUDE_WIRE) {
         return false;
       }
     }
@@ -587,9 +591,6 @@ static void drw_viewport_var_init(void)
     ED_view3d_init_mats_rv3d(DST.draw_ctx.object_edit, rv3d);
   }
 
-  /* Alloc array of texture reference. */
-  memset(&DST.RST, 0x0, sizeof(DST.RST));
-
   if (G_draw.view_ubo == NULL) {
     G_draw.view_ubo = DRW_uniformbuffer_create(sizeof(DRWViewUboStorage), NULL);
   }
@@ -693,8 +694,7 @@ void **DRW_duplidata_get(void *vedata)
 
 void *DRW_view_layer_engine_data_get(DrawEngineType *engine_type)
 {
-  for (ViewLayerEngineData *sled = DST.draw_ctx.view_layer->drawdata.first; sled;
-       sled = sled->next) {
+  LISTBASE_FOREACH (ViewLayerEngineData *, sled, &DST.draw_ctx.view_layer->drawdata) {
     if (sled->engine_type == engine_type) {
       return sled->storage;
     }
@@ -925,7 +925,7 @@ void DRW_cache_free_old_batches(Main *bmain)
 
 static void drw_engines_init(void)
 {
-  for (LinkData *link = DST.enabled_engines.first; link; link = link->next) {
+  LISTBASE_FOREACH (LinkData *, link, &DST.enabled_engines) {
     DrawEngineType *engine = link->data;
     ViewportEngineData *data = drw_viewport_engine_data_ensure(engine);
     PROFILE_START(stime);
@@ -969,7 +969,7 @@ static void drw_engines_world_update(Scene *scene)
     return;
   }
 
-  for (LinkData *link = DST.enabled_engines.first; link; link = link->next) {
+  LISTBASE_FOREACH (LinkData *, link, &DST.enabled_engines) {
     DrawEngineType *engine = link->data;
     ViewportEngineData *data = drw_viewport_engine_data_ensure(engine);
 
@@ -1035,7 +1035,7 @@ static void drw_engines_cache_finish(void)
 
 static void drw_engines_draw_scene(void)
 {
-  for (LinkData *link = DST.enabled_engines.first; link; link = link->next) {
+  LISTBASE_FOREACH (LinkData *, link, &DST.enabled_engines) {
     DrawEngineType *engine = link->data;
     ViewportEngineData *data = drw_viewport_engine_data_ensure(engine);
     PROFILE_START(stime);
@@ -1058,7 +1058,7 @@ static void drw_engines_draw_scene(void)
 
 static void drw_engines_draw_text(void)
 {
-  for (LinkData *link = DST.enabled_engines.first; link; link = link->next) {
+  LISTBASE_FOREACH (LinkData *, link, &DST.enabled_engines) {
     DrawEngineType *engine = link->data;
     ViewportEngineData *data = drw_viewport_engine_data_ensure(engine);
     PROFILE_START(stime);
@@ -1072,9 +1072,9 @@ static void drw_engines_draw_text(void)
 }
 
 /* Draw render engine info. */
-void DRW_draw_region_engine_info(int xoffset, int yoffset)
+void DRW_draw_region_engine_info(int xoffset, int *yoffset, int line_height)
 {
-  for (LinkData *link = DST.enabled_engines.first; link; link = link->next) {
+  LISTBASE_FOREACH (LinkData *, link, &DST.enabled_engines) {
     DrawEngineType *engine = link->data;
     ViewportEngineData *data = drw_viewport_engine_data_ensure(engine);
 
@@ -1095,8 +1095,8 @@ void DRW_draw_region_engine_info(int xoffset, int yoffset)
         if (*chr_current == '\n') {
           char info[GPU_INFO_SIZE];
           BLI_strncpy(info, chr_start, line_len + 1);
-          yoffset -= U.widget_unit;
-          BLF_draw_default(xoffset, yoffset, 0.0f, info, sizeof(info));
+          *yoffset -= line_height;
+          BLF_draw_default(xoffset, *yoffset, 0.0f, info, sizeof(info));
 
           /* Re-start counting. */
           chr_start = chr_current + 1;
@@ -1106,8 +1106,8 @@ void DRW_draw_region_engine_info(int xoffset, int yoffset)
 
       char info[GPU_INFO_SIZE];
       BLI_strncpy(info, chr_start, line_len + 1);
-      yoffset -= U.widget_unit;
-      BLF_draw_default(xoffset, yoffset, 0.0f, info, sizeof(info));
+      *yoffset -= line_height;
+      BLF_draw_default(xoffset, *yoffset, 0.0f, info, sizeof(info));
 
       BLF_disable(font_id, BLF_SHADOW);
     }
@@ -1181,7 +1181,7 @@ static void drw_engines_data_validate(void)
   void **engine_handle_array = BLI_array_alloca(engine_handle_array, enabled_engines + 1);
   int i = 0;
 
-  for (LinkData *link = DST.enabled_engines.first; link; link = link->next) {
+  LISTBASE_FOREACH (LinkData *, link, &DST.enabled_engines) {
     DrawEngineType *engine = link->data;
     engine_handle_array[i++] = engine;
   }
@@ -1248,7 +1248,7 @@ void DRW_notify_view_update(const DRWUpdateContext *update_ctx)
     drw_engines_enable(view_layer, engine_type, gpencil_engine_needed);
     drw_engines_data_validate();
 
-    for (LinkData *link = DST.enabled_engines.first; link; link = link->next) {
+    LISTBASE_FOREACH (LinkData *, link, &DST.enabled_engines) {
       DrawEngineType *draw_engine = link->data;
       ViewportEngineData *data = drw_viewport_engine_data_ensure(draw_engine);
 
@@ -1300,9 +1300,6 @@ void DRW_draw_callbacks_post_scene(void)
     DRW_state_reset();
 
     GPU_framebuffer_bind(dfbl->overlay_fb);
-    /* Disable sRGB encoding from the fixed function pipeline since all the drawing in this
-     * function is done with sRGB color. Avoid double transform. */
-    glDisable(GL_FRAMEBUFFER_SRGB);
 
     GPU_matrix_projection_set(rv3d->winmat);
     GPU_matrix_set(rv3d->viewmat);
@@ -1431,6 +1428,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
       /* reuse if caller sets */
       .evil_C = DST.draw_ctx.evil_C,
   };
+  drw_task_graph_init();
   drw_context_state_init();
 
   drw_viewport_var_init();
@@ -1495,6 +1493,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
 #endif
   }
 
+  drw_task_graph_deinit();
   DRW_stats_begin();
 
   GPU_framebuffer_bind(DST.default_framebuffer);
@@ -1514,6 +1513,8 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
   /* Fix 3D view being "laggy" on macos and win+nvidia. (See T56996, T61474) */
   GPU_flush();
 
+  DRW_stats_reset();
+
   DRW_draw_callbacks_post_scene();
 
   if (WM_draw_region_get_bound_viewport(region)) {
@@ -1773,7 +1774,6 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph)
   DST.options.is_image_render = true;
   DST.options.is_scene_render = true;
   DST.options.draw_background = scene->r.alphamode == R_ADDSKY;
-
   DST.draw_ctx = (DRWContextState){
       .scene = scene,
       .view_layer = view_layer,
@@ -1858,9 +1858,9 @@ void DRW_render_object_iter(
     void (*callback)(void *vedata, Object *ob, RenderEngine *engine, struct Depsgraph *depsgraph))
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
-
   DRW_hair_init();
 
+  drw_task_graph_init();
   const int object_type_exclude_viewport = draw_ctx->v3d ?
                                                draw_ctx->v3d->object_type_exclude_viewport :
                                                0;
@@ -1883,6 +1883,7 @@ void DRW_render_object_iter(
   DEG_OBJECT_ITER_FOR_RENDER_ENGINE_END;
 
   drw_duplidata_free();
+  drw_task_graph_deinit();
 }
 
 /* Assume a valid gl context is bound (and that the gl_context_mutex has been acquired).
@@ -2032,7 +2033,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
         Object *obweight = OBWEIGHTPAINT_FROM_OBACT(obact);
         if (obweight) {
           /* Only use Armature pose selection, when connected armature is in pose mode. */
-          Object *ob_armature = modifiers_isDeformedByArmature(obweight);
+          Object *ob_armature = BKE_modifiers_is_deformed_by_armature(obweight);
           if (ob_armature && ob_armature->mode == OB_MODE_POSE) {
             obpose = ob_armature;
           }
@@ -2054,14 +2055,16 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
 
   DST.viewport = viewport;
   DST.options.is_select = true;
-
+  drw_task_graph_init();
   /* Get list of enabled engines */
   if (use_obedit) {
     drw_engines_enable_overlays();
   }
   else if (!draw_surface) {
     /* grease pencil selection */
-    use_drw_engine(&draw_engine_gpencil_type);
+    if (drw_gpencil_engine_needed(depsgraph, v3d)) {
+      use_drw_engine(&draw_engine_gpencil_type);
+    }
 
     drw_engines_enable_overlays();
   }
@@ -2069,7 +2072,9 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
     /* Draw surface for occlusion. */
     drw_engines_enable_basic();
     /* grease pencil selection */
-    use_drw_engine(&draw_engine_gpencil_type);
+    if (drw_gpencil_engine_needed(depsgraph, v3d)) {
+      use_drw_engine(&draw_engine_gpencil_type);
+    }
 
     drw_engines_enable_overlays();
   }
@@ -2160,6 +2165,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
     }
 
     drw_duplidata_free();
+    drw_task_graph_deinit();
     drw_engines_cache_finish();
 
     DRW_render_instance_buffer_finish();
@@ -2240,7 +2246,7 @@ static void drw_draw_depth_loop_imp(struct Depsgraph *depsgraph,
       .engine_type = engine_type,
       .depsgraph = depsgraph,
   };
-
+  drw_task_graph_init();
   drw_engines_data_validate();
 
   /* Setup framebuffer */
@@ -2284,6 +2290,7 @@ static void drw_draw_depth_loop_imp(struct Depsgraph *depsgraph,
 
     DRW_render_instance_buffer_finish();
   }
+  drw_task_graph_deinit();
 
   /* Start Drawing */
   DRW_state_reset();
@@ -2373,7 +2380,7 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons
       .obact = OBACT(view_layer),
       .depsgraph = depsgraph,
   };
-
+  drw_task_graph_init();
   drw_context_state_init();
 
   /* Setup viewport */
@@ -2408,6 +2415,7 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons
     drw_resource_buffer_finish(DST.vmempool);
 #endif
   }
+  drw_task_graph_deinit();
 
   /* Start Drawing */
   DRW_state_reset();
@@ -2433,7 +2441,8 @@ static void draw_world_clip_planes_from_rv3d(GPUBatch *batch, const float world_
 /**
  * Clears the Depth Buffer and draws only the specified object.
  */
-void DRW_draw_depth_object(ARegion *region, View3D *v3d, GPUViewport *viewport, Object *object)
+void DRW_draw_depth_object(
+    Scene *scene, ARegion *region, View3D *v3d, GPUViewport *viewport, Object *object)
 {
   RegionView3D *rv3d = region->regiondata;
 
@@ -2470,8 +2479,10 @@ void DRW_draw_depth_object(ARegion *region, View3D *v3d, GPUViewport *viewport,
       else {
         batch = DRW_mesh_batch_cache_get_surface(me);
       }
-
-      DRW_mesh_batch_cache_create_requested(object, me, NULL, false, true);
+      struct TaskGraph *task_graph = BLI_task_graph_create();
+      DRW_mesh_batch_cache_create_requested(task_graph, object, me, scene, false, true);
+      BLI_task_graph_work_and_wait(task_graph);
+      BLI_task_graph_free(task_graph);
 
       const eGPUShaderConfig sh_cfg = world_clip_planes ? GPU_SHADER_CFG_CLIPPED :
                                                           GPU_SHADER_CFG_DEFAULT;
@@ -2575,6 +2586,15 @@ bool DRW_state_is_playback(void)
 }
 
 /**
+ * Is the user navigating the region.
+ */
+bool DRW_state_is_navigating(void)
+{
+  const RegionView3D *rv3d = DST.draw_ctx.rv3d;
+  return (rv3d) && (rv3d->rflag & (RV3D_NAVIGATING | RV3D_PAINTING));
+}
+
+/**
  * Should text draw in this mode?
  */
 bool DRW_state_show_text(void)
@@ -2705,8 +2725,6 @@ void DRW_engines_free(void)
   DRW_TEXTURE_FREE_SAFE(G_draw.ramp);
   DRW_TEXTURE_FREE_SAFE(G_draw.weight_ramp);
 
-  MEM_SAFE_FREE(DST.uniform_names.buffer);
-
   if (DST.draw_list) {
     GPU_draw_list_discard(DST.draw_list);
   }
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index df7f0597017..6cae2a4f9f6 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -31,6 +31,7 @@
 #include "BLI_assert.h"
 #include "BLI_linklist.h"
 #include "BLI_memblock.h"
+#include "BLI_task.h"
 #include "BLI_threads.h"
 
 #include "GPU_batch.h"
@@ -210,7 +211,7 @@ typedef struct DRWCommandDrawInstance {
   GPUBatch *batch;
   DRWResourceHandle handle;
   uint inst_count;
-  uint use_attribs; /* bool */
+  uint use_attrs; /* bool */
 } DRWCommandDrawInstance;
 
 typedef struct DRWCommandDrawInstanceRange {
@@ -276,10 +277,9 @@ typedef enum {
   DRW_UNIFORM_FLOAT,
   DRW_UNIFORM_FLOAT_COPY,
   DRW_UNIFORM_TEXTURE,
-  DRW_UNIFORM_TEXTURE_PERSIST,
   DRW_UNIFORM_TEXTURE_REF,
   DRW_UNIFORM_BLOCK,
-  DRW_UNIFORM_BLOCK_PERSIST,
+  DRW_UNIFORM_BLOCK_REF,
   DRW_UNIFORM_TFEEDBACK_TARGET,
   /** Per drawcall uniforms/UBO */
   DRW_UNIFORM_BLOCK_OBMATS,
@@ -290,7 +290,6 @@ typedef enum {
   DRW_UNIFORM_BASE_INSTANCE,
   DRW_UNIFORM_MODEL_MATRIX,
   DRW_UNIFORM_MODEL_MATRIX_INVERSE,
-  DRW_UNIFORM_MODELVIEWPROJECTION_MATRIX,
   /* WARNING: set DRWUniform->type
    * bit length accordingly. */
 } DRWUniformType;
@@ -299,15 +298,28 @@ struct DRWUniform {
   union {
     /* For reference or array/vector types. */
     const void *pvalue;
-    /* Single values. */
+    /* DRW_UNIFORM_TEXTURE */
+    struct {
+      union {
+        GPUTexture *texture;
+        GPUTexture **texture_ref;
+      };
+      eGPUSamplerState sampler_state;
+    };
+    /* DRW_UNIFORM_BLOCK */
+    union {
+      GPUUniformBuffer *block;
+      GPUUniformBuffer **block_ref;
+    };
+    /* DRW_UNIFORM_FLOAT_COPY */
     float fvalue[4];
+    /* DRW_UNIFORM_INT_COPY */
     int ivalue[4];
   };
-  int location;
-  uint32_t type : 5;      /* DRWUniformType */
-  uint32_t length : 5;    /* cannot be more than 16 */
-  uint32_t arraysize : 5; /* cannot be more than 16 too */
-  uint32_t name_ofs : 17; /* name offset in name buffer. */
+  int location;      /* Uniform location or binding point for textures and ubos. */
+  uint8_t type;      /* DRWUniformType */
+  uint8_t length;    /* Length of vector types. */
+  uint8_t arraysize; /* Array size of scalar/vector types. */
 };
 
 struct DRWShadingGroup {
@@ -322,10 +334,13 @@ struct DRWShadingGroup {
   } cmd;
 
   union {
+    /* This struct is used during cache populate. */
     struct {
       int objectinfo;                /* Equal to 1 if the shader needs obinfos. */
       DRWResourceHandle pass_handle; /* Memblock key to parent pass. */
     };
+    /* This struct is used after cache populate if using the Z sorting.
+     * It will not conflict with the above struct. */
     struct {
       float distance;      /* Distance from camera. */
       uint original_index; /* Original position inside the shgroup list. */
@@ -342,6 +357,13 @@ struct DRWPass {
     DRWShadingGroup *last;
   } shgroups;
 
+  /* Draw the shgroups of this pass instead.
+   * This avoid duplicating drawcalls/shgroups
+   * for similar passes. */
+  DRWPass *original;
+  /* Link list of additional passes to render. */
+  DRWPass *next;
+
   DRWResourceHandle handle;
   DRWState state;
   char name[MAX_PASS_NAME];
@@ -525,6 +547,8 @@ typedef struct DRWManager {
   uint select_id;
 #endif
 
+  struct TaskGraph *task_graph;
+
   /* ---------- Nothing after this point is cleared after use ----------- */
 
   /* gl_context serves as the offset for clearing only
@@ -537,31 +561,11 @@ typedef struct DRWManager {
 
   GPUDrawList *draw_list;
 
-  /** GPU Resource State: Memory storage between drawing. */
-  struct {
-    /* High end GPUs supports up to 32 binds per shader stage.
-     * We only use textures during the vertex and fragment stage,
-     * so 2 * 32 slots is a nice limit. */
-    GPUTexture *bound_texs[DST_MAX_SLOTS];
-    uint64_t bound_tex_slots;
-    uint64_t bound_tex_slots_persist;
-
-    GPUUniformBuffer *bound_ubos[DST_MAX_SLOTS];
-    uint64_t bound_ubo_slots;
-    uint64_t bound_ubo_slots_persist;
-  } RST;
-
   struct {
     /* TODO(fclem) optimize: use chunks. */
     DRWDebugLine *lines;
     DRWDebugSphere *spheres;
   } debug;
-
-  struct {
-    char *buffer;
-    uint buffer_len;
-    uint buffer_ofs;
-  } uniform_names;
 } DRWManager;
 
 extern DRWManager DST; /* TODO: get rid of this and allow multi-threaded rendering. */
diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c
index 94a3e9e8343..ea67dd87772 100644
--- a/source/blender/draw/intern/draw_manager_data.c
+++ b/source/blender/draw/intern/draw_manager_data.c
@@ -22,8 +22,8 @@
 
 #include "draw_manager.h"
 
-#include "BKE_anim.h"
 #include "BKE_curve.h"
+#include "BKE_duplilist.h"
 #include "BKE_global.h"
 #include "BKE_image.h"
 #include "BKE_mesh.h"
@@ -38,6 +38,7 @@
 #include "BLI_alloca.h"
 #include "BLI_hash.h"
 #include "BLI_link_utils.h"
+#include "BLI_listbase.h"
 #include "BLI_memblock.h"
 #include "BLI_mempool.h"
 
@@ -169,13 +170,20 @@ void drw_resource_buffer_finish(ViewportMemoryPool *vmempool)
 /** \name Uniforms (DRW_shgroup_uniform)
  * \{ */
 
-static DRWUniform *drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup,
-                                                 int loc,
-                                                 DRWUniformType type,
-                                                 const void *value,
-                                                 int length,
-                                                 int arraysize)
+static void drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup,
+                                          int loc,
+                                          DRWUniformType type,
+                                          const void *value,
+                                          eGPUSamplerState sampler_state,
+                                          int length,
+                                          int arraysize)
 {
+  if (loc == -1) {
+    /* Nice to enable eventually, for now eevee uses uniforms that might not exist. */
+    // BLI_assert(0);
+    return;
+  }
+
   DRWUniformChunk *unichunk = shgroup->uniforms;
   /* Happens on first uniform or if chunk is full. */
   if (!unichunk || unichunk->uniform_used == unichunk->uniform_len) {
@@ -201,22 +209,24 @@ static DRWUniform *drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup,
       BLI_assert(length <= 4);
       memcpy(uni->fvalue, value, sizeof(float) * length);
       break;
+    case DRW_UNIFORM_BLOCK:
+      uni->block = (GPUUniformBuffer *)value;
+      break;
+    case DRW_UNIFORM_BLOCK_REF:
+      uni->block_ref = (GPUUniformBuffer **)value;
+      break;
+    case DRW_UNIFORM_TEXTURE:
+      uni->texture = (GPUTexture *)value;
+      uni->sampler_state = sampler_state;
+      break;
+    case DRW_UNIFORM_TEXTURE_REF:
+      uni->texture_ref = (GPUTexture **)value;
+      uni->sampler_state = sampler_state;
+      break;
     default:
       uni->pvalue = (const float *)value;
       break;
   }
-
-  return uni;
-}
-
-static void drw_shgroup_builtin_uniform(
-    DRWShadingGroup *shgroup, int builtin, const void *value, int length, int arraysize)
-{
-  int loc = GPU_shader_get_builtin_uniform(shgroup->shader, builtin);
-
-  if (loc != -1) {
-    drw_shgroup_uniform_create_ex(shgroup, loc, DRW_UNIFORM_FLOAT, value, length, arraysize);
-  }
 }
 
 static void drw_shgroup_uniform(DRWShadingGroup *shgroup,
@@ -226,61 +236,29 @@ static void drw_shgroup_uniform(DRWShadingGroup *shgroup,
                                 int length,
                                 int arraysize)
 {
-  int location;
-  if (ELEM(type, DRW_UNIFORM_BLOCK, DRW_UNIFORM_BLOCK_PERSIST)) {
-    location = GPU_shader_get_uniform_block(shgroup->shader, name);
-  }
-  else {
-    location = GPU_shader_get_uniform(shgroup->shader, name);
-  }
-
-  if (location == -1) {
-    /* Nice to enable eventually, for now eevee uses uniforms that might not exist. */
-    // BLI_assert(0);
-    return;
-  }
-
   BLI_assert(arraysize > 0 && arraysize <= 16);
   BLI_assert(length >= 0 && length <= 16);
-
-  DRWUniform *uni = drw_shgroup_uniform_create_ex(
-      shgroup, location, type, value, length, arraysize);
-
-  /* If location is -2, the uniform has not yet been queried.
-   * We save the name for query just before drawing. */
-  if (location == -2 || DRW_DEBUG_USE_UNIFORM_NAME) {
-    int ofs = DST.uniform_names.buffer_ofs;
-    int max_len = DST.uniform_names.buffer_len - ofs;
-    size_t len = strlen(name) + 1;
-
-    if (len >= max_len) {
-      DST.uniform_names.buffer_len += MAX2(DST.uniform_names.buffer_len, len);
-      DST.uniform_names.buffer = MEM_reallocN(DST.uniform_names.buffer,
-                                              DST.uniform_names.buffer_len);
-    }
-
-    char *dst = DST.uniform_names.buffer + ofs;
-    memcpy(dst, name, len); /* Copies NULL terminator. */
-
-    DST.uniform_names.buffer_ofs += len;
-    uni->name_ofs = ofs;
-  }
+  BLI_assert(!ELEM(type,
+                   DRW_UNIFORM_BLOCK,
+                   DRW_UNIFORM_BLOCK_REF,
+                   DRW_UNIFORM_TEXTURE,
+                   DRW_UNIFORM_TEXTURE_REF));
+  int location = GPU_shader_get_uniform(shgroup->shader, name);
+  drw_shgroup_uniform_create_ex(shgroup, location, type, value, 0, length, arraysize);
 }
 
 void DRW_shgroup_uniform_texture(DRWShadingGroup *shgroup, const char *name, const GPUTexture *tex)
 {
   BLI_assert(tex != NULL);
-  drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_TEXTURE, tex, 0, 1);
+  int loc = GPU_shader_get_texture_binding(shgroup->shader, name);
+  drw_shgroup_uniform_create_ex(shgroup, loc, DRW_UNIFORM_TEXTURE, tex, GPU_SAMPLER_MAX, 0, 1);
 }
 
-/* Same as DRW_shgroup_uniform_texture but is guaranteed to be bound if shader does not change
- * between shgrp. */
-void DRW_shgroup_uniform_texture_persistent(DRWShadingGroup *shgroup,
-                                            const char *name,
-                                            const GPUTexture *tex)
+void DRW_shgroup_uniform_texture_ref(DRWShadingGroup *shgroup, const char *name, GPUTexture **tex)
 {
   BLI_assert(tex != NULL);
-  drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_TEXTURE_PERSIST, tex, 0, 1);
+  int loc = GPU_shader_get_texture_binding(shgroup->shader, name);
+  drw_shgroup_uniform_create_ex(shgroup, loc, DRW_UNIFORM_TEXTURE_REF, tex, GPU_SAMPLER_MAX, 0, 1);
 }
 
 void DRW_shgroup_uniform_block(DRWShadingGroup *shgroup,
@@ -288,22 +266,17 @@ void DRW_shgroup_uniform_block(DRWShadingGroup *shgroup,
                                const GPUUniformBuffer *ubo)
 {
   BLI_assert(ubo != NULL);
-  drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_BLOCK, ubo, 0, 1);
+  int loc = GPU_shader_get_uniform_block_binding(shgroup->shader, name);
+  drw_shgroup_uniform_create_ex(shgroup, loc, DRW_UNIFORM_BLOCK, ubo, 0, 0, 1);
 }
 
-/* Same as DRW_shgroup_uniform_block but is guaranteed to be bound if shader does not change
- * between shgrp. */
-void DRW_shgroup_uniform_block_persistent(DRWShadingGroup *shgroup,
-                                          const char *name,
-                                          const GPUUniformBuffer *ubo)
+void DRW_shgroup_uniform_block_ref(DRWShadingGroup *shgroup,
+                                   const char *name,
+                                   GPUUniformBuffer **ubo)
 {
   BLI_assert(ubo != NULL);
-  drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_BLOCK_PERSIST, ubo, 0, 1);
-}
-
-void DRW_shgroup_uniform_texture_ref(DRWShadingGroup *shgroup, const char *name, GPUTexture **tex)
-{
-  drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_TEXTURE_REF, tex, 0, 1);
+  int loc = GPU_shader_get_uniform_block_binding(shgroup->shader, name);
+  drw_shgroup_uniform_create_ex(shgroup, loc, DRW_UNIFORM_BLOCK_REF, ubo, 0, 0, 1);
 }
 
 void DRW_shgroup_uniform_bool(DRWShadingGroup *shgroup,
@@ -435,6 +408,25 @@ void DRW_shgroup_uniform_vec4_copy(DRWShadingGroup *shgroup, const char *name, c
   drw_shgroup_uniform(shgroup, name, DRW_UNIFORM_FLOAT_COPY, value, 4, 1);
 }
 
+void DRW_shgroup_uniform_vec4_array_copy(DRWShadingGroup *shgroup,
+                                         const char *name,
+                                         const float (*value)[4],
+                                         int arraysize)
+{
+  int location = GPU_shader_get_uniform(shgroup->shader, name);
+
+  if (location == -1) {
+    /* Nice to enable eventually, for now eevee uses uniforms that might not exist. */
+    // BLI_assert(0);
+    return;
+  }
+
+  for (int i = 0; i < arraysize; i++) {
+    drw_shgroup_uniform_create_ex(
+        shgroup, location + i, DRW_UNIFORM_FLOAT_COPY, &value[i], 0, 4, 1);
+  }
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -657,17 +649,14 @@ static void drw_command_draw_range(
   cmd->vert_count = count;
 }
 
-static void drw_command_draw_instance(DRWShadingGroup *shgroup,
-                                      GPUBatch *batch,
-                                      DRWResourceHandle handle,
-                                      uint count,
-                                      bool use_attrib)
+static void drw_command_draw_instance(
+    DRWShadingGroup *shgroup, GPUBatch *batch, DRWResourceHandle handle, uint count, bool use_attr)
 {
   DRWCommandDrawInstance *cmd = drw_command_create(shgroup, DRW_CMD_DRAW_INSTANCE);
   cmd->batch = batch;
   cmd->handle = handle;
   cmd->inst_count = count;
-  cmd->use_attribs = use_attrib;
+  cmd->use_attrs = use_attr;
 }
 
 static void drw_command_draw_intance_range(
@@ -841,10 +830,10 @@ void DRW_shgroup_call_instances(DRWShadingGroup *shgroup,
   drw_command_draw_instance(shgroup, geom, handle, count, false);
 }
 
-void DRW_shgroup_call_instances_with_attribs(DRWShadingGroup *shgroup,
-                                             Object *ob,
-                                             struct GPUBatch *geom,
-                                             struct GPUBatch *inst_attributes)
+void DRW_shgroup_call_instances_with_attrs(DRWShadingGroup *shgroup,
+                                           Object *ob,
+                                           struct GPUBatch *geom,
+                                           struct GPUBatch *inst_attributes)
 {
   BLI_assert(geom != NULL);
   BLI_assert(inst_attributes != NULL);
@@ -860,6 +849,7 @@ void DRW_shgroup_call_instances_with_attribs(DRWShadingGroup *shgroup,
 typedef struct DRWSculptCallbackData {
   Object *ob;
   DRWShadingGroup **shading_groups;
+  int num_shading_groups;
   bool use_wire;
   bool use_mats;
   bool use_mask;
@@ -884,7 +874,11 @@ static float sculpt_debug_colors[9][4] = {
 
 static void sculpt_draw_cb(DRWSculptCallbackData *scd, GPU_PBVH_Buffers *buffers)
 {
+  if (!buffers) {
+    return;
+  }
 
+  /* Meh... use_mask is a bit misleading here. */
   if (scd->use_mask && !GPU_pbvh_buffers_has_overlays(buffers)) {
     return;
   }
@@ -894,6 +888,9 @@ static void sculpt_draw_cb(DRWSculptCallbackData *scd, GPU_PBVH_Buffers *buffers
 
   if (scd->use_mats) {
     index = GPU_pbvh_buffers_material_index_get(buffers);
+    if (index >= scd->num_shading_groups) {
+      index = 0;
+    }
   }
 
   DRWShadingGroup *shgrp = scd->shading_groups[index];
@@ -948,7 +945,7 @@ static void drw_sculpt_get_frustum_planes(Object *ob, float planes[6][4])
   }
 }
 
-static void drw_sculpt_generate_calls(DRWSculptCallbackData *scd, bool use_vcol)
+static void drw_sculpt_generate_calls(DRWSculptCallbackData *scd)
 {
   /* PBVH should always exist for non-empty meshes, created by depsgrah eval. */
   PBVH *pbvh = (scd->ob->sculpt) ? scd->ob->sculpt->pbvh : NULL;
@@ -958,32 +955,60 @@ static void drw_sculpt_generate_calls(DRWSculptCallbackData *scd, bool use_vcol)
 
   const DRWContextState *drwctx = DRW_context_state_get();
   RegionView3D *rv3d = drwctx->rv3d;
+  const bool navigating = rv3d && (rv3d->rflag & RV3D_NAVIGATING);
+
+  Paint *p = NULL;
+  if (drwctx->evil_C != NULL) {
+    p = BKE_paint_get_active_from_context(drwctx->evil_C);
+  }
 
   /* Frustum planes to show only visible PBVH nodes. */
-  float planes[6][4];
-  drw_sculpt_get_frustum_planes(scd->ob, planes);
-  PBVHFrustumPlanes frustum = {.planes = planes, .num_planes = 6};
+  float update_planes[6][4];
+  float draw_planes[6][4];
+  PBVHFrustumPlanes update_frustum;
+  PBVHFrustumPlanes draw_frustum;
+
+  if (p && (p->flags & PAINT_SCULPT_DELAY_UPDATES)) {
+    update_frustum.planes = update_planes;
+    update_frustum.num_planes = 6;
+    BKE_pbvh_get_frustum_planes(pbvh, &update_frustum);
+    if (!navigating) {
+      drw_sculpt_get_frustum_planes(scd->ob, update_planes);
+      update_frustum.planes = update_planes;
+      update_frustum.num_planes = 6;
+      BKE_pbvh_set_frustum_planes(pbvh, &update_frustum);
+    }
+  }
+  else {
+    drw_sculpt_get_frustum_planes(scd->ob, update_planes);
+    update_frustum.planes = update_planes;
+    update_frustum.num_planes = 6;
+  }
+
+  drw_sculpt_get_frustum_planes(scd->ob, draw_planes);
+  draw_frustum.planes = draw_planes;
+  draw_frustum.num_planes = 6;
 
   /* Fast mode to show low poly multires while navigating. */
   scd->fast_mode = false;
-  if (drwctx->evil_C != NULL) {
-    Paint *p = BKE_paint_get_active_from_context(drwctx->evil_C);
-    if (p && (p->flags & PAINT_FAST_NAVIGATE)) {
-      scd->fast_mode = rv3d && (rv3d->rflag & RV3D_NAVIGATING);
-    }
+  if (p && (p->flags & PAINT_FAST_NAVIGATE)) {
+    scd->fast_mode = rv3d && (rv3d->rflag & RV3D_NAVIGATING);
   }
 
   /* Update draw buffers only for visible nodes while painting.
    * But do update them otherwise so navigating stays smooth. */
-  const bool update_only_visible = rv3d && (rv3d->rflag & RV3D_PAINTING);
+  bool update_only_visible = rv3d && !(rv3d->rflag & RV3D_PAINTING);
+  if (p && (p->flags & PAINT_SCULPT_DELAY_UPDATES)) {
+    update_only_visible = true;
+  }
 
   Mesh *mesh = scd->ob->data;
   BKE_pbvh_update_normals(pbvh, mesh->runtime.subdiv_ccg);
 
   BKE_pbvh_draw_cb(pbvh,
-                   use_vcol,
                    update_only_visible,
-                   &frustum,
+                   &update_frustum,
+                   &draw_frustum,
                    (void (*)(void *, GPU_PBVH_Buffers *))sculpt_draw_cb,
                    scd);
 
@@ -998,29 +1023,32 @@ static void drw_sculpt_generate_calls(DRWSculptCallbackData *scd, bool use_vcol)
   }
 }
 
-void DRW_shgroup_call_sculpt(
-    DRWShadingGroup *shgroup, Object *ob, bool use_wire, bool use_mask, bool use_vcol)
+void DRW_shgroup_call_sculpt(DRWShadingGroup *shgroup, Object *ob, bool use_wire, bool use_mask)
 {
   DRWSculptCallbackData scd = {
       .ob = ob,
       .shading_groups = &shgroup,
+      .num_shading_groups = 1,
       .use_wire = use_wire,
       .use_mats = false,
       .use_mask = use_mask,
   };
-  drw_sculpt_generate_calls(&scd, use_vcol);
+  drw_sculpt_generate_calls(&scd);
 }
 
-void DRW_shgroup_call_sculpt_with_materials(DRWShadingGroup **shgroups, Object *ob, bool use_vcol)
+void DRW_shgroup_call_sculpt_with_materials(DRWShadingGroup **shgroups,
+                                            int num_shgroups,
+                                            Object *ob)
 {
   DRWSculptCallbackData scd = {
       .ob = ob,
       .shading_groups = shgroups,
+      .num_shading_groups = num_shgroups,
       .use_wire = false,
       .use_mats = true,
       .use_mask = false,
   };
-  drw_sculpt_generate_calls(&scd, use_vcol);
+  drw_sculpt_generate_calls(&scd);
 }
 
 static GPUVertFormat inst_select_format = {0};
@@ -1140,53 +1168,49 @@ static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader)
 {
   shgroup->uniforms = NULL;
 
-  /* TODO(fclem) make them builtin. */
-  int view_ubo_location = GPU_shader_get_uniform_block(shader, "viewBlock");
-  int model_ubo_location = GPU_shader_get_uniform_block(shader, "modelBlock");
-  int info_ubo_location = GPU_shader_get_uniform_block(shader, "infoBlock");
+  int view_ubo_location = GPU_shader_get_builtin_block(shader, GPU_UNIFORM_BLOCK_VIEW);
+  int model_ubo_location = GPU_shader_get_builtin_block(shader, GPU_UNIFORM_BLOCK_MODEL);
+  int info_ubo_location = GPU_shader_get_builtin_block(shader, GPU_UNIFORM_BLOCK_INFO);
   int baseinst_location = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_BASE_INSTANCE);
   int chunkid_location = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_RESOURCE_CHUNK);
   int resourceid_location = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_RESOURCE_ID);
 
   if (chunkid_location != -1) {
     drw_shgroup_uniform_create_ex(
-        shgroup, chunkid_location, DRW_UNIFORM_RESOURCE_CHUNK, NULL, 0, 1);
+        shgroup, chunkid_location, DRW_UNIFORM_RESOURCE_CHUNK, NULL, 0, 0, 1);
   }
 
   if (resourceid_location != -1) {
     drw_shgroup_uniform_create_ex(
-        shgroup, resourceid_location, DRW_UNIFORM_RESOURCE_ID, NULL, 0, 1);
+        shgroup, resourceid_location, DRW_UNIFORM_RESOURCE_ID, NULL, 0, 0, 1);
   }
 
   if (baseinst_location != -1) {
     drw_shgroup_uniform_create_ex(
-        shgroup, baseinst_location, DRW_UNIFORM_BASE_INSTANCE, NULL, 0, 1);
+        shgroup, baseinst_location, DRW_UNIFORM_BASE_INSTANCE, NULL, 0, 0, 1);
   }
 
   if (model_ubo_location != -1) {
     drw_shgroup_uniform_create_ex(
-        shgroup, model_ubo_location, DRW_UNIFORM_BLOCK_OBMATS, NULL, 0, 1);
+        shgroup, model_ubo_location, DRW_UNIFORM_BLOCK_OBMATS, NULL, 0, 0, 1);
   }
   else {
+    /* Note: This is only here to support old hardware fallback where uniform buffer is still
+     * too slow or buggy. */
     int model = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODEL);
     int modelinverse = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODEL_INV);
-    int modelviewprojection = GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MVP);
     if (model != -1) {
-      drw_shgroup_uniform_create_ex(shgroup, model, DRW_UNIFORM_MODEL_MATRIX, NULL, 0, 1);
+      drw_shgroup_uniform_create_ex(shgroup, model, DRW_UNIFORM_MODEL_MATRIX, NULL, 0, 0, 1);
     }
     if (modelinverse != -1) {
       drw_shgroup_uniform_create_ex(
-          shgroup, modelinverse, DRW_UNIFORM_MODEL_MATRIX_INVERSE, NULL, 0, 1);
-    }
-    if (modelviewprojection != -1) {
-      drw_shgroup_uniform_create_ex(
-          shgroup, modelviewprojection, DRW_UNIFORM_MODELVIEWPROJECTION_MATRIX, NULL, 0, 1);
+          shgroup, modelinverse, DRW_UNIFORM_MODEL_MATRIX_INVERSE, NULL, 0, 0, 1);
     }
   }
 
   if (info_ubo_location != -1) {
     drw_shgroup_uniform_create_ex(
-        shgroup, info_ubo_location, DRW_UNIFORM_BLOCK_OBINFOS, NULL, 0, 1);
+        shgroup, info_ubo_location, DRW_UNIFORM_BLOCK_OBINFOS, NULL, 0, 0, 1);
 
     /* Abusing this loc to tell shgroup we need the obinfos. */
     shgroup->objectinfo = 1;
@@ -1197,25 +1221,21 @@ static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader)
 
   if (view_ubo_location != -1) {
     drw_shgroup_uniform_create_ex(
-        shgroup, view_ubo_location, DRW_UNIFORM_BLOCK_PERSIST, G_draw.view_ubo, 0, 1);
-  }
-  else {
-    /* Only here to support builtin shaders. This should not be used by engines. */
-    /* TODO remove. */
-    DRWViewUboStorage *storage = &DST.view_storage_cpy;
-    drw_shgroup_builtin_uniform(shgroup, GPU_UNIFORM_VIEW, storage->viewmat, 16, 1);
-    drw_shgroup_builtin_uniform(shgroup, GPU_UNIFORM_VIEW_INV, storage->viewinv, 16, 1);
-    drw_shgroup_builtin_uniform(shgroup, GPU_UNIFORM_VIEWPROJECTION, storage->persmat, 16, 1);
-    drw_shgroup_builtin_uniform(shgroup, GPU_UNIFORM_VIEWPROJECTION_INV, storage->persinv, 16, 1);
-    drw_shgroup_builtin_uniform(shgroup, GPU_UNIFORM_PROJECTION, storage->winmat, 16, 1);
-    drw_shgroup_builtin_uniform(shgroup, GPU_UNIFORM_PROJECTION_INV, storage->wininv, 16, 1);
-    drw_shgroup_builtin_uniform(shgroup, GPU_UNIFORM_CLIPPLANES, storage->clipplanes, 4, 6);
+        shgroup, view_ubo_location, DRW_UNIFORM_BLOCK, G_draw.view_ubo, 0, 0, 1);
   }
 
   /* Not supported. */
   BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW_INV) == -1);
   BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW) == -1);
   BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_NORMAL) == -1);
+  BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_VIEW) == -1);
+  BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_VIEW_INV) == -1);
+  BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_VIEWPROJECTION) == -1);
+  BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_VIEWPROJECTION_INV) == -1);
+  BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_PROJECTION) == -1);
+  BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_PROJECTION_INV) == -1);
+  BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_CLIPPLANES) == -1);
+  BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MVP) == -1);
 }
 
 static DRWShadingGroup *drw_shgroup_create_ex(struct GPUShader *shader, DRWPass *pass)
@@ -1253,31 +1273,36 @@ static DRWShadingGroup *drw_shgroup_material_create_ex(GPUPass *gpupass, DRWPass
 static void drw_shgroup_material_texture(DRWShadingGroup *grp,
                                          GPUMaterialTexture *tex,
                                          const char *name,
+                                         eGPUSamplerState state,
                                          int textarget)
 {
   GPUTexture *gputex = GPU_texture_from_blender(tex->ima, tex->iuser, NULL, textarget);
-  DRW_shgroup_uniform_texture(grp, name, gputex);
+
+  int loc = GPU_shader_get_texture_binding(grp->shader, name);
+  drw_shgroup_uniform_create_ex(grp, loc, DRW_UNIFORM_TEXTURE, gputex, state, 0, 1);
 
   GPUTexture **gputex_ref = BLI_memblock_alloc(DST.vmempool->images);
   *gputex_ref = gputex;
   GPU_texture_ref(gputex);
 }
 
-static DRWShadingGroup *drw_shgroup_material_inputs(DRWShadingGroup *grp,
-                                                    struct GPUMaterial *material)
+void DRW_shgroup_add_material_resources(DRWShadingGroup *grp, struct GPUMaterial *material)
 {
   ListBase textures = GPU_material_textures(material);
 
   /* Bind all textures needed by the material. */
-  for (GPUMaterialTexture *tex = textures.first; tex; tex = tex->next) {
+  LISTBASE_FOREACH (GPUMaterialTexture *, tex, &textures) {
     if (tex->ima) {
       /* Image */
       if (tex->tiled_mapping_name[0]) {
-        drw_shgroup_material_texture(grp, tex, tex->sampler_name, GL_TEXTURE_2D_ARRAY);
-        drw_shgroup_material_texture(grp, tex, tex->tiled_mapping_name, GL_TEXTURE_1D_ARRAY);
+        drw_shgroup_material_texture(
+            grp, tex, tex->sampler_name, tex->sampler_state, GL_TEXTURE_2D_ARRAY);
+        drw_shgroup_material_texture(
+            grp, tex, tex->tiled_mapping_name, tex->sampler_state, GL_TEXTURE_1D_ARRAY);
       }
       else {
-        drw_shgroup_material_texture(grp, tex, tex->sampler_name, GL_TEXTURE_2D);
+        drw_shgroup_material_texture(
+            grp, tex, tex->sampler_name, tex->sampler_state, GL_TEXTURE_2D);
       }
     }
     else if (tex->colorband) {
@@ -1290,8 +1315,6 @@ static DRWShadingGroup *drw_shgroup_material_inputs(DRWShadingGroup *grp,
   if (ubo != NULL) {
     DRW_shgroup_uniform_block(grp, GPU_UBO_BLOCK_NAME, ubo);
   }
-
-  return grp;
 }
 
 GPUVertFormat *DRW_shgroup_instance_format_array(const DRWInstanceAttrFormat attrs[],
@@ -1316,7 +1339,7 @@ DRWShadingGroup *DRW_shgroup_material_create(struct GPUMaterial *material, DRWPa
 
   if (shgroup) {
     drw_shgroup_init(shgroup, GPU_pass_shader_get(gpupass));
-    drw_shgroup_material_inputs(shgroup, material);
+    DRW_shgroup_add_material_resources(shgroup, material);
   }
   return shgroup;
 }
@@ -1335,7 +1358,7 @@ DRWShadingGroup *DRW_shgroup_transform_feedback_create(struct GPUShader *shader,
   BLI_assert(tf_target != NULL);
   DRWShadingGroup *shgroup = drw_shgroup_create_ex(shader, pass);
   drw_shgroup_init(shgroup, shader);
-  drw_shgroup_uniform_create_ex(shgroup, 0, DRW_UNIFORM_TFEEDBACK_TARGET, tf_target, 0, 1);
+  drw_shgroup_uniform_create_ex(shgroup, 0, DRW_UNIFORM_TFEEDBACK_TARGET, tf_target, 0, 0, 1);
   return shgroup;
 }
 
@@ -1867,12 +1890,31 @@ DRWPass *DRW_pass_create(const char *name, DRWState state)
   pass->handle = DST.pass_handle;
   DRW_handle_increment(&DST.pass_handle);
 
+  pass->original = NULL;
+  pass->next = NULL;
+
+  return pass;
+}
+
+DRWPass *DRW_pass_create_instance(const char *name, DRWPass *original, DRWState state)
+{
+  DRWPass *pass = DRW_pass_create(name, state);
+  pass->original = original;
+
   return pass;
 }
 
+/* Link two passes so that they are both rendered if the first one is being drawn. */
+void DRW_pass_link(DRWPass *first, DRWPass *second)
+{
+  BLI_assert(first != second);
+  BLI_assert(first->next == NULL);
+  first->next = second;
+}
+
 bool DRW_pass_is_empty(DRWPass *pass)
 {
-  for (DRWShadingGroup *shgroup = pass->shgroups.first; shgroup; shgroup = shgroup->next) {
+  LISTBASE_FOREACH (DRWShadingGroup *, shgroup, &pass->shgroups) {
     if (!DRW_shgroup_is_empty(shgroup)) {
       return false;
     }
@@ -1899,7 +1941,7 @@ void DRW_pass_foreach_shgroup(DRWPass *pass,
                               void (*callback)(void *userData, DRWShadingGroup *shgrp),
                               void *userData)
 {
-  for (DRWShadingGroup *shgroup = pass->shgroups.first; shgroup; shgroup = shgroup->next) {
+  LISTBASE_FOREACH (DRWShadingGroup *, shgroup, &pass->shgroups) {
     callback(userData, shgroup);
   }
 }
@@ -1963,7 +2005,10 @@ void DRW_pass_sort_shgroup_z(DRWPass *pass)
       }
     }
     /* To be sorted a shgroup needs to have at least one draw command.  */
-    BLI_assert(handle != 0);
+    /* FIXME(fclem) In some case, we can still have empty shading group to sort. However their
+     * final order is not well defined.
+     * (see T76730 & D7729). */
+    // BLI_assert(handle != 0);
 
     DRWObjectMatrix *obmats = DRW_memblock_elem_from_handle(DST.vmempool->obmats, &handle);
 
diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c
index 8e712295b61..59b4e9af14e 100644
--- a/source/blender/draw/intern/draw_manager_exec.c
+++ b/source/blender/draw/intern/draw_manager_exec.c
@@ -22,6 +22,7 @@
 
 #include "draw_manager.h"
 
+#include "BLI_alloca.h"
 #include "BLI_math.h"
 #include "BLI_math_bits.h"
 #include "BLI_memblock.h"
@@ -65,7 +66,6 @@ typedef struct DRWCommandsState {
   /* Legacy matrix support. */
   int obmat_loc;
   int obinv_loc;
-  int mvp_loc;
   /* Selection ID state. */
   GPUVertBuf *select_buf;
   uint select_id;
@@ -454,6 +454,8 @@ void DRW_state_reset(void)
 {
   DRW_state_reset_ex(DRW_STATE_DEFAULT);
 
+  GPU_texture_unbind_all();
+
   /* Should stay constant during the whole rendering. */
   GPU_point_size(5);
   GPU_line_smooth(false);
@@ -655,8 +657,7 @@ static void draw_compute_culling(DRWView *view)
 BLI_INLINE void draw_legacy_matrix_update(DRWShadingGroup *shgroup,
                                           DRWResourceHandle *handle,
                                           float obmat_loc,
-                                          float obinv_loc,
-                                          float mvp_loc)
+                                          float obinv_loc)
 {
   /* Still supported for compatibility with gpu_shader_* but should be forbidden. */
   DRWObjectMatrix *ob_mats = DRW_memblock_elem_from_handle(DST.vmempool->obmats, handle);
@@ -666,13 +667,6 @@ BLI_INLINE void draw_legacy_matrix_update(DRWShadingGroup *shgroup,
   if (obinv_loc != -1) {
     GPU_shader_uniform_vector(shgroup->shader, obinv_loc, 16, 1, (float *)ob_mats->modelinverse);
   }
-  /* Still supported for compatibility with gpu_shader_* but should be forbidden
-   * and is slow (since it does not cache the result). */
-  if (mvp_loc != -1) {
-    float mvp[4][4];
-    mul_m4_m4m4(mvp, DST.view_active->storage.persmat, ob_mats->model);
-    GPU_shader_uniform_vector(shgroup->shader, mvp_loc, 16, 1, (float *)mvp);
-  }
 }
 
 BLI_INLINE void draw_geometry_bind(DRWShadingGroup *shgroup, GPUBatch *geom)
@@ -744,107 +738,6 @@ BLI_INLINE void draw_indirect_call(DRWShadingGroup *shgroup, DRWCommandsState *s
   }
 }
 
-enum {
-  BIND_NONE = 0,
-  BIND_TEMP = 1,    /* Release slot after this shading group. */
-  BIND_PERSIST = 2, /* Release slot only after the next shader change. */
-};
-
-static void set_bound_flags(uint64_t *slots, uint64_t *persist_slots, int slot_idx, char bind_type)
-{
-  uint64_t slot = 1llu << (unsigned long)slot_idx;
-  *slots |= slot;
-  if (bind_type == BIND_PERSIST) {
-    *persist_slots |= slot;
-  }
-}
-
-static int get_empty_slot_index(uint64_t slots)
-{
-  uint64_t empty_slots = ~slots;
-  /* Find first empty slot using bitscan. */
-  if (empty_slots != 0) {
-    if ((empty_slots & 0xFFFFFFFFlu) != 0) {
-      return (int)bitscan_forward_uint(empty_slots);
-    }
-    else {
-      return (int)bitscan_forward_uint(empty_slots >> 32) + 32;
-    }
-  }
-  else {
-    /* Greater than GPU_max_textures() */
-    return 99999;
-  }
-}
-
-static void bind_texture(GPUTexture *tex, char bind_type)
-{
-  int idx = GPU_texture_bound_number(tex);
-  if (idx == -1) {
-    /* Texture isn't bound yet. Find an empty slot and bind it. */
-    idx = get_empty_slot_index(DST.RST.bound_tex_slots);
-
-    if (idx < GPU_max_textures()) {
-      GPUTexture **gpu_tex_slot = &DST.RST.bound_texs[idx];
-      /* Unbind any previous texture. */
-      if (*gpu_tex_slot != NULL) {
-        GPU_texture_unbind(*gpu_tex_slot);
-      }
-      GPU_texture_bind(tex, idx);
-      *gpu_tex_slot = tex;
-    }
-    else {
-      printf("Not enough texture slots! Reduce number of textures used by your shader.\n");
-      return;
-    }
-  }
-  else {
-    /* This texture slot was released but the tex
-     * is still bound. Just flag the slot again. */
-    BLI_assert(DST.RST.bound_texs[idx] == tex);
-  }
-  set_bound_flags(&DST.RST.bound_tex_slots, &DST.RST.bound_tex_slots_persist, idx, bind_type);
-}
-
-static void bind_ubo(GPUUniformBuffer *ubo, char bind_type)
-{
-  int idx = GPU_uniformbuffer_bindpoint(ubo);
-  if (idx == -1) {
-    /* UBO isn't bound yet. Find an empty slot and bind it. */
-    idx = get_empty_slot_index(DST.RST.bound_ubo_slots);
-
-    /* [0..1] are reserved ubo slots. */
-    idx += 2;
-
-    if (idx < GPU_max_ubo_binds()) {
-      GPUUniformBuffer **gpu_ubo_slot = &DST.RST.bound_ubos[idx];
-      /* Unbind any previous UBO. */
-      if (*gpu_ubo_slot != NULL) {
-        GPU_uniformbuffer_unbind(*gpu_ubo_slot);
-      }
-      GPU_uniformbuffer_bind(ubo, idx);
-      *gpu_ubo_slot = ubo;
-    }
-    else {
-      /* printf so user can report bad behavior */
-      printf("Not enough ubo slots! This should not happen!\n");
-      /* This is not depending on user input.
-       * It is our responsibility to make sure there is enough slots. */
-      BLI_assert(0);
-      return;
-    }
-  }
-  else {
-    BLI_assert(idx < 64);
-    /* This UBO slot was released but the UBO is
-     * still bound here. Just flag the slot again. */
-    BLI_assert(DST.RST.bound_ubos[idx] == ubo);
-  }
-  /* Remove offset for flag bitfield. */
-  idx -= 2;
-  set_bound_flags(&DST.RST.bound_ubo_slots, &DST.RST.bound_ubo_slots_persist, idx, bind_type);
-}
-
 #ifndef NDEBUG
 /**
  * Opengl specification is strict on buffer binding.
@@ -900,28 +793,6 @@ static bool ubo_bindings_validate(DRWShadingGroup *shgroup)
 }
 #endif
 
-static void release_texture_slots(bool with_persist)
-{
-  if (with_persist) {
-    DST.RST.bound_tex_slots = 0;
-    DST.RST.bound_tex_slots_persist = 0;
-  }
-  else {
-    DST.RST.bound_tex_slots &= DST.RST.bound_tex_slots_persist;
-  }
-}
-
-static void release_ubo_slots(bool with_persist)
-{
-  if (with_persist) {
-    DST.RST.bound_ubo_slots = 0;
-    DST.RST.bound_ubo_slots_persist = 0;
-  }
-  else {
-    DST.RST.bound_ubo_slots &= DST.RST.bound_ubo_slots_persist;
-  }
-}
-
 static void draw_update_uniforms(DRWShadingGroup *shgroup,
                                  DRWCommandsState *state,
                                  bool *use_tfeedback)
@@ -929,69 +800,42 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup,
   for (DRWUniformChunk *unichunk = shgroup->uniforms; unichunk; unichunk = unichunk->next) {
     DRWUniform *uni = unichunk->uniforms;
     for (int i = 0; i < unichunk->uniform_used; i++, uni++) {
-      GPUTexture *tex;
-      GPUUniformBuffer *ubo;
-      if (uni->location == -2) {
-        uni->location = GPU_shader_get_uniform_ensure(shgroup->shader,
-                                                      DST.uniform_names.buffer + uni->name_ofs);
-        if (uni->location == -1) {
-          continue;
-        }
-      }
-      const void *data = uni->pvalue;
-      if (ELEM(uni->type, DRW_UNIFORM_INT_COPY, DRW_UNIFORM_FLOAT_COPY)) {
-        data = uni->fvalue;
-      }
       switch (uni->type) {
         case DRW_UNIFORM_INT_COPY:
+          GPU_shader_uniform_vector_int(
+              shgroup->shader, uni->location, uni->length, uni->arraysize, uni->ivalue);
+          break;
         case DRW_UNIFORM_INT:
           GPU_shader_uniform_vector_int(
-              shgroup->shader, uni->location, uni->length, uni->arraysize, data);
+              shgroup->shader, uni->location, uni->length, uni->arraysize, uni->pvalue);
           break;
         case DRW_UNIFORM_FLOAT_COPY:
+          GPU_shader_uniform_vector(
+              shgroup->shader, uni->location, uni->length, uni->arraysize, uni->fvalue);
+          break;
         case DRW_UNIFORM_FLOAT:
           GPU_shader_uniform_vector(
-              shgroup->shader, uni->location, uni->length, uni->arraysize, data);
+              shgroup->shader, uni->location, uni->length, uni->arraysize, uni->pvalue);
           break;
         case DRW_UNIFORM_TEXTURE:
-          tex = (GPUTexture *)uni->pvalue;
-          BLI_assert(tex);
-          bind_texture(tex, BIND_TEMP);
-          GPU_shader_uniform_texture(shgroup->shader, uni->location, tex);
-          break;
-        case DRW_UNIFORM_TEXTURE_PERSIST:
-          tex = (GPUTexture *)uni->pvalue;
-          BLI_assert(tex);
-          bind_texture(tex, BIND_PERSIST);
-          GPU_shader_uniform_texture(shgroup->shader, uni->location, tex);
+          GPU_texture_bind_ex(uni->texture, uni->sampler_state, uni->location, false);
           break;
         case DRW_UNIFORM_TEXTURE_REF:
-          tex = *((GPUTexture **)uni->pvalue);
-          BLI_assert(tex);
-          bind_texture(tex, BIND_TEMP);
-          GPU_shader_uniform_texture(shgroup->shader, uni->location, tex);
+          GPU_texture_bind_ex(*uni->texture_ref, uni->sampler_state, uni->location, false);
           break;
         case DRW_UNIFORM_BLOCK:
-          ubo = (GPUUniformBuffer *)uni->pvalue;
-          bind_ubo(ubo, BIND_TEMP);
-          GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo);
+          GPU_uniformbuffer_bind(uni->block, uni->location);
           break;
-        case DRW_UNIFORM_BLOCK_PERSIST:
-          ubo = (GPUUniformBuffer *)uni->pvalue;
-          bind_ubo(ubo, BIND_PERSIST);
-          GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo);
+        case DRW_UNIFORM_BLOCK_REF:
+          GPU_uniformbuffer_bind(*uni->block_ref, uni->location);
           break;
         case DRW_UNIFORM_BLOCK_OBMATS:
           state->obmats_loc = uni->location;
-          ubo = DST.vmempool->matrices_ubo[0];
-          GPU_uniformbuffer_bind(ubo, 0);
-          GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo);
+          GPU_uniformbuffer_bind(DST.vmempool->matrices_ubo[0], uni->location);
           break;
         case DRW_UNIFORM_BLOCK_OBINFOS:
           state->obinfos_loc = uni->location;
-          ubo = DST.vmempool->obinfos_ubo[0];
-          GPU_uniformbuffer_bind(ubo, 1);
-          GPU_shader_uniform_buffer(shgroup->shader, uni->location, ubo);
+          GPU_uniformbuffer_bind(DST.vmempool->obinfos_ubo[0], uni->location);
           break;
         case DRW_UNIFORM_RESOURCE_CHUNK:
           state->chunkid_loc = uni->location;
@@ -1001,9 +845,9 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup,
           state->resourceid_loc = uni->location;
           break;
         case DRW_UNIFORM_TFEEDBACK_TARGET:
-          BLI_assert(data && (*use_tfeedback == false));
-          *use_tfeedback = GPU_shader_transform_feedback_enable(shgroup->shader,
-                                                                ((GPUVertBuf *)data)->vbo_id);
+          BLI_assert(uni->pvalue && (*use_tfeedback == false));
+          *use_tfeedback = GPU_shader_transform_feedback_enable(
+              shgroup->shader, ((GPUVertBuf *)uni->pvalue)->vbo_id);
           break;
           /* Legacy/Fallback support. */
         case DRW_UNIFORM_BASE_INSTANCE:
@@ -1015,9 +859,6 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup,
         case DRW_UNIFORM_MODEL_MATRIX_INVERSE:
           state->obinv_loc = uni->location;
           break;
-        case DRW_UNIFORM_MODELVIEWPROJECTION_MATRIX:
-          state->mvp_loc = uni->location;
-          break;
       }
     }
   }
@@ -1110,11 +951,11 @@ static void draw_call_resource_bind(DRWCommandsState *state, const DRWResourceHa
     }
     if (state->obmats_loc != -1) {
       GPU_uniformbuffer_unbind(DST.vmempool->matrices_ubo[state->resource_chunk]);
-      GPU_uniformbuffer_bind(DST.vmempool->matrices_ubo[chunk], 0);
+      GPU_uniformbuffer_bind(DST.vmempool->matrices_ubo[chunk], state->obmats_loc);
     }
     if (state->obinfos_loc != -1) {
       GPU_uniformbuffer_unbind(DST.vmempool->obinfos_ubo[state->resource_chunk]);
-      GPU_uniformbuffer_bind(DST.vmempool->obinfos_ubo[chunk], 1);
+      GPU_uniformbuffer_bind(DST.vmempool->obinfos_ubo[chunk], state->obinfos_loc);
     }
     state->resource_chunk = chunk;
   }
@@ -1153,10 +994,8 @@ static void draw_call_single_do(DRWShadingGroup *shgroup,
   draw_call_resource_bind(state, &handle);
 
   /* TODO This is Legacy. Need to be removed. */
-  if (state->obmats_loc == -1 &&
-      (state->obmat_loc != -1 || state->obinv_loc != -1 || state->mvp_loc != -1)) {
-    draw_legacy_matrix_update(
-        shgroup, &handle, state->obmat_loc, state->obinv_loc, state->mvp_loc);
+  if (state->obmats_loc == -1 && (state->obmat_loc != -1 || state->obinv_loc != -1)) {
+    draw_legacy_matrix_update(shgroup, &handle, state->obmat_loc, state->obinv_loc);
   }
 
   if (G.f & G_FLAG_PICKSEL) {
@@ -1262,7 +1101,6 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
       .resourceid_loc = -1,
       .obmat_loc = -1,
       .obinv_loc = -1,
-      .mvp_loc = -1,
       .drw_state_enabled = 0,
       .drw_state_disabled = 0,
   };
@@ -1273,6 +1111,11 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
   if (shader_changed) {
     if (DST.shader) {
       GPU_shader_unbind();
+
+      /* Unbinding can be costly. Skip in normal condition. */
+      if (G.debug & G_DEBUG_GPU) {
+        GPU_texture_unbind_all();
+      }
     }
     GPU_shader_bind(shgroup->shader);
     DST.shader = shgroup->shader;
@@ -1283,9 +1126,6 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
     DST.batch = NULL;
   }
 
-  release_ubo_slots(shader_changed);
-  release_texture_slots(shader_changed);
-
   draw_update_uniforms(shgroup, &state, &use_tfeedback);
 
   drw_state_set(pass_state);
@@ -1376,7 +1216,7 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
                               0,
                               0,
                               cmd->instance.inst_count,
-                              cmd->instance.use_attribs == 0);
+                              cmd->instance.use_attrs == 0);
           break;
         case DRW_CMD_DRAW_RANGE:
           draw_call_single_do(shgroup,
@@ -1426,6 +1266,11 @@ static void drw_draw_pass_ex(DRWPass *pass,
                              DRWShadingGroup *start_group,
                              DRWShadingGroup *end_group)
 {
+  if (pass->original) {
+    start_group = pass->original->shgroups.first;
+    end_group = pass->original->shgroups.last;
+  }
+
   if (start_group == NULL) {
     return;
   }
@@ -1462,22 +1307,6 @@ static void drw_draw_pass_ex(DRWPass *pass,
     }
   }
 
-  /* Clear Bound textures */
-  for (int i = 0; i < DST_MAX_SLOTS; i++) {
-    if (DST.RST.bound_texs[i] != NULL) {
-      GPU_texture_unbind(DST.RST.bound_texs[i]);
-      DST.RST.bound_texs[i] = NULL;
-    }
-  }
-
-  /* Clear Bound Ubos */
-  for (int i = 0; i < DST_MAX_SLOTS; i++) {
-    if (DST.RST.bound_ubos[i] != NULL) {
-      GPU_uniformbuffer_unbind(DST.RST.bound_ubos[i]);
-      DST.RST.bound_ubos[i] = NULL;
-    }
-  }
-
   if (DST.shader) {
     GPU_shader_unbind();
     DST.shader = NULL;
@@ -1511,7 +1340,9 @@ static void drw_draw_pass_ex(DRWPass *pass,
 
 void DRW_draw_pass(DRWPass *pass)
 {
-  drw_draw_pass_ex(pass, pass->shgroups.first, pass->shgroups.last);
+  for (; pass; pass = pass->next) {
+    drw_draw_pass_ex(pass, pass->shgroups.first, pass->shgroups.last);
+  }
 }
 
 /* Draw only a subset of shgroups. Used in special situations as grease pencil strokes */
diff --git a/source/blender/draw/intern/draw_manager_profiling.c b/source/blender/draw/intern/draw_manager_profiling.c
index 76382132230..57887c11c02 100644
--- a/source/blender/draw/intern/draw_manager_profiling.c
+++ b/source/blender/draw/intern/draw_manager_profiling.c
@@ -20,6 +20,7 @@
  * \ingroup draw
  */
 
+#include "BLI_listbase.h"
 #include "BLI_rect.h"
 #include "BLI_string.h"
 
@@ -250,7 +251,7 @@ void DRW_stats_draw(const rcti *rect)
 
   /* Engines rows */
   char time_to_txt[16];
-  for (LinkData *link = DST.enabled_engines.first; link; link = link->next) {
+  LISTBASE_FOREACH (LinkData *, link, &DST.enabled_engines) {
     u = 0;
     DrawEngineType *engine = link->data;
     ViewportEngineData *data = drw_viewport_engine_data_ensure(engine);
diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c
index 89884d58099..6304b707cb9 100644
--- a/source/blender/draw/intern/draw_manager_shader.c
+++ b/source/blender/draw/intern/draw_manager_shader.c
@@ -99,6 +99,10 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
   DRWShaderCompiler *comp = (DRWShaderCompiler *)custom_data;
   void *gl_context = comp->gl_context;
 
+#if TRUST_NO_ONE
+  BLI_assert(gl_context != NULL);
+#endif
+
   WM_opengl_context_activate(gl_context);
 
   while (true) {
@@ -237,6 +241,9 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
   WM_jobs_timer(wm_job, 0.1, NC_MATERIAL | ND_SHADING_DRAW, 0);
   WM_jobs_delay_start(wm_job, 0.1);
   WM_jobs_callbacks(wm_job, drw_deferred_shader_compilation_exec, NULL, NULL, NULL);
+
+  G.is_break = false;
+
   WM_jobs_start(wm, wm_job);
 }
 
@@ -249,7 +256,7 @@ void DRW_deferred_shader_remove(GPUMaterial *mat)
       /* No job running, do not create a new one by calling WM_jobs_get. */
       continue;
     }
-    for (wmWindow *win = wm->windows.first; win; win = win->next) {
+    LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
       wmJob *wm_job = WM_jobs_get(
           wm, win, scene, "Shaders Compilation", WM_JOB_PROGRESS, WM_JOB_TYPE_SHADER_COMPILATION);
 
@@ -336,27 +343,12 @@ GPUShader *DRW_shader_create_with_transform_feedback(const char *vert,
                               __func__);
 }
 
-GPUShader *DRW_shader_create_2d(const char *frag, const char *defines)
-{
-  return GPU_shader_create(datatoc_gpu_shader_2D_vert_glsl, frag, NULL, NULL, defines, __func__);
-}
-
-GPUShader *DRW_shader_create_3d(const char *frag, const char *defines)
-{
-  return GPU_shader_create(datatoc_gpu_shader_3D_vert_glsl, frag, NULL, NULL, defines, __func__);
-}
-
 GPUShader *DRW_shader_create_fullscreen(const char *frag, const char *defines)
 {
   return GPU_shader_create(
       datatoc_common_fullscreen_vert_glsl, frag, NULL, NULL, defines, __func__);
 }
 
-GPUShader *DRW_shader_create_3d_depth_only(eGPUShaderConfig sh_cfg)
-{
-  return GPU_shader_get_builtin_shader_with_config(GPU_SHADER_3D_DEPTH_ONLY, sh_cfg);
-}
-
 GPUMaterial *DRW_shader_find_from_world(World *wo,
                                         const void *engine_type,
                                         const int options,
@@ -391,6 +383,7 @@ GPUMaterial *DRW_shader_find_from_material(Material *ma,
 
 GPUMaterial *DRW_shader_create_from_world(struct Scene *scene,
                                           World *wo,
+                                          struct bNodeTree *ntree,
                                           const void *engine_type,
                                           const int options,
                                           const bool is_volume_shader,
@@ -401,7 +394,7 @@ GPUMaterial *DRW_shader_create_from_world(struct Scene *scene,
                                           bool deferred)
 {
   GPUMaterial *mat = NULL;
-  if (DRW_state_is_image_render()) {
+  if (DRW_state_is_image_render() || !deferred) {
     mat = GPU_material_from_nodetree_find(&wo->gpumaterial, engine_type, options);
   }
 
@@ -409,7 +402,7 @@ GPUMaterial *DRW_shader_create_from_world(struct Scene *scene,
     scene = (Scene *)DEG_get_original_id(&DST.draw_ctx.scene->id);
     mat = GPU_material_from_nodetree(scene,
                                      NULL,
-                                     wo->nodetree,
+                                     ntree,
                                      &wo->gpumaterial,
                                      engine_type,
                                      options,
@@ -430,6 +423,7 @@ GPUMaterial *DRW_shader_create_from_world(struct Scene *scene,
 
 GPUMaterial *DRW_shader_create_from_material(struct Scene *scene,
                                              Material *ma,
+                                             struct bNodeTree *ntree,
                                              const void *engine_type,
                                              const int options,
                                              const bool is_volume_shader,
@@ -440,7 +434,7 @@ GPUMaterial *DRW_shader_create_from_material(struct Scene *scene,
                                              bool deferred)
 {
   GPUMaterial *mat = NULL;
-  if (DRW_state_is_image_render()) {
+  if (DRW_state_is_image_render() || !deferred) {
     mat = GPU_material_from_nodetree_find(&ma->gpumaterial, engine_type, options);
   }
 
@@ -448,7 +442,7 @@ GPUMaterial *DRW_shader_create_from_material(struct Scene *scene,
     scene = (Scene *)DEG_get_original_id(&DST.draw_ctx.scene->id);
     mat = GPU_material_from_nodetree(scene,
                                      ma,
-                                     ma->nodetree,
+                                     ntree,
                                      &ma->gpumaterial,
                                      engine_type,
                                      options,
diff --git a/source/blender/draw/intern/draw_manager_text.c b/source/blender/draw/intern/draw_manager_text.c
index 2692f7b4795..23956df71e8 100644
--- a/source/blender/draw/intern/draw_manager_text.c
+++ b/source/blender/draw/intern/draw_manager_text.c
@@ -27,6 +27,7 @@
 #include "BLI_string.h"
 
 #include "BKE_editmesh.h"
+#include "BKE_editmesh_cache.h"
 #include "BKE_global.h"
 #include "BKE_unit.h"
 
@@ -48,6 +49,7 @@
 #include "WM_api.h"
 
 #include "draw_manager_text.h"
+#include "intern/bmesh_polygon.h"
 
 typedef struct ViewCachedString {
   float vec[3];
@@ -216,6 +218,8 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
   float clip_planes[4][4];
   /* allow for displaying shape keys and deform mods */
   BMIter iter;
+  const float(*vert_coords)[3] = (me->runtime.edit_data ? me->runtime.edit_data->vertexCos : NULL);
+  const bool use_coords = (vert_coords != NULL);
 
   /* when 2 or more edge-info options are enabled, space apart */
   short edge_tex_count = 0;
@@ -261,6 +265,10 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
 
     UI_GetThemeColor3ubv(TH_DRAWEXTRA_EDGELEN, col);
 
+    if (use_coords) {
+      BM_mesh_elem_index_ensure(em->bm, BM_VERT);
+    }
+
     BM_ITER_MESH (eed, &iter, em->bm, BM_EDGES_OF_MESH) {
       /* draw selected edges, or edges next to selected verts while dragging */
       if (BM_elem_flag_test(eed, BM_ELEM_SELECT) ||
@@ -268,8 +276,14 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
                          BM_elem_flag_test(eed->v2, BM_ELEM_SELECT)))) {
         float v1_clip[3], v2_clip[3];
 
-        copy_v3_v3(v1, eed->v1->co);
-        copy_v3_v3(v2, eed->v2->co);
+        if (vert_coords) {
+          copy_v3_v3(v1, vert_coords[BM_elem_index_get(eed->v1)]);
+          copy_v3_v3(v2, vert_coords[BM_elem_index_get(eed->v2)]);
+        }
+        else {
+          copy_v3_v3(v1, eed->v1->co);
+          copy_v3_v3(v2, eed->v2->co);
+        }
 
         if (clip_segment_v3_plane_n(v1, v2, clip_planes, 4, v1_clip, v2_clip)) {
 
@@ -306,6 +320,13 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
 
     UI_GetThemeColor3ubv(TH_DRAWEXTRA_EDGEANG, col);
 
+    const float(*poly_normals)[3] = NULL;
+    if (use_coords) {
+      BM_mesh_elem_index_ensure(em->bm, BM_VERT | BM_FACE);
+      BKE_editmesh_cache_ensure_poly_normals(em, me->runtime.edit_data);
+      poly_normals = me->runtime.edit_data->polyNos;
+    }
+
     BM_ITER_MESH (eed, &iter, em->bm, BM_EDGES_OF_MESH) {
       BMLoop *l_a, *l_b;
       if (BM_edge_loop_pair(eed, &l_a, &l_b)) {
@@ -321,8 +342,14 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
                            BM_elem_flag_test(l_b->prev->v, BM_ELEM_SELECT)))) {
           float v1_clip[3], v2_clip[3];
 
-          copy_v3_v3(v1, eed->v1->co);
-          copy_v3_v3(v2, eed->v2->co);
+          if (vert_coords) {
+            copy_v3_v3(v1, vert_coords[BM_elem_index_get(eed->v1)]);
+            copy_v3_v3(v2, vert_coords[BM_elem_index_get(eed->v2)]);
+          }
+          else {
+            copy_v3_v3(v1, eed->v1->co);
+            copy_v3_v3(v2, eed->v2->co);
+          }
 
           if (clip_segment_v3_plane_n(v1, v2, clip_planes, 4, v1_clip, v2_clip)) {
             float no_a[3], no_b[3];
@@ -331,8 +358,14 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
             mid_v3_v3v3(vmid, v1_clip, v2_clip);
             mul_m4_v3(ob->obmat, vmid);
 
-            copy_v3_v3(no_a, l_a->f->no);
-            copy_v3_v3(no_b, l_b->f->no);
+            if (use_coords) {
+              copy_v3_v3(no_a, poly_normals[BM_elem_index_get(l_a->f)]);
+              copy_v3_v3(no_b, poly_normals[BM_elem_index_get(l_b->f)]);
+            }
+            else {
+              copy_v3_v3(no_a, l_a->f->no);
+              copy_v3_v3(no_b, l_b->f->no);
+            }
 
             if (do_global) {
               mul_mat3_m4_v3(ob->imat, no_a);
@@ -372,9 +405,17 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
         zero_v3(vmid);
         BMLoop *(*l)[3] = &em->looptris[poly_to_tri_count(i, BM_elem_index_get(f->l_first))];
         for (int j = 0; j < numtri; j++) {
-          copy_v3_v3(v1, l[j][0]->v->co);
-          copy_v3_v3(v2, l[j][1]->v->co);
-          copy_v3_v3(v3, l[j][2]->v->co);
+
+          if (use_coords) {
+            copy_v3_v3(v1, vert_coords[BM_elem_index_get(l[j][0]->v)]);
+            copy_v3_v3(v2, vert_coords[BM_elem_index_get(l[j][1]->v)]);
+            copy_v3_v3(v3, vert_coords[BM_elem_index_get(l[j][2]->v)]);
+          }
+          else {
+            copy_v3_v3(v1, l[j][0]->v->co);
+            copy_v3_v3(v2, l[j][1]->v->co);
+            copy_v3_v3(v3, l[j][2]->v->co);
+          }
 
           add_v3_v3(vmid, v1);
           add_v3_v3(vmid, v2);
@@ -417,6 +458,10 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
 
     UI_GetThemeColor3ubv(TH_DRAWEXTRA_FACEANG, col);
 
+    if (use_coords) {
+      BM_mesh_elem_index_ensure(em->bm, BM_VERT);
+    }
+
     BM_ITER_MESH (efa, &iter, em->bm, BM_FACES_OF_MESH) {
       const bool is_face_sel = BM_elem_flag_test_bool(efa, BM_ELEM_SELECT);
 
@@ -433,12 +478,24 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
 
             /* lazy init center calc */
             if (is_first) {
-              BM_face_calc_center_bounds(efa, vmid);
+              if (use_coords) {
+                BM_face_calc_center_bounds_vcos(em->bm, efa, vmid, vert_coords);
+              }
+              else {
+                BM_face_calc_center_bounds(efa, vmid);
+              }
               is_first = false;
             }
-            copy_v3_v3(v1, loop->prev->v->co);
-            copy_v3_v3(v2, loop->v->co);
-            copy_v3_v3(v3, loop->next->v->co);
+            if (use_coords) {
+              copy_v3_v3(v1, vert_coords[BM_elem_index_get(loop->prev->v)]);
+              copy_v3_v3(v2, vert_coords[BM_elem_index_get(loop->v)]);
+              copy_v3_v3(v3, vert_coords[BM_elem_index_get(loop->next->v)]);
+            }
+            else {
+              copy_v3_v3(v1, loop->prev->v->co);
+              copy_v3_v3(v2, loop->v->co);
+              copy_v3_v3(v3, loop->next->v->co);
+            }
 
             copy_v3_v3(v2_local, v2);
 
@@ -475,29 +532,44 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
     if (em->selectmode & SCE_SELECT_VERTEX) {
       BMVert *v;
 
+      if (use_coords) {
+        BM_mesh_elem_index_ensure(em->bm, BM_VERT);
+      }
       BM_ITER_MESH_INDEX (v, &iter, em->bm, BM_VERTS_OF_MESH, i) {
         if (BM_elem_flag_test(v, BM_ELEM_SELECT)) {
-          float vec[3];
-          mul_v3_m4v3(vec, ob->obmat, v->co);
+          if (use_coords) {
+            copy_v3_v3(v1, vert_coords[BM_elem_index_get(v)]);
+          }
+          else {
+            copy_v3_v3(v1, v->co);
+          }
+
+          mul_m4_v3(ob->obmat, v1);
 
           numstr_len = BLI_snprintf_rlen(numstr, sizeof(numstr), "%d", i);
-          DRW_text_cache_add(dt, vec, numstr, numstr_len, 0, 0, txt_flag, col);
+          DRW_text_cache_add(dt, v1, numstr, numstr_len, 0, 0, txt_flag, col);
         }
       }
     }
 
     if (em->selectmode & SCE_SELECT_EDGE) {
-      BMEdge *e;
+      BMEdge *eed;
 
       const bool use_edge_tex_sep = (edge_tex_count == 2);
       const bool use_edge_tex_len = (v3d->overlay.edit_flag & V3D_OVERLAY_EDIT_EDGE_LEN);
 
-      BM_ITER_MESH_INDEX (e, &iter, em->bm, BM_EDGES_OF_MESH, i) {
-        if (BM_elem_flag_test(e, BM_ELEM_SELECT)) {
+      BM_ITER_MESH_INDEX (eed, &iter, em->bm, BM_EDGES_OF_MESH, i) {
+        if (BM_elem_flag_test(eed, BM_ELEM_SELECT)) {
           float v1_clip[3], v2_clip[3];
 
-          copy_v3_v3(v1, e->v1->co);
-          copy_v3_v3(v2, e->v2->co);
+          if (use_coords) {
+            copy_v3_v3(v1, vert_coords[BM_elem_index_get(eed->v1)]);
+            copy_v3_v3(v2, vert_coords[BM_elem_index_get(eed->v2)]);
+          }
+          else {
+            copy_v3_v3(v1, eed->v1->co);
+            copy_v3_v3(v2, eed->v2->co);
+          }
 
           if (clip_segment_v3_plane_n(v1, v2, clip_planes, 4, v1_clip, v2_clip)) {
             mid_v3_v3v3(vmid, v1_clip, v2_clip);
@@ -521,9 +593,20 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
     if (em->selectmode & SCE_SELECT_FACE) {
       BMFace *f;
 
+      if (use_coords) {
+        BM_mesh_elem_index_ensure(em->bm, BM_VERT);
+      }
+
       BM_ITER_MESH_INDEX (f, &iter, em->bm, BM_FACES_OF_MESH, i) {
         if (BM_elem_flag_test(f, BM_ELEM_SELECT)) {
-          BM_face_calc_center_median(f, v1);
+
+          if (use_coords) {
+            BM_face_calc_center_median_vcos(em->bm, f, v1, vert_coords);
+          }
+          else {
+            BM_face_calc_center_median(f, v1);
+          }
+
           mul_m4_v3(ob->obmat, v1);
 
           numstr_len = BLI_snprintf_rlen(numstr, sizeof(numstr), "%d", i);
diff --git a/source/blender/draw/intern/draw_manager_texture.c b/source/blender/draw/intern/draw_manager_texture.c
index 3f11fe9d11e..77b0462303d 100644
--- a/source/blender/draw/intern/draw_manager_texture.c
+++ b/source/blender/draw/intern/draw_manager_texture.c
@@ -61,17 +61,17 @@ static bool drw_texture_format_supports_framebuffer(eGPUTextureFormat format)
 
 void drw_texture_set_parameters(GPUTexture *tex, DRWTextureFlag flags)
 {
-  GPU_texture_bind(tex, 0);
   if (flags & DRW_TEX_MIPMAP) {
     GPU_texture_mipmap_mode(tex, true, flags & DRW_TEX_FILTER);
+    GPU_texture_bind(tex, 0);
     GPU_texture_generate_mipmap(tex);
+    GPU_texture_unbind(tex);
   }
   else {
     GPU_texture_filter_mode(tex, flags & DRW_TEX_FILTER);
   }
-  GPU_texture_wrap_mode(tex, flags & DRW_TEX_WRAP);
+  GPU_texture_wrap_mode(tex, flags & DRW_TEX_WRAP, true);
   GPU_texture_compare_mode(tex, flags & DRW_TEX_COMPARE);
-  GPU_texture_unbind(tex);
 }
 
 GPUTexture *DRW_texture_create_1d(int w,
diff --git a/source/blender/draw/intern/shaders/common_globals_lib.glsl b/source/blender/draw/intern/shaders/common_globals_lib.glsl
index 9dfd48cc21a..a479a87e14b 100644
--- a/source/blender/draw/intern/shaders/common_globals_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_globals_lib.glsl
@@ -140,3 +140,4 @@ layout(std140) uniform globalsBlock
 #define EDGE_SEAM (1 << 4)
 #define EDGE_SHARP (1 << 5)
 #define EDGE_FREESTYLE (1 << 6)
+#define HANDLE_SELECTED (1 << 7)
diff --git a/source/blender/draw/intern/shaders/common_view_lib.glsl b/source/blender/draw/intern/shaders/common_view_lib.glsl
index 3faefd485bf..1054f4d11c9 100644
--- a/source/blender/draw/intern/shaders/common_view_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_view_lib.glsl
@@ -77,12 +77,12 @@ uniform int resourceChunk;
 uniform int baseInstance;
 #  endif
 
-#  if defined(IN_PLACE_INSTANCES) || defined(INSTANCED_ATTRIB)
+#  if defined(IN_PLACE_INSTANCES) || defined(INSTANCED_ATTR)
 /* When drawing instances of an object at the same position. */
 #    define instanceId 0
 #  elif defined(GPU_DEPRECATED_AMD_DRIVER)
 /* A driver bug make it so that when using an attribute with GL_INT_2_10_10_10_REV as format,
- * the gl_InstanceID is incremented by the 2 bit component of the attrib.
+ * the gl_InstanceID is incremented by the 2 bit component of the attribute.
  * Ignore gl_InstanceID then. */
 #    define instanceId 0
 #  else
@@ -124,7 +124,7 @@ flat in int resourceIDFrag;
 
 /* Breaking this across multiple lines causes issues for some older GLSL compilers. */
 /* clang-format off */
-#if !defined(GPU_INTEL) && !defined(GPU_DEPRECATED_AMD_DRIVER) && !defined(OS_MAC) && !defined(INSTANCED_ATTRIB)
+#if !defined(GPU_INTEL) && !defined(GPU_DEPRECATED_AMD_DRIVER) && !defined(OS_MAC) && !defined(INSTANCED_ATTR)
 /* clang-format on */
 struct ObjectMatrices {
   mat4 drw_modelMatrix;
author	Julian Eisel <julian@blender.org>	2020-06-05 14:09:31 +0300
committer	Julian Eisel <julian@blender.org>	2020-06-05 14:09:31 +0300
commit	920a58d9b6d667894cf166cbbd25e4c2fbd238ea (patch)
tree	7ca5a9da640753b5e070c439ac3bdd14dfad92cf /source/blender/draw/intern
parent	c94b6209861ca7cc3985b53474feed7d94c0221a (diff)
parent	a1d55bdd530390e58c51abe9707b8d3b0ae3e861 (diff)