27 files changed, 858 insertions, 618 deletions
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index 8c6a51442e1..00ac2563c43 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -899,8 +899,6 @@ GPUBatch *DRW_cache_object_surface_get(Object *ob)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_surface_get(ob);
-    case OB_POINTCLOUD:
-      return DRW_cache_pointcloud_surface_get(ob);
     default:
       return NULL;
   }
@@ -959,8 +957,6 @@ GPUBatch **DRW_cache_object_surface_material_get(struct Object *ob,
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_surface_shaded_get(ob, gpumat_array, gpumat_array_len);
-    case OB_POINTCLOUD:
-      return DRW_cache_pointcloud_surface_shaded_get(ob, gpumat_array, gpumat_array_len);
     default:
       return NULL;
   }
@@ -3006,24 +3002,6 @@ GPUBatch *DRW_cache_lattice_vert_overlay_get(Object *ob)
 /** \name PointCloud
  * \{ */
 
-GPUBatch *DRW_cache_pointcloud_get_dots(Object *object)
-{
-  BLI_assert(object->type == OB_POINTCLOUD);
-  return DRW_pointcloud_batch_cache_get_dots(object);
-}
-
-GPUBatch *DRW_cache_pointcloud_surface_get(Object *object)
-{
-  BLI_assert(object->type == OB_POINTCLOUD);
-  return DRW_pointcloud_batch_cache_get_surface(object);
-}
-
-GPUBatch *DRW_cache_pointcloud_surface_viewer_attribute_get(Object *object)
-{
-  BLI_assert(object->type == OB_POINTCLOUD);
-  return DRW_pointcloud_batch_cache_get_surface_viewer_attribute(object);
-}
-
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -3308,6 +3286,9 @@ void drw_batch_cache_generate_requested(Object *ob)
     case OB_CURVES:
       DRW_curves_batch_cache_create_requested(ob);
       break;
+    case OB_POINTCLOUD:
+      DRW_pointcloud_batch_cache_create_requested(ob);
+      break;
     /* TODO: all cases. */
     default:
       break;
@@ -3358,7 +3339,9 @@ void DRW_batch_cache_free_old(Object *ob, int ctime)
     case OB_CURVES:
       DRW_curves_batch_cache_free_old((Curves *)ob->data, ctime);
       break;
-    /* TODO: all cases. */
+    case OB_POINTCLOUD:
+      DRW_pointcloud_batch_cache_free_old((PointCloud *)ob->data, ctime);
+      break;
     default:
       break;
   }
diff --git a/source/blender/draw/intern/draw_cache.h b/source/blender/draw/intern/draw_cache.h
index 058f28f094d..772cf3b12a5 100644
--- a/source/blender/draw/intern/draw_cache.h
+++ b/source/blender/draw/intern/draw_cache.h
@@ -13,6 +13,7 @@ extern "C" {
 
 struct GPUBatch;
 struct GPUMaterial;
+struct GPUVertBuf;
 struct ModifierData;
 struct Object;
 struct PTCacheEdit;
@@ -224,12 +225,6 @@ struct GPUBatch **DRW_cache_curves_surface_shaded_get(struct Object *ob,
 struct GPUBatch *DRW_cache_curves_face_wireframe_get(struct Object *ob);
 struct GPUBatch *DRW_cache_curves_edge_detection_get(struct Object *ob, bool *r_is_manifold);
 
-/* PointCloud */
-
-struct GPUBatch *DRW_cache_pointcloud_get_dots(struct Object *obj);
-struct GPUBatch *DRW_cache_pointcloud_surface_get(struct Object *obj);
-struct GPUBatch *DRW_cache_pointcloud_surface_viewer_attribute_get(struct Object *obj);
-
 /* Volume */
 
 typedef struct DRWVolumeGrid {
@@ -257,14 +252,17 @@ struct GPUBatch *DRW_cache_volume_selection_surface_get(struct Object *ob);
 
 /* GPencil */
 
-struct GPUBatch *DRW_cache_gpencil_strokes_get(struct Object *ob, int cfra);
-struct GPUBatch *DRW_cache_gpencil_fills_get(struct Object *ob, int cfra);
+struct GPUBatch *DRW_cache_gpencil_get(struct Object *ob, int cfra);
+struct GPUVertBuf *DRW_cache_gpencil_position_buffer_get(struct Object *ob, int cfra);
+struct GPUVertBuf *DRW_cache_gpencil_color_buffer_get(struct Object *ob, int cfra);
 struct GPUBatch *DRW_cache_gpencil_edit_lines_get(struct Object *ob, int cfra);
 struct GPUBatch *DRW_cache_gpencil_edit_points_get(struct Object *ob, int cfra);
 struct GPUBatch *DRW_cache_gpencil_edit_curve_handles_get(struct Object *ob, int cfra);
 struct GPUBatch *DRW_cache_gpencil_edit_curve_points_get(struct Object *ob, int cfra);
-struct GPUBatch *DRW_cache_gpencil_sbuffer_stroke_get(struct Object *ob);
-struct GPUBatch *DRW_cache_gpencil_sbuffer_fill_get(struct Object *ob);
+struct GPUBatch *DRW_cache_gpencil_sbuffer_get(struct Object *ob, bool show_fill);
+struct GPUVertBuf *DRW_cache_gpencil_sbuffer_position_buffer_get(struct Object *ob,
+                                                                 bool show_fill);
+struct GPUVertBuf *DRW_cache_gpencil_sbuffer_color_buffer_get(struct Object *ob, bool show_fill);
 int DRW_gpencil_material_count_get(struct bGPdata *gpd);
 
 struct GPUBatch *DRW_cache_gpencil_face_wireframe_get(struct Object *ob);
diff --git a/source/blender/draw/intern/draw_cache_impl.h b/source/blender/draw/intern/draw_cache_impl.h
index c6e8539121f..5aa2203ca68 100644
--- a/source/blender/draw/intern/draw_cache_impl.h
+++ b/source/blender/draw/intern/draw_cache_impl.h
@@ -80,6 +80,7 @@ void DRW_batch_cache_free_old(struct Object *ob, int ctime);
  */
 void DRW_mesh_batch_cache_free_old(struct Mesh *me, int ctime);
 void DRW_curves_batch_cache_free_old(struct Curves *curves, int ctime);
+void DRW_pointcloud_batch_cache_free_old(struct PointCloud *pointcloud, int ctime);
 
 /** \} */
 
@@ -131,7 +132,7 @@ int DRW_curves_material_count_get(struct Curves *curves);
  * \return A pointer to location where the texture will be
  * stored, which will be filled by #DRW_shgroup_curves_create_sub.
  */
-struct GPUTexture **DRW_curves_texture_for_evaluated_attribute(struct Curves *curves,
+struct GPUVertBuf **DRW_curves_texture_for_evaluated_attribute(struct Curves *curves,
                                                                const char *name,
                                                                bool *r_is_point_domain);
 
@@ -147,12 +148,11 @@ void DRW_curves_batch_cache_create_requested(struct Object *ob);
 
 int DRW_pointcloud_material_count_get(struct PointCloud *pointcloud);
 
+struct GPUVertBuf **DRW_pointcloud_evaluated_attribute(struct PointCloud *pointcloud,
+                                                       const char *name);
 struct GPUBatch *DRW_pointcloud_batch_cache_get_dots(struct Object *ob);
-struct GPUBatch *DRW_pointcloud_batch_cache_get_surface(struct Object *ob);
-struct GPUBatch *DRW_pointcloud_batch_cache_get_surface_viewer_attribute(struct Object *ob);
-struct GPUBatch **DRW_cache_pointcloud_surface_shaded_get(struct Object *ob,
-                                                          struct GPUMaterial **gpumat_array,
-                                                          uint gpumat_array_len);
+
+void DRW_pointcloud_batch_cache_create_requested(struct Object *ob);
 
 /** \} */
 
diff --git a/source/blender/draw/intern/draw_cache_impl_curves.cc b/source/blender/draw/intern/draw_cache_impl_curves.cc
index 85dd9ca8695..c36b90ec32e 100644
--- a/source/blender/draw/intern/draw_cache_impl_curves.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curves.cc
@@ -88,13 +88,11 @@ static void curves_discard_attributes(CurvesEvalCache &curves_cache)
 {
   for (const int i : IndexRange(GPU_MAX_ATTR)) {
     GPU_VERTBUF_DISCARD_SAFE(curves_cache.proc_attributes_buf[i]);
-    DRW_TEXTURE_FREE_SAFE(curves_cache.proc_attributes_tex[i]);
   }
 
   for (const int i : IndexRange(MAX_HAIR_SUBDIV)) {
     for (const int j : IndexRange(GPU_MAX_ATTR)) {
       GPU_VERTBUF_DISCARD_SAFE(curves_cache.final[i].attributes_buf[j]);
-      DRW_TEXTURE_FREE_SAFE(curves_cache.final[i].attributes_tex[j]);
     }
 
     drw_attributes_clear(&curves_cache.final[i].attr_used);
@@ -107,17 +105,12 @@ static void curves_batch_cache_clear_data(CurvesEvalCache &curves_cache)
   GPU_VERTBUF_DISCARD_SAFE(curves_cache.proc_point_buf);
   GPU_VERTBUF_DISCARD_SAFE(curves_cache.proc_length_buf);
   GPU_VERTBUF_DISCARD_SAFE(curves_cache.data_edit_points);
-  DRW_TEXTURE_FREE_SAFE(curves_cache.point_tex);
-  DRW_TEXTURE_FREE_SAFE(curves_cache.length_tex);
 
   GPU_VERTBUF_DISCARD_SAFE(curves_cache.proc_strand_buf);
   GPU_VERTBUF_DISCARD_SAFE(curves_cache.proc_strand_seg_buf);
-  DRW_TEXTURE_FREE_SAFE(curves_cache.strand_tex);
-  DRW_TEXTURE_FREE_SAFE(curves_cache.strand_seg_tex);
 
   for (const int i : IndexRange(MAX_HAIR_SUBDIV)) {
     GPU_VERTBUF_DISCARD_SAFE(curves_cache.final[i].proc_buf);
-    DRW_TEXTURE_FREE_SAFE(curves_cache.final[i].proc_tex);
     for (const int j : IndexRange(MAX_THICKRES)) {
       GPU_BATCH_DISCARD_SAFE(curves_cache.final[i].proc_hairs[j]);
     }
@@ -286,21 +279,6 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
                                 cache.strands_len};
 
     curves_batch_cache_fill_segments_proc_pos(curves, posTime_data, hairLength_data);
-
-    /* Create vbo immediately to bind to texture buffer. */
-    GPU_vertbuf_use(cache.proc_point_buf);
-    cache.point_tex = GPU_texture_create_from_vertbuf("hair_point", cache.proc_point_buf);
-  }
-
-  if (gpu_material && cache.proc_length_buf != nullptr && cache.length_tex) {
-    ListBase gpu_attrs = GPU_material_attributes(gpu_material);
-    LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &gpu_attrs) {
-      if (attr->type == CD_HAIRLENGTH) {
-        GPU_vertbuf_use(cache.proc_length_buf);
-        cache.length_tex = GPU_texture_create_from_vertbuf("hair_length", cache.proc_length_buf);
-        break;
-      }
-    }
   }
 }
 
@@ -363,12 +341,6 @@ static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cac
   /* Those are points! not line segments. */
   GPU_vertbuf_data_alloc(final_cache.attributes_buf[index],
                          final_cache.strands_res * cache.strands_len);
-
-  /* Create vbo immediately to bind to texture buffer. */
-  GPU_vertbuf_use(final_cache.attributes_buf[index]);
-
-  final_cache.attributes_tex[index] = GPU_texture_create_from_vertbuf(
-      name, final_cache.attributes_buf[index]);
 }
 
 static void curves_batch_ensure_attribute(const Curves &curves,
@@ -379,7 +351,6 @@ static void curves_batch_ensure_attribute(const Curves &curves,
 {
   using namespace blender;
   GPU_VERTBUF_DISCARD_SAFE(cache.proc_attributes_buf[index]);
-  DRW_TEXTURE_FREE_SAFE(cache.proc_attributes_tex[index]);
 
   char sampler_name[32];
   drw_curves_get_attribute_sampler_name(request.attribute_name, sampler_name);
@@ -414,13 +385,9 @@ static void curves_batch_ensure_attribute(const Curves &curves,
 
   attribute.materialize(vbo_span);
 
-  GPU_vertbuf_use(attr_vbo);
-  cache.proc_attributes_tex[index] = GPU_texture_create_from_vertbuf(sampler_name, attr_vbo);
-
   /* Existing final data may have been for a different attribute (with a different name or domain),
    * free the data. */
   GPU_VERTBUF_DISCARD_SAFE(cache.final[subdiv].attributes_buf[index]);
-  DRW_TEXTURE_FREE_SAFE(cache.final[subdiv].attributes_tex[index]);
 
   /* Ensure final data for points. */
   if (request.domain == ATTR_DOMAIN_POINT) {
@@ -466,19 +433,11 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves,
   GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step);
 
   curves_batch_cache_fill_strands_data(curves, data_step, seg_step);
-
-  /* Create vbo immediately to bind to texture buffer. */
-  GPU_vertbuf_use(cache.proc_strand_buf);
-  cache.strand_tex = GPU_texture_create_from_vertbuf("curves_strand", cache.proc_strand_buf);
-
-  GPU_vertbuf_use(cache.proc_strand_seg_buf);
-  cache.strand_seg_tex = GPU_texture_create_from_vertbuf("curves_strand_seg",
-                                                         cache.proc_strand_seg_buf);
 }
 
 static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &cache, int subdiv)
 {
-  /* Same format as point_tex. */
+  /* Same format as proc_point_buf. */
   GPUVertFormat format = {0};
   GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
@@ -489,12 +448,6 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c
   /* Those are points! not line segments. */
   GPU_vertbuf_data_alloc(cache.final[subdiv].proc_buf,
                          cache.final[subdiv].strands_res * cache.strands_len);
-
-  /* Create vbo immediately to bind to texture buffer. */
-  GPU_vertbuf_use(cache.final[subdiv].proc_buf);
-
-  cache.final[subdiv].proc_tex = GPU_texture_create_from_vertbuf("hair_proc",
-                                                                 cache.final[subdiv].proc_buf);
 }
 
 static void curves_batch_cache_fill_segments_indices(const Curves &curves,
@@ -583,7 +536,6 @@ static bool curves_ensure_attributes(const Curves &curves,
       /* Some new attributes have been added, free all and start over. */
       for (const int i : IndexRange(GPU_MAX_ATTR)) {
         GPU_VERTBUF_DISCARD_SAFE(cache.curves_cache.proc_attributes_buf[i]);
-        DRW_TEXTURE_FREE_SAFE(cache.curves_cache.proc_attributes_tex[i]);
       }
       drw_attributes_merge(&final_cache.attr_used, &attrs_needed, cache.render_mutex);
     }
@@ -631,7 +583,7 @@ bool curves_ensure_procedural_data(Curves *curves,
   }
 
   /* Refreshed if active layer or custom data changes. */
-  if ((*r_hair_cache)->strand_tex == nullptr) {
+  if ((*r_hair_cache)->proc_strand_buf == nullptr) {
     curves_batch_cache_ensure_procedural_strand_data(*curves, cache.curves_cache);
   }
 
@@ -689,7 +641,7 @@ static void request_attribute(Curves &curves, const char *name)
   drw_attributes_merge(&final_cache.attr_used, &attributes, cache.render_mutex);
 }
 
-GPUTexture **DRW_curves_texture_for_evaluated_attribute(Curves *curves,
+GPUVertBuf **DRW_curves_texture_for_evaluated_attribute(Curves *curves,
                                                         const char *name,
                                                         bool *r_is_point_domain)
 {
@@ -715,10 +667,10 @@ GPUTexture **DRW_curves_texture_for_evaluated_attribute(Curves *curves,
   switch (final_cache.attr_used.requests[request_i].domain) {
     case ATTR_DOMAIN_POINT:
       *r_is_point_domain = true;
-      return &final_cache.attributes_tex[request_i];
+      return &final_cache.attributes_buf[request_i];
     case ATTR_DOMAIN_CURVE:
       *r_is_point_domain = false;
-      return &cache.curves_cache.proc_attributes_tex[request_i];
+      return &cache.curves_cache.proc_attributes_buf[request_i];
     default:
       BLI_assert_unreachable();
       return nullptr;
diff --git a/source/blender/draw/intern/draw_cache_impl_gpencil.c b/source/blender/draw/intern/draw_cache_impl_gpencil.cc
index 7a43c7ee2e6..3d6d2631186 100644
--- a/source/blender/draw/intern/draw_cache_impl_gpencil.c
+++ b/source/blender/draw/intern/draw_cache_impl_gpencil.cc
@@ -23,12 +23,14 @@
 #include "DEG_depsgraph_query.h"
 
 #include "BLI_hash.h"
+#include "BLI_math_vec_types.hh"
 #include "BLI_polyfill_2d.h"
 
 #include "draw_cache.h"
 #include "draw_cache_impl.h"
 
 #include "../engines/gpencil/gpencil_defines.h"
+#include "../engines/gpencil/gpencil_shader_shared.h"
 
 #define BEZIER_HANDLE (1 << 3)
 #define COLOR_SHIFT 5
@@ -41,11 +43,13 @@ typedef struct GpencilBatchCache {
   /** Instancing Data */
   GPUVertBuf *vbo;
   GPUVertBuf *vbo_col;
-  /** Fill Topology */
+  /** Indices in material order, then stroke order with fill first.
+   * Strokes can be individually rendered using `gps->runtime.stroke_start` and
+   * `gps->runtime.fill_start`. */
   GPUIndexBuf *ibo;
-  /** Instancing Batches */
-  GPUBatch *stroke_batch;
-  GPUBatch *fill_batch;
+  /** Batches */
+  GPUBatch *geom_batch;
+  /** Stroke lines only */
   GPUBatch *lines_batch;
 
   /** Edit Mode */
@@ -97,7 +101,8 @@ static GpencilBatchCache *gpencil_batch_cache_init(Object *ob, int cfra)
   GpencilBatchCache *cache = gpd->runtime.gpencil_cache;
 
   if (!cache) {
-    cache = gpd->runtime.gpencil_cache = MEM_callocN(sizeof(*cache), __func__);
+    cache = gpd->runtime.gpencil_cache = (GpencilBatchCache *)MEM_callocN(sizeof(*cache),
+                                                                          __func__);
   }
   else {
     memset(cache, 0, sizeof(*cache));
@@ -116,8 +121,7 @@ static void gpencil_batch_cache_clear(GpencilBatchCache *cache)
   }
 
   GPU_BATCH_DISCARD_SAFE(cache->lines_batch);
-  GPU_BATCH_DISCARD_SAFE(cache->fill_batch);
-  GPU_BATCH_DISCARD_SAFE(cache->stroke_batch);
+  GPU_BATCH_DISCARD_SAFE(cache->geom_batch);
   GPU_VERTBUF_DISCARD_SAFE(cache->vbo);
   GPU_VERTBUF_DISCARD_SAFE(cache->vbo_col);
   GPU_INDEXBUF_DISCARD_SAFE(cache->ibo);
@@ -172,9 +176,10 @@ void DRW_gpencil_batch_cache_free(bGPdata *gpd)
 
 /* MUST match the format below. */
 typedef struct gpStrokeVert {
-  int32_t mat, stroke_id, point_id, packed_asp_hard_rot;
   /** Position and thickness packed in the same attribute. */
   float pos[3], thickness;
+  /** Material Index, Stroke Index, Point Index, Packed aspect + hardness + rotation. */
+  int32_t mat, stroke_id, point_id, packed_asp_hard_rot;
   /** UV and strength packed in the same attribute. */
   float uv_fill[2], u_stroke, strength;
 } gpStrokeVert;
@@ -183,12 +188,9 @@ static GPUVertFormat *gpencil_stroke_format(void)
 {
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "ma", GPU_COMP_I32, 4, GPU_FETCH_INT);
     GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_attr_add(&format, "ma", GPU_COMP_I32, 4, GPU_FETCH_INT);
     GPU_vertformat_attr_add(&format, "uv", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
-    /* IMPORTANT: This means having only 4 attributes
-     * to fit into GPU module limit of 16 attributes. */
-    GPU_vertformat_multiload_enable(&format, 4);
   }
   return &format;
 }
@@ -238,9 +240,6 @@ static GPUVertFormat *gpencil_color_format(void)
   if (format.attr_len == 0) {
     GPU_vertformat_attr_add(&format, "col", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
     GPU_vertformat_attr_add(&format, "fcol", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
-    /* IMPORTANT: This means having only 4 attributes
-     * to fit into GPU module limit of 16 attributes. */
-    GPU_vertformat_multiload_enable(&format, 4);
   }
   return &format;
 }
@@ -295,7 +294,8 @@ BLI_INLINE int32_t pack_rotation_aspect_hardness(float rot, float asp, float har
   return packed;
 }
 
-static void gpencil_buffer_add_point(gpStrokeVert *verts,
+static void gpencil_buffer_add_point(GPUIndexBufBuilder *ibo,
+                                     gpStrokeVert *verts,
                                      gpColorVert *cols,
                                      const bGPDstroke *gps,
                                      const bGPDspoint *pt,
@@ -319,7 +319,7 @@ static void gpencil_buffer_add_point(gpStrokeVert *verts,
 
   vert->strength = (round_cap0) ? pt->strength : -pt->strength;
   vert->u_stroke = pt->uv_fac;
-  vert->stroke_id = gps->runtime.stroke_start;
+  vert->stroke_id = gps->runtime.vertex_start;
   vert->point_id = v;
   vert->thickness = max_ff(0.0f, gps->thickness * pt->pressure) * (round_cap1 ? 1.0f : -1.0f);
   /* Tag endpoint material to -1 so they get discarded by vertex shader. */
@@ -329,27 +329,36 @@ static void gpencil_buffer_add_point(gpStrokeVert *verts,
 
   vert->packed_asp_hard_rot = pack_rotation_aspect_hardness(
       pt->uv_rot, aspect_ratio, gps->hardeness);
+
+  if (!is_endpoint) {
+    /* Issue a Quad per point. */
+    /* The attribute loading uses a different shader and will undo this bit packing. */
+    int v_mat = (v << GP_VERTEX_ID_SHIFT) | GP_IS_STROKE_VERTEX_BIT;
+    GPU_indexbuf_add_tri_verts(ibo, v_mat + 0, v_mat + 1, v_mat + 2);
+    GPU_indexbuf_add_tri_verts(ibo, v_mat + 2, v_mat + 1, v_mat + 3);
+  }
 }
 
-static void gpencil_buffer_add_stroke(gpStrokeVert *verts,
+static void gpencil_buffer_add_stroke(GPUIndexBufBuilder *ibo,
+                                      gpStrokeVert *verts,
                                       gpColorVert *cols,
                                       const bGPDstroke *gps)
 {
   const bGPDspoint *pts = gps->points;
   int pts_len = gps->totpoints;
   bool is_cyclic = gpencil_stroke_is_cyclic(gps);
-  int v = gps->runtime.stroke_start;
+  int v = gps->runtime.vertex_start;
 
   /* First point for adjacency (not drawn). */
   int adj_idx = (is_cyclic) ? (pts_len - 1) : min_ii(pts_len - 1, 1);
-  gpencil_buffer_add_point(verts, cols, gps, &pts[adj_idx], v++, true);
+  gpencil_buffer_add_point(ibo, verts, cols, gps, &pts[adj_idx], v++, true);
 
   for (int i = 0; i < pts_len; i++) {
-    gpencil_buffer_add_point(verts, cols, gps, &pts[i], v++, false);
+    gpencil_buffer_add_point(ibo, verts, cols, gps, &pts[i], v++, false);
   }
   /* Draw line to first point to complete the loop for cyclic strokes. */
   if (is_cyclic) {
-    gpencil_buffer_add_point(verts, cols, gps, &pts[0], v, false);
+    gpencil_buffer_add_point(ibo, verts, cols, gps, &pts[0], v, false);
     /* UV factor needs to be adjusted for the last point to not be equal to the UV factor of the
      * first point. It should be the factor of the last point plus the distance from the last point
      * to the first.
@@ -360,16 +369,20 @@ static void gpencil_buffer_add_stroke(gpStrokeVert *verts,
   }
   /* Last adjacency point (not drawn). */
   adj_idx = (is_cyclic) ? 1 : max_ii(0, pts_len - 2);
-  gpencil_buffer_add_point(verts, cols, gps, &pts[adj_idx], v++, true);
+  gpencil_buffer_add_point(ibo, verts, cols, gps, &pts[adj_idx], v++, true);
 }
 
 static void gpencil_buffer_add_fill(GPUIndexBufBuilder *ibo, const bGPDstroke *gps)
 {
   int tri_len = gps->tot_triangles;
-  int v = gps->runtime.stroke_start;
+  int v = gps->runtime.vertex_start + 1;
   for (int i = 0; i < tri_len; i++) {
     uint *tri = gps->triangles[i].verts;
-    GPU_indexbuf_add_tri_verts(ibo, v + tri[0], v + tri[1], v + tri[2]);
+    /* The attribute loading uses a different shader and will undo this bit packing. */
+    GPU_indexbuf_add_tri_verts(ibo,
+                               (v + tri[0]) << GP_VERTEX_ID_SHIFT,
+                               (v + tri[1]) << GP_VERTEX_ID_SHIFT,
+                               (v + tri[2]) << GP_VERTEX_ID_SHIFT);
   }
 }
 
@@ -379,10 +392,10 @@ static void gpencil_stroke_iter_cb(bGPDlayer *UNUSED(gpl),
                                    void *thunk)
 {
   gpIterData *iter = (gpIterData *)thunk;
-  gpencil_buffer_add_stroke(iter->verts, iter->cols, gps);
   if (gps->tot_triangles > 0) {
     gpencil_buffer_add_fill(&iter->ibo, gps);
   }
+  gpencil_buffer_add_stroke(&iter->ibo, iter->verts, iter->cols, gps);
 }
 
 static void gpencil_object_verts_count_cb(bGPDlayer *UNUSED(gpl),
@@ -391,12 +404,15 @@ static void gpencil_object_verts_count_cb(bGPDlayer *UNUSED(gpl),
                                           void *thunk)
 {
   gpIterData *iter = (gpIterData *)thunk;
-
-  /* Store first index offset */
-  gps->runtime.stroke_start = iter->vert_len;
+  int stroke_vert_len = gps->totpoints + gpencil_stroke_is_cyclic(gps);
+  gps->runtime.vertex_start = iter->vert_len;
+  /* Add additional padding at the start and end. */
+  iter->vert_len += 1 + stroke_vert_len + 1;
+  /* Store first index offset. */
   gps->runtime.fill_start = iter->tri_len;
-  iter->vert_len += gps->totpoints + 2 + gpencil_stroke_is_cyclic(gps);
   iter->tri_len += gps->tot_triangles;
+  gps->runtime.stroke_start = iter->tri_len;
+  iter->tri_len += stroke_vert_len * 2;
 }
 
 static void gpencil_batches_ensure(Object *ob, GpencilBatchCache *cache, int cfra)
@@ -406,7 +422,7 @@ static void gpencil_batches_ensure(Object *ob, GpencilBatchCache *cache, int cfr
   if (cache->vbo == NULL) {
     /* Should be discarded together. */
     BLI_assert(cache->vbo == NULL && cache->ibo == NULL);
-    BLI_assert(cache->fill_batch == NULL && cache->stroke_batch == NULL);
+    BLI_assert(cache->geom_batch == NULL);
     /* TODO/PERF: Could be changed to only do it if needed.
      * For now it's simpler to assume we always need it
      * since multiple viewport could or could not need it.
@@ -415,29 +431,29 @@ static void gpencil_batches_ensure(Object *ob, GpencilBatchCache *cache, int cfr
     bool do_onion = true;
 
     /* First count how many vertices and triangles are needed for the whole object. */
-    gpIterData iter = {
-        .gpd = gpd,
-        .verts = NULL,
-        .ibo = {0},
-        .vert_len = 1, /* Start at 1 for the gl_InstanceID trick to work (see vert shader). */
-        .tri_len = 0,
-        .curve_len = 0,
-    };
+    gpIterData iter = {};
+    iter.gpd = gpd;
+    iter.verts = NULL;
+    iter.ibo = {0};
+    iter.vert_len = 0;
+    iter.tri_len = 0;
+    iter.curve_len = 0;
     BKE_gpencil_visible_stroke_advanced_iter(
         NULL, ob, NULL, gpencil_object_verts_count_cb, &iter, do_onion, cfra);
 
+    GPUUsageType vbo_flag = GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY;
     /* Create VBOs. */
     GPUVertFormat *format = gpencil_stroke_format();
     GPUVertFormat *format_col = gpencil_color_format();
-    cache->vbo = GPU_vertbuf_create_with_format(format);
-    cache->vbo_col = GPU_vertbuf_create_with_format(format_col);
+    cache->vbo = GPU_vertbuf_create_with_format_ex(format, vbo_flag);
+    cache->vbo_col = GPU_vertbuf_create_with_format_ex(format_col, vbo_flag);
     /* Add extra space at the end of the buffer because of quad load. */
     GPU_vertbuf_data_alloc(cache->vbo, iter.vert_len + 2);
     GPU_vertbuf_data_alloc(cache->vbo_col, iter.vert_len + 2);
     iter.verts = (gpStrokeVert *)GPU_vertbuf_get_data(cache->vbo);
     iter.cols = (gpColorVert *)GPU_vertbuf_get_data(cache->vbo_col);
     /* Create IBO. */
-    GPU_indexbuf_init(&iter.ibo, GPU_PRIM_TRIS, iter.tri_len, iter.vert_len);
+    GPU_indexbuf_init(&iter.ibo, GPU_PRIM_TRIS, iter.tri_len, 0xFFFFFFFFu);
 
     /* Fill buffers with data. */
     BKE_gpencil_visible_stroke_advanced_iter(
@@ -452,33 +468,39 @@ static void gpencil_batches_ensure(Object *ob, GpencilBatchCache *cache, int cfr
 
     /* Finish the IBO. */
     cache->ibo = GPU_indexbuf_build(&iter.ibo);
-
     /* Create the batches */
-    cache->fill_batch = GPU_batch_create(GPU_PRIM_TRIS, cache->vbo, cache->ibo);
-    GPU_batch_vertbuf_add(cache->fill_batch, cache->vbo_col);
-    cache->stroke_batch = GPU_batch_create(GPU_PRIM_TRI_STRIP, gpencil_dummy_buffer_get(), NULL);
-    GPU_batch_instbuf_add_ex(cache->stroke_batch, cache->vbo, 0);
-    GPU_batch_instbuf_add_ex(cache->stroke_batch, cache->vbo_col, 0);
+    cache->geom_batch = GPU_batch_create(GPU_PRIM_TRIS, cache->vbo, cache->ibo);
+    /* Allow creation of buffer texture. */
+    GPU_vertbuf_use(cache->vbo);
+    GPU_vertbuf_use(cache->vbo_col);
 
     gpd->flag &= ~GP_DATA_CACHE_IS_DIRTY;
     cache->is_dirty = false;
   }
 }
 
-GPUBatch *DRW_cache_gpencil_strokes_get(Object *ob, int cfra)
+GPUBatch *DRW_cache_gpencil_get(Object *ob, int cfra)
+{
+  GpencilBatchCache *cache = gpencil_batch_cache_get(ob, cfra);
+  gpencil_batches_ensure(ob, cache, cfra);
+
+  return cache->geom_batch;
+}
+
+GPUVertBuf *DRW_cache_gpencil_position_buffer_get(Object *ob, int cfra)
 {
   GpencilBatchCache *cache = gpencil_batch_cache_get(ob, cfra);
   gpencil_batches_ensure(ob, cache, cfra);
 
-  return cache->stroke_batch;
+  return cache->vbo;
 }
 
-GPUBatch *DRW_cache_gpencil_fills_get(Object *ob, int cfra)
+GPUVertBuf *DRW_cache_gpencil_color_buffer_get(Object *ob, int cfra)
 {
   GpencilBatchCache *cache = gpencil_batch_cache_get(ob, cfra);
   gpencil_batches_ensure(ob, cache, cfra);
 
-  return cache->fill_batch;
+  return cache->vbo_col;
 }
 
 static void gpencil_lines_indices_cb(bGPDlayer *UNUSED(gpl),
@@ -489,7 +511,7 @@ static void gpencil_lines_indices_cb(bGPDlayer *UNUSED(gpl),
   gpIterData *iter = (gpIterData *)thunk;
   int pts_len = gps->totpoints + gpencil_stroke_is_cyclic(gps);
 
-  int start = gps->runtime.stroke_start + 1;
+  int start = gps->runtime.vertex_start + 1;
   int end = start + pts_len;
   for (int i = start; i < end; i++) {
     GPU_indexbuf_add_generic_vert(&iter->ibo, i);
@@ -508,10 +530,9 @@ GPUBatch *DRW_cache_gpencil_face_wireframe_get(Object *ob)
   if (cache->lines_batch == NULL) {
     GPUVertBuf *vbo = cache->vbo;
 
-    gpIterData iter = {
-        .gpd = ob->data,
-        .ibo = {0},
-    };
+    gpIterData iter = {};
+    iter.gpd = (bGPdata *)ob->data;
+    iter.ibo = {0};
 
     uint vert_len = GPU_vertbuf_get_vertex_len(vbo);
     GPU_indexbuf_init_ex(&iter.ibo, GPU_PRIM_LINE_STRIP, vert_len, vert_len);
@@ -540,7 +561,7 @@ bGPDstroke *DRW_cache_gpencil_sbuffer_stroke_data_get(Object *ob)
   Brush *brush = gpd->runtime.sbuffer_brush;
   /* Convert the sbuffer to a bGPDstroke. */
   if (gpd->runtime.sbuffer_gps == NULL) {
-    bGPDstroke *gps = MEM_callocN(sizeof(*gps), "bGPDstroke sbuffer");
+    bGPDstroke *gps = (bGPDstroke *)MEM_callocN(sizeof(*gps), "bGPDstroke sbuffer");
     gps->totpoints = gpd->runtime.sbuffer_used;
     gps->mat_nr = max_ii(0, gpd->runtime.matid - 1);
     gps->flag = gpd->runtime.sbuffer_sflag;
@@ -553,7 +574,9 @@ bGPDstroke *DRW_cache_gpencil_sbuffer_stroke_data_get(Object *ob)
 
     gps->tot_triangles = max_ii(0, gpd->runtime.sbuffer_used - 2);
     gps->caps[0] = gps->caps[1] = GP_STROKE_CAP_ROUND;
-    gps->runtime.stroke_start = 1; /* Add one for the adjacency index. */
+    gps->runtime.vertex_start = 0;
+    gps->runtime.fill_start = 0;
+    gps->runtime.stroke_start = 0;
     copy_v4_v4(gps->vert_color_fill, gpd->runtime.vert_color_fill);
     /* Caps. */
     gps->caps[0] = gps->caps[1] = (short)brush->gpencil_settings->caps_type;
@@ -563,17 +586,17 @@ bGPDstroke *DRW_cache_gpencil_sbuffer_stroke_data_get(Object *ob)
   return gpd->runtime.sbuffer_gps;
 }
 
-static void gpencil_sbuffer_stroke_ensure(bGPdata *gpd, bool do_stroke, bool do_fill)
+static void gpencil_sbuffer_stroke_ensure(bGPdata *gpd, bool do_fill)
 {
-  tGPspoint *tpoints = gpd->runtime.sbuffer;
+  tGPspoint *tpoints = (tGPspoint *)gpd->runtime.sbuffer;
   bGPDstroke *gps = gpd->runtime.sbuffer_gps;
   int vert_len = gpd->runtime.sbuffer_used;
 
   /* DRW_cache_gpencil_sbuffer_stroke_data_get need to have been called previously. */
   BLI_assert(gps != NULL);
 
-  if (do_stroke && (gpd->runtime.sbuffer_stroke_batch == NULL)) {
-    gps->points = MEM_mallocN(vert_len * sizeof(*gps->points), __func__);
+  if (gpd->runtime.sbuffer_batch == NULL) {
+    gps->points = (bGPDspoint *)MEM_mallocN(vert_len * sizeof(*gps->points), __func__);
 
     const DRWContextState *draw_ctx = DRW_context_state_get();
     Scene *scene = draw_ctx->scene;
@@ -596,36 +619,25 @@ static void gpencil_sbuffer_stroke_ensure(bGPdata *gpd, bool do_stroke, bool do_
     /* Calc uv data along the stroke. */
     BKE_gpencil_stroke_uv_update(gps);
 
+    int tri_len = gps->tot_triangles + (gps->totpoints + gpencil_stroke_is_cyclic(gps)) * 2;
+    /* Create IBO. */
+    GPUIndexBufBuilder ibo_builder;
+    GPU_indexbuf_init(&ibo_builder, GPU_PRIM_TRIS, tri_len, 0xFFFFFFFFu);
     /* Create VBO. */
+    GPUUsageType vbo_flag = GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY;
     GPUVertFormat *format = gpencil_stroke_format();
     GPUVertFormat *format_color = gpencil_color_format();
-    GPUVertBuf *vbo = GPU_vertbuf_create_with_format(format);
-    GPUVertBuf *vbo_col = GPU_vertbuf_create_with_format(format_color);
-    /* Add extra space at the end (and start) of the buffer because of quad load and cyclic. */
+    GPUVertBuf *vbo = GPU_vertbuf_create_with_format_ex(format, vbo_flag);
+    GPUVertBuf *vbo_col = GPU_vertbuf_create_with_format_ex(format_color, vbo_flag);
+    /* Add extra space at the start and end the buffer because of quad load and cyclic. */
     GPU_vertbuf_data_alloc(vbo, 1 + vert_len + 1 + 2);
     GPU_vertbuf_data_alloc(vbo_col, 1 + vert_len + 1 + 2);
     gpStrokeVert *verts = (gpStrokeVert *)GPU_vertbuf_get_data(vbo);
     gpColorVert *cols = (gpColorVert *)GPU_vertbuf_get_data(vbo_col);
 
-    /* Fill buffers with data. */
-    gpencil_buffer_add_stroke(verts, cols, gps);
-
-    GPUBatch *batch = GPU_batch_create(GPU_PRIM_TRI_STRIP, gpencil_dummy_buffer_get(), NULL);
-    GPU_batch_instbuf_add_ex(batch, vbo, true);
-    GPU_batch_instbuf_add_ex(batch, vbo_col, true);
-
-    gpd->runtime.sbuffer_stroke_batch = batch;
-
-    MEM_freeN(gps->points);
-  }
-
-  if (do_fill && (gpd->runtime.sbuffer_fill_batch == NULL)) {
-    /* Create IBO. */
-    GPUIndexBufBuilder ibo_builder;
-    GPU_indexbuf_init(&ibo_builder, GPU_PRIM_TRIS, gps->tot_triangles, vert_len);
-
-    if (gps->tot_triangles > 0) {
-      float(*tpoints2d)[2] = MEM_mallocN(sizeof(*tpoints2d) * vert_len, __func__);
+    /* Create fill indices. */
+    if (do_fill && gps->tot_triangles > 0) {
+      float(*tpoints2d)[2] = (float(*)[2])MEM_mallocN(sizeof(*tpoints2d) * vert_len, __func__);
       /* Triangulate in 2D. */
       for (int i = 0; i < vert_len; i++) {
         copy_v2_v2(tpoints2d[i], tpoints[i].m_xy);
@@ -633,51 +645,72 @@ static void gpencil_sbuffer_stroke_ensure(bGPdata *gpd, bool do_stroke, bool do_
       /* Compute directly inside the IBO data buffer. */
       /* OPTI: This is a bottleneck if the stroke is very long. */
       BLI_polyfill_calc(tpoints2d, (uint)vert_len, 0, (uint(*)[3])ibo_builder.data);
-      /* Add stroke start offset. */
+      /* Add stroke start offset and shift. */
       for (int i = 0; i < gps->tot_triangles * 3; i++) {
-        ibo_builder.data[i] += gps->runtime.stroke_start;
+        ibo_builder.data[i] = (ibo_builder.data[i] + 1) << GP_VERTEX_ID_SHIFT;
       }
       /* HACK since we didn't use the builder API to avoid another malloc and copy,
        * we need to set the number of indices manually. */
       ibo_builder.index_len = gps->tot_triangles * 3;
+      ibo_builder.index_min = 0;
+      /* For this case, do not allow index compaction to avoid yet another preprocessing step. */
+      ibo_builder.index_max = 0xFFFFFFFFu - 1u;
+
+      gps->runtime.stroke_start = gps->tot_triangles;
 
       MEM_freeN(tpoints2d);
     }
 
-    GPUIndexBuf *ibo = GPU_indexbuf_build(&ibo_builder);
-    GPUVertBuf *vbo = gpd->runtime.sbuffer_stroke_batch->inst[0];
-    GPUVertBuf *vbo_col = gpd->runtime.sbuffer_stroke_batch->inst[1];
+    /* Fill buffers with data. */
+    gpencil_buffer_add_stroke(&ibo_builder, verts, cols, gps);
 
-    GPUBatch *batch = GPU_batch_create_ex(GPU_PRIM_TRIS, vbo, ibo, GPU_BATCH_OWNS_INDEX);
-    GPU_batch_vertbuf_add(batch, vbo_col);
+    GPUBatch *batch = GPU_batch_create_ex(GPU_PRIM_TRIS,
+                                          gpencil_dummy_buffer_get(),
+                                          GPU_indexbuf_build(&ibo_builder),
+                                          GPU_BATCH_OWNS_INDEX);
 
-    gpd->runtime.sbuffer_fill_batch = batch;
+    gpd->runtime.sbuffer_position_buf = vbo;
+    gpd->runtime.sbuffer_color_buf = vbo_col;
+    gpd->runtime.sbuffer_batch = batch;
+
+    MEM_freeN(gps->points);
   }
 }
 
-GPUBatch *DRW_cache_gpencil_sbuffer_stroke_get(Object *ob)
+GPUBatch *DRW_cache_gpencil_sbuffer_get(Object *ob, bool show_fill)
 {
   bGPdata *gpd = (bGPdata *)ob->data;
-  gpencil_sbuffer_stroke_ensure(gpd, true, false);
+  /* Fill batch also need stroke batch to be created (vbo is shared). */
+  gpencil_sbuffer_stroke_ensure(gpd, show_fill);
 
-  return gpd->runtime.sbuffer_stroke_batch;
+  return gpd->runtime.sbuffer_batch;
 }
 
-GPUBatch *DRW_cache_gpencil_sbuffer_fill_get(Object *ob)
+GPUVertBuf *DRW_cache_gpencil_sbuffer_position_buffer_get(Object *ob, bool show_fill)
 {
   bGPdata *gpd = (bGPdata *)ob->data;
   /* Fill batch also need stroke batch to be created (vbo is shared). */
-  gpencil_sbuffer_stroke_ensure(gpd, true, true);
+  gpencil_sbuffer_stroke_ensure(gpd, show_fill);
 
-  return gpd->runtime.sbuffer_fill_batch;
+  return gpd->runtime.sbuffer_position_buf;
+}
+
+GPUVertBuf *DRW_cache_gpencil_sbuffer_color_buffer_get(Object *ob, bool show_fill)
+{
+  bGPdata *gpd = (bGPdata *)ob->data;
+  /* Fill batch also need stroke batch to be created (vbo is shared). */
+  gpencil_sbuffer_stroke_ensure(gpd, show_fill);
+
+  return gpd->runtime.sbuffer_color_buf;
 }
 
 void DRW_cache_gpencil_sbuffer_clear(Object *ob)
 {
   bGPdata *gpd = (bGPdata *)ob->data;
   MEM_SAFE_FREE(gpd->runtime.sbuffer_gps);
-  GPU_BATCH_DISCARD_SAFE(gpd->runtime.sbuffer_fill_batch);
-  GPU_BATCH_DISCARD_SAFE(gpd->runtime.sbuffer_stroke_batch);
+  GPU_BATCH_DISCARD_SAFE(gpd->runtime.sbuffer_batch);
+  GPU_VERTBUF_DISCARD_SAFE(gpd->runtime.sbuffer_position_buf);
+  GPU_VERTBUF_DISCARD_SAFE(gpd->runtime.sbuffer_color_buf);
 }
 
 /** \} */
@@ -728,7 +761,7 @@ static void gpencil_edit_stroke_iter_cb(bGPDlayer *gpl,
 {
   gpEditIterData *iter = (gpEditIterData *)thunk;
   const int v_len = gps->totpoints;
-  const int v = gps->runtime.stroke_start + 1;
+  const int v = gps->runtime.vertex_start + 1;
   MDeformVert *dvert = ((iter->vgindex > -1) && gps->dvert) ? gps->dvert : NULL;
   gpEditVert *vert_ptr = iter->verts + v;
 
@@ -743,9 +776,12 @@ static void gpencil_edit_stroke_iter_cb(bGPDlayer *gpl,
     vert_ptr->weight = gpencil_point_edit_weight(dvert, i, iter->vgindex);
     vert_ptr++;
   }
-  /* Draw line to first point to complete the loop for cyclic strokes. */
-  vert_ptr->vflag = sflag | gpencil_point_edit_flag(layer_lock, &gps->points[0], 0, v_len);
-  vert_ptr->weight = gpencil_point_edit_weight(dvert, 0, iter->vgindex);
+
+  if (gpencil_stroke_is_cyclic(gps)) {
+    /* Draw line to first point to complete the loop for cyclic strokes. */
+    vert_ptr->vflag = sflag | gpencil_point_edit_flag(layer_lock, &gps->points[0], 0, v_len);
+    vert_ptr->weight = gpencil_point_edit_weight(dvert, 0, iter->vgindex);
+  }
 }
 
 static void gpencil_edit_curve_stroke_count_cb(bGPDlayer *gpl,
@@ -876,14 +912,13 @@ static void gpencil_edit_batches_ensure(Object *ob, GpencilBatchCache *cache, in
 
   /* Curve Handles and Points for Editing. */
   if (cache->edit_curve_vbo == NULL) {
-    gpIterData iterdata = {
-        .gpd = gpd,
-        .verts = NULL,
-        .ibo = {0},
-        .vert_len = 0,
-        .tri_len = 0,
-        .curve_len = 0,
-    };
+    gpIterData iterdata = {};
+    iterdata.gpd = gpd;
+    iterdata.verts = NULL;
+    iterdata.ibo = {0};
+    iterdata.vert_len = 0;
+    iterdata.tri_len = 0;
+    iterdata.curve_len = 0;
 
     /* Create VBO. */
     GPUVertFormat *format = gpencil_edit_curve_format();
diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.cc b/source/blender/draw/intern/draw_cache_impl_mesh.cc
index d3e071c14e7..031de3e4ef2 100644
--- a/source/blender/draw/intern/draw_cache_impl_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.cc
@@ -560,16 +560,7 @@ static bool mesh_batch_cache_valid(Object *object, Mesh *me)
     return false;
   }
 
-  if (object->sculpt && object->sculpt->pbvh) {
-    if (cache->pbvh_is_drawing != BKE_pbvh_is_drawing(object->sculpt->pbvh)) {
-      return false;
-    }
-
-    if (BKE_pbvh_is_drawing(object->sculpt->pbvh) &&
-        BKE_pbvh_draw_cache_invalid(object->sculpt->pbvh)) {
-      return false;
-    }
-  }
+  /* Note: PBVH draw data should not be checked here. */
 
   if (cache->is_editmode != (me->edit_mesh != nullptr)) {
     return false;
@@ -1443,7 +1434,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
       }
     }
 
-
     /* Verify that all surface batches have needed attribute layers.
      */
     /* TODO(fclem): We could be a bit smarter here and only do it per
@@ -1486,7 +1476,8 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
     mesh_cd_layers_type_merge(&cache->cd_used_over_time, cache->cd_needed);
     mesh_cd_layers_type_clear(&cache->cd_needed);
 
-    drw_attributes_merge(&cache->attr_used_over_time, &cache->attr_needed, me->runtime->render_mutex);
+    drw_attributes_merge(
+        &cache->attr_used_over_time, &cache->attr_needed, me->runtime->render_mutex);
     drw_attributes_clear(&cache->attr_needed);
   }
 
@@ -1898,7 +1889,7 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
                                                       is_editmode,
                                                       is_paint_mode,
                                                       is_mode_active,
-                                                      ob->obmat,
+                                                      ob->object_to_world,
                                                       false,
                                                       true,
                                                       scene,
@@ -1915,7 +1906,7 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
                                                       is_editmode,
                                                       is_paint_mode,
                                                       is_mode_active,
-                                                      ob->obmat,
+                                                      ob->object_to_world,
                                                       false,
                                                       false,
                                                       scene,
@@ -1931,7 +1922,7 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
                            is_editmode,
                            is_paint_mode,
                            is_mode_active,
-                           ob->obmat,
+                           ob->object_to_world,
                            true,
                            false,
                            do_cage,
@@ -1952,7 +1943,7 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
                                                     is_editmode,
                                                     is_paint_mode,
                                                     is_mode_active,
-                                                    ob->obmat,
+                                                    ob->object_to_world,
                                                     true,
                                                     false,
                                                     scene,
diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c
index 9c1784b1de2..8facea4333f 100644
--- a/source/blender/draw/intern/draw_cache_impl_particles.c
+++ b/source/blender/draw/intern/draw_cache_impl_particles.c
@@ -173,13 +173,9 @@ static void particle_batch_cache_clear_hair(ParticleHairCache *hair_cache)
   /* TODO: more granular update tagging. */
   GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_point_buf);
   GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_length_buf);
-  DRW_TEXTURE_FREE_SAFE(hair_cache->point_tex);
-  DRW_TEXTURE_FREE_SAFE(hair_cache->length_tex);
 
   GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_strand_buf);
   GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_strand_seg_buf);
-  DRW_TEXTURE_FREE_SAFE(hair_cache->strand_tex);
-  DRW_TEXTURE_FREE_SAFE(hair_cache->strand_seg_tex);
 
   for (int i = 0; i < MAX_MTFACE; i++) {
     GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_uv_buf[i]);
@@ -192,7 +188,6 @@ static void particle_batch_cache_clear_hair(ParticleHairCache *hair_cache)
 
   for (int i = 0; i < MAX_HAIR_SUBDIV; i++) {
     GPU_VERTBUF_DISCARD_SAFE(hair_cache->final[i].proc_buf);
-    DRW_TEXTURE_FREE_SAFE(hair_cache->final[i].proc_tex);
     for (int j = 0; j < MAX_THICKRES; j++) {
       GPU_BATCH_DISCARD_SAFE(hair_cache->final[i].proc_hairs[j]);
     }
@@ -810,7 +805,7 @@ static int particle_batch_cache_fill_strands_data(ParticleSystem *psys,
 static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCache *cache,
                                                                 int subdiv)
 {
-  /* Same format as point_tex. */
+  /* Same format as proc_point_buf. */
   GPUVertFormat format = {0};
   GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
@@ -823,12 +818,6 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach
   /* Those are points! not line segments. */
   GPU_vertbuf_data_alloc(cache->final[subdiv].proc_buf,
                          cache->final[subdiv].strands_res * cache->strands_len);
-
-  /* Create vbo immediately to bind to texture buffer. */
-  GPU_vertbuf_use(cache->final[subdiv].proc_buf);
-
-  cache->final[subdiv].proc_tex = GPU_texture_create_from_vertbuf("part_proc",
-                                                                  cache->final[subdiv].proc_buf);
 }
 
 static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit,
@@ -1034,14 +1023,6 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
     MEM_freeN(parent_mcol);
   }
 
-  /* Create vbo immediately to bind to texture buffer. */
-  GPU_vertbuf_use(cache->proc_strand_buf);
-  cache->strand_tex = GPU_texture_create_from_vertbuf("part_strand", cache->proc_strand_buf);
-
-  GPU_vertbuf_use(cache->proc_strand_seg_buf);
-  cache->strand_seg_tex = GPU_texture_create_from_vertbuf("part_strand_seg",
-                                                          cache->proc_strand_seg_buf);
-
   for (int i = 0; i < cache->num_uv_layers; i++) {
     GPU_vertbuf_use(cache->proc_uv_buf[i]);
     cache->uv_tex[i] = GPU_texture_create_from_vertbuf("part_uv", cache->proc_uv_buf[i]);
@@ -1107,7 +1088,7 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit,
 static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
                                                        ParticleSystem *psys,
                                                        ParticleHairCache *cache,
-                                                       GPUMaterial *gpu_material)
+                                                       GPUMaterial *UNUSED(gpu_material))
 {
   if (cache->proc_point_buf == NULL) {
     /* initialize vertex format */
@@ -1149,22 +1130,6 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
             psys->childcache, child_count, &pos_step, &length_step);
       }
     }
-
-    /* Create vbo immediately to bind to texture buffer. */
-    GPU_vertbuf_use(cache->proc_point_buf);
-    cache->point_tex = GPU_texture_create_from_vertbuf("part_point", cache->proc_point_buf);
-  }
-
-  /* Checking hair length separately, only allocating gpu memory when needed. */
-  if (gpu_material && cache->proc_length_buf != NULL && cache->length_tex == NULL) {
-    ListBase gpu_attrs = GPU_material_attributes(gpu_material);
-    LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &gpu_attrs) {
-      if (attr->type == CD_HAIRLENGTH) {
-        GPU_vertbuf_use(cache->proc_length_buf);
-        cache->length_tex = GPU_texture_create_from_vertbuf("hair_length", cache->proc_length_buf);
-        break;
-      }
-    }
   }
 }
 
@@ -1722,7 +1687,7 @@ bool particles_ensure_procedural_data(Object *object,
 
   /* Refreshed on combing and simulation. */
   if ((*r_hair_cache)->proc_point_buf == NULL ||
-      (gpu_material && (*r_hair_cache)->length_tex == NULL)) {
+      (gpu_material && (*r_hair_cache)->proc_length_buf == NULL)) {
     ensure_seg_pt_count(source.edit, source.psys, &cache->hair);
     particle_batch_cache_ensure_procedural_pos(
         source.edit, source.psys, &cache->hair, gpu_material);
@@ -1730,7 +1695,7 @@ bool particles_ensure_procedural_data(Object *object,
   }
 
   /* Refreshed if active layer or custom data changes. */
-  if ((*r_hair_cache)->strand_tex == NULL) {
+  if ((*r_hair_cache)->proc_strand_buf == NULL) {
     particle_batch_cache_ensure_procedural_strand_data(
         source.edit, source.psys, source.md, &cache->hair);
   }
diff --git a/source/blender/draw/intern/draw_cache_impl_pointcloud.cc b/source/blender/draw/intern/draw_cache_impl_pointcloud.cc
index 98531637e57..d64fc581942 100644
--- a/source/blender/draw/intern/draw_cache_impl_pointcloud.cc
+++ b/source/blender/draw/intern/draw_cache_impl_pointcloud.cc
@@ -23,30 +23,64 @@
 #include "BKE_pointcloud.h"
 
 #include "GPU_batch.h"
+#include "GPU_material.h"
 
-#include "draw_cache_impl.h" /* own include */
+#include "draw_attributes.h"
+#include "draw_cache_impl.h"
+#include "draw_cache_inline.h"
+#include "draw_pointcloud_private.hh" /* own include */
 
-/* ---------------------------------------------------------------------- */
-/* PointCloud GPUBatch Cache */
+using namespace blender;
 
-struct PointCloudBatchCache {
-  GPUVertBuf *pos;  /* Position and radius. */
-  GPUVertBuf *geom; /* Instanced geometry for each point in the cloud (small sphere). */
-  GPUVertBuf *attr_viewer;
-  GPUIndexBuf *geom_indices;
+/** \} */
 
+/* -------------------------------------------------------------------- */
+/** \name GPUBatch cache management
+ * \{ */
+
+struct PointCloudBatchCache {
+  /* Dot primitive types. */
   GPUBatch *dots;
+  /* Triangle primitive types. */
   GPUBatch *surface;
   GPUBatch **surface_per_mat;
-  GPUBatch *surface_viewer_attribute;
 
+  /* Triangles indices to draw the points. */
+  GPUIndexBuf *geom_indices;
+
+  /* Position and radius. */
+  GPUVertBuf *pos_rad;
+  /* Active attribute in 3D view. */
+  GPUVertBuf *attr_viewer;
+  /* Requested attributes */
+  GPUVertBuf *attributes_buf[GPU_MAX_ATTR];
+
+  /** Attributes currently being drawn or about to be drawn. */
+  DRW_Attributes attr_used;
+  /**
+   * Attributes that were used at some point. This is used for garbage collection, to remove
+   * attributes that are not used in shaders anymore due to user edits.
+   */
+  DRW_Attributes attr_used_over_time;
+
+  /**
+   * The last time in seconds that the `attr_used` and `attr_used_over_time` were exactly the same.
+   * If the delta between this time and the current scene time is greater than the timeout set in
+   * user preferences (`U.vbotimeout`) then garbage collection is performed.
+   */
+  int last_attr_matching_time;
   /* settings to determine if cache is invalid */
   bool is_dirty;
 
   int mat_len;
-};
 
-/* GPUBatch cache management. */
+  /**
+   * The draw cache extraction is currently not multi-threaded for multiple objects, but if it was,
+   * some locking would be necessary because multiple objects can use the same object data with
+   * different materials, etc. This is a placeholder to make multi-threading easier in the future.
+   */
+  std::mutex render_mutex;
+};
 
 static PointCloudBatchCache *pointcloud_batch_cache_get(PointCloud &pointcloud)
 {
@@ -71,7 +105,7 @@ static void pointcloud_batch_cache_init(PointCloud &pointcloud)
   PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
 
   if (!cache) {
-    cache = MEM_cnew<PointCloudBatchCache>(__func__);
+    cache = MEM_new<PointCloudBatchCache>(__func__);
     pointcloud.batch_cache = cache;
   }
   else {
@@ -100,6 +134,15 @@ void DRW_pointcloud_batch_cache_dirty_tag(PointCloud *pointcloud, int mode)
   }
 }
 
+static void pointcloud_discard_attributes(PointCloudBatchCache &cache)
+{
+  for (const int j : IndexRange(GPU_MAX_ATTR)) {
+    GPU_VERTBUF_DISCARD_SAFE(cache.attributes_buf[j]);
+  }
+
+  drw_attributes_clear(&cache.attr_used);
+}
+
 static void pointcloud_batch_cache_clear(PointCloud &pointcloud)
 {
   PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
@@ -109,8 +152,7 @@ static void pointcloud_batch_cache_clear(PointCloud &pointcloud)
 
   GPU_BATCH_DISCARD_SAFE(cache->dots);
   GPU_BATCH_DISCARD_SAFE(cache->surface);
-  GPU_VERTBUF_DISCARD_SAFE(cache->pos);
-  GPU_VERTBUF_DISCARD_SAFE(cache->geom);
+  GPU_VERTBUF_DISCARD_SAFE(cache->pos_rad);
   GPU_VERTBUF_DISCARD_SAFE(cache->attr_viewer);
   GPU_INDEXBUF_DISCARD_SAFE(cache->geom_indices);
 
@@ -119,8 +161,9 @@ static void pointcloud_batch_cache_clear(PointCloud &pointcloud)
       GPU_BATCH_DISCARD_SAFE(cache->surface_per_mat[i]);
     }
   }
-  GPU_BATCH_DISCARD_SAFE(cache->surface_viewer_attribute);
   MEM_SAFE_FREE(cache->surface_per_mat);
+
+  pointcloud_discard_attributes(*cache);
 }
 
 void DRW_pointcloud_batch_cache_validate(PointCloud *pointcloud)
@@ -137,61 +180,35 @@ void DRW_pointcloud_batch_cache_free(PointCloud *pointcloud)
   MEM_SAFE_FREE(pointcloud->batch_cache);
 }
 
-static void pointcloud_batch_cache_ensure_pos(const PointCloud &pointcloud,
-                                              PointCloudBatchCache &cache)
+void DRW_pointcloud_batch_cache_free_old(PointCloud *pointcloud, int ctime)
 {
-  using namespace blender;
-  if (cache.pos != nullptr) {
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(*pointcloud);
+  if (!cache) {
     return;
   }
 
-  const bke::AttributeAccessor attributes = pointcloud.attributes();
-  const VArraySpan<float3> positions = attributes.lookup<float3>("position", ATTR_DOMAIN_POINT);
-  const VArray<float> radii = attributes.lookup<float>("radius", ATTR_DOMAIN_POINT);
-  /* From the opengl wiki:
-   * Note that size does not have to exactly match the size used by the vertex shader. If the
-   * vertex shader has fewer components than the attribute provides, then the extras are ignored.
-   * If the vertex shader has more components than the array provides, the extras are given
-   * values from the vector (0, 0, 0, 1) for the missing XYZW components. */
-  if (radii) {
-    static GPUVertFormat format = {0};
-    if (format.attr_len == 0) {
-      GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
-    }
-    cache.pos = GPU_vertbuf_create_with_format(&format);
-    GPU_vertbuf_data_alloc(cache.pos, positions.size());
-    const VArraySpan<float> radii_span(radii);
-    MutableSpan<float4> vbo_data{static_cast<float4 *>(GPU_vertbuf_get_data(cache.pos)),
-                                 pointcloud.totpoint};
-    threading::parallel_for(vbo_data.index_range(), 4096, [&](IndexRange range) {
-      for (const int i : range) {
-        vbo_data[i].x = positions[i].x;
-        vbo_data[i].y = positions[i].y;
-        vbo_data[i].z = positions[i].z;
-        /* TODO(fclem): remove multiplication. Here only for keeping the size correct for now. */
-        vbo_data[i].w = radii_span[i] * 100.0f;
-      }
-    });
+  bool do_discard = false;
+
+  if (drw_attributes_overlap(&cache->attr_used_over_time, &cache->attr_used)) {
+    cache->last_attr_matching_time = ctime;
   }
-  else {
-    static GPUVertFormat format = {0};
-    static uint pos;
-    if (format.attr_len == 0) {
-      pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-    }
-    cache.pos = GPU_vertbuf_create_with_format(&format);
-    GPU_vertbuf_data_alloc(cache.pos, positions.size());
-    GPU_vertbuf_attr_fill(cache.pos, pos, positions.data());
+
+  if (ctime - cache->last_attr_matching_time > U.vbotimeout) {
+    do_discard = true;
+  }
+
+  drw_attributes_clear(&cache->attr_used_over_time);
+
+  if (do_discard) {
+    pointcloud_discard_attributes(*cache);
   }
 }
 
-static const float half_octahedron_normals[5][3] = {
-    {0.0f, 0.0f, 1.0f},
-    {1.0f, 0.0f, 0.0f},
-    {0.0f, 1.0f, 0.0f},
-    {-1.0f, 0.0f, 0.0f},
-    {0.0f, -1.0f, 0.0f},
-};
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name PointCloud extraction
+ * \{ */
 
 static const uint half_octahedron_tris[4][3] = {
     {0, 1, 2},
@@ -200,130 +217,236 @@ static const uint half_octahedron_tris[4][3] = {
     {0, 4, 1},
 };
 
-static void pointcloud_batch_cache_ensure_geom(PointCloudBatchCache &cache)
+static void pointcloud_extract_indices(const PointCloud &pointcloud, PointCloudBatchCache &cache)
 {
-  if (cache.geom != nullptr) {
-    return;
+  /** \note: Avoid modulo by non-power-of-two in shader. */
+  uint32_t vertid_max = pointcloud.totpoint * 32;
+  uint32_t index_len = pointcloud.totpoint * ARRAY_SIZE(half_octahedron_tris);
+
+  GPUIndexBufBuilder builder;
+  GPU_indexbuf_init(&builder, GPU_PRIM_TRIS, index_len, vertid_max);
+
+  for (int p = 0; p < pointcloud.totpoint; p++) {
+    for (int i = 0; i < ARRAY_SIZE(half_octahedron_tris); i++) {
+      GPU_indexbuf_add_tri_verts(&builder,
+                                 half_octahedron_tris[i][0] + p * 32,
+                                 half_octahedron_tris[i][1] + p * 32,
+                                 half_octahedron_tris[i][2] + p * 32);
+    }
   }
 
+  GPU_indexbuf_build_in_place(&builder, cache.geom_indices);
+}
+
+static void pointcloud_extract_position_and_radius(const PointCloud &pointcloud,
+                                                   PointCloudBatchCache &cache)
+{
+  using namespace blender;
+
+  const bke::AttributeAccessor attributes = pointcloud.attributes();
+  const VArraySpan<float3> positions = attributes.lookup<float3>("position", ATTR_DOMAIN_POINT);
+  const VArray<float> radii = attributes.lookup<float>("radius", ATTR_DOMAIN_POINT);
   static GPUVertFormat format = {0};
-  static uint pos;
   if (format.attr_len == 0) {
-    pos = GPU_vertformat_attr_add(&format, "pos_inst", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-    GPU_vertformat_alias_add(&format, "nor");
+    GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
   }
 
-  cache.geom = GPU_vertbuf_create_with_format(&format);
-  GPU_vertbuf_data_alloc(cache.geom, ARRAY_SIZE(half_octahedron_normals));
-
-  GPU_vertbuf_attr_fill(cache.geom, pos, half_octahedron_normals);
-
-  GPUIndexBufBuilder builder;
-  GPU_indexbuf_init(&builder,
-                    GPU_PRIM_TRIS,
-                    ARRAY_SIZE(half_octahedron_tris),
-                    ARRAY_SIZE(half_octahedron_normals));
+  GPUUsageType usage_flag = GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY;
+  GPU_vertbuf_init_with_format_ex(cache.pos_rad, &format, usage_flag);
 
-  for (int i = 0; i < ARRAY_SIZE(half_octahedron_tris); i++) {
-    GPU_indexbuf_add_tri_verts(&builder, UNPACK3(half_octahedron_tris[i]));
+  GPU_vertbuf_data_alloc(cache.pos_rad, positions.size());
+  MutableSpan<float4> vbo_data{static_cast<float4 *>(GPU_vertbuf_get_data(cache.pos_rad)),
+                               pointcloud.totpoint};
+  if (radii) {
+    const VArraySpan<float> radii_span(radii);
+    threading::parallel_for(vbo_data.index_range(), 4096, [&](IndexRange range) {
+      for (const int i : range) {
+        vbo_data[i].x = positions[i].x;
+        vbo_data[i].y = positions[i].y;
+        vbo_data[i].z = positions[i].z;
+        /* TODO(fclem): remove multiplication. Here only for keeping the size correct for now. */
+        vbo_data[i].w = radii_span[i] * 100.0f;
+      }
+    });
+  }
+  else {
+    threading::parallel_for(vbo_data.index_range(), 4096, [&](IndexRange range) {
+      for (const int i : range) {
+        vbo_data[i].x = positions[i].x;
+        vbo_data[i].y = positions[i].y;
+        vbo_data[i].z = positions[i].z;
+        vbo_data[i].w = 1.0f;
+      }
+    });
   }
-
-  cache.geom_indices = GPU_indexbuf_build(&builder);
 }
 
-static void pointcloud_batch_cache_ensure_attribute_overlay(const PointCloud &pointcloud,
-                                                            PointCloudBatchCache &cache)
+static void pointcloud_extract_attribute(const PointCloud &pointcloud,
+                                         PointCloudBatchCache &cache,
+                                         const DRW_AttributeRequest &request,
+                                         int index)
 {
   using namespace blender;
-  if (cache.attr_viewer != nullptr) {
-    return;
-  }
+
+  GPUVertBuf *&attr_buf = cache.attributes_buf[index];
 
   const bke::AttributeAccessor attributes = pointcloud.attributes();
-  const VArray<ColorGeometry4f> colors = attributes.lookup_or_default<ColorGeometry4f>(
-      ".viewer", ATTR_DOMAIN_POINT, {1.0f, 0.0f, 1.0f, 1.0f});
+
+  /* TODO(@kevindietrich): float4 is used for scalar attributes as the implicit conversion done
+   * by OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following
+   * the Blender convention, it should be `vec4(s, s, s, 1)`. This could be resolved using a
+   * similar texture state swizzle to map the attribute correctly as for volume attributes, so we
+   * can control the conversion ourselves. */
+  VArray<ColorGeometry4f> attribute = attributes.lookup_or_default<ColorGeometry4f>(
+      request.attribute_name, request.domain, {0.0f, 0.0f, 0.0f, 1.0f});
 
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "attribute_value", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+    GPU_vertformat_attr_add(&format, "attr", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
   }
-  cache.attr_viewer = GPU_vertbuf_create_with_format(&format);
-  GPU_vertbuf_data_alloc(cache.attr_viewer, pointcloud.totpoint);
+  GPUUsageType usage_flag = GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY;
+  GPU_vertbuf_init_with_format_ex(attr_buf, &format, usage_flag);
+  GPU_vertbuf_data_alloc(attr_buf, pointcloud.totpoint);
+
   MutableSpan<ColorGeometry4f> vbo_data{
-      static_cast<ColorGeometry4f *>(GPU_vertbuf_get_data(cache.attr_viewer)),
-      pointcloud.totpoint};
-  colors.materialize(vbo_data);
+      static_cast<ColorGeometry4f *>(GPU_vertbuf_get_data(attr_buf)), pointcloud.totpoint};
+  attribute.materialize(vbo_data);
 }
 
-GPUBatch *DRW_pointcloud_batch_cache_get_dots(Object *ob)
-{
-  PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
-  PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
+/** \} */
 
-  if (cache->dots == nullptr) {
-    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
-    cache->dots = GPU_batch_create(GPU_PRIM_POINTS, cache->pos, nullptr);
-  }
+/* -------------------------------------------------------------------- */
+/** \name Private API
+ * \{ */
 
-  return cache->dots;
+GPUVertBuf *pointcloud_position_and_radius_get(PointCloud *pointcloud)
+{
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(*pointcloud);
+  DRW_vbo_request(nullptr, &cache->pos_rad);
+  return cache->pos_rad;
 }
 
-GPUBatch *DRW_pointcloud_batch_cache_get_surface(Object *ob)
+GPUBatch **pointcloud_surface_shaded_get(PointCloud *pointcloud,
+                                         GPUMaterial **gpu_materials,
+                                         int mat_len)
 {
-  PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
-  PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(*pointcloud);
+  DRW_Attributes attrs_needed;
+  drw_attributes_clear(&attrs_needed);
+
+  for (GPUMaterial *gpu_material : Span<GPUMaterial *>(gpu_materials, mat_len)) {
+    ListBase gpu_attrs = GPU_material_attributes(gpu_material);
+    LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
+      const char *name = gpu_attr->name;
+
+      int layer_index;
+      eCustomDataType type;
+      eAttrDomain domain = ATTR_DOMAIN_POINT;
+      if (!drw_custom_data_match_attribute(&pointcloud->pdata, name, &layer_index, &type)) {
+        continue;
+      }
 
-  if (cache->surface == nullptr) {
-    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
-    pointcloud_batch_cache_ensure_geom(*cache);
+      drw_attributes_add_request(&attrs_needed, name, type, layer_index, domain);
+    }
+  }
 
-    cache->surface = GPU_batch_create(GPU_PRIM_TRIS, cache->geom, cache->geom_indices);
-    GPU_batch_instbuf_add_ex(cache->surface, cache->pos, false);
+  if (!drw_attributes_overlap(&cache->attr_used, &attrs_needed)) {
+    /* Some new attributes have been added, free all and start over. */
+    for (const int i : IndexRange(GPU_MAX_ATTR)) {
+      GPU_VERTBUF_DISCARD_SAFE(cache->attributes_buf[i]);
+    }
+    drw_attributes_merge(&cache->attr_used, &attrs_needed, cache->render_mutex);
   }
+  drw_attributes_merge(&cache->attr_used_over_time, &attrs_needed, cache->render_mutex);
+
+  DRW_batch_request(&cache->surface_per_mat[0]);
+  return cache->surface_per_mat;
+}
 
-  return cache->surface;
+GPUBatch *pointcloud_surface_get(PointCloud *pointcloud)
+{
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(*pointcloud);
+  return DRW_batch_request(&cache->surface);
 }
 
-GPUBatch *DRW_pointcloud_batch_cache_get_surface_viewer_attribute(Object *ob)
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name API
+ * \{ */
+
+GPUBatch *DRW_pointcloud_batch_cache_get_dots(Object *ob)
 {
   PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
   PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
+  return DRW_batch_request(&cache->dots);
+}
 
-  if (cache->surface_viewer_attribute == nullptr) {
-    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
-    pointcloud_batch_cache_ensure_geom(*cache);
-    pointcloud_batch_cache_ensure_attribute_overlay(pointcloud, *cache);
+GPUVertBuf **DRW_pointcloud_evaluated_attribute(PointCloud *pointcloud, const char *name)
+{
+  PointCloudBatchCache &cache = *pointcloud_batch_cache_get(*pointcloud);
+
+  int layer_index;
+  eCustomDataType type;
+  eAttrDomain domain = ATTR_DOMAIN_POINT;
+  if (drw_custom_data_match_attribute(&pointcloud->pdata, name, &layer_index, &type)) {
+    DRW_Attributes attributes{};
+    drw_attributes_add_request(&attributes, name, type, layer_index, domain);
+    drw_attributes_merge(&cache.attr_used, &attributes, cache.render_mutex);
+  }
 
-    cache->surface_viewer_attribute = GPU_batch_create(
-        GPU_PRIM_TRIS, cache->geom, cache->geom_indices);
-    GPU_batch_instbuf_add_ex(cache->surface_viewer_attribute, cache->attr_viewer, false);
-    GPU_batch_instbuf_add_ex(cache->surface_viewer_attribute, cache->pos, false);
+  int request_i = -1;
+  for (const int i : IndexRange(cache.attr_used.num_requests)) {
+    if (STREQ(cache.attr_used.requests[i].attribute_name, name)) {
+      request_i = i;
+      break;
+    }
+  }
+  if (request_i == -1) {
+    return nullptr;
   }
+  return &cache.attributes_buf[request_i];
+}
 
-  return cache->surface_viewer_attribute;
+int DRW_pointcloud_material_count_get(PointCloud *pointcloud)
+{
+  return max_ii(1, pointcloud->totcol);
 }
 
-GPUBatch **DRW_cache_pointcloud_surface_shaded_get(Object *ob,
-                                                   struct GPUMaterial ** /*gpumat_array*/,
-                                                   uint gpumat_array_len)
+void DRW_pointcloud_batch_cache_create_requested(Object *ob)
 {
-  PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
-  PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
-  BLI_assert(cache->mat_len == gpumat_array_len);
-  UNUSED_VARS(gpumat_array_len);
+  PointCloud *pointcloud = static_cast<PointCloud *>(ob->data);
+  PointCloudBatchCache &cache = *pointcloud_batch_cache_get(*pointcloud);
 
-  if (cache->surface_per_mat[0] == nullptr) {
-    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
-    pointcloud_batch_cache_ensure_geom(*cache);
+  if (DRW_batch_requested(cache.dots, GPU_PRIM_POINTS)) {
+    DRW_vbo_request(cache.dots, &cache.pos_rad);
+  }
 
-    cache->surface_per_mat[0] = GPU_batch_create(GPU_PRIM_TRIS, cache->geom, cache->geom_indices);
-    GPU_batch_instbuf_add_ex(cache->surface_per_mat[0], cache->pos, false);
+  if (DRW_batch_requested(cache.surface, GPU_PRIM_TRIS)) {
+    DRW_ibo_request(cache.surface, &cache.geom_indices);
+    DRW_vbo_request(cache.surface, &cache.pos_rad);
+  }
+  for (int i = 0; i < cache.mat_len; i++) {
+    if (DRW_batch_requested(cache.surface_per_mat[i], GPU_PRIM_TRIS)) {
+      /* TODO(fclem): Per material ranges. */
+      DRW_ibo_request(cache.surface_per_mat[i], &cache.geom_indices);
+    }
   }
+  for (int j = 0; j < cache.attr_used.num_requests; j++) {
+    DRW_vbo_request(nullptr, &cache.attributes_buf[j]);
 
-  return cache->surface_per_mat;
-}
+    if (DRW_vbo_requested(cache.attributes_buf[j])) {
+      pointcloud_extract_attribute(*pointcloud, cache, cache.attr_used.requests[j], j);
+    }
+  }
 
-int DRW_pointcloud_material_count_get(PointCloud *pointcloud)
-{
-  return max_ii(1, pointcloud->totcol);
+  if (DRW_ibo_requested(cache.geom_indices)) {
+    pointcloud_extract_indices(*pointcloud, cache);
+  }
+
+  if (DRW_vbo_requested(cache.pos_rad)) {
+    pointcloud_extract_position_and_radius(*pointcloud, cache);
+  }
 }
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_command.cc b/source/blender/draw/intern/draw_command.cc
index 882eda9b31d..10dd63e05dc 100644
--- a/source/blender/draw/intern/draw_command.cc
+++ b/source/blender/draw/intern/draw_command.cc
@@ -30,6 +30,11 @@ void ShaderBind::execute(RecordingState &state) const
   }
 }
 
+void FramebufferBind::execute() const
+{
+  GPU_framebuffer_bind(framebuffer);
+}
+
 void ResourceBind::execute() const
 {
   if (slot == -1) {
@@ -229,6 +234,11 @@ std::string ShaderBind::serialize() const
   return std::string(".shader_bind(") + GPU_shader_get_name(shader) + ")";
 }
 
+std::string FramebufferBind::serialize() const
+{
+  return std::string(".framebuffer_bind(") + GPU_framebuffer_get_name(framebuffer) + ")";
+}
+
 std::string ResourceBind::serialize() const
 {
   switch (type) {
diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh
index 5307a242e39..ab180cc60b1 100644
--- a/source/blender/draw/intern/draw_command.hh
+++ b/source/blender/draw/intern/draw_command.hh
@@ -88,6 +88,7 @@ enum class Type : uint8_t {
   DispatchIndirect,
   Draw,
   DrawIndirect,
+  FramebufferBind,
   PushConstant,
   ResourceBind,
   ShaderBind,
@@ -118,6 +119,13 @@ struct ShaderBind {
   std::string serialize() const;
 };
 
+struct FramebufferBind {
+  GPUFrameBuffer *framebuffer;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
 struct ResourceBind {
   eGPUSamplerState sampler;
   int slot;
@@ -473,10 +481,8 @@ class DrawMultiBuf {
                    uint vertex_first,
                    ResourceHandle handle)
   {
-    /* Unsupported for now. Use PassSimple. */
-    BLI_assert(vertex_first == 0 || vertex_first == -1);
-    BLI_assert(vertex_len == -1);
-    UNUSED_VARS_NDEBUG(vertex_len, vertex_first);
+    /* Custom draw-calls cannot be batched and will produce one group per draw. */
+    const bool custom_group = (vertex_first != 0 || vertex_first != -1 || vertex_len != -1);
 
     instance_len = instance_len != -1 ? instance_len : 1;
 
@@ -493,8 +499,14 @@ class DrawMultiBuf {
 
     bool inverted = handle.has_inverted_handedness();
 
-    if (group_id == uint(-1)) {
+    DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++);
+    draw.resource_handle = handle.raw;
+    draw.instance_len = instance_len;
+    draw.group_id = group_id;
+
+    if (group_id == uint(-1) || custom_group) {
       uint new_group_id = group_count_++;
+      draw.group_id = new_group_id;
 
       DrawGroup &group = group_buf_.get_or_resize(new_group_id);
       group.next = cmd.group_first;
@@ -503,11 +515,16 @@ class DrawMultiBuf {
       group.gpu_batch = batch;
       group.front_proto_len = 0;
       group.back_proto_len = 0;
+      group.vertex_len = vertex_len;
+      group.vertex_first = vertex_first;
+      /* Custom group are not to be registered in the group_ids_. */
+      if (!custom_group) {
+        group_id = new_group_id;
+      }
       /* For serialization only. */
       (inverted ? group.back_proto_len : group.front_proto_len)++;
       /* Append to list. */
       cmd.group_first = new_group_id;
-      group_id = new_group_id;
     }
     else {
       DrawGroup &group = group_buf_[group_id];
@@ -516,11 +533,6 @@ class DrawMultiBuf {
       /* For serialization only. */
       (inverted ? group.back_proto_len : group.front_proto_len)++;
     }
-
-    DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++);
-    draw.group_id = group_id;
-    draw.resource_handle = handle.raw;
-    draw.instance_len = instance_len;
   }
 
   void bind(RecordingState &state,
diff --git a/source/blender/draw/intern/draw_common.h b/source/blender/draw/intern/draw_common.h
index b5e9b3ee8bd..07d245e7dfe 100644
--- a/source/blender/draw/intern/draw_common.h
+++ b/source/blender/draw/intern/draw_common.h
@@ -88,6 +88,14 @@ void DRW_curves_ubos_pool_free(struct CurvesUniformBufPool *pool);
 void DRW_curves_update(void);
 void DRW_curves_free(void);
 
+/* draw_pointcloud.cc */
+
+struct DRWShadingGroup *DRW_shgroup_pointcloud_create_sub(struct Object *object,
+                                                          struct DRWShadingGroup *shgrp_parent,
+                                                          struct GPUMaterial *gpu_material);
+void DRW_pointcloud_init(void);
+void DRW_pointcloud_free(void);
+
 /* draw_volume.cc */
 
 /**
diff --git a/source/blender/draw/intern/draw_curves.cc b/source/blender/draw/intern/draw_curves.cc
index a61769e7a63..ee9ed4666e0 100644
--- a/source/blender/draw/intern/draw_curves.cc
+++ b/source/blender/draw/intern/draw_curves.cc
@@ -129,25 +129,25 @@ void DRW_curves_ubos_pool_free(CurvesUniformBufPool *pool)
 
 static void drw_curves_cache_shgrp_attach_resources(DRWShadingGroup *shgrp,
                                                     CurvesEvalCache *cache,
-                                                    GPUTexture *tex,
+                                                    GPUVertBuf *point_buf,
                                                     const int subdiv)
 {
-  DRW_shgroup_uniform_texture(shgrp, "hairPointBuffer", tex);
-  DRW_shgroup_uniform_texture(shgrp, "hairStrandBuffer", cache->strand_tex);
-  DRW_shgroup_uniform_texture(shgrp, "hairStrandSegBuffer", cache->strand_seg_tex);
+  DRW_shgroup_buffer_texture(shgrp, "hairPointBuffer", point_buf);
+  DRW_shgroup_buffer_texture(shgrp, "hairStrandBuffer", cache->proc_strand_buf);
+  DRW_shgroup_buffer_texture(shgrp, "hairStrandSegBuffer", cache->proc_strand_seg_buf);
   DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &cache->final[subdiv].strands_res, 1);
 }
 
 static void drw_curves_cache_update_compute(CurvesEvalCache *cache,
                                             const int subdiv,
                                             const int strands_len,
-                                            GPUVertBuf *buffer,
-                                            GPUTexture *tex)
+                                            GPUVertBuf *output_buf,
+                                            GPUVertBuf *input_buf)
 {
   GPUShader *shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM);
   DRWShadingGroup *shgrp = DRW_shgroup_create(shader, g_tf_pass);
-  drw_curves_cache_shgrp_attach_resources(shgrp, cache, tex, subdiv);
-  DRW_shgroup_vertex_buffer(shgrp, "posTime", buffer);
+  drw_curves_cache_shgrp_attach_resources(shgrp, cache, input_buf, subdiv);
+  DRW_shgroup_vertex_buffer(shgrp, "posTime", output_buf);
 
   const int max_strands_per_call = GPU_max_work_group_count(0);
   int strands_start = 0;
@@ -169,7 +169,7 @@ static void drw_curves_cache_update_compute(CurvesEvalCache *cache, const int su
   }
 
   drw_curves_cache_update_compute(
-      cache, subdiv, strands_len, cache->final[subdiv].proc_buf, cache->point_tex);
+      cache, subdiv, strands_len, cache->final[subdiv].proc_buf, cache->proc_point_buf);
 
   const DRW_Attributes &attrs = cache->final[subdiv].attr_used;
   for (int i = 0; i < attrs.num_requests; i++) {
@@ -182,13 +182,13 @@ static void drw_curves_cache_update_compute(CurvesEvalCache *cache, const int su
                                     subdiv,
                                     strands_len,
                                     cache->final[subdiv].attributes_buf[i],
-                                    cache->proc_attributes_tex[i]);
+                                    cache->proc_attributes_buf[i]);
   }
 }
 
 static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache,
-                                                       GPUVertBuf *vbo,
-                                                       GPUTexture *tex,
+                                                       GPUVertBuf *output_buf,
+                                                       GPUVertBuf *input_buf,
                                                        const int subdiv,
                                                        const int final_points_len)
 {
@@ -196,14 +196,14 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache,
 
   DRWShadingGroup *tf_shgrp = nullptr;
   if (GPU_transform_feedback_support()) {
-    tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo);
+    tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, output_buf);
   }
   else {
     tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
 
     CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__);
     pr_call->next = g_tf_calls;
-    pr_call->vbo = vbo;
+    pr_call->vbo = output_buf;
     pr_call->shgrp = tf_shgrp;
     pr_call->vert_len = final_points_len;
     g_tf_calls = pr_call;
@@ -213,7 +213,7 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache,
   }
   BLI_assert(tf_shgrp != nullptr);
 
-  drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, tex, subdiv);
+  drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, input_buf, subdiv);
   DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
 }
 
@@ -225,7 +225,7 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache, c
   }
 
   drw_curves_cache_update_transform_feedback(
-      cache, cache->final[subdiv].proc_buf, cache->point_tex, subdiv, final_points_len);
+      cache, cache->final[subdiv].proc_buf, cache->proc_point_buf, subdiv, final_points_len);
 
   const DRW_Attributes &attrs = cache->final[subdiv].attr_used;
   for (int i = 0; i < attrs.num_requests; i++) {
@@ -236,7 +236,7 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache, c
 
     drw_curves_cache_update_transform_feedback(cache,
                                                cache->final[subdiv].attributes_buf[i],
-                                               cache->proc_attributes_tex[i],
+                                               cache->proc_attributes_buf[i],
                                                subdiv,
                                                final_points_len);
   }
@@ -346,9 +346,9 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
         1.0f);
   }
 
-  DRW_shgroup_uniform_texture(shgrp, "hairPointBuffer", curves_cache->final[subdiv].proc_tex);
-  if (curves_cache->length_tex) {
-    DRW_shgroup_uniform_texture(shgrp, "hairLen", curves_cache->length_tex);
+  DRW_shgroup_buffer_texture(shgrp, "hairPointBuffer", curves_cache->final[subdiv].proc_buf);
+  if (curves_cache->proc_length_buf) {
+    DRW_shgroup_buffer_texture(shgrp, "hairLen", curves_cache->proc_length_buf);
   }
 
   const DRW_Attributes &attrs = curves_cache->final[subdiv].attr_used;
@@ -359,18 +359,18 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
     drw_curves_get_attribute_sampler_name(request.attribute_name, sampler_name);
 
     if (request.domain == ATTR_DOMAIN_CURVE) {
-      if (!curves_cache->proc_attributes_tex[i]) {
+      if (!curves_cache->proc_attributes_buf[i]) {
         continue;
       }
 
-      DRW_shgroup_uniform_texture(shgrp, sampler_name, curves_cache->proc_attributes_tex[i]);
+      DRW_shgroup_buffer_texture(shgrp, sampler_name, curves_cache->proc_attributes_buf[i]);
     }
     else {
-      if (!curves_cache->final[subdiv].attributes_tex[i]) {
+      if (!curves_cache->final[subdiv].attributes_buf[i]) {
         continue;
       }
-      DRW_shgroup_uniform_texture(
-          shgrp, sampler_name, curves_cache->final[subdiv].attributes_tex[i]);
+      DRW_shgroup_buffer_texture(
+          shgrp, sampler_name, curves_cache->final[subdiv].attributes_buf[i]);
     }
 
     /* Some attributes may not be used in the shader anymore and were not garbage collected yet, so
@@ -390,10 +390,15 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
   DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &curves_cache->final[subdiv].strands_res, 1);
   DRW_shgroup_uniform_int_copy(shgrp, "hairThicknessRes", thickness_res);
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadShape", hair_rad_shape);
-  DRW_shgroup_uniform_mat4_copy(shgrp, "hairDupliMatrix", object->obmat);
+  DRW_shgroup_uniform_mat4_copy(shgrp, "hairDupliMatrix", object->object_to_world);
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadRoot", hair_rad_root);
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadTip", hair_rad_tip);
   DRW_shgroup_uniform_bool_copy(shgrp, "hairCloseTip", hair_close_tip);
+  if (gpu_material) {
+    /* \note: This needs to happen before the drawcall to allow correct attribute extraction.
+     * (see T101896) */
+    DRW_shgroup_add_material_resources(shgrp, gpu_material);
+  }
   /* TODO(fclem): Until we have a better way to cull the curves and render with orco, bypass
    * culling test. */
   GPUBatch *geom = curves_cache->final[subdiv].proc_hairs[thickness_res - 1];
diff --git a/source/blender/draw/intern/draw_curves_private.h b/source/blender/draw/intern/draw_curves_private.h
index a74878ec674..715706fd7a6 100644
--- a/source/blender/draw/intern/draw_curves_private.h
+++ b/source/blender/draw/intern/draw_curves_private.h
@@ -34,7 +34,6 @@ typedef enum CurvesEvalShader {
 typedef struct CurvesEvalFinalCache {
   /* Output of the subdivision stage: vertex buffer sized to subdiv level. */
   GPUVertBuf *proc_buf;
-  GPUTexture *proc_tex;
 
   /** Just contains a huge index buffer used to draw the final curves. */
   GPUBatch *proc_hairs[MAX_THICKRES];
@@ -61,35 +60,29 @@ typedef struct CurvesEvalFinalCache {
   /* Output of the subdivision stage: vertex buffers sized to subdiv level. This is only attributes
    * on point domain. */
   GPUVertBuf *attributes_buf[GPU_MAX_ATTR];
-  GPUTexture *attributes_tex[GPU_MAX_ATTR];
 } CurvesEvalFinalCache;
 
 /* Curves procedural display: Evaluation is done on the GPU. */
 typedef struct CurvesEvalCache {
   /* Input control point positions combined with parameter data. */
   GPUVertBuf *proc_point_buf;
-  GPUTexture *point_tex;
 
   /* Editmode data (such as selection flags) used by overlay_edit_curve_point.glsl */
   GPUVertBuf *data_edit_points;
 
   /** Info of control points strands (segment count and base index) */
   GPUVertBuf *proc_strand_buf;
-  GPUTexture *strand_tex;
 
   /* Curve length data. */
   GPUVertBuf *proc_length_buf;
-  GPUTexture *length_tex;
 
   GPUVertBuf *proc_strand_seg_buf;
-  GPUTexture *strand_seg_tex;
 
   CurvesEvalFinalCache final[MAX_HAIR_SUBDIV];
 
   /* For point attributes, which need subdivision, these buffers contain the input data.
    * For curve domain attributes, which do not need subdivision, these are the final data. */
   GPUVertBuf *proc_attributes_buf[GPU_MAX_ATTR];
-  GPUTexture *proc_attributes_tex[GPU_MAX_ATTR];
 
   int strands_len;
   int elems_len;
diff --git a/source/blender/draw/intern/draw_hair.cc b/source/blender/draw/intern/draw_hair.cc
index 4e44967e5e9..c5261f26f76 100644
--- a/source/blender/draw/intern/draw_hair.cc
+++ b/source/blender/draw/intern/draw_hair.cc
@@ -105,9 +105,9 @@ static void drw_hair_particle_cache_shgrp_attach_resources(DRWShadingGroup *shgr
                                                            ParticleHairCache *cache,
                                                            const int subdiv)
 {
-  DRW_shgroup_uniform_texture(shgrp, "hairPointBuffer", cache->point_tex);
-  DRW_shgroup_uniform_texture(shgrp, "hairStrandBuffer", cache->strand_tex);
-  DRW_shgroup_uniform_texture(shgrp, "hairStrandSegBuffer", cache->strand_seg_tex);
+  DRW_shgroup_buffer_texture(shgrp, "hairPointBuffer", cache->proc_point_buf);
+  DRW_shgroup_buffer_texture(shgrp, "hairStrandBuffer", cache->proc_strand_buf);
+  DRW_shgroup_buffer_texture(shgrp, "hairStrandSegBuffer", cache->proc_strand_seg_buf);
   DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &cache->final[subdiv].strands_res, 1);
 }
 
@@ -216,12 +216,12 @@ void DRW_hair_duplimat_get(Object *object,
       if (collection != nullptr) {
         sub_v3_v3(dupli_mat[3], collection->instance_offset);
       }
-      mul_m4_m4m4(dupli_mat, dupli_parent->obmat, dupli_mat);
+      mul_m4_m4m4(dupli_mat, dupli_parent->object_to_world, dupli_mat);
     }
     else {
-      copy_m4_m4(dupli_mat, dupli_object->ob->obmat);
+      copy_m4_m4(dupli_mat, dupli_object->ob->object_to_world);
       invert_m4(dupli_mat);
-      mul_m4_m4m4(dupli_mat, object->obmat, dupli_mat);
+      mul_m4_m4m4(dupli_mat, object->object_to_world, dupli_mat);
     }
   }
   else {
@@ -280,9 +280,9 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
   float hair_rad_tip = part->rad_tip * part->rad_scale * 0.5f;
   bool hair_close_tip = (part->shape_flag & PART_SHAPE_CLOSE_TIP) != 0;
 
-  DRW_shgroup_uniform_texture(shgrp, "hairPointBuffer", hair_cache->final[subdiv].proc_tex);
-  if (hair_cache->length_tex) {
-    DRW_shgroup_uniform_texture(shgrp, "l", hair_cache->length_tex);
+  DRW_shgroup_buffer_texture(shgrp, "hairPointBuffer", hair_cache->final[subdiv].proc_buf);
+  if (hair_cache->proc_length_buf) {
+    DRW_shgroup_buffer_texture(shgrp, "l", hair_cache->proc_length_buf);
   }
 
   DRW_shgroup_uniform_block(shgrp, "drw_curves", *g_dummy_curves_info);
@@ -293,6 +293,11 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadRoot", hair_rad_root);
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadTip", hair_rad_tip);
   DRW_shgroup_uniform_bool_copy(shgrp, "hairCloseTip", hair_close_tip);
+  if (gpu_material) {
+    /* \note: This needs to happen before the drawcall to allow correct attribute extraction.
+     * (see T101896) */
+    DRW_shgroup_add_material_resources(shgrp, gpu_material);
+  }
   /* TODO(fclem): Until we have a better way to cull the hair and render with orco, bypass
    * culling test. */
   GPUBatch *geom = hair_cache->final[subdiv].proc_hairs[thickness_res - 1];
diff --git a/source/blender/draw/intern/draw_hair_private.h b/source/blender/draw/intern/draw_hair_private.h
index c7e9e1e22de..a3019ab5aa5 100644
--- a/source/blender/draw/intern/draw_hair_private.h
+++ b/source/blender/draw/intern/draw_hair_private.h
@@ -29,7 +29,6 @@ struct ParticleSystem;
 typedef struct ParticleHairFinalCache {
   /* Output of the subdivision stage: vertex buff sized to subdiv level. */
   GPUVertBuf *proc_buf;
-  GPUTexture *proc_tex;
 
   /* Just contains a huge index buffer used to draw the final hair. */
   GPUBatch *proc_hairs[MAX_THICKRES];
@@ -44,18 +43,14 @@ typedef struct ParticleHairCache {
 
   /* Hair Procedural display: Interpolation is done on the GPU. */
   GPUVertBuf *proc_point_buf; /* Input control points */
-  GPUTexture *point_tex;
 
   /** Infos of control points strands (segment count and base index) */
   GPUVertBuf *proc_strand_buf;
-  GPUTexture *strand_tex;
 
   /* Hair Length */
   GPUVertBuf *proc_length_buf;
-  GPUTexture *length_tex;
 
   GPUVertBuf *proc_strand_seg_buf;
-  GPUTexture *strand_seg_tex;
 
   GPUVertBuf *proc_uv_buf[MAX_MTFACE];
   GPUTexture *uv_tex[MAX_MTFACE];
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index da77845feb4..4fcfec833eb 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -1690,6 +1690,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
   DRW_globals_update();
 
   drw_debug_init();
+  DRW_pointcloud_init();
   DRW_curves_init(DST.vmempool);
   DRW_volume_init(DST.vmempool);
   DRW_smoke_init(DST.vmempool);
@@ -1959,20 +1960,6 @@ void DRW_render_gpencil(struct RenderEngine *engine, struct Depsgraph *depsgraph
   DST.buffer_finish_called = false;
 }
 
-/* Callback function for RE_engine_update_render_passes to ensure all
- * render passes are registered. */
-static void draw_render_result_ensure_pass_cb(void *user_data,
-                                              struct Scene *UNUSED(scene),
-                                              struct ViewLayer *view_layer,
-                                              const char *name,
-                                              int channels,
-                                              const char *chanid,
-                                              eNodeSocketDatatype UNUSED(type))
-{
-  RenderEngine *engine = user_data;
-  RE_engine_add_pass(engine, name, channels, chanid, view_layer->name);
-}
-
 void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph)
 {
   Scene *scene = DEG_get_evaluated_scene(depsgraph);
@@ -2023,10 +2010,6 @@ void DRW_render_to_image(RenderEngine *engine, struct Depsgraph *depsgraph)
   /* set default viewport */
   GPU_viewport(0, 0, size[0], size[1]);
 
-  /* Update the render passes. This needs to be done before acquiring the render result. */
-  RE_engine_update_render_passes(
-      engine, scene, view_layer, draw_render_result_ensure_pass_cb, engine);
-
   /* Init render result. */
   RenderResult *render_result = RE_engine_begin_result(engine,
                                                        0,
@@ -2071,6 +2054,7 @@ void DRW_render_object_iter(
     void (*callback)(void *vedata, Object *ob, RenderEngine *engine, struct Depsgraph *depsgraph))
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
+  DRW_pointcloud_init();
   DRW_curves_init(DST.vmempool);
   DRW_volume_init(DST.vmempool);
   DRW_smoke_init(DST.vmempool);
@@ -2131,6 +2115,7 @@ void DRW_custom_pipeline(DrawEngineType *draw_engine_type,
 
   drw_manager_init(&DST, NULL, NULL);
 
+  DRW_pointcloud_init();
   DRW_curves_init(DST.vmempool);
   DRW_volume_init(DST.vmempool);
   DRW_smoke_init(DST.vmempool);
@@ -2166,6 +2151,7 @@ void DRW_cache_restart(void)
 
   DST.buffer_finish_called = false;
 
+  DRW_pointcloud_init();
   DRW_curves_init(DST.vmempool);
   DRW_volume_init(DST.vmempool);
   DRW_smoke_init(DST.vmempool);
@@ -2491,6 +2477,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
 
   /* Init engines */
   drw_engines_init();
+  DRW_pointcloud_init();
   DRW_curves_init(DST.vmempool);
   DRW_volume_init(DST.vmempool);
   DRW_smoke_init(DST.vmempool);
@@ -2672,6 +2659,7 @@ void DRW_draw_depth_loop(struct Depsgraph *depsgraph,
 
   /* Init engines */
   drw_engines_init();
+  DRW_pointcloud_init();
   DRW_curves_init(DST.vmempool);
   DRW_volume_init(DST.vmempool);
   DRW_smoke_init(DST.vmempool);
@@ -2813,7 +2801,7 @@ void DRW_draw_depth_object(
 
   GPU_matrix_projection_set(rv3d->winmat);
   GPU_matrix_set(rv3d->viewmat);
-  GPU_matrix_mul(object->obmat);
+  GPU_matrix_mul(object->object_to_world);
 
   /* Setup frame-buffer. */
   GPUTexture *depth_tx = GPU_viewport_depth_texture(viewport);
@@ -2833,11 +2821,11 @@ void DRW_draw_depth_object(
   const bool use_clipping_planes = RV3D_CLIPPING_ENABLED(v3d, rv3d);
   if (use_clipping_planes) {
     GPU_clip_distances(6);
-    ED_view3d_clipping_local(rv3d, object->obmat);
+    ED_view3d_clipping_local(rv3d, object->object_to_world);
     for (int i = 0; i < 6; i++) {
       copy_v4_v4(planes.world[i], rv3d->clip_local[i]);
     }
-    copy_m4_m4(planes.ModelMatrix, object->obmat);
+    copy_m4_m4(planes.ModelMatrix, object->object_to_world);
   }
 
   drw_batch_cache_validate(object);
@@ -3086,6 +3074,7 @@ void DRW_engines_free(void)
   GPU_FRAMEBUFFER_FREE_SAFE(g_select_buffer.framebuffer_depth_only);
 
   DRW_shaders_free();
+  DRW_pointcloud_free();
   DRW_curves_free();
   DRW_volume_free();
   DRW_shape_cache_free();
diff --git a/source/blender/draw/intern/draw_manager_data.cc b/source/blender/draw/intern/draw_manager_data.cc
index 9768f1ce9e7..4aa27e2288c 100644
--- a/source/blender/draw/intern/draw_manager_data.cc
+++ b/source/blender/draw/intern/draw_manager_data.cc
@@ -589,7 +589,7 @@ void DRW_shgroup_buffer_texture(DRWShadingGroup *shgroup,
                                 const char *name,
                                 GPUVertBuf *vertex_buffer)
 {
-  int location = GPU_shader_get_ssbo(shgroup->shader, name);
+  int location = GPU_shader_get_texture_binding(shgroup->shader, name);
   if (location == -1) {
     return;
   }
@@ -606,7 +606,7 @@ void DRW_shgroup_buffer_texture_ref(DRWShadingGroup *shgroup,
                                     const char *name,
                                     GPUVertBuf **vertex_buffer)
 {
-  int location = GPU_shader_get_ssbo(shgroup->shader, name);
+  int location = GPU_shader_get_texture_binding(shgroup->shader, name);
   if (location == -1) {
     return;
   }
@@ -724,8 +724,8 @@ static void drw_call_culling_init(DRWCullingState *cull, Object *ob)
     float corner[3];
     /* Get BoundSphere center and radius from the BoundBox. */
     mid_v3_v3v3(cull->bsphere.center, bbox->vec[0], bbox->vec[6]);
-    mul_v3_m4v3(corner, ob->obmat, bbox->vec[0]);
-    mul_m4_v3(ob->obmat, cull->bsphere.center);
+    mul_v3_m4v3(corner, ob->object_to_world, bbox->vec[0]);
+    mul_m4_v3(ob->object_to_world, cull->bsphere.center);
     cull->bsphere.radius = len_v3v3(cull->bsphere.center, corner);
 
     /* Bypass test for very large objects (see T67319). */
@@ -1017,7 +1017,7 @@ void DRW_shgroup_call_ex(DRWShadingGroup *shgroup,
   if (G.f & G_FLAG_PICKSEL) {
     drw_command_set_select_id(shgroup, nullptr, DST.select_id);
   }
-  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : obmat, ob);
+  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->object_to_world : obmat, ob);
   drw_command_draw(shgroup, geom, handle);
 
   /* Culling data. */
@@ -1042,7 +1042,7 @@ void DRW_shgroup_call_range(
   if (G.f & G_FLAG_PICKSEL) {
     drw_command_set_select_id(shgroup, nullptr, DST.select_id);
   }
-  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : nullptr, ob);
+  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->object_to_world : nullptr, ob);
   drw_command_draw_range(shgroup, geom, handle, v_sta, v_num);
 }
 
@@ -1053,7 +1053,7 @@ void DRW_shgroup_call_instance_range(
   if (G.f & G_FLAG_PICKSEL) {
     drw_command_set_select_id(shgroup, nullptr, DST.select_id);
   }
-  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : nullptr, ob);
+  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->object_to_world : nullptr, ob);
   drw_command_draw_intance_range(shgroup, geom, handle, i_sta, i_num);
 }
 
@@ -1099,7 +1099,7 @@ static void drw_shgroup_call_procedural_add_ex(DRWShadingGroup *shgroup,
   if (G.f & G_FLAG_PICKSEL) {
     drw_command_set_select_id(shgroup, nullptr, DST.select_id);
   }
-  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : nullptr, ob);
+  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->object_to_world : nullptr, ob);
   drw_command_draw_procedural(shgroup, geom, handle, vert_count);
 }
 
@@ -1149,7 +1149,7 @@ void DRW_shgroup_call_procedural_indirect(DRWShadingGroup *shgroup,
   if (G.f & G_FLAG_PICKSEL) {
     drw_command_set_select_id(shgroup, nullptr, DST.select_id);
   }
-  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : nullptr, ob);
+  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->object_to_world : nullptr, ob);
   drw_command_draw_indirect(shgroup, geom, handle, indirect_buf);
 }
 
@@ -1159,7 +1159,7 @@ void DRW_shgroup_call_instances(DRWShadingGroup *shgroup, Object *ob, GPUBatch *
   if (G.f & G_FLAG_PICKSEL) {
     drw_command_set_select_id(shgroup, nullptr, DST.select_id);
   }
-  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : nullptr, ob);
+  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->object_to_world : nullptr, ob);
   drw_command_draw_instance(shgroup, geom, handle, count, false);
 }
 
@@ -1173,7 +1173,7 @@ void DRW_shgroup_call_instances_with_attrs(DRWShadingGroup *shgroup,
   if (G.f & G_FLAG_PICKSEL) {
     drw_command_set_select_id(shgroup, nullptr, DST.select_id);
   }
-  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : nullptr, ob);
+  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->object_to_world : nullptr, ob);
   GPUBatch *batch = DRW_temp_batch_instance_request(
       DST.vmempool->idatalist, nullptr, inst_attributes, geom);
   drw_command_draw_instance(shgroup, batch, handle, 0, true);
@@ -1283,7 +1283,7 @@ static void drw_sculpt_get_frustum_planes(Object *ob, float planes[6][4])
    * 4x4 matrix is done by multiplying with the transpose inverse.
    * The inverse cancels out here since we transform by inverse(obmat). */
   float tmat[4][4];
-  transpose_m4_m4(tmat, ob->obmat);
+  transpose_m4_m4(tmat, ob->object_to_world);
   for (int i = 0; i < 6; i++) {
     mul_m4_v4(tmat, planes[i]);
   }
@@ -1361,7 +1361,7 @@ static void drw_sculpt_generate_calls(DRWSculptCallbackData *scd)
 
   if (SCULPT_DEBUG_BUFFERS) {
     int debug_node_nr = 0;
-    DRW_debug_modelmat(scd->ob->obmat);
+    DRW_debug_modelmat(scd->ob->object_to_world);
     BKE_pbvh_draw_debug_cb(
         pbvh,
         (void (*)(PBVHNode * n, void *d, const float min[3], const float max[3], PBVHNodeFlags f))
@@ -1714,23 +1714,32 @@ static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader)
   }
 
 #ifdef DEBUG
-  int debug_print_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT);
-  if (debug_print_location != -1) {
-    GPUStorageBuf *buf = drw_debug_gpu_print_buf_get();
-    drw_shgroup_uniform_create_ex(
-        shgroup, debug_print_location, DRW_UNIFORM_STORAGE_BLOCK, buf, GPU_SAMPLER_DEFAULT, 0, 1);
+  /* TODO(Metal): Support Shader debug print.
+   * This is not currently supported by Metal Backend. */
+  if (GPU_backend_get_type() != GPU_BACKEND_METAL) {
+    int debug_print_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT);
+    if (debug_print_location != -1) {
+      GPUStorageBuf *buf = drw_debug_gpu_print_buf_get();
+      drw_shgroup_uniform_create_ex(shgroup,
+                                    debug_print_location,
+                                    DRW_UNIFORM_STORAGE_BLOCK,
+                                    buf,
+                                    GPU_SAMPLER_DEFAULT,
+                                    0,
+                                    1);
 #  ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER
-    /* Add a barrier to allow multiple shader writing to the same buffer. */
-    DRW_shgroup_barrier(shgroup, GPU_BARRIER_SHADER_STORAGE);
+      /* Add a barrier to allow multiple shader writing to the same buffer. */
+      DRW_shgroup_barrier(shgroup, GPU_BARRIER_SHADER_STORAGE);
 #  endif
-  }
+    }
 
-  int debug_draw_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS);
-  if (debug_draw_location != -1) {
-    GPUStorageBuf *buf = drw_debug_gpu_draw_buf_get();
-    drw_shgroup_uniform_create_ex(
-        shgroup, debug_draw_location, DRW_UNIFORM_STORAGE_BLOCK, buf, GPU_SAMPLER_DEFAULT, 0, 1);
-    /* NOTE(fclem): No barrier as ordering is not important. */
+    int debug_draw_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS);
+    if (debug_draw_location != -1) {
+      GPUStorageBuf *buf = drw_debug_gpu_draw_buf_get();
+      drw_shgroup_uniform_create_ex(
+          shgroup, debug_draw_location, DRW_UNIFORM_STORAGE_BLOCK, buf, GPU_SAMPLER_DEFAULT, 0, 1);
+      /* NOTE(fclem): No barrier as ordering is not important. */
+    }
   }
 #endif
 
diff --git a/source/blender/draw/intern/draw_manager_text.cc b/source/blender/draw/intern/draw_manager_text.cc
index 100ef528bc8..239f6fcca73 100644
--- a/source/blender/draw/intern/draw_manager_text.cc
+++ b/source/blender/draw/intern/draw_manager_text.cc
@@ -305,11 +305,11 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
         if (clip_segment_v3_plane_n(v1, v2, clip_planes, 4, v1_clip, v2_clip)) {
 
           mid_v3_v3v3(vmid, v1_clip, v2_clip);
-          mul_m4_v3(ob->obmat, vmid);
+          mul_m4_v3(ob->object_to_world, vmid);
 
           if (do_global) {
-            mul_mat3_m4_v3(ob->obmat, v1);
-            mul_mat3_m4_v3(ob->obmat, v2);
+            mul_mat3_m4_v3(ob->object_to_world, v1);
+            mul_mat3_m4_v3(ob->object_to_world, v2);
           }
 
           if (unit->system) {
@@ -373,7 +373,7 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
             float angle;
 
             mid_v3_v3v3(vmid, v1_clip, v2_clip);
-            mul_m4_v3(ob->obmat, vmid);
+            mul_m4_v3(ob->object_to_world, vmid);
 
             if (use_coords) {
               copy_v3_v3(no_a, poly_normals[BM_elem_index_get(l_a->f)]);
@@ -443,16 +443,16 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
           n += 3;
 
           if (do_global) {
-            mul_mat3_m4_v3(ob->obmat, v1);
-            mul_mat3_m4_v3(ob->obmat, v2);
-            mul_mat3_m4_v3(ob->obmat, v3);
+            mul_mat3_m4_v3(ob->object_to_world, v1);
+            mul_mat3_m4_v3(ob->object_to_world, v2);
+            mul_mat3_m4_v3(ob->object_to_world, v3);
           }
 
           area += area_tri_v3(v1, v2, v3);
         }
 
         mul_v3_fl(vmid, 1.0f / float(n));
-        mul_m4_v3(ob->obmat, vmid);
+        mul_m4_v3(ob->object_to_world, vmid);
 
         if (unit->system) {
           numstr_len = BKE_unit_value_as_string(
@@ -522,9 +522,9 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
             copy_v3_v3(v2_local, v2);
 
             if (do_global) {
-              mul_mat3_m4_v3(ob->obmat, v1);
-              mul_mat3_m4_v3(ob->obmat, v2);
-              mul_mat3_m4_v3(ob->obmat, v3);
+              mul_mat3_m4_v3(ob->object_to_world, v1);
+              mul_mat3_m4_v3(ob->object_to_world, v2);
+              mul_mat3_m4_v3(ob->object_to_world, v3);
             }
 
             float angle = angle_v3v3v3(v1, v2, v3);
@@ -535,7 +535,7 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
                                            (is_rad) ? angle : RAD2DEGF(angle),
                                            (is_rad) ? "r" : "°");
             interp_v3_v3v3(fvec, vmid, v2_local, 0.8f);
-            mul_m4_v3(ob->obmat, fvec);
+            mul_m4_v3(ob->object_to_world, fvec);
             DRW_text_cache_add(dt, fvec, numstr, numstr_len, 0, 0, txt_flag, col);
           }
         }
@@ -566,7 +566,7 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
             copy_v3_v3(v1, v->co);
           }
 
-          mul_m4_v3(ob->obmat, v1);
+          mul_m4_v3(ob->object_to_world, v1);
 
           numstr_len = BLI_snprintf_rlen(numstr, sizeof(numstr), "%d", i);
           DRW_text_cache_add(dt, v1, numstr, numstr_len, 0, 0, txt_flag, col);
@@ -595,7 +595,7 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
 
           if (clip_segment_v3_plane_n(v1, v2, clip_planes, 4, v1_clip, v2_clip)) {
             mid_v3_v3v3(vmid, v1_clip, v2_clip);
-            mul_m4_v3(ob->obmat, vmid);
+            mul_m4_v3(ob->object_to_world, vmid);
 
             numstr_len = BLI_snprintf_rlen(numstr, sizeof(numstr), "%d", i);
             DRW_text_cache_add(
@@ -629,7 +629,7 @@ void DRW_text_edit_mesh_measure_stats(ARegion *region,
             BM_face_calc_center_median(f, v1);
           }
 
-          mul_m4_v3(ob->obmat, v1);
+          mul_m4_v3(ob->object_to_world, v1);
 
           numstr_len = BLI_snprintf_rlen(numstr, sizeof(numstr), "%d", i);
           DRW_text_cache_add(dt, v1, numstr, numstr_len, 0, 0, txt_flag, col);
diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh
index 24dfdd1b97b..892dfdddfcf 100644
--- a/source/blender/draw/intern/draw_pass.hh
+++ b/source/blender/draw/intern/draw_pass.hh
@@ -14,8 +14,7 @@
  * #Pass. Use many #PassSub along with a main #Pass to reduce the overhead and allow groupings of
  * commands. \note The draw call order inside a batch of multiple draw with the exact same state is
  * not guaranteed and is not even deterministic. Use a #PassSimple or #PassSortable if ordering is
- * needed. \note As of now, it is also quite limited in the type of draw command it can record
- * (no custom vertex count, no custom first vertex).
+ * needed. Custom vertex count and custom first vertex will effectively disable batching.
  *
  * `PassSimple`:
  * Does not have the overhead of #PassMain but does not have the culling and batching optimization.
@@ -192,6 +191,12 @@ class PassBase {
   void shader_set(GPUShader *shader);
 
   /**
+   * Bind a framebuffer. This is equivalent to a deferred GPU_framebuffer_bind() call.
+   * \note Changes the global GPU state (outside of DRW).
+   */
+  void framebuffer_set(GPUFrameBuffer *framebuffer);
+
+  /**
    * Bind a material shader along with its associated resources. Any following bind() or
    * push_constant() call will use its interface.
    * IMPORTANT: Assumes material is compiled and can be used (no compilation error).
@@ -743,6 +748,11 @@ template<class T> inline void PassBase<T>::shader_set(GPUShader *shader)
   create_command(Type::ShaderBind).shader_bind = {shader};
 }
 
+template<class T> inline void PassBase<T>::framebuffer_set(GPUFrameBuffer *framebuffer)
+{
+  create_command(Type::FramebufferBind).framebuffer_bind = {framebuffer};
+}
+
 template<class T> inline void PassBase<T>::material_set(Manager &manager, GPUMaterial *material)
 {
   GPUPass *gpupass = GPU_material_get_pass(material);
diff --git a/source/blender/draw/intern/draw_pbvh.cc b/source/blender/draw/intern/draw_pbvh.cc
index 880a5a445f4..b25bb42a8a5 100644
--- a/source/blender/draw/intern/draw_pbvh.cc
+++ b/source/blender/draw/intern/draw_pbvh.cc
@@ -389,12 +389,19 @@ struct PBVHBatches {
         break;
 
       case CD_PBVH_MASK_TYPE:
-        foreach_grids([&](int /*x*/, int /*y*/, int /*grid_index*/, CCGElem *elems[4], int i) {
-          float *mask = CCG_elem_mask(&args->ccg_key, elems[i]);
+        if (args->ccg_key.has_mask) {
+          foreach_grids([&](int /*x*/, int /*y*/, int /*grid_index*/, CCGElem *elems[4], int i) {
+            float *mask = CCG_elem_mask(&args->ccg_key, elems[i]);
 
-          *static_cast<uchar *>(GPU_vertbuf_raw_step(&access)) = mask ? uchar(*mask * 255.0f) :
-                                                                        255;
-        });
+            *static_cast<uchar *>(GPU_vertbuf_raw_step(&access)) = uchar(*mask * 255.0f);
+          });
+        }
+        else {
+          foreach_grids(
+              [&](int /*x*/, int /*y*/, int /*grid_index*/, CCGElem * /*elems*/[4], int /*i*/) {
+                *static_cast<uchar *>(GPU_vertbuf_raw_step(&access)) = 0;
+              });
+        }
         break;
 
       case CD_PBVH_FSET_TYPE: {
@@ -944,6 +951,14 @@ struct PBVHBatches {
 
   void create_index_faces(PBVH_GPU_Args *args)
   {
+    int *mat_index = static_cast<int *>(
+        CustomData_get_layer_named(args->pdata, CD_PROP_INT32, "material_index"));
+
+    if (mat_index && args->totprim) {
+      int poly_index = args->mlooptri[args->prim_indices[0]].poly;
+      material_index = mat_index[poly_index];
+    }
+
     /* Calculate number of edges*/
     int edge_count = 0;
     for (int i = 0; i < args->totprim; i++) {
@@ -952,6 +967,7 @@ struct PBVHBatches {
       if (args->hide_poly && args->hide_poly[lt->poly]) {
         continue;
       }
+
       int r_edges[3];
       BKE_mesh_looptri_get_real_edges(args->me, lt, r_edges);
 
@@ -1023,6 +1039,14 @@ struct PBVHBatches {
 
   void create_index_grids(PBVH_GPU_Args *args)
   {
+    int *mat_index = static_cast<int *>(
+        CustomData_get_layer_named(args->pdata, CD_PROP_INT32, "material_index"));
+
+    if (mat_index && args->totprim) {
+      int poly_index = BKE_subdiv_ccg_grid_to_face_index(args->subdiv_ccg, args->grid_indices[0]);
+      material_index = mat_index[poly_index];
+    }
+
     needs_tri_index = true;
     int gridsize = args->ccg_key.grid_size;
     int totgrid = args->totprim;
diff --git a/source/blender/draw/intern/draw_pointcloud.cc b/source/blender/draw/intern/draw_pointcloud.cc
new file mode 100644
index 00000000000..582dc690cee
--- /dev/null
+++ b/source/blender/draw/intern/draw_pointcloud.cc
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2017 Blender Foundation. All rights reserved. */
+
+/** \file
+ * \ingroup draw
+ *
+ * \brief Contains procedural GPU hair drawing methods.
+ */
+
+#include "BLI_string_utils.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_customdata_types.h"
+#include "DNA_pointcloud_types.h"
+
+#include "BKE_curves.hh"
+#include "BKE_geometry_set.hh"
+
+#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_compute.h"
+#include "GPU_material.h"
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+#include "GPU_vertex_buffer.h"
+
+#include "DRW_gpu_wrapper.hh"
+#include "DRW_render.h"
+
+#include "draw_attributes.h"
+#include "draw_cache_impl.h"
+#include "draw_common.h"
+#include "draw_manager.h"
+#include "draw_pointcloud_private.hh"
+
+static GPUVertBuf *g_dummy_vbo = nullptr;
+
+void DRW_pointcloud_init()
+{
+  if (g_dummy_vbo == nullptr) {
+    /* initialize vertex format */
+    GPUVertFormat format = {0};
+    uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+
+    g_dummy_vbo = GPU_vertbuf_create_with_format_ex(
+        &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
+
+    const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
+    GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
+    GPU_vertbuf_attr_fill(g_dummy_vbo, dummy_id, vert);
+  }
+}
+
+DRWShadingGroup *DRW_shgroup_pointcloud_create_sub(Object *object,
+                                                   DRWShadingGroup *shgrp_parent,
+                                                   GPUMaterial *gpu_material)
+{
+  PointCloud &pointcloud = *static_cast<PointCloud *>(object->data);
+
+  DRWShadingGroup *shgrp = DRW_shgroup_create_sub(shgrp_parent);
+
+  /* Fix issue with certain driver not drawing anything if there is no texture bound to
+   * "ac", "au", "u" or "c". */
+  DRW_shgroup_buffer_texture(shgrp, "u", g_dummy_vbo);
+  DRW_shgroup_buffer_texture(shgrp, "au", g_dummy_vbo);
+  DRW_shgroup_buffer_texture(shgrp, "c", g_dummy_vbo);
+  DRW_shgroup_buffer_texture(shgrp, "ac", g_dummy_vbo);
+
+  GPUVertBuf *pos_rad_buf = pointcloud_position_and_radius_get(&pointcloud);
+  DRW_shgroup_buffer_texture(shgrp, "ptcloud_pos_rad_tx", pos_rad_buf);
+
+  if (gpu_material != nullptr) {
+
+    // const DRW_Attributes &attrs = cache->attr_used;
+    // for (int i = 0; i < attrs.num_requests; i++) {
+    //   const DRW_AttributeRequest &request = attrs.requests[i];
+
+    //   char sampler_name[32];
+    //   /* \note reusing curve attribute function. */
+    //   drw_curves_get_attribute_sampler_name(request.attribute_name, sampler_name);
+
+    //   GPUTexture *attribute_buf = DRW_pointcloud_evaluated_attribute(&pointcloud);
+    //   if (!cache->attributes_buf[i]) {
+    //     continue;
+    //   }
+    //   DRW_shgroup_buffer_texture_ref(shgrp, sampler_name, attribute_buf);
+    // }
+
+    /* Only single material supported for now. */
+    GPUBatch **geom = pointcloud_surface_shaded_get(&pointcloud, &gpu_material, 1);
+    DRW_shgroup_call(shgrp, geom[0], object);
+  }
+  else {
+    GPUBatch *geom = pointcloud_surface_get(&pointcloud);
+    DRW_shgroup_call(shgrp, geom, object);
+  }
+  return shgrp;
+}
+
+void DRW_pointcloud_free()
+{
+  GPU_VERTBUF_DISCARD_SAFE(g_dummy_vbo);
+}
diff --git a/source/blender/draw/intern/draw_pointcloud_private.hh b/source/blender/draw/intern/draw_pointcloud_private.hh
new file mode 100644
index 00000000000..9422d7fbc99
--- /dev/null
+++ b/source/blender/draw/intern/draw_pointcloud_private.hh
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation. All rights reserved. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#pragma once
+
+struct PointCloud;
+struct GPUBatch;
+struct GPUVertBuf;
+struct GPUMaterial;
+
+GPUVertBuf *pointcloud_position_and_radius_get(PointCloud *pointcloud);
+GPUBatch **pointcloud_surface_shaded_get(PointCloud *pointcloud,
+                                         GPUMaterial **gpu_materials,
+                                         int mat_len);
+GPUBatch *pointcloud_surface_get(PointCloud *pointcloud);
diff --git a/source/blender/draw/intern/draw_resource.hh b/source/blender/draw/intern/draw_resource.hh
index 2df38e32ed2..b116d7fe183 100644
--- a/source/blender/draw/intern/draw_resource.hh
+++ b/source/blender/draw/intern/draw_resource.hh
@@ -31,7 +31,7 @@
 
 inline void ObjectMatrices::sync(const Object &object)
 {
-  model = object.obmat;
+  model = object.object_to_world;
   model_inverse = object.imat;
 }
 
diff --git a/source/blender/draw/intern/draw_volume.cc b/source/blender/draw/intern/draw_volume.cc
index 2b4b0e3c089..5c1ce7c3111 100644
--- a/source/blender/draw/intern/draw_volume.cc
+++ b/source/blender/draw/intern/draw_volume.cc
@@ -127,7 +127,7 @@ static DRWShadingGroup *drw_volume_object_grids_init(Object *ob,
 
   grp = DRW_shgroup_create_sub(grp);
 
-  volume_infos.density_scale = BKE_volume_density_scale(volume, ob->obmat);
+  volume_infos.density_scale = BKE_volume_density_scale(volume, ob->object_to_world);
   volume_infos.color_mul = float4(1.0f);
   volume_infos.temperature_mul = 1.0f;
   volume_infos.temperature_bias = 0.0f;
diff --git a/source/blender/draw/intern/shaders/common_gpencil_lib.glsl b/source/blender/draw/intern/shaders/common_gpencil_lib.glsl
index 123c493b572..def841b07aa 100644
--- a/source/blender/draw/intern/shaders/common_gpencil_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_gpencil_lib.glsl
@@ -90,10 +90,15 @@ float gpencil_clamp_small_stroke_thickness(float thickness, vec4 ndc_pos)
 
 #ifdef GPU_VERTEX_SHADER
 
-/* Trick to detect if a drawcall is stroke or fill.
- * This does mean that we need to draw an empty stroke segment before starting
- * to draw the real stroke segments. */
-#  define GPENCIL_IS_STROKE_VERTEX (gl_InstanceID != 0)
+int gpencil_stroke_point_id()
+{
+  return (gl_VertexID & ~GP_IS_STROKE_VERTEX_BIT) >> GP_VERTEX_ID_SHIFT;
+}
+
+bool gpencil_is_stroke_vertex()
+{
+  return flag_test(gl_VertexID, GP_IS_STROKE_VERTEX_BIT);
+}
 
 /**
  * Returns value of gl_Position.
@@ -120,20 +125,7 @@ float gpencil_clamp_small_stroke_thickness(float thickness, vec4 ndc_pos)
  * WARNING: Max attribute count is actually 14 because OSX OpenGL implementation
  * considers gl_VertexID and gl_InstanceID as vertex attribute. (see T74536)
  */
-vec4 gpencil_vertex(ivec4 ma,
-                    ivec4 ma1,
-                    ivec4 ma2,
-                    ivec4 ma3,
-                    vec4 pos,
-                    vec4 pos1,
-                    vec4 pos2,
-                    vec4 pos3,
-                    vec4 uv1,
-                    vec4 uv2,
-                    vec4 col1,
-                    vec4 col2,
-                    vec4 fcol1,
-                    vec4 viewport_size,
+vec4 gpencil_vertex(vec4 viewport_size,
                     gpMaterialFlag material_flags,
                     vec2 alignment_rot,
                     /* World Position. */
@@ -155,6 +147,24 @@ vec4 gpencil_vertex(ivec4 ma,
                     /* Stroke hardness. */
                     out float out_hardness)
 {
+  int stroke_point_id = (gl_VertexID & ~GP_IS_STROKE_VERTEX_BIT) >> GP_VERTEX_ID_SHIFT;
+
+  /* Attribute Loading. */
+  vec4 pos = texelFetch(gp_pos_tx, (stroke_point_id - 1) * 3 + 0);
+  vec4 pos1 = texelFetch(gp_pos_tx, (stroke_point_id + 0) * 3 + 0);
+  vec4 pos2 = texelFetch(gp_pos_tx, (stroke_point_id + 1) * 3 + 0);
+  vec4 pos3 = texelFetch(gp_pos_tx, (stroke_point_id + 2) * 3 + 0);
+  ivec4 ma = floatBitsToInt(texelFetch(gp_pos_tx, (stroke_point_id - 1) * 3 + 1));
+  ivec4 ma1 = floatBitsToInt(texelFetch(gp_pos_tx, (stroke_point_id + 0) * 3 + 1));
+  ivec4 ma2 = floatBitsToInt(texelFetch(gp_pos_tx, (stroke_point_id + 1) * 3 + 1));
+  ivec4 ma3 = floatBitsToInt(texelFetch(gp_pos_tx, (stroke_point_id + 2) * 3 + 1));
+  vec4 uv1 = texelFetch(gp_pos_tx, (stroke_point_id + 0) * 3 + 2);
+  vec4 uv2 = texelFetch(gp_pos_tx, (stroke_point_id + 1) * 3 + 2);
+
+  vec4 col1 = texelFetch(gp_col_tx, (stroke_point_id + 0) * 2 + 0);
+  vec4 col2 = texelFetch(gp_col_tx, (stroke_point_id + 1) * 2 + 0);
+  vec4 fcol1 = texelFetch(gp_col_tx, (stroke_point_id + 0) * 2 + 1);
+
 #  define thickness1 pos1.w
 #  define thickness2 pos2.w
 #  define strength1 uv1.w
@@ -167,7 +177,7 @@ vec4 gpencil_vertex(ivec4 ma,
 
   vec4 out_ndc;
 
-  if (GPENCIL_IS_STROKE_VERTEX) {
+  if (gpencil_is_stroke_vertex()) {
     bool is_dot = flag_test(material_flags, GP_STROKE_ALIGNMENT);
     bool is_squares = !flag_test(material_flags, GP_STROKE_DOTS);
 
@@ -177,13 +187,6 @@ vec4 gpencil_vertex(ivec4 ma,
       is_squares = false;
     }
 
-    /* Endpoints, we discard the vertices. */
-    if (ma1.x == -1 || (!is_dot && ma2.x == -1)) {
-      /* We set the vertex at the camera origin to generate 0 fragments. */
-      out_ndc = vec4(0.0, 0.0, -3e36, 0.0);
-      return out_ndc;
-    }
-
     /* Avoid using a vertex attribute for quad positioning. */
     float x = float(gl_VertexID & 1) * 2.0 - 1.0; /* [-1..1] */
     float y = float(gl_VertexID & 2) - 1.0;       /* [-1..1] */
@@ -336,8 +339,7 @@ vec4 gpencil_vertex(ivec4 ma,
     out_N = safe_normalize(N);
 
     /* Decode fill opacity. */
-    out_color = vec4(fcol1.rgb, floor(fcol1.a / 10.0));
-    out_color.a /= 10000.0;
+    out_color = vec4(fcol1.rgb, floor(fcol1.a / 10.0) / 10000.0);
 
     /* We still offset the fills a little to avoid overlaps */
     out_ndc.z += 0.000002;
@@ -355,20 +357,7 @@ vec4 gpencil_vertex(ivec4 ma,
   return out_ndc;
 }
 
-vec4 gpencil_vertex(ivec4 ma,
-                    ivec4 ma1,
-                    ivec4 ma2,
-                    ivec4 ma3,
-                    vec4 pos,
-                    vec4 pos1,
-                    vec4 pos2,
-                    vec4 pos3,
-                    vec4 uv1,
-                    vec4 uv2,
-                    vec4 col1,
-                    vec4 col2,
-                    vec4 fcol1,
-                    vec4 viewport_size,
+vec4 gpencil_vertex(vec4 viewport_size,
                     out vec3 out_P,
                     out vec3 out_N,
                     out vec4 out_color,
@@ -379,20 +368,7 @@ vec4 gpencil_vertex(ivec4 ma,
                     out vec2 out_thickness,
                     out float out_hardness)
 {
-  return gpencil_vertex(ma,
-                        ma1,
-                        ma2,
-                        ma3,
-                        pos,
-                        pos1,
-                        pos2,
-                        pos3,
-                        uv1,
-                        uv2,
-                        col1,
-                        col2,
-                        fcol1,
-                        viewport_size,
+  return gpencil_vertex(viewport_size,
                         0u,
                         vec2(1.0, 0.0),
                         out_P,
diff --git a/source/blender/draw/intern/shaders/common_pointcloud_lib.glsl b/source/blender/draw/intern/shaders/common_pointcloud_lib.glsl
index dd725ad327f..8725e036435 100644
--- a/source/blender/draw/intern/shaders/common_pointcloud_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_pointcloud_lib.glsl
@@ -2,16 +2,10 @@
 /* NOTE: To be used with UNIFORM_RESOURCE_ID and INSTANCED_ATTR as define. */
 #pragma BLENDER_REQUIRE(common_view_lib.glsl)
 
-#ifndef DRW_SHADER_SHARED_H
-
-in vec4 pos; /* Position and radius. */
-
-/* ---- Instanced attribs ---- */
-
-in vec3 pos_inst;
-in vec3 nor;
-
-#endif
+int pointcloud_get_point_id()
+{
+  return gl_VertexID / 32;
+}
 
 mat3 pointcloud_get_facing_matrix(vec3 p)
 {
@@ -25,8 +19,10 @@ mat3 pointcloud_get_facing_matrix(vec3 p)
 /* Returns world center position and radius. */
 void pointcloud_get_pos_and_radius(out vec3 outpos, out float outradius)
 {
-  outpos = point_object_to_world(pos.xyz);
-  outradius = dot(abs(mat3(ModelMatrix) * pos.www), vec3(1.0 / 3.0));
+  int id = pointcloud_get_point_id();
+  vec4 pos_rad = texelFetch(ptcloud_pos_rad_tx, id);
+  outpos = point_object_to_world(pos_rad.xyz);
+  outradius = dot(abs(mat3(ModelMatrix) * pos_rad.www), vec3(1.0 / 3.0));
 }
 
 /* Return world position and normal. */
@@ -38,15 +34,67 @@ void pointcloud_get_pos_and_nor(out vec3 outpos, out vec3 outnor)
 
   mat3 facing_mat = pointcloud_get_facing_matrix(p);
 
+  /** \note: Avoid modulo by non-power-of-two in shader. See Index buffer setup. */
+  int vert_id = gl_VertexID % 32;
+  vec3 pos_inst = vec3(0.0);
+
+  switch (vert_id) {
+    case 0:
+      pos_inst.z = 1.0;
+      break;
+    case 1:
+      pos_inst.x = 1.0;
+      break;
+    case 2:
+      pos_inst.y = 1.0;
+      break;
+    case 3:
+      pos_inst.x = -1.0;
+      break;
+    case 4:
+      pos_inst.y = -1.0;
+      break;
+  }
+
   /* TODO(fclem): remove multiplication here. Here only for keeping the size correct for now. */
   radius *= 0.01;
-  outpos = p + (facing_mat * pos_inst) * radius;
-  outnor = facing_mat * nor;
+  outnor = facing_mat * pos_inst;
+  outpos = p + outnor * radius;
 }
 
-vec3 pointcloud_get_pos(void)
+vec3 pointcloud_get_pos()
 {
   vec3 outpos, outnor;
   pointcloud_get_pos_and_nor(outpos, outnor);
   return outpos;
 }
+
+float pointcloud_get_customdata_float(const samplerBuffer cd_buf)
+{
+  int id = pointcloud_get_point_id();
+  return texelFetch(cd_buf, id).r;
+}
+
+vec2 pointcloud_get_customdata_vec2(const samplerBuffer cd_buf)
+{
+  int id = pointcloud_get_point_id();
+  return texelFetch(cd_buf, id).rg;
+}
+
+vec3 pointcloud_get_customdata_vec3(const samplerBuffer cd_buf)
+{
+  int id = pointcloud_get_point_id();
+  return texelFetch(cd_buf, id).rgb;
+}
+
+vec4 pointcloud_get_customdata_vec4(const samplerBuffer cd_buf)
+{
+  int id = pointcloud_get_point_id();
+  return texelFetch(cd_buf, id).rgba;
+}
+
+vec2 pointcloud_get_barycentric(void)
+{
+  /* TODO: To be implemented. */
+  return vec2(0.0);
+}
diff --git a/source/blender/draw/intern/shaders/draw_view_info.hh b/source/blender/draw/intern/shaders/draw_view_info.hh
index 7b500f66a68..23892a39062 100644
--- a/source/blender/draw/intern/shaders/draw_view_info.hh
+++ b/source/blender/draw/intern/shaders/draw_view_info.hh
@@ -112,9 +112,7 @@ GPU_SHADER_CREATE_INFO(draw_hair)
     .additional_info("draw_modelmat", "draw_resource_id");
 
 GPU_SHADER_CREATE_INFO(draw_pointcloud)
-    .vertex_in(0, Type::VEC4, "pos")
-    .vertex_in(1, Type::VEC3, "pos_inst")
-    .vertex_in(2, Type::VEC3, "nor")
+    .sampler(0, ImageType::FLOAT_BUFFER, "ptcloud_pos_rad_tx", Frequency::BATCH)
     .additional_info("draw_modelmat_instanced_attr", "draw_resource_id_uniform");
 
 GPU_SHADER_CREATE_INFO(draw_volume).additional_info("draw_modelmat", "draw_resource_id_uniform");
@@ -122,26 +120,15 @@ GPU_SHADER_CREATE_INFO(draw_volume).additional_info("draw_modelmat", "draw_resou
 GPU_SHADER_CREATE_INFO(draw_gpencil)
     .typedef_source("gpencil_shader_shared.h")
     .define("DRW_GPENCIL_INFO")
-    .vertex_in(0, Type::IVEC4, "ma")
-    .vertex_in(1, Type::IVEC4, "ma1")
-    .vertex_in(2, Type::IVEC4, "ma2")
-    .vertex_in(3, Type::IVEC4, "ma3")
-    .vertex_in(4, Type::VEC4, "pos")
-    .vertex_in(5, Type::VEC4, "pos1")
-    .vertex_in(6, Type::VEC4, "pos2")
-    .vertex_in(7, Type::VEC4, "pos3")
-    .vertex_in(8, Type::VEC4, "uv1")
-    .vertex_in(9, Type::VEC4, "uv2")
-    .vertex_in(10, Type::VEC4, "col1")
-    .vertex_in(11, Type::VEC4, "col2")
-    .vertex_in(12, Type::VEC4, "fcol1")
+    .sampler(0, ImageType::FLOAT_BUFFER, "gp_pos_tx")
+    .sampler(1, ImageType::FLOAT_BUFFER, "gp_col_tx")
     /* Per Object */
     .push_constant(Type::FLOAT, "gpThicknessScale") /* TODO(fclem): Replace with object info. */
     .push_constant(Type::FLOAT, "gpThicknessWorldScale") /* TODO(fclem): Same as above. */
     .define("gpThicknessIsScreenSpace", "(gpThicknessWorldScale < 0.0)")
     /* Per Layer */
     .push_constant(Type::FLOAT, "gpThicknessOffset")
-    .additional_info("draw_modelmat", "draw_resource_id_uniform", "draw_object_infos");
+    .additional_info("draw_modelmat", "draw_object_infos");
 
 /** \} */