115 files changed, 8993 insertions, 2867 deletions
diff --git a/source/blender/draw/intern/DRW_gpu_wrapper.hh b/source/blender/draw/intern/DRW_gpu_wrapper.hh
index 257f01a5562..890cd588527 100644
--- a/source/blender/draw/intern/DRW_gpu_wrapper.hh
+++ b/source/blender/draw/intern/DRW_gpu_wrapper.hh
@@ -50,13 +50,13 @@
  *
  * `draw::Framebuffer`
  *   Simple wrapper to #GPUFramebuffer that can be moved.
- *
  */
 
 #include "DRW_render.h"
 
 #include "MEM_guardedalloc.h"
 
+#include "draw_manager.h"
 #include "draw_texture_pool.h"
 
 #include "BLI_math_vec_types.hh"
@@ -182,7 +182,7 @@ class UniformCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable
     GPU_uniformbuf_free(ubo_);
   }
 
-  void push_update(void)
+  void push_update()
   {
     GPU_uniformbuf_update(ubo_, this->data_);
   }
@@ -227,12 +227,22 @@ class StorageCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable
     GPU_storagebuf_free(ssbo_);
   }
 
-  void push_update(void)
+  void push_update()
   {
     BLI_assert(device_only == false);
     GPU_storagebuf_update(ssbo_, this->data_);
   }
 
+  void clear_to_zero()
+  {
+    GPU_storagebuf_clear_to_zero(ssbo_);
+  }
+
+  void read()
+  {
+    GPU_storagebuf_read(ssbo_, this->data_);
+  }
+
   operator GPUStorageBuf *() const
   {
     return ssbo_;
@@ -319,6 +329,7 @@ class StorageArrayBuffer : public detail::StorageCommon<T, len, device_only> {
     MEM_freeN(this->data_);
   }
 
+  /* Resize to \a new_size elements. */
   void resize(int64_t new_size)
   {
     BLI_assert(new_size > 0);
@@ -392,10 +403,10 @@ class Texture : NonCopyable {
           int extent,
           float *data = nullptr,
           bool cubemap = false,
-          int mips = 1)
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(extent, 0, 0, mips, format, data, false, cubemap);
+    tx_ = create(extent, 0, 0, mip_len, format, data, false, cubemap);
   }
 
   Texture(const char *name,
@@ -404,17 +415,20 @@ class Texture : NonCopyable {
           int layers,
           float *data = nullptr,
           bool cubemap = false,
-          int mips = 1)
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(extent, layers, 0, mips, format, data, true, cubemap);
+    tx_ = create(extent, layers, 0, mip_len, format, data, true, cubemap);
   }
 
-  Texture(
-      const char *name, eGPUTextureFormat format, int2 extent, float *data = nullptr, int mips = 1)
+  Texture(const char *name,
+          eGPUTextureFormat format,
+          int2 extent,
+          float *data = nullptr,
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(UNPACK2(extent), 0, mips, format, data, false, false);
+    tx_ = create(UNPACK2(extent), 0, mip_len, format, data, false, false);
   }
 
   Texture(const char *name,
@@ -422,17 +436,20 @@ class Texture : NonCopyable {
           int2 extent,
           int layers,
           float *data = nullptr,
-          int mips = 1)
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(UNPACK2(extent), layers, mips, format, data, true, false);
+    tx_ = create(UNPACK2(extent), layers, mip_len, format, data, true, false);
   }
 
-  Texture(
-      const char *name, eGPUTextureFormat format, int3 extent, float *data = nullptr, int mips = 1)
+  Texture(const char *name,
+          eGPUTextureFormat format,
+          int3 extent,
+          float *data = nullptr,
+          int mip_len = 1)
       : name_(name)
   {
-    tx_ = create(UNPACK3(extent), mips, format, data, false, false);
+    tx_ = create(UNPACK3(extent), mip_len, format, data, false, false);
   }
 
   ~Texture()
@@ -467,9 +484,9 @@ class Texture : NonCopyable {
    * Ensure the texture has the correct properties. Recreating it if needed.
    * Return true if a texture has been created.
    */
-  bool ensure_1d(eGPUTextureFormat format, int extent, float *data = nullptr, int mips = 1)
+  bool ensure_1d(eGPUTextureFormat format, int extent, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(extent, 0, 0, mips, format, data, false, false);
+    return ensure_impl(extent, 0, 0, mip_len, format, data, false, false);
   }
 
   /**
@@ -477,18 +494,18 @@ class Texture : NonCopyable {
    * Return true if a texture has been created.
    */
   bool ensure_1d_array(
-      eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mips = 1)
+      eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(extent, layers, 0, mips, format, data, true, false);
+    return ensure_impl(extent, layers, 0, mip_len, format, data, true, false);
   }
 
   /**
    * Ensure the texture has the correct properties. Recreating it if needed.
    * Return true if a texture has been created.
    */
-  bool ensure_2d(eGPUTextureFormat format, int2 extent, float *data = nullptr, int mips = 1)
+  bool ensure_2d(eGPUTextureFormat format, int2 extent, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(UNPACK2(extent), 0, mips, format, data, false, false);
+    return ensure_impl(UNPACK2(extent), 0, mip_len, format, data, false, false);
   }
 
   /**
@@ -496,27 +513,27 @@ class Texture : NonCopyable {
    * Return true if a texture has been created.
    */
   bool ensure_2d_array(
-      eGPUTextureFormat format, int2 extent, int layers, float *data = nullptr, int mips = 1)
+      eGPUTextureFormat format, int2 extent, int layers, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(UNPACK2(extent), layers, mips, format, data, true, false);
+    return ensure_impl(UNPACK2(extent), layers, mip_len, format, data, true, false);
   }
 
   /**
    * Ensure the texture has the correct properties. Recreating it if needed.
    * Return true if a texture has been created.
    */
-  bool ensure_3d(eGPUTextureFormat format, int3 extent, float *data = nullptr, int mips = 1)
+  bool ensure_3d(eGPUTextureFormat format, int3 extent, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(UNPACK3(extent), mips, format, data, false, false);
+    return ensure_impl(UNPACK3(extent), mip_len, format, data, false, false);
   }
 
   /**
    * Ensure the texture has the correct properties. Recreating it if needed.
    * Return true if a texture has been created.
    */
-  bool ensure_cube(eGPUTextureFormat format, int extent, float *data = nullptr, int mips = 1)
+  bool ensure_cube(eGPUTextureFormat format, int extent, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(extent, extent, 0, mips, format, data, false, true);
+    return ensure_impl(extent, extent, 0, mip_len, format, data, false, true);
   }
 
   /**
@@ -524,9 +541,9 @@ class Texture : NonCopyable {
    * Return true if a texture has been created.
    */
   bool ensure_cube_array(
-      eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mips = 1)
+      eGPUTextureFormat format, int extent, int layers, float *data = nullptr, int mip_len = 1)
   {
-    return ensure_impl(extent, extent, layers, mips, format, data, false, true);
+    return ensure_impl(extent, extent, layers, mip_len, format, data, false, true);
   }
 
   /**
@@ -555,9 +572,15 @@ class Texture : NonCopyable {
     return mip_views_[miplvl];
   }
 
+  int mip_count() const
+  {
+    return GPU_texture_mip_count(tx_);
+  }
+
   /**
    * Ensure the availability of mipmap views.
    * Layer views covers all layers of array textures.
+   * Returns true if the views were (re)created.
    */
   bool ensure_layer_views(bool cube_as_array = false)
   {
@@ -594,42 +617,47 @@ class Texture : NonCopyable {
   /**
    * Returns true if the texture has been allocated or acquired from the pool.
    */
-  bool is_valid(void) const
+  bool is_valid() const
   {
     return tx_ != nullptr;
   }
 
-  int width(void) const
+  int width() const
   {
     return GPU_texture_width(tx_);
   }
 
-  int height(void) const
+  int height() const
   {
     return GPU_texture_height(tx_);
   }
 
-  bool depth(void) const
+  int pixel_count() const
+  {
+    return GPU_texture_width(tx_) * GPU_texture_height(tx_);
+  }
+
+  bool depth() const
   {
     return GPU_texture_depth(tx_);
   }
 
-  bool is_stencil(void) const
+  bool is_stencil() const
   {
     return GPU_texture_stencil(tx_);
   }
 
-  bool is_integer(void) const
+  bool is_integer() const
   {
     return GPU_texture_integer(tx_);
   }
 
-  bool is_cube(void) const
+  bool is_cube() const
   {
     return GPU_texture_cube(tx_);
   }
 
-  bool is_array(void) const
+  bool is_array() const
   {
     return GPU_texture_array(tx_);
   }
@@ -708,7 +736,7 @@ class Texture : NonCopyable {
   bool ensure_impl(int w,
                    int h = 0,
                    int d = 0,
-                   int mips = 1,
+                   int mip_len = 1,
                    eGPUTextureFormat format = GPU_RGBA8,
                    float *data = nullptr,
                    bool layered = false,
@@ -721,11 +749,11 @@ class Texture : NonCopyable {
       int3 size = this->size();
       if (size != int3(w, h, d) || GPU_texture_format(tx_) != format ||
           GPU_texture_cube(tx_) != cubemap || GPU_texture_array(tx_) != layered) {
-        GPU_TEXTURE_FREE_SAFE(tx_);
+        free();
       }
     }
     if (tx_ == nullptr) {
-      tx_ = create(w, h, d, mips, format, data, layered, cubemap);
+      tx_ = create(w, h, d, mip_len, format, data, layered, cubemap);
       return true;
     }
     return false;
@@ -734,87 +762,82 @@ class Texture : NonCopyable {
   GPUTexture *create(int w,
                      int h,
                      int d,
-                     int mips,
+                     int mip_len,
                      eGPUTextureFormat format,
                      float *data,
                      bool layered,
                      bool cubemap)
   {
     if (h == 0) {
-      return GPU_texture_create_1d(name_, w, mips, format, data);
+      return GPU_texture_create_1d(name_, w, mip_len, format, data);
     }
     else if (cubemap) {
       if (layered) {
-        return GPU_texture_create_cube_array(name_, w, d, mips, format, data);
+        return GPU_texture_create_cube_array(name_, w, d, mip_len, format, data);
       }
       else {
-        return GPU_texture_create_cube(name_, w, mips, format, data);
+        return GPU_texture_create_cube(name_, w, mip_len, format, data);
       }
     }
     else if (d == 0) {
       if (layered) {
-        return GPU_texture_create_1d_array(name_, w, h, mips, format, data);
+        return GPU_texture_create_1d_array(name_, w, h, mip_len, format, data);
       }
       else {
-        return GPU_texture_create_2d(name_, w, h, mips, format, data);
+        return GPU_texture_create_2d(name_, w, h, mip_len, format, data);
       }
     }
     else {
       if (layered) {
-        return GPU_texture_create_2d_array(name_, w, h, d, mips, format, data);
+        return GPU_texture_create_2d_array(name_, w, h, d, mip_len, format, data);
       }
       else {
-        return GPU_texture_create_3d(name_, w, h, d, mips, format, GPU_DATA_FLOAT, data);
+        return GPU_texture_create_3d(name_, w, h, d, mip_len, format, GPU_DATA_FLOAT, data);
       }
     }
   }
 };
 
 class TextureFromPool : public Texture, NonMovable {
- private:
-  GPUTexture *tx_tmp_saved_ = nullptr;
-
  public:
   TextureFromPool(const char *name = "gpu::Texture") : Texture(name){};
 
-  /* Always use `release()` after rendering and `sync()` in sync phase. */
-  void acquire(int2 extent, eGPUTextureFormat format, void *owner_)
+  /* Always use `release()` after rendering. */
+  void acquire(int2 extent, eGPUTextureFormat format)
   {
     BLI_assert(this->tx_ == nullptr);
-    if (this->tx_ != nullptr) {
-      return;
-    }
-    if (tx_tmp_saved_ != nullptr) {
-      if (GPU_texture_width(tx_tmp_saved_) != extent.x ||
-          GPU_texture_height(tx_tmp_saved_) != extent.y ||
-          GPU_texture_format(tx_tmp_saved_) != format) {
-        this->tx_tmp_saved_ = nullptr;
-      }
-      else {
-        this->tx_ = tx_tmp_saved_;
-        return;
-      }
-    }
-    DrawEngineType *owner = (DrawEngineType *)owner_;
-    this->tx_ = DRW_texture_pool_query_2d(UNPACK2(extent), format, owner);
+
+    this->tx_ = DRW_texture_pool_texture_acquire(
+        DST.vmempool->texture_pool, UNPACK2(extent), format);
   }
 
-  void release(void)
+  void release()
   {
     /* Allows multiple release. */
-    if (this->tx_ != nullptr) {
-      tx_tmp_saved_ = this->tx_;
-      this->tx_ = nullptr;
+    if (this->tx_ == nullptr) {
+      return;
     }
+    DRW_texture_pool_texture_release(DST.vmempool->texture_pool, this->tx_);
+    this->tx_ = nullptr;
   }
 
   /**
-   * Clears any reference. Workaround for pool texture not being able to release on demand.
-   * Needs to be called at during the sync phase.
+   * Swap the content of the two textures.
+   * Also change ownership accordingly if needed.
    */
-  void sync(void)
+  static void swap(TextureFromPool &a, Texture &b)
+  {
+    Texture::swap(a, b);
+    DRW_texture_pool_give_texture_ownership(DST.vmempool->texture_pool, a);
+    DRW_texture_pool_take_texture_ownership(DST.vmempool->texture_pool, b);
+  }
+  static void swap(Texture &a, TextureFromPool &b)
   {
-    tx_tmp_saved_ = nullptr;
+    swap(b, a);
+  }
+  static void swap(TextureFromPool &a, TextureFromPool &b)
+  {
+    Texture::swap(a, b);
   }
 
   /** Remove methods that are forbidden with this type of textures. */
@@ -832,6 +855,33 @@ class TextureFromPool : public Texture, NonMovable {
   GPUTexture *stencil_view() = delete;
 };
 
+/**
+ * Dummy type to bind texture as image.
+ * It is just a GPUTexture in disguise.
+ */
+class Image {
+};
+
+static inline Image *as_image(GPUTexture *tex)
+{
+  return reinterpret_cast<Image *>(tex);
+}
+
+static inline Image **as_image(GPUTexture **tex)
+{
+  return reinterpret_cast<Image **>(tex);
+}
+
+static inline GPUTexture *as_texture(Image *img)
+{
+  return reinterpret_cast<GPUTexture *>(img);
+}
+
+static inline GPUTexture **as_texture(Image **img)
+{
+  return reinterpret_cast<GPUTexture **>(img);
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -901,45 +951,47 @@ class Framebuffer : NonCopyable {
 
 template<typename T, int64_t len> class SwapChain {
  private:
+  BLI_STATIC_ASSERT(len > 1, "A swap-chain needs more than 1 unit in length.");
   std::array<T, len> chain_;
-  int64_t index_ = 0;
 
  public:
   void swap()
   {
-    index_ = (index_ + 1) % len;
+    for (auto i : IndexRange(len - 1)) {
+      T::swap(chain_[i], chain_[(i + 1) % len]);
+    }
   }
 
   T &current()
   {
-    return chain_[index_];
+    return chain_[0];
   }
 
   T &previous()
   {
     /* Avoid modulo operation with negative numbers. */
-    return chain_[(index_ + len - 1) % len];
+    return chain_[(0 + len - 1) % len];
   }
 
   T &next()
   {
-    return chain_[(index_ + 1) % len];
+    return chain_[(0 + 1) % len];
   }
 
   const T &current() const
   {
-    return chain_[index_];
+    return chain_[0];
   }
 
   const T &previous() const
   {
     /* Avoid modulo operation with negative numbers. */
-    return chain_[(index_ + len - 1) % len];
+    return chain_[(0 + len - 1) % len];
   }
 
   const T &next() const
   {
-    return chain_[(index_ + 1) % len];
+    return chain_[(0 + 1) % len];
   }
 };
 
diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h
index fa4a1d93d3e..b49203d85f6 100644
--- a/source/blender/draw/intern/DRW_render.h
+++ b/source/blender/draw/intern/DRW_render.h
@@ -41,6 +41,7 @@
 
 #include "draw_debug.h"
 #include "draw_manager_profiling.h"
+#include "draw_state.h"
 #include "draw_view_data.h"
 
 #include "MEM_guardedalloc.h"
@@ -206,6 +207,10 @@ struct GPUShader *DRW_shader_create_with_lib_ex(const char *vert,
                                                 const char *lib,
                                                 const char *defines,
                                                 const char *name);
+struct GPUShader *DRW_shader_create_compute_with_shaderlib(const char *comp,
+                                                           const DRWShaderLibrary *lib,
+                                                           const char *defines,
+                                                           const char *name);
 struct GPUShader *DRW_shader_create_with_shaderlib_ex(const char *vert,
                                                       const char *geom,
                                                       const char *frag,
@@ -288,83 +293,6 @@ void DRW_shader_library_free(DRWShaderLibrary *lib);
 
 /* Batches */
 
-/**
- * DRWState is a bit-mask that stores the current render state and the desired render state. Based
- * on the differences the minimum state changes can be invoked to setup the desired render state.
- *
- * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive
- * therefore they aren't ordered as a bit mask.
- */
-typedef enum {
-  /** To be used for compute passes. */
-  DRW_STATE_NO_DRAW = 0,
-  /** Write mask */
-  DRW_STATE_WRITE_DEPTH = (1 << 0),
-  DRW_STATE_WRITE_COLOR = (1 << 1),
-  /* Write Stencil. These options are mutual exclusive and packed into 2 bits */
-  DRW_STATE_WRITE_STENCIL = (1 << 2),
-  DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2),
-  DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2),
-  /** Depth test. These options are mutual exclusive and packed into 3 bits */
-  DRW_STATE_DEPTH_ALWAYS = (1 << 4),
-  DRW_STATE_DEPTH_LESS = (2 << 4),
-  DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4),
-  DRW_STATE_DEPTH_EQUAL = (4 << 4),
-  DRW_STATE_DEPTH_GREATER = (5 << 4),
-  DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4),
-  /** Culling test */
-  DRW_STATE_CULL_BACK = (1 << 7),
-  DRW_STATE_CULL_FRONT = (1 << 8),
-  /** Stencil test. These options are mutually exclusive and packed into 2 bits. */
-  DRW_STATE_STENCIL_ALWAYS = (1 << 9),
-  DRW_STATE_STENCIL_EQUAL = (2 << 9),
-  DRW_STATE_STENCIL_NEQUAL = (3 << 9),
-
-  /** Blend state. These options are mutual exclusive and packed into 4 bits */
-  DRW_STATE_BLEND_ADD = (1 << 11),
-  /** Same as additive but let alpha accumulate without pre-multiply. */
-  DRW_STATE_BLEND_ADD_FULL = (2 << 11),
-  /** Standard alpha blending. */
-  DRW_STATE_BLEND_ALPHA = (3 << 11),
-  /** Use that if color is already pre-multiply by alpha. */
-  DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11),
-  DRW_STATE_BLEND_BACKGROUND = (5 << 11),
-  DRW_STATE_BLEND_OIT = (6 << 11),
-  DRW_STATE_BLEND_MUL = (7 << 11),
-  DRW_STATE_BLEND_SUB = (8 << 11),
-  /** Use dual source blending. WARNING: Only one color buffer allowed. */
-  DRW_STATE_BLEND_CUSTOM = (9 << 11),
-  DRW_STATE_LOGIC_INVERT = (10 << 11),
-  DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11),
-
-  DRW_STATE_IN_FRONT_SELECT = (1 << 27),
-  DRW_STATE_SHADOW_OFFSET = (1 << 28),
-  DRW_STATE_CLIP_PLANES = (1 << 29),
-  DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30),
-  /** DO NOT USE. Assumed always enabled. Only used internally. */
-  DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31),
-} DRWState;
-
-ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE);
-
-#define DRW_STATE_DEFAULT \
-  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL)
-#define DRW_STATE_BLEND_ENABLED \
-  (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \
-   DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \
-   DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT)
-#define DRW_STATE_RASTERIZER_ENABLED \
-  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \
-   DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
-#define DRW_STATE_DEPTH_TEST_ENABLED \
-  (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \
-   DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL)
-#define DRW_STATE_STENCIL_TEST_ENABLED \
-  (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL)
-#define DRW_STATE_WRITE_STENCIL_ENABLED \
-  (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \
-   DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
-
 typedef enum {
   DRW_ATTR_INT,
   DRW_ATTR_FLOAT,
@@ -409,7 +337,7 @@ void DRW_shgroup_call_ex(DRWShadingGroup *shgroup,
                          void *user_data);
 
 /**
- * If ob is NULL, unit modelmatrix is assumed and culling is bypassed.
+ * If ob is NULL, unit model-matrix is assumed and culling is bypassed.
  */
 #define DRW_shgroup_call(shgroup, geom, ob) \
   DRW_shgroup_call_ex(shgroup, ob, NULL, geom, false, NULL)
@@ -420,8 +348,8 @@ void DRW_shgroup_call_ex(DRWShadingGroup *shgroup,
 #define DRW_shgroup_call_obmat(shgroup, geom, obmat) \
   DRW_shgroup_call_ex(shgroup, NULL, obmat, geom, false, NULL)
 
-/* TODO(fclem): remove this when we have DRWView */
-/* user_data is used by DRWCallVisibilityFn defined in DRWView. */
+/* TODO(fclem): remove this when we have #DRWView */
+/* user_data is used by #DRWCallVisibilityFn defined in #DRWView. */
 #define DRW_shgroup_call_with_callback(shgroup, geom, ob, user_data) \
   DRW_shgroup_call_ex(shgroup, ob, NULL, geom, false, user_data)
 
@@ -454,6 +382,10 @@ void DRW_shgroup_call_compute_indirect(DRWShadingGroup *shgroup, GPUStorageBuf *
 void DRW_shgroup_call_procedural_points(DRWShadingGroup *sh, Object *ob, uint point_count);
 void DRW_shgroup_call_procedural_lines(DRWShadingGroup *sh, Object *ob, uint line_count);
 void DRW_shgroup_call_procedural_triangles(DRWShadingGroup *sh, Object *ob, uint tri_count);
+void DRW_shgroup_call_procedural_indirect(DRWShadingGroup *shgroup,
+                                          GPUPrimType primitive_type,
+                                          Object *ob,
+                                          GPUStorageBuf *indirect_buf);
 /**
  * \warning Only use with Shaders that have `IN_PLACE_INSTANCES` defined.
  * TODO: Should be removed.
@@ -639,10 +571,10 @@ void DRW_shgroup_buffer_texture_ref(DRWShadingGroup *shgroup,
     DRW_shgroup_uniform_block_ex(shgroup, name, ubo, __FILE__, __LINE__)
 #  define DRW_shgroup_uniform_block_ref(shgroup, name, ubo) \
     DRW_shgroup_uniform_block_ref_ex(shgroup, name, ubo, __FILE__, __LINE__)
-#  define DRW_shgroup_storage_block(shgroup, name, ubo) \
-    DRW_shgroup_storage_block_ex(shgroup, name, ubo, __FILE__, __LINE__)
-#  define DRW_shgroup_storage_block_ref(shgroup, name, ubo) \
-    DRW_shgroup_storage_block_ref_ex(shgroup, name, ubo, __FILE__, __LINE__)
+#  define DRW_shgroup_storage_block(shgroup, name, ssbo) \
+    DRW_shgroup_storage_block_ex(shgroup, name, ssbo, __FILE__, __LINE__)
+#  define DRW_shgroup_storage_block_ref(shgroup, name, ssbo) \
+    DRW_shgroup_storage_block_ref_ex(shgroup, name, ssbo, __FILE__, __LINE__)
 #else
 #  define DRW_shgroup_vertex_buffer(shgroup, name, vert) \
     DRW_shgroup_vertex_buffer_ex(shgroup, name, vert)
@@ -652,10 +584,10 @@ void DRW_shgroup_buffer_texture_ref(DRWShadingGroup *shgroup,
     DRW_shgroup_uniform_block_ex(shgroup, name, ubo)
 #  define DRW_shgroup_uniform_block_ref(shgroup, name, ubo) \
     DRW_shgroup_uniform_block_ref_ex(shgroup, name, ubo)
-#  define DRW_shgroup_storage_block(shgroup, name, ubo) \
-    DRW_shgroup_storage_block_ex(shgroup, name, ubo)
-#  define DRW_shgroup_storage_block_ref(shgroup, name, ubo) \
-    DRW_shgroup_storage_block_ref_ex(shgroup, name, ubo)
+#  define DRW_shgroup_storage_block(shgroup, name, ssbo) \
+    DRW_shgroup_storage_block_ex(shgroup, name, ssbo)
+#  define DRW_shgroup_storage_block_ref(shgroup, name, ssbo) \
+    DRW_shgroup_storage_block_ref_ex(shgroup, name, ssbo)
 #endif
 
 bool DRW_shgroup_is_empty(DRWShadingGroup *shgroup);
@@ -791,7 +723,7 @@ bool DRW_culling_box_test(const DRWView *view, const BoundBox *bbox);
 bool DRW_culling_plane_test(const DRWView *view, const float plane[4]);
 /**
  * Return True if the given box intersect the current view frustum.
- * This function will have to be replaced when world space bb per objects is implemented.
+ * This function will have to be replaced when world space bounding-box per objects is implemented.
  */
 bool DRW_culling_min_max_test(const DRWView *view, float obmat[4][4], float min[3], float max[3]);
 
@@ -887,7 +819,6 @@ bool DRW_object_is_in_edit_mode(const struct Object *ob);
  * we are rendering or drawing in the viewport.
  */
 int DRW_object_visibility_in_active_context(const struct Object *ob);
-bool DRW_object_is_flat_normal(const struct Object *ob);
 bool DRW_object_use_hide_faces(const struct Object *ob);
 
 bool DRW_object_is_visible_psys_in_active_context(const struct Object *object,
@@ -981,7 +912,7 @@ typedef struct DRWContextState {
   struct ViewLayer *view_layer; /* 'CTX_data_view_layer(C)' */
 
   /* Use 'object_edit' for edit-mode */
-  struct Object *obact; /* 'OBACT' */
+  struct Object *obact;
 
   struct RenderEngineType *engine_type;
 
diff --git a/source/blender/draw/intern/draw_attributes.cc b/source/blender/draw/intern/draw_attributes.cc
index 8fb4210901f..011d72e9e8f 100644
--- a/source/blender/draw/intern/draw_attributes.cc
+++ b/source/blender/draw/intern/draw_attributes.cc
@@ -65,9 +65,10 @@ bool drw_attributes_overlap(const DRW_Attributes *a, const DRW_Attributes *b)
 }
 
 DRW_AttributeRequest *drw_attributes_add_request(DRW_Attributes *attrs,
-                                                 eCustomDataType type,
-                                                 int layer,
-                                                 eAttrDomain domain)
+                                                 const char *name,
+                                                 const eCustomDataType type,
+                                                 const int layer_index,
+                                                 const eAttrDomain domain)
 {
   if (attrs->num_requests >= GPU_MAX_ATTR) {
     return nullptr;
@@ -75,7 +76,8 @@ DRW_AttributeRequest *drw_attributes_add_request(DRW_Attributes *attrs,
 
   DRW_AttributeRequest *req = &attrs->requests[attrs->num_requests];
   req->cd_type = type;
-  req->layer_index = layer;
+  BLI_strncpy(req->attribute_name, name, sizeof(req->attribute_name));
+  req->layer_index = layer_index;
   req->domain = domain;
   attrs->num_requests += 1;
   return req;
@@ -86,7 +88,7 @@ bool drw_custom_data_match_attribute(const CustomData *custom_data,
                                      int *r_layer_index,
                                      eCustomDataType *r_type)
 {
-  const eCustomDataType possible_attribute_types[7] = {
+  const eCustomDataType possible_attribute_types[8] = {
       CD_PROP_BOOL,
       CD_PROP_INT8,
       CD_PROP_INT32,
@@ -94,6 +96,7 @@ bool drw_custom_data_match_attribute(const CustomData *custom_data,
       CD_PROP_FLOAT2,
       CD_PROP_FLOAT3,
       CD_PROP_COLOR,
+      CD_PROP_BYTE_COLOR,
   };
 
   for (int i = 0; i < ARRAY_SIZE(possible_attribute_types); i++) {
diff --git a/source/blender/draw/intern/draw_attributes.h b/source/blender/draw/intern/draw_attributes.h
index 4f82f3b94e9..b577c6c4162 100644
--- a/source/blender/draw/intern/draw_attributes.h
+++ b/source/blender/draw/intern/draw_attributes.h
@@ -46,8 +46,9 @@ void drw_attributes_merge(DRW_Attributes *dst,
 bool drw_attributes_overlap(const DRW_Attributes *a, const DRW_Attributes *b);
 
 DRW_AttributeRequest *drw_attributes_add_request(DRW_Attributes *attrs,
-                                                 eCustomDataType type,
-                                                 int layer,
+                                                 const char *name,
+                                                 eCustomDataType data_type,
+                                                 int layer_index,
                                                  eAttrDomain domain);
 
 bool drw_custom_data_match_attribute(const CustomData *custom_data,
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index f846251c66b..6537490c06c 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -90,6 +90,7 @@ static struct DRWShapeCache {
   GPUBatch *drw_procedural_verts;
   GPUBatch *drw_procedural_lines;
   GPUBatch *drw_procedural_tris;
+  GPUBatch *drw_procedural_tri_strips;
   GPUBatch *drw_cursor;
   GPUBatch *drw_cursor_only_circle;
   GPUBatch *drw_fullscreen_quad;
@@ -208,6 +209,21 @@ GPUBatch *drw_cache_procedural_triangles_get(void)
   return SHC.drw_procedural_tris;
 }
 
+GPUBatch *drw_cache_procedural_triangle_strips_get()
+{
+  if (!SHC.drw_procedural_tri_strips) {
+    /* TODO(fclem): get rid of this dummy VBO. */
+    GPUVertFormat format = {0};
+    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
+    GPU_vertbuf_data_alloc(vbo, 1);
+
+    SHC.drw_procedural_tri_strips = GPU_batch_create_ex(
+        GPU_PRIM_TRI_STRIP, vbo, NULL, GPU_BATCH_OWNS_VBO);
+  }
+  return SHC.drw_procedural_tri_strips;
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -764,6 +780,39 @@ GPUBatch *DRW_cache_normal_arrow_get(void)
   return SHC.drw_normal_arrow;
 }
 
+void DRW_vertbuf_create_wiredata(GPUVertBuf *vbo, const int vert_len)
+{
+  static GPUVertFormat format = {0};
+  static struct {
+    uint wd;
+  } attr_id;
+  if (format.attr_len == 0) {
+    /* initialize vertex format */
+    if (!GPU_crappy_amd_driver()) {
+      /* Some AMD drivers strangely crash with a vbo with this format. */
+      attr_id.wd = GPU_vertformat_attr_add(
+          &format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    }
+    else {
+      attr_id.wd = GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
+    }
+  }
+
+  GPU_vertbuf_init_with_format(vbo, &format);
+  GPU_vertbuf_data_alloc(vbo, vert_len);
+
+  if (GPU_vertbuf_get_format(vbo)->stride == 1) {
+    memset(GPU_vertbuf_get_data(vbo), 0xFF, (size_t)vert_len);
+  }
+  else {
+    GPUVertBufRaw wd_step;
+    GPU_vertbuf_attr_get_raw_data(vbo, attr_id.wd, &wd_step);
+    for (int i = 0; i < vert_len; i++) {
+      *((float *)GPU_vertbuf_raw_step(&wd_step)) = 1.0f;
+    }
+  }
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -777,7 +826,8 @@ GPUBatch *DRW_gpencil_dummy_buffer_get(void)
 {
   if (SHC.drw_gpencil_dummy_quad == NULL) {
     GPUVertFormat format = {0};
-    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT);
+    /* NOTE: Use GPU_COMP_U32 to satisfy minimum 4-byte vertex stride for Metal backend. */
+    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT);
     GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
     GPU_vertbuf_data_alloc(vbo, 4);
 
@@ -802,7 +852,6 @@ GPUBatch *DRW_cache_object_all_edges_get(Object *ob)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_all_edges_get(ob);
-
     /* TODO: should match #DRW_cache_object_surface_get. */
     default:
       return NULL;
@@ -814,20 +863,6 @@ GPUBatch *DRW_cache_object_edge_detection_get(Object *ob, bool *r_is_manifold)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_edge_detection_get(ob, r_is_manifold);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_cache_mball_edge_detection_get(ob, r_is_manifold);
-    case OB_CURVES:
-      return NULL;
-    case OB_POINTCLOUD:
-      return NULL;
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -838,23 +873,12 @@ GPUBatch *DRW_cache_object_face_wireframe_get(Object *ob)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_face_wireframe_get(ob);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_cache_mball_face_wireframe_get(ob);
-    case OB_CURVES:
-      return NULL;
     case OB_POINTCLOUD:
       return DRW_pointcloud_batch_cache_get_dots(ob);
     case OB_VOLUME:
       return DRW_cache_volume_face_wireframe_get(ob);
-    case OB_GPENCIL: {
+    case OB_GPENCIL:
       return DRW_cache_gpencil_face_wireframe_get(ob);
-    }
     default:
       return NULL;
   }
@@ -865,20 +889,6 @@ GPUBatch *DRW_cache_object_loose_edges_get(struct Object *ob)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_loose_edges_get(ob);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return NULL;
-    case OB_CURVES:
-      return NULL;
-    case OB_POINTCLOUD:
-      return NULL;
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -889,20 +899,8 @@ GPUBatch *DRW_cache_object_surface_get(Object *ob)
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_surface_get(ob);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_cache_mball_surface_get(ob);
-    case OB_CURVES:
-      return NULL;
     case OB_POINTCLOUD:
       return DRW_cache_pointcloud_surface_get(ob);
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -916,18 +914,6 @@ GPUVertBuf *DRW_cache_object_pos_vertbuf_get(Object *ob)
   switch (type) {
     case OB_MESH:
       return DRW_mesh_batch_cache_pos_vertbuf_get((me != NULL) ? me : ob->data);
-    case OB_CURVES_LEGACY:
-    case OB_SURF:
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_mball_batch_cache_pos_vertbuf_get(ob);
-    case OB_CURVES:
-      return NULL;
-    case OB_POINTCLOUD:
-      return NULL;
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -952,8 +938,6 @@ int DRW_cache_object_material_count_get(struct Object *ob)
     case OB_SURF:
     case OB_FONT:
       return DRW_curve_material_count_get(ob->data);
-    case OB_MBALL:
-      return DRW_metaball_material_count_get(ob->data);
     case OB_CURVES:
       return DRW_curves_material_count_get(ob->data);
     case OB_POINTCLOUD:
@@ -975,20 +959,8 @@ GPUBatch **DRW_cache_object_surface_material_get(struct Object *ob,
   switch (ob->type) {
     case OB_MESH:
       return DRW_cache_mesh_surface_shaded_get(ob, gpumat_array, gpumat_array_len);
-    case OB_CURVES_LEGACY:
-      return NULL;
-    case OB_SURF:
-      return NULL;
-    case OB_FONT:
-      return NULL;
-    case OB_MBALL:
-      return DRW_cache_mball_surface_shaded_get(ob, gpumat_array, gpumat_array_len);
-    case OB_CURVES:
-      return NULL;
     case OB_POINTCLOUD:
       return DRW_cache_pointcloud_surface_shaded_get(ob, gpumat_array, gpumat_array_len);
-    case OB_VOLUME:
-      return NULL;
     default:
       return NULL;
   }
@@ -2956,39 +2928,6 @@ GPUBatch *DRW_cache_curve_vert_overlay_get(Object *ob)
 /** \} */
 
 /* -------------------------------------------------------------------- */
-/** \name MetaBall
- * \{ */
-
-GPUBatch *DRW_cache_mball_surface_get(Object *ob)
-{
-  BLI_assert(ob->type == OB_MBALL);
-  return DRW_metaball_batch_cache_get_triangles_with_normals(ob);
-}
-
-GPUBatch *DRW_cache_mball_edge_detection_get(Object *ob, bool *r_is_manifold)
-{
-  BLI_assert(ob->type == OB_MBALL);
-  return DRW_metaball_batch_cache_get_edge_detection(ob, r_is_manifold);
-}
-
-GPUBatch *DRW_cache_mball_face_wireframe_get(Object *ob)
-{
-  BLI_assert(ob->type == OB_MBALL);
-  return DRW_metaball_batch_cache_get_wireframes_face(ob);
-}
-
-GPUBatch **DRW_cache_mball_surface_shaded_get(Object *ob,
-                                              struct GPUMaterial **gpumat_array,
-                                              uint gpumat_array_len)
-{
-  BLI_assert(ob->type == OB_MBALL);
-  MetaBall *mb = ob->data;
-  return DRW_metaball_batch_cache_get_surface_shaded(ob, mb, gpumat_array, gpumat_array_len);
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
 /** \name Font
  * \{ */
 
@@ -3306,9 +3245,6 @@ void drw_batch_cache_validate(Object *ob)
     case OB_SURF:
       DRW_curve_batch_cache_validate((Curve *)ob->data);
       break;
-    case OB_MBALL:
-      DRW_mball_batch_cache_validate((MetaBall *)ob->data);
-      break;
     case OB_LATTICE:
       DRW_lattice_batch_cache_validate((Lattice *)ob->data);
       break;
diff --git a/source/blender/draw/intern/draw_cache.h b/source/blender/draw/intern/draw_cache.h
index a107eb7c75c..4e8788ada08 100644
--- a/source/blender/draw/intern/draw_cache.h
+++ b/source/blender/draw/intern/draw_cache.h
@@ -213,15 +213,6 @@ struct GPUBatch *DRW_cache_particles_get_edit_tip_points(struct Object *object,
                                                          struct PTCacheEdit *edit);
 struct GPUBatch *DRW_cache_particles_get_prim(int type);
 
-/* Metaball */
-
-struct GPUBatch *DRW_cache_mball_surface_get(struct Object *ob);
-struct GPUBatch **DRW_cache_mball_surface_shaded_get(struct Object *ob,
-                                                     struct GPUMaterial **gpumat_array,
-                                                     uint gpumat_array_len);
-struct GPUBatch *DRW_cache_mball_face_wireframe_get(struct Object *ob);
-struct GPUBatch *DRW_cache_mball_edge_detection_get(struct Object *ob, bool *r_is_manifold);
-
 /* Curves */
 
 struct GPUBatch *DRW_cache_curves_surface_get(struct Object *ob);
diff --git a/source/blender/draw/intern/draw_cache_extract.hh b/source/blender/draw/intern/draw_cache_extract.hh
index c7127d169e1..203da22406c 100644
--- a/source/blender/draw/intern/draw_cache_extract.hh
+++ b/source/blender/draw/intern/draw_cache_extract.hh
@@ -55,7 +55,6 @@ enum {
 struct DRW_MeshCDMask {
   uint32_t uv : 8;
   uint32_t tan : 8;
-  uint32_t vcol : 8;
   uint32_t orco : 1;
   uint32_t tan_orco : 1;
   uint32_t sculpt_overlays : 1;
@@ -111,7 +110,6 @@ struct MeshBufferList {
     GPUVertBuf *weights;  /* extend */
     GPUVertBuf *uv;
     GPUVertBuf *tan;
-    GPUVertBuf *vcol;
     GPUVertBuf *sculpt_data;
     GPUVertBuf *orco;
     /* Only for edit mode. */
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh.cc b/source/blender/draw/intern/draw_cache_extract_mesh.cc
index 00005fd7b4c..b1d1631cb6d 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_extract_mesh.cc
@@ -155,7 +155,7 @@ struct ExtractTaskData {
   bool use_threading = false;
 
   ExtractTaskData(const MeshRenderData *mr,
-                  struct MeshBatchCache *cache,
+                  MeshBatchCache *cache,
                   ExtractorRunDatas *extractors,
                   MeshBufferList *mbuflist,
                   const bool use_threading)
@@ -193,7 +193,7 @@ static void extract_task_data_free(void *data)
  * \{ */
 
 BLI_INLINE void extract_init(const MeshRenderData *mr,
-                             struct MeshBatchCache *cache,
+                             MeshBatchCache *cache,
                              ExtractorRunDatas &extractors,
                              MeshBufferList *mbuflist,
                              void *data_stack)
@@ -209,7 +209,7 @@ BLI_INLINE void extract_init(const MeshRenderData *mr,
 }
 
 BLI_INLINE void extract_finish(const MeshRenderData *mr,
-                               struct MeshBatchCache *cache,
+                               MeshBatchCache *cache,
                                const ExtractorRunDatas &extractors,
                                void *data_stack)
 {
@@ -619,7 +619,6 @@ void mesh_buffer_cache_create_requested(struct TaskGraph *task_graph,
   EXTRACT_ADD_REQUESTED(vbo, lnor);
   EXTRACT_ADD_REQUESTED(vbo, uv);
   EXTRACT_ADD_REQUESTED(vbo, tan);
-  EXTRACT_ADD_REQUESTED(vbo, vcol);
   EXTRACT_ADD_REQUESTED(vbo, sculpt_data);
   EXTRACT_ADD_REQUESTED(vbo, orco);
   EXTRACT_ADD_REQUESTED(vbo, edge_fac);
@@ -848,7 +847,6 @@ void mesh_buffer_cache_create_requested_subdiv(MeshBatchCache *cache,
   EXTRACT_ADD_REQUESTED(vbo, edituv_stretch_angle);
   EXTRACT_ADD_REQUESTED(ibo, lines_paint_mask);
   EXTRACT_ADD_REQUESTED(ibo, lines_adjacency);
-  EXTRACT_ADD_REQUESTED(vbo, vcol);
   EXTRACT_ADD_REQUESTED(vbo, weights);
   EXTRACT_ADD_REQUESTED(vbo, sculpt_data);
 
diff --git a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc
index baea0c7b646..eea19cbebf3 100644
--- a/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc
+++ b/source/blender/draw/intern/draw_cache_extract_mesh_render_data.cc
@@ -14,6 +14,7 @@
 #include "BLI_math.h"
 #include "BLI_task.h"
 
+#include "BKE_attribute.hh"
 #include "BKE_editmesh.h"
 #include "BKE_editmesh_cache.h"
 #include "BKE_mesh.h"
@@ -228,10 +229,10 @@ static void mesh_render_data_polys_sorted_build(MeshRenderData *mr, MeshBufferCa
     }
   }
   else {
-    const MPoly *mp = &mr->mpoly[0];
-    for (int i = 0; i < mr->poly_len; i++, mp++) {
-      if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
-        const int mat = min_ii(mp->mat_nr, mat_last);
+    for (int i = 0; i < mr->poly_len; i++) {
+      if (!(mr->use_hide && mr->hide_poly && mr->hide_poly[i])) {
+        const MPoly *mp = &mr->mpoly[i];
+        const int mat = min_ii(mr->material_indices ? mr->material_indices[i] : 0, mat_last);
         tri_first_index[i] = mat_tri_offs[mat];
         mat_tri_offs[mat] += mp->totloop - 2;
       }
@@ -269,8 +270,8 @@ static void mesh_render_data_mat_tri_len_mesh_range_fn(void *__restrict userdata
   int *mat_tri_len = static_cast<int *>(tls->userdata_chunk);
 
   const MPoly *mp = &mr->mpoly[iter];
-  if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
-    int mat = min_ii(mp->mat_nr, mr->mat_len - 1);
+  if (!(mr->use_hide && mr->hide_poly && mr->hide_poly[iter])) {
+    int mat = min_ii(mr->material_indices ? mr->material_indices[iter] : 0, mr->mat_len - 1);
     mat_tri_len[mat] += mp->totloop - 2;
   }
 }
@@ -332,15 +333,15 @@ void mesh_render_data_update_looptris(MeshRenderData *mr,
   if (mr->extract_type != MR_EXTRACT_BMESH) {
     /* Mesh */
     if ((iter_type & MR_ITER_LOOPTRI) || (data_flag & MR_DATA_LOOPTRI)) {
-      /* NOTE(campbell): It's possible to skip allocating tessellation,
+      /* NOTE(@campbellbarton): It's possible to skip allocating tessellation,
        * the tessellation can be calculated as part of the iterator, see: P2188.
        * The overall advantage is small (around 1%), so keep this as-is. */
       mr->mlooptri = static_cast<MLoopTri *>(
           MEM_mallocN(sizeof(*mr->mlooptri) * mr->tri_len, "MR_DATATYPE_LOOPTRI"));
       if (mr->poly_normals != nullptr) {
-        BKE_mesh_recalc_looptri_with_normals(me->mloop,
-                                             me->mpoly,
-                                             me->mvert,
+        BKE_mesh_recalc_looptri_with_normals(mr->mloop,
+                                             mr->mpoly,
+                                             mr->mvert,
                                              me->totloop,
                                              me->totpoly,
                                              mr->mlooptri,
@@ -348,7 +349,7 @@ void mesh_render_data_update_looptris(MeshRenderData *mr,
       }
       else {
         BKE_mesh_recalc_looptri(
-            me->mloop, me->mpoly, me->mvert, me->totloop, me->totpoly, mr->mlooptri);
+            mr->mloop, mr->mpoly, mr->mvert, me->totloop, me->totpoly, mr->mlooptri);
       }
     }
   }
@@ -378,15 +379,15 @@ void mesh_render_data_update_normals(MeshRenderData *mr, const eMRDataType data_
           MEM_mallocN(sizeof(*mr->loop_normals) * mr->loop_len, __func__));
       short(*clnors)[2] = static_cast<short(*)[2]>(
           CustomData_get_layer(&mr->me->ldata, CD_CUSTOMLOOPNORMAL));
-      BKE_mesh_normals_loop_split(mr->me->mvert,
+      BKE_mesh_normals_loop_split(mr->mvert,
                                   mr->vert_normals,
                                   mr->vert_len,
-                                  mr->me->medge,
+                                  mr->medge,
                                   mr->edge_len,
-                                  mr->me->mloop,
+                                  mr->mloop,
                                   mr->loop_normals,
                                   mr->loop_len,
-                                  mr->me->mpoly,
+                                  mr->mpoly,
                                   mr->poly_normals,
                                   mr->poly_len,
                                   is_auto_smooth,
@@ -431,6 +432,30 @@ void mesh_render_data_update_normals(MeshRenderData *mr, const eMRDataType data_
   }
 }
 
+static void retrieve_active_attribute_names(MeshRenderData &mr,
+                                            const Object &object,
+                                            const Mesh &mesh)
+{
+  const Mesh *mesh_final = editmesh_final_or_this(&object, &mesh);
+  const CustomData *cd_vdata = mesh_cd_vdata_get_from_mesh(mesh_final);
+  const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(mesh_final);
+
+  /* Necessary because which attributes are active/default is stored in #CustomData. */
+  Mesh me_query = blender::dna::shallow_zero_initialize();
+  BKE_id_attribute_copy_domains_temp(
+      ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
+
+  mr.active_color_name = nullptr;
+  mr.default_color_name = nullptr;
+
+  if (const CustomDataLayer *active = BKE_id_attributes_active_color_get(&me_query.id)) {
+    mr.active_color_name = active->name;
+  }
+  if (const CustomDataLayer *render = BKE_id_attributes_render_color_get(&me_query.id)) {
+    mr.default_color_name = render->name;
+  }
+}
+
 MeshRenderData *mesh_render_data_create(Object *object,
                                         Mesh *me,
                                         const bool is_editmode,
@@ -470,17 +495,6 @@ MeshRenderData *mesh_render_data_create(Object *object,
       mr->bm_poly_centers = mr->edit_data->polyCos;
     }
 
-    /* A subdivision wrapper may be created in edit mode when X-ray is turned on to ensure that the
-     * topology seen by the user matches the one used for the selection routines. This wrapper
-     * seemingly takes precedence over the MDATA one, however the mesh we use for rendering is not
-     * the subdivided one, but the one where the MDATA wrapper would have been added. So consider
-     * the subdivision wrapper as well for the `has_mdata` case. */
-    bool has_mdata = is_mode_active && ELEM(mr->me->runtime.wrapper_type,
-                                            ME_WRAPPER_TYPE_MDATA,
-                                            ME_WRAPPER_TYPE_SUBD);
-    bool use_mapped = is_mode_active &&
-                      (has_mdata && !do_uvedit && mr->me && !mr->me->runtime.is_original);
-
     int bm_ensure_types = BM_VERT | BM_EDGE | BM_LOOP | BM_FACE;
 
     BM_mesh_elem_index_ensure(mr->bm, bm_ensure_types);
@@ -499,43 +513,51 @@ MeshRenderData *mesh_render_data_create(Object *object,
     mr->freestyle_face_ofs = CustomData_get_offset(&mr->bm->pdata, CD_FREESTYLE_FACE);
 #endif
 
-    if (use_mapped) {
-      mr->v_origindex = static_cast<const int *>(
-          CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
-      mr->e_origindex = static_cast<const int *>(
-          CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
-      mr->p_origindex = static_cast<const int *>(
-          CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
-
-      use_mapped = (mr->v_origindex || mr->e_origindex || mr->p_origindex);
+    /* Use bmesh directly when the object is in edit mode unchanged by any modifiers.
+     * For non-final UVs, always use original bmesh since the UV editor does not support
+     * using the cage mesh with deformed coordinates. */
+    if ((is_mode_active && mr->me->runtime.is_original_bmesh &&
+         mr->me->runtime.wrapper_type == ME_WRAPPER_TYPE_BMESH) ||
+        (do_uvedit && !do_final)) {
+      mr->extract_type = MR_EXTRACT_BMESH;
     }
-
-    mr->extract_type = use_mapped ? MR_EXTRACT_MAPPED : MR_EXTRACT_BMESH;
-
-    /* Seems like the mesh_eval_final do not have the right origin indices.
-     * Force not mapped in this case. */
-    if (has_mdata && do_final && editmesh_eval_final != editmesh_eval_cage) {
-      // mr->edit_bmesh = nullptr;
+    else {
       mr->extract_type = MR_EXTRACT_MESH;
+
+      /* Use mapping from final to original mesh when the object is in edit mode. */
+      if (is_mode_active && do_final) {
+        mr->v_origindex = static_cast<const int *>(
+            CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
+        mr->e_origindex = static_cast<const int *>(
+            CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
+        mr->p_origindex = static_cast<const int *>(
+            CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
+      }
+      else {
+        mr->v_origindex = nullptr;
+        mr->e_origindex = nullptr;
+        mr->p_origindex = nullptr;
+      }
     }
   }
   else {
     mr->me = me;
     mr->edit_bmesh = nullptr;
+    mr->extract_type = MR_EXTRACT_MESH;
 
-    bool use_mapped = is_paint_mode && mr->me && !mr->me->runtime.is_original;
-    if (use_mapped) {
+    if (is_paint_mode && mr->me) {
       mr->v_origindex = static_cast<const int *>(
           CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
       mr->e_origindex = static_cast<const int *>(
           CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
       mr->p_origindex = static_cast<const int *>(
           CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
-
-      use_mapped = (mr->v_origindex || mr->e_origindex || mr->p_origindex);
     }
-
-    mr->extract_type = use_mapped ? MR_EXTRACT_MAPPED : MR_EXTRACT_MESH;
+    else {
+      mr->v_origindex = nullptr;
+      mr->e_origindex = nullptr;
+      mr->p_origindex = nullptr;
+    }
   }
 
   if (mr->extract_type != MR_EXTRACT_BMESH) {
@@ -546,14 +568,24 @@ MeshRenderData *mesh_render_data_create(Object *object,
     mr->poly_len = mr->me->totpoly;
     mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len);
 
-    mr->mvert = static_cast<MVert *>(CustomData_get_layer(&mr->me->vdata, CD_MVERT));
-    mr->medge = static_cast<MEdge *>(CustomData_get_layer(&mr->me->edata, CD_MEDGE));
-    mr->mloop = static_cast<MLoop *>(CustomData_get_layer(&mr->me->ldata, CD_MLOOP));
-    mr->mpoly = static_cast<MPoly *>(CustomData_get_layer(&mr->me->pdata, CD_MPOLY));
+    mr->mvert = BKE_mesh_verts(mr->me);
+    mr->medge = BKE_mesh_edges(mr->me);
+    mr->mpoly = BKE_mesh_polys(mr->me);
+    mr->mloop = BKE_mesh_loops(mr->me);
 
     mr->v_origindex = static_cast<const int *>(CustomData_get_layer(&mr->me->vdata, CD_ORIGINDEX));
     mr->e_origindex = static_cast<const int *>(CustomData_get_layer(&mr->me->edata, CD_ORIGINDEX));
     mr->p_origindex = static_cast<const int *>(CustomData_get_layer(&mr->me->pdata, CD_ORIGINDEX));
+
+    mr->material_indices = static_cast<const int *>(
+        CustomData_get_layer_named(&me->pdata, CD_PROP_INT32, "material_index"));
+
+    mr->hide_vert = static_cast<const bool *>(
+        CustomData_get_layer_named(&me->vdata, CD_PROP_BOOL, ".hide_vert"));
+    mr->hide_edge = static_cast<const bool *>(
+        CustomData_get_layer_named(&me->edata, CD_PROP_BOOL, ".hide_edge"));
+    mr->hide_poly = static_cast<const bool *>(
+        CustomData_get_layer_named(&me->pdata, CD_PROP_BOOL, ".hide_poly"));
   }
   else {
     /* #BMesh */
@@ -566,6 +598,8 @@ MeshRenderData *mesh_render_data_create(Object *object,
     mr->tri_len = poly_to_tri_count(mr->poly_len, mr->loop_len);
   }
 
+  retrieve_active_attribute_names(*mr, *object, *me);
+
   return mr;
 }
 
diff --git a/source/blender/draw/intern/draw_cache_impl.h b/source/blender/draw/intern/draw_cache_impl.h
index 4fa5813d476..7f7d0a7613f 100644
--- a/source/blender/draw/intern/draw_cache_impl.h
+++ b/source/blender/draw/intern/draw_cache_impl.h
@@ -36,10 +36,6 @@ extern "C" {
 /** \name Expose via BKE callbacks
  * \{ */
 
-void DRW_mball_batch_cache_dirty_tag(struct MetaBall *mb, int mode);
-void DRW_mball_batch_cache_validate(struct MetaBall *mb);
-void DRW_mball_batch_cache_free(struct MetaBall *mb);
-
 void DRW_curve_batch_cache_dirty_tag(struct Curve *cu, int mode);
 void DRW_curve_batch_cache_validate(struct Curve *cu);
 void DRW_curve_batch_cache_free(struct Curve *cu);
@@ -111,39 +107,6 @@ struct GPUBatch *DRW_curve_batch_cache_get_edit_verts(struct Curve *cu);
 /** \} */
 
 /* -------------------------------------------------------------------- */
-/** \name Metaball
- * \{ */
-
-int DRW_metaball_material_count_get(struct MetaBall *mb);
-
-struct GPUBatch *DRW_metaball_batch_cache_get_triangles_with_normals(struct Object *ob);
-struct GPUBatch **DRW_metaball_batch_cache_get_surface_shaded(struct Object *ob,
-                                                              struct MetaBall *mb,
-                                                              struct GPUMaterial **gpumat_array,
-                                                              uint gpumat_array_len);
-struct GPUBatch *DRW_metaball_batch_cache_get_wireframes_face(struct Object *ob);
-struct GPUBatch *DRW_metaball_batch_cache_get_edge_detection(struct Object *ob,
-                                                             bool *r_is_manifold);
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name DispList
- * \{ */
-
-void DRW_displist_vertbuf_create_pos_and_nor(struct ListBase *lb,
-                                             struct GPUVertBuf *vbo,
-                                             const struct Scene *scene);
-void DRW_displist_vertbuf_create_wiredata(struct ListBase *lb, struct GPUVertBuf *vbo);
-void DRW_displist_indexbuf_create_lines_in_order(struct ListBase *lb, struct GPUIndexBuf *ibo);
-void DRW_displist_indexbuf_create_triangles_in_order(struct ListBase *lb, struct GPUIndexBuf *ibo);
-void DRW_displist_indexbuf_create_edges_adjacency_lines(struct ListBase *lb,
-                                                        struct GPUIndexBuf *ibo,
-                                                        bool *r_is_manifold);
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
 /** \name Lattice
  * \{ */
 
@@ -161,6 +124,16 @@ struct GPUBatch *DRW_lattice_batch_cache_get_edit_verts(struct Lattice *lt);
 
 int DRW_curves_material_count_get(struct Curves *curves);
 
+/**
+ * Provide GPU access to a specific evaluated attribute on curves.
+ *
+ * \return A pointer to location where the texture will be
+ * stored, which will be filled by #DRW_shgroup_curves_create_sub.
+ */
+struct GPUTexture **DRW_curves_texture_for_evaluated_attribute(struct Curves *curves,
+                                                               const char *name,
+                                                               bool *r_is_point_domain);
+
 struct GPUBatch *DRW_curves_batch_cache_get_edit_points(struct Curves *curves);
 
 void DRW_curves_batch_cache_create_requested(struct Object *ob);
@@ -299,7 +272,6 @@ struct GPUBatch *DRW_mesh_batch_cache_get_edit_mesh_analysis(struct Mesh *me);
  * \{ */
 
 struct GPUVertBuf *DRW_mesh_batch_cache_pos_vertbuf_get(struct Mesh *me);
-struct GPUVertBuf *DRW_mball_batch_cache_pos_vertbuf_get(struct Object *ob);
 
 int DRW_mesh_material_count_get(const struct Object *object, const struct Mesh *me);
 
diff --git a/source/blender/draw/intern/draw_cache_impl_curve.cc b/source/blender/draw/intern/draw_cache_impl_curve.cc
index ebcdabe4942..695c348d8e2 100644
--- a/source/blender/draw/intern/draw_cache_impl_curve.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curve.cc
@@ -108,7 +108,7 @@ static void curve_eval_render_wire_verts_edges_len_get(const blender::bke::Curve
   const blender::VArray<bool> cyclic = curves.cyclic();
   for (const int i : curves.curves_range()) {
     const IndexRange points = curves.evaluated_points_for_curve(i);
-    *r_edge_len += blender::bke::curves::curve_segment_num(points.size(), cyclic[i]);
+    *r_edge_len += blender::bke::curves::segments_num(points.size(), cyclic[i]);
   }
 }
 
diff --git a/source/blender/draw/intern/draw_cache_impl_curves.cc b/source/blender/draw/intern/draw_cache_impl_curves.cc
index 68ca1153c96..3bca17d9c56 100644
--- a/source/blender/draw/intern/draw_cache_impl_curves.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curves.cc
@@ -75,13 +75,14 @@ static void curves_batch_cache_init(Curves &curves)
 
   if (!cache) {
     cache = MEM_cnew<CurvesBatchCache>(__func__);
-    BLI_mutex_init(&cache->render_mutex);
     curves.batch_cache = cache;
   }
   else {
     memset(cache, 0, sizeof(*cache));
   }
 
+  BLI_mutex_init(&cache->render_mutex);
+
   cache->is_dirty = false;
 }
 
@@ -258,7 +259,7 @@ static void curves_batch_cache_fill_segments_proc_pos(
   }
 }
 
-static void curves_batch_cache_ensure_procedural_pos(Curves &curves,
+static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
                                                      CurvesEvalCache &cache,
                                                      GPUMaterial *gpu_material)
 {
@@ -268,7 +269,8 @@ static void curves_batch_cache_ensure_procedural_pos(Curves &curves,
     GPU_vertformat_attr_add(&format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
     GPU_vertformat_alias_add(&format, "pos");
 
-    cache.proc_point_buf = GPU_vertbuf_create_with_format(&format);
+    cache.proc_point_buf = GPU_vertbuf_create_with_format_ex(
+        &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache.proc_point_buf, cache.point_len);
 
     MutableSpan posTime_data{
@@ -278,7 +280,8 @@ static void curves_batch_cache_ensure_procedural_pos(Curves &curves,
     GPUVertFormat length_format = {0};
     GPU_vertformat_attr_add(&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
 
-    cache.proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
+    cache.proc_length_buf = GPU_vertbuf_create_with_format_ex(
+        &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache.proc_length_buf, cache.strands_len);
 
     MutableSpan hairLength_data{
@@ -311,12 +314,15 @@ void drw_curves_get_attribute_sampler_name(const char *layer_name, char r_sample
   BLI_snprintf(r_sampler_name, 32, "a%s", attr_safe_name);
 }
 
-static void curves_batch_cache_ensure_procedural_final_attr(
-    CurvesEvalCache &cache, GPUVertFormat *format, int subdiv, int index, const char *name)
+static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cache,
+                                                            const GPUVertFormat *format,
+                                                            const int subdiv,
+                                                            const int index,
+                                                            const char *name)
 {
   CurvesEvalFinalCache &final_cache = cache.final[subdiv];
-  final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(format,
-                                                                        GPU_USAGE_DEVICE_ONLY);
+  final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+      format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
   /* Create a destination buffer for the transform feedback. Sized appropriately */
   /* Those are points! not line segments. */
@@ -333,8 +339,8 @@ static void curves_batch_cache_ensure_procedural_final_attr(
 static void curves_batch_ensure_attribute(const Curves &curves,
                                           CurvesEvalCache &cache,
                                           const DRW_AttributeRequest &request,
-                                          int subdiv,
-                                          int index)
+                                          const int subdiv,
+                                          const int index)
 {
   GPU_VERTBUF_DISCARD_SAFE(cache.proc_attributes_buf[index]);
   DRW_TEXTURE_FREE_SAFE(cache.proc_attributes_tex[index]);
@@ -347,27 +353,28 @@ static void curves_batch_ensure_attribute(const Curves &curves,
   /* All attributes use vec4, see comment below. */
   GPU_vertformat_attr_add(&format, sampler_name, GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-  cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format(&format);
+  cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+      &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPUVertBuf *attr_vbo = cache.proc_attributes_buf[index];
 
   GPU_vertbuf_data_alloc(attr_vbo,
                          request.domain == ATTR_DOMAIN_POINT ? curves.geometry.point_num :
                                                                curves.geometry.curve_num);
 
-  CurveComponent component;
-  component.replace(const_cast<Curves *>(&curves), GeometryOwnershipType::ReadOnly);
+  const blender::bke::AttributeAccessor attributes =
+      blender::bke::CurvesGeometry::wrap(curves.geometry).attributes();
 
   /* TODO(@kevindietrich): float4 is used for scalar attributes as the implicit conversion done
    * by OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following
    * the Blender convention, it should be `vec4(s, s, s, 1)`. This could be resolved using a
    * similar texture state swizzle to map the attribute correctly as for volume attributes, so we
    * can control the conversion ourselves. */
-  blender::VArray<ColorGeometry4f> attribute = component.attribute_get_for_read<ColorGeometry4f>(
+  blender::VArray<ColorGeometry4f> attribute = attributes.lookup_or_default<ColorGeometry4f>(
       request.attribute_name, request.domain, {0.0f, 0.0f, 0.0f, 1.0f});
 
   MutableSpan<ColorGeometry4f> vbo_span{
       static_cast<ColorGeometry4f *>(GPU_vertbuf_get_data(attr_vbo)),
-      component.attribute_domain_num(request.domain)};
+      attributes.domain_size(request.domain)};
 
   attribute.materialize(vbo_span);
 
@@ -393,10 +400,10 @@ static void curves_batch_cache_fill_strands_data(const Curves &curves_id,
       curves_id.geometry);
 
   for (const int i : IndexRange(curves.curves_num())) {
-    const IndexRange curve_range = curves.points_for_curve(i);
+    const IndexRange points = curves.points_for_curve(i);
 
-    *(uint *)GPU_vertbuf_raw_step(&data_step) = curve_range.start();
-    *(ushort *)GPU_vertbuf_raw_step(&seg_step) = curve_range.size() - 1;
+    *(uint *)GPU_vertbuf_raw_step(&data_step) = points.start();
+    *(ushort *)GPU_vertbuf_raw_step(&seg_step) = points.size() - 1;
   }
 }
 
@@ -412,11 +419,13 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves,
   uint seg_id = GPU_vertformat_attr_add(&format_seg, "data", GPU_COMP_U16, 1, GPU_FETCH_INT);
 
   /* Curve Data. */
-  cache.proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+  cache.proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+      &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache.proc_strand_buf, cache.strands_len);
   GPU_vertbuf_attr_get_raw_data(cache.proc_strand_buf, data_id, &data_step);
 
-  cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+  cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+      &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache.proc_strand_seg_buf, cache.strands_len);
   GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step);
 
@@ -437,7 +446,8 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c
   GPUVertFormat format = {0};
   GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-  cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY);
+  cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+      &format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
   /* Create a destination buffer for the transform feedback. Sized appropriately */
   /* Those are points! not line segments. */
@@ -509,58 +519,41 @@ static bool curves_ensure_attributes(const Curves &curves,
   ThreadMutex *render_mutex = &cache.render_mutex;
   const CustomData *cd_curve = &curves.geometry.curve_data;
   const CustomData *cd_point = &curves.geometry.point_data;
+  CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv];
 
-  DRW_Attributes attrs_needed;
-  drw_attributes_clear(&attrs_needed);
-  ListBase gpu_attrs = GPU_material_attributes(gpu_material);
-  LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
-    const char *name = gpu_attr->name;
-
-    int layer_index;
-    eCustomDataType type;
-    eAttrDomain domain;
-    if (drw_custom_data_match_attribute(cd_curve, name, &layer_index, &type)) {
-      domain = ATTR_DOMAIN_CURVE;
-    }
-    else if (drw_custom_data_match_attribute(cd_point, name, &layer_index, &type)) {
-      domain = ATTR_DOMAIN_POINT;
-    }
-    else {
-      continue;
-    }
-
-    switch (type) {
-      case CD_PROP_BOOL:
-      case CD_PROP_INT8:
-      case CD_PROP_INT32:
-      case CD_PROP_FLOAT:
-      case CD_PROP_FLOAT2:
-      case CD_PROP_FLOAT3:
-      case CD_PROP_COLOR: {
-        DRW_AttributeRequest *request = drw_attributes_add_request(
-            &attrs_needed, type, layer_index, domain);
-        if (request) {
-          BLI_strncpy(request->attribute_name, name, sizeof(request->attribute_name));
-        }
-
-        break;
+  if (gpu_material) {
+    DRW_Attributes attrs_needed;
+    drw_attributes_clear(&attrs_needed);
+    ListBase gpu_attrs = GPU_material_attributes(gpu_material);
+    LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
+      const char *name = gpu_attr->name;
+
+      int layer_index;
+      eCustomDataType type;
+      eAttrDomain domain;
+      if (drw_custom_data_match_attribute(cd_curve, name, &layer_index, &type)) {
+        domain = ATTR_DOMAIN_CURVE;
+      }
+      else if (drw_custom_data_match_attribute(cd_point, name, &layer_index, &type)) {
+        domain = ATTR_DOMAIN_POINT;
+      }
+      else {
+        continue;
       }
-      default:
-        break;
-    }
-  }
 
-  CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv];
+      drw_attributes_add_request(&attrs_needed, name, type, layer_index, domain);
+    }
 
-  if (!drw_attributes_overlap(&final_cache.attr_used, &attrs_needed)) {
-    /* Some new attributes have been added, free all and start over. */
-    for (const int i : IndexRange(GPU_MAX_ATTR)) {
-      GPU_VERTBUF_DISCARD_SAFE(cache.curves_cache.proc_attributes_buf[i]);
-      DRW_TEXTURE_FREE_SAFE(cache.curves_cache.proc_attributes_tex[i]);
+    if (!drw_attributes_overlap(&final_cache.attr_used, &attrs_needed)) {
+      /* Some new attributes have been added, free all and start over. */
+      for (const int i : IndexRange(GPU_MAX_ATTR)) {
+        GPU_VERTBUF_DISCARD_SAFE(cache.curves_cache.proc_attributes_buf[i]);
+        DRW_TEXTURE_FREE_SAFE(cache.curves_cache.proc_attributes_tex[i]);
+      }
+      drw_attributes_merge(&final_cache.attr_used, &attrs_needed, render_mutex);
     }
-    drw_attributes_merge(&final_cache.attr_used, &attrs_needed, render_mutex);
+    drw_attributes_merge(&final_cache.attr_used_over_time, &attrs_needed, render_mutex);
   }
-  drw_attributes_merge(&final_cache.attr_used_over_time, &attrs_needed, render_mutex);
 
   bool need_tf_update = false;
 
@@ -581,16 +574,15 @@ static bool curves_ensure_attributes(const Curves &curves,
   return need_tf_update;
 }
 
-bool curves_ensure_procedural_data(Object *object,
+bool curves_ensure_procedural_data(Curves *curves,
                                    CurvesEvalCache **r_hair_cache,
                                    GPUMaterial *gpu_material,
                                    const int subdiv,
                                    const int thickness_res)
 {
   bool need_ft_update = false;
-  Curves &curves = *static_cast<Curves *>(object->data);
 
-  CurvesBatchCache &cache = curves_batch_cache_get(curves);
+  CurvesBatchCache &cache = curves_batch_cache_get(*curves);
   *r_hair_cache = &cache.curves_cache;
 
   const int steps = 3; /* TODO: don't hard-code? */
@@ -598,14 +590,14 @@ bool curves_ensure_procedural_data(Object *object,
 
   /* Refreshed on combing and simulation. */
   if ((*r_hair_cache)->proc_point_buf == nullptr) {
-    ensure_seg_pt_count(curves, cache.curves_cache);
-    curves_batch_cache_ensure_procedural_pos(curves, cache.curves_cache, gpu_material);
+    ensure_seg_pt_count(*curves, cache.curves_cache);
+    curves_batch_cache_ensure_procedural_pos(*curves, cache.curves_cache, gpu_material);
     need_ft_update = true;
   }
 
   /* Refreshed if active layer or custom data changes. */
   if ((*r_hair_cache)->strand_tex == nullptr) {
-    curves_batch_cache_ensure_procedural_strand_data(curves, cache.curves_cache);
+    curves_batch_cache_ensure_procedural_strand_data(*curves, cache.curves_cache);
   }
 
   /* Refreshed only on subdiv count change. */
@@ -615,12 +607,10 @@ bool curves_ensure_procedural_data(Object *object,
   }
   if ((*r_hair_cache)->final[subdiv].proc_hairs[thickness_res - 1] == nullptr) {
     curves_batch_cache_ensure_procedural_indices(
-        curves, cache.curves_cache, thickness_res, subdiv);
+        *curves, cache.curves_cache, thickness_res, subdiv);
   }
 
-  if (gpu_material) {
-    need_ft_update |= curves_ensure_attributes(curves, cache, gpu_material, subdiv);
-  }
+  need_ft_update |= curves_ensure_attributes(*curves, cache, gpu_material, subdiv);
 
   return need_ft_update;
 }
@@ -636,6 +626,70 @@ GPUBatch *DRW_curves_batch_cache_get_edit_points(Curves *curves)
   return DRW_batch_request(&cache.edit_points);
 }
 
+static void request_attribute(Curves &curves, const char *name)
+{
+  CurvesBatchCache &cache = curves_batch_cache_get(curves);
+  const DRWContextState *draw_ctx = DRW_context_state_get();
+  const Scene *scene = draw_ctx->scene;
+  const int subdiv = scene->r.hair_subdiv;
+  CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv];
+
+  DRW_Attributes attributes{};
+
+  blender::bke::CurvesGeometry &curves_geometry = blender::bke::CurvesGeometry::wrap(
+      curves.geometry);
+  std::optional<blender::bke::AttributeMetaData> meta_data =
+      curves_geometry.attributes().lookup_meta_data(name);
+  if (!meta_data) {
+    return;
+  }
+  const eAttrDomain domain = meta_data->domain;
+  const eCustomDataType type = meta_data->data_type;
+  const CustomData &custom_data = domain == ATTR_DOMAIN_POINT ? curves.geometry.point_data :
+                                                                curves.geometry.curve_data;
+
+  drw_attributes_add_request(
+      &attributes, name, type, CustomData_get_named_layer(&custom_data, type, name), domain);
+
+  drw_attributes_merge(&final_cache.attr_used, &attributes, &cache.render_mutex);
+}
+
+GPUTexture **DRW_curves_texture_for_evaluated_attribute(Curves *curves,
+                                                        const char *name,
+                                                        bool *r_is_point_domain)
+{
+  CurvesBatchCache &cache = curves_batch_cache_get(*curves);
+  const DRWContextState *draw_ctx = DRW_context_state_get();
+  const Scene *scene = draw_ctx->scene;
+  const int subdiv = scene->r.hair_subdiv;
+  CurvesEvalFinalCache &final_cache = cache.curves_cache.final[subdiv];
+
+  request_attribute(*curves, name);
+
+  int request_i = -1;
+  for (const int i : IndexRange(final_cache.attr_used.num_requests)) {
+    if (STREQ(final_cache.attr_used.requests[i].attribute_name, name)) {
+      request_i = i;
+      break;
+    }
+  }
+  if (request_i == -1) {
+    *r_is_point_domain = false;
+    return nullptr;
+  }
+  switch (final_cache.attr_used.requests[request_i].domain) {
+    case ATTR_DOMAIN_POINT:
+      *r_is_point_domain = true;
+      return &final_cache.attributes_tex[request_i];
+    case ATTR_DOMAIN_CURVE:
+      *r_is_point_domain = false;
+      return &cache.curves_cache.proc_attributes_tex[request_i];
+    default:
+      BLI_assert_unreachable();
+      return nullptr;
+  }
+}
+
 void DRW_curves_batch_cache_create_requested(Object *ob)
 {
   Curves *curves = static_cast<Curves *>(ob->data);
diff --git a/source/blender/draw/intern/draw_cache_impl_displist.c b/source/blender/draw/intern/draw_cache_impl_displist.c
deleted file mode 100644
index 96c088c3ee9..00000000000
--- a/source/blender/draw/intern/draw_cache_impl_displist.c
+++ /dev/null
@@ -1,354 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- * Copyright 2017 Blender Foundation. All rights reserved. */
-
-/** \file
- * \ingroup draw
- *
- * \brief DispList API for render engines
- *
- * \note DispList may be removed soon! This is a utility for object types that use render.
- */
-
-#include "BLI_edgehash.h"
-#include "BLI_listbase.h"
-#include "BLI_math_vector.h"
-#include "BLI_utildefines.h"
-
-#include "DNA_curve_types.h"
-#include "DNA_scene_types.h"
-
-#include "BKE_displist.h"
-
-#include "GPU_batch.h"
-#include "GPU_capabilities.h"
-
-#include "draw_cache_inline.h"
-
-#include "draw_cache_impl.h" /* own include */
-
-static int dl_vert_len(const DispList *dl)
-{
-  switch (dl->type) {
-    case DL_INDEX3:
-    case DL_INDEX4:
-      return dl->nr;
-    case DL_SURF:
-      return dl->parts * dl->nr;
-  }
-  return 0;
-}
-
-static int dl_tri_len(const DispList *dl)
-{
-  switch (dl->type) {
-    case DL_INDEX3:
-      return dl->parts;
-    case DL_INDEX4:
-      return dl->parts * 2;
-    case DL_SURF:
-      return dl->totindex * 2;
-  }
-  return 0;
-}
-
-/* see: displist_vert_coords_alloc */
-static int curve_render_surface_vert_len_get(const ListBase *lb)
-{
-  int vert_len = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    vert_len += dl_vert_len(dl);
-  }
-  return vert_len;
-}
-
-static int curve_render_surface_tri_len_get(const ListBase *lb)
-{
-  int tri_len = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    tri_len += dl_tri_len(dl);
-  }
-  return tri_len;
-}
-
-typedef void(SetTriIndicesFn)(void *thunk, uint v1, uint v2, uint v3);
-
-static void displist_indexbufbuilder_set(
-    SetTriIndicesFn *set_tri_indices,
-    SetTriIndicesFn *set_quad_tri_indices, /* meh, find a better solution. */
-    void *thunk,
-    const DispList *dl,
-    const int ofs)
-{
-  if (ELEM(dl->type, DL_INDEX3, DL_INDEX4, DL_SURF)) {
-    const int *idx = dl->index;
-    if (dl->type == DL_INDEX3) {
-      const int i_end = dl->parts;
-      for (int i = 0; i < i_end; i++, idx += 3) {
-        set_tri_indices(thunk, idx[0] + ofs, idx[2] + ofs, idx[1] + ofs);
-      }
-    }
-    else if (dl->type == DL_SURF) {
-      const int i_end = dl->totindex;
-      for (int i = 0; i < i_end; i++, idx += 4) {
-        set_quad_tri_indices(thunk, idx[0] + ofs, idx[2] + ofs, idx[1] + ofs);
-        set_quad_tri_indices(thunk, idx[2] + ofs, idx[0] + ofs, idx[3] + ofs);
-      }
-    }
-    else {
-      BLI_assert(dl->type == DL_INDEX4);
-      const int i_end = dl->parts;
-      for (int i = 0; i < i_end; i++, idx += 4) {
-        if (idx[2] != idx[3]) {
-          set_quad_tri_indices(thunk, idx[2] + ofs, idx[0] + ofs, idx[1] + ofs);
-          set_quad_tri_indices(thunk, idx[0] + ofs, idx[2] + ofs, idx[3] + ofs);
-        }
-        else {
-          set_tri_indices(thunk, idx[2] + ofs, idx[0] + ofs, idx[1] + ofs);
-        }
-      }
-    }
-  }
-}
-
-void DRW_displist_vertbuf_create_pos_and_nor(ListBase *lb, GPUVertBuf *vbo, const Scene *scene)
-{
-  const bool do_hq_normals = (scene->r.perf_flag & SCE_PERF_HQ_NORMALS) != 0 ||
-                             GPU_use_hq_normals_workaround();
-
-  static GPUVertFormat format = {0};
-  static GPUVertFormat format_hq = {0};
-  static struct {
-    uint pos, nor;
-    uint pos_hq, nor_hq;
-  } attr_id;
-  if (format.attr_len == 0) {
-    /* initialize vertex format */
-    attr_id.pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-    attr_id.nor = GPU_vertformat_attr_add(
-        &format, "nor", GPU_COMP_I10, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
-    /* initialize vertex format */
-    attr_id.pos_hq = GPU_vertformat_attr_add(&format_hq, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-    attr_id.nor_hq = GPU_vertformat_attr_add(
-        &format_hq, "nor", GPU_COMP_I16, 3, GPU_FETCH_INT_TO_FLOAT_UNIT);
-  }
-
-  uint pos_id = do_hq_normals ? attr_id.pos_hq : attr_id.pos;
-  uint nor_id = do_hq_normals ? attr_id.nor_hq : attr_id.nor;
-
-  GPU_vertbuf_init_with_format(vbo, do_hq_normals ? &format_hq : &format);
-  GPU_vertbuf_data_alloc(vbo, curve_render_surface_vert_len_get(lb));
-
-  BKE_displist_normals_add(lb);
-
-  int vbo_len_used = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    const bool ndata_is_single = dl->type == DL_INDEX3;
-    if (ELEM(dl->type, DL_INDEX3, DL_INDEX4, DL_SURF)) {
-      const float *fp_co = dl->verts;
-      const float *fp_no = dl->nors;
-      const int vbo_end = vbo_len_used + dl_vert_len(dl);
-      while (vbo_len_used < vbo_end) {
-        GPU_vertbuf_attr_set(vbo, pos_id, vbo_len_used, fp_co);
-        if (fp_no) {
-          GPUNormal vnor_pack;
-          GPU_normal_convert_v3(&vnor_pack, fp_no, do_hq_normals);
-          GPU_vertbuf_attr_set(vbo, nor_id, vbo_len_used, &vnor_pack);
-          if (ndata_is_single == false) {
-            fp_no += 3;
-          }
-        }
-        fp_co += 3;
-        vbo_len_used += 1;
-      }
-    }
-  }
-}
-
-void DRW_vertbuf_create_wiredata(GPUVertBuf *vbo, const int vert_len)
-{
-  static GPUVertFormat format = {0};
-  static struct {
-    uint wd;
-  } attr_id;
-  if (format.attr_len == 0) {
-    /* initialize vertex format */
-    if (!GPU_crappy_amd_driver()) {
-      /* Some AMD drivers strangely crash with a vbo with this format. */
-      attr_id.wd = GPU_vertformat_attr_add(
-          &format, "wd", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
-    }
-    else {
-      attr_id.wd = GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
-    }
-  }
-
-  GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, vert_len);
-
-  if (GPU_vertbuf_get_format(vbo)->stride == 1) {
-    memset(GPU_vertbuf_get_data(vbo), 0xFF, (size_t)vert_len);
-  }
-  else {
-    GPUVertBufRaw wd_step;
-    GPU_vertbuf_attr_get_raw_data(vbo, attr_id.wd, &wd_step);
-    for (int i = 0; i < vert_len; i++) {
-      *((float *)GPU_vertbuf_raw_step(&wd_step)) = 1.0f;
-    }
-  }
-}
-
-void DRW_displist_vertbuf_create_wiredata(ListBase *lb, GPUVertBuf *vbo)
-{
-  const int vert_len = curve_render_surface_vert_len_get(lb);
-  DRW_vertbuf_create_wiredata(vbo, vert_len);
-}
-
-void DRW_displist_indexbuf_create_triangles_in_order(ListBase *lb, GPUIndexBuf *ibo)
-{
-  const int tri_len = curve_render_surface_tri_len_get(lb);
-  const int vert_len = curve_render_surface_vert_len_get(lb);
-
-  GPUIndexBufBuilder elb;
-  GPU_indexbuf_init(&elb, GPU_PRIM_TRIS, tri_len, vert_len);
-
-  int ofs = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    displist_indexbufbuilder_set((SetTriIndicesFn *)GPU_indexbuf_add_tri_verts,
-                                 (SetTriIndicesFn *)GPU_indexbuf_add_tri_verts,
-                                 &elb,
-                                 dl,
-                                 ofs);
-    ofs += dl_vert_len(dl);
-  }
-
-  GPU_indexbuf_build_in_place(&elb, ibo);
-}
-
-static void set_overlay_wires_tri_indices(void *thunk, uint v1, uint v2, uint v3)
-{
-  GPUIndexBufBuilder *eld = (GPUIndexBufBuilder *)thunk;
-  GPU_indexbuf_add_line_verts(eld, v1, v2);
-  GPU_indexbuf_add_line_verts(eld, v2, v3);
-  GPU_indexbuf_add_line_verts(eld, v3, v1);
-}
-
-static void set_overlay_wires_quad_tri_indices(void *thunk, uint v1, uint v2, uint v3)
-{
-  GPUIndexBufBuilder *eld = (GPUIndexBufBuilder *)thunk;
-  GPU_indexbuf_add_line_verts(eld, v1, v3);
-  GPU_indexbuf_add_line_verts(eld, v3, v2);
-}
-
-void DRW_displist_indexbuf_create_lines_in_order(ListBase *lb, GPUIndexBuf *ibo)
-{
-  const int tri_len = curve_render_surface_tri_len_get(lb);
-  const int vert_len = curve_render_surface_vert_len_get(lb);
-
-  GPUIndexBufBuilder elb;
-  GPU_indexbuf_init(&elb, GPU_PRIM_LINES, tri_len * 3, vert_len);
-
-  int ofs = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    displist_indexbufbuilder_set(
-        set_overlay_wires_tri_indices, set_overlay_wires_quad_tri_indices, &elb, dl, ofs);
-    ofs += dl_vert_len(dl);
-  }
-
-  GPU_indexbuf_build_in_place(&elb, ibo);
-}
-
-/* Edge detection/adjacency. */
-#define NO_EDGE INT_MAX
-static void set_edge_adjacency_lines_indices(
-    EdgeHash *eh, GPUIndexBufBuilder *elb, bool *r_is_manifold, uint v1, uint v2, uint v3)
-{
-  bool inv_indices = (v2 > v3);
-  void **pval;
-  bool value_is_init = BLI_edgehash_ensure_p(eh, v2, v3, &pval);
-  int v_data = POINTER_AS_INT(*pval);
-  if (!value_is_init || v_data == NO_EDGE) {
-    /* Save the winding order inside the sign bit. Because the
-     * edgehash sort the keys and we need to compare winding later. */
-    int value = (int)v1 + 1; /* Int 0 bm_looptricannot be signed */
-    *pval = POINTER_FROM_INT((inv_indices) ? -value : value);
-  }
-  else {
-    /* HACK Tag as not used. Prevent overhead of BLI_edgehash_remove. */
-    *pval = POINTER_FROM_INT(NO_EDGE);
-    bool inv_opposite = (v_data < 0);
-    uint v_opposite = (uint)abs(v_data) - 1;
-
-    if (inv_opposite == inv_indices) {
-      /* Don't share edge if triangles have non matching winding. */
-      GPU_indexbuf_add_line_adj_verts(elb, v1, v2, v3, v1);
-      GPU_indexbuf_add_line_adj_verts(elb, v_opposite, v2, v3, v_opposite);
-      *r_is_manifold = false;
-    }
-    else {
-      GPU_indexbuf_add_line_adj_verts(elb, v1, v2, v3, v_opposite);
-    }
-  }
-}
-
-static void set_edges_adjacency_lines_indices(void *thunk, uint v1, uint v2, uint v3)
-{
-  void **packed = (void **)thunk;
-  GPUIndexBufBuilder *elb = (GPUIndexBufBuilder *)packed[0];
-  EdgeHash *eh = (EdgeHash *)packed[1];
-  bool *r_is_manifold = (bool *)packed[2];
-
-  set_edge_adjacency_lines_indices(eh, elb, r_is_manifold, v1, v2, v3);
-  set_edge_adjacency_lines_indices(eh, elb, r_is_manifold, v2, v3, v1);
-  set_edge_adjacency_lines_indices(eh, elb, r_is_manifold, v3, v1, v2);
-}
-
-void DRW_displist_indexbuf_create_edges_adjacency_lines(struct ListBase *lb,
-                                                        struct GPUIndexBuf *ibo,
-                                                        bool *r_is_manifold)
-{
-  const int tri_len = curve_render_surface_tri_len_get(lb);
-  const int vert_len = curve_render_surface_vert_len_get(lb);
-
-  *r_is_manifold = true;
-
-  /* Allocate max but only used indices are sent to GPU. */
-  GPUIndexBufBuilder elb;
-  GPU_indexbuf_init(&elb, GPU_PRIM_LINES_ADJ, tri_len * 3, vert_len);
-
-  EdgeHash *eh = BLI_edgehash_new_ex(__func__, tri_len * 3);
-
-  /* pack values to pass to `set_edges_adjacency_lines_indices` function. */
-  void *thunk[3] = {&elb, eh, r_is_manifold};
-  int v_idx = 0;
-  LISTBASE_FOREACH (const DispList *, dl, lb) {
-    displist_indexbufbuilder_set((SetTriIndicesFn *)set_edges_adjacency_lines_indices,
-                                 (SetTriIndicesFn *)set_edges_adjacency_lines_indices,
-                                 thunk,
-                                 dl,
-                                 v_idx);
-    v_idx += dl_vert_len(dl);
-  }
-
-  /* Create edges for remaining non manifold edges. */
-  EdgeHashIterator *ehi;
-  for (ehi = BLI_edgehashIterator_new(eh); BLI_edgehashIterator_isDone(ehi) == false;
-       BLI_edgehashIterator_step(ehi)) {
-    uint v1, v2;
-    int v_data = POINTER_AS_INT(BLI_edgehashIterator_getValue(ehi));
-    if (v_data == NO_EDGE) {
-      continue;
-    }
-    BLI_edgehashIterator_getKey(ehi, &v1, &v2);
-    uint v0 = (uint)abs(v_data) - 1;
-    if (v_data < 0) { /* inv_opposite */
-      SWAP(uint, v1, v2);
-    }
-    GPU_indexbuf_add_line_adj_verts(&elb, v0, v1, v2, v0);
-    *r_is_manifold = false;
-  }
-  BLI_edgehashIterator_free(ehi);
-  BLI_edgehash_free(eh, NULL);
-
-  GPU_indexbuf_build_in_place(&elb, ibo);
-}
-#undef NO_EDGE
diff --git a/source/blender/draw/intern/draw_cache_impl_lattice.c b/source/blender/draw/intern/draw_cache_impl_lattice.c
index cb621c6ceb9..0f12e78d60e 100644
--- a/source/blender/draw/intern/draw_cache_impl_lattice.c
+++ b/source/blender/draw/intern/draw_cache_impl_lattice.c
@@ -27,12 +27,6 @@
 
 #define SELECT 1
 
-/**
- * TODO
- * - 'DispList' is currently not used
- *   (we could avoid using since it will be removed)
- */
-
 static void lattice_batch_cache_clear(Lattice *lt);
 
 /* ---------------------------------------------------------------------- */
diff --git a/source/blender/draw/intern/draw_cache_impl_mesh.cc b/source/blender/draw/intern/draw_cache_impl_mesh.cc
index 7c02ee2c033..c22382b3e09 100644
--- a/source/blender/draw/intern/draw_cache_impl_mesh.cc
+++ b/source/blender/draw/intern/draw_cache_impl_mesh.cc
@@ -21,6 +21,7 @@
 #include "BLI_math_vector.h"
 #include "BLI_span.hh"
 #include "BLI_string.h"
+#include "BLI_string_ref.hh"
 #include "BLI_task.h"
 #include "BLI_utildefines.h"
 
@@ -67,6 +68,7 @@
 using blender::IndexRange;
 using blender::Map;
 using blender::Span;
+using blender::StringRefNull;
 
 /* ---------------------------------------------------------------------- */
 /** \name Dependencies between buffer and batch
@@ -115,8 +117,6 @@ static constexpr DRWBatchFlag batches_that_use_buffer(const int buffer_index)
              MBC_SURFACE_PER_MAT;
     case BUFFER_INDEX(vbo.tan):
       return MBC_SURFACE_PER_MAT;
-    case BUFFER_INDEX(vbo.vcol):
-      return MBC_SURFACE | MBC_SURFACE_PER_MAT;
     case BUFFER_INDEX(vbo.sculpt_data):
       return MBC_SCULPT_OVERLAYS;
     case BUFFER_INDEX(vbo.orco):
@@ -236,87 +236,11 @@ BLI_INLINE void mesh_cd_layers_type_clear(DRW_MeshCDMask *a)
   *((uint32_t *)a) = 0;
 }
 
-BLI_INLINE const Mesh *editmesh_final_or_this(const Object *object, const Mesh *me)
-{
-  if (me->edit_mesh != nullptr) {
-    Mesh *editmesh_eval_final = BKE_object_get_editmesh_eval_final(object);
-    if (editmesh_eval_final != nullptr) {
-      return editmesh_eval_final;
-    }
-  }
-
-  return me;
-}
-
 static void mesh_cd_calc_edit_uv_layer(const Mesh *UNUSED(me), DRW_MeshCDMask *cd_used)
 {
   cd_used->edit_uv = 1;
 }
 
-BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
-{
-  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
-    case ME_WRAPPER_TYPE_SUBD:
-    case ME_WRAPPER_TYPE_MDATA:
-      return &me->ldata;
-      break;
-    case ME_WRAPPER_TYPE_BMESH:
-      return &me->edit_mesh->bm->ldata;
-      break;
-  }
-
-  BLI_assert(0);
-  return &me->ldata;
-}
-
-BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
-{
-  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
-    case ME_WRAPPER_TYPE_SUBD:
-    case ME_WRAPPER_TYPE_MDATA:
-      return &me->pdata;
-      break;
-    case ME_WRAPPER_TYPE_BMESH:
-      return &me->edit_mesh->bm->pdata;
-      break;
-  }
-
-  BLI_assert(0);
-  return &me->pdata;
-}
-
-BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
-{
-  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
-    case ME_WRAPPER_TYPE_SUBD:
-    case ME_WRAPPER_TYPE_MDATA:
-      return &me->edata;
-      break;
-    case ME_WRAPPER_TYPE_BMESH:
-      return &me->edit_mesh->bm->edata;
-      break;
-  }
-
-  BLI_assert(0);
-  return &me->edata;
-}
-
-BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me)
-{
-  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
-    case ME_WRAPPER_TYPE_SUBD:
-    case ME_WRAPPER_TYPE_MDATA:
-      return &me->vdata;
-      break;
-    case ME_WRAPPER_TYPE_BMESH:
-      return &me->edit_mesh->bm->vdata;
-      break;
-  }
-
-  BLI_assert(0);
-  return &me->vdata;
-}
-
 static void mesh_cd_calc_active_uv_layer(const Object *object,
                                          const Mesh *me,
                                          DRW_MeshCDMask *cd_used)
@@ -341,75 +265,6 @@ static void mesh_cd_calc_active_mask_uv_layer(const Object *object,
   }
 }
 
-static void mesh_cd_calc_active_mloopcol_layer(const Object *object,
-                                               const Mesh *me,
-                                               DRW_MeshCDMask *cd_used)
-{
-  const Mesh *me_final = editmesh_final_or_this(object, me);
-  Mesh me_query = blender::dna::shallow_zero_initialize();
-
-  const CustomData *cd_vdata = mesh_cd_vdata_get_from_mesh(me_final);
-  const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final);
-
-  BKE_id_attribute_copy_domains_temp(
-      ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
-
-  const CustomDataLayer *layer = BKE_id_attributes_active_color_get(&me_query.id);
-  int layer_i = BKE_id_attribute_to_index(
-      &me_query.id, layer, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL);
-
-  if (layer_i != -1) {
-    cd_used->vcol |= (1UL << (uint)layer_i);
-  }
-}
-
-static uint mesh_cd_calc_gpu_layers_vcol_used(const Mesh *me_query,
-                                              const CustomData *cd_vdata,
-                                              const CustomData *cd_ldata,
-                                              const char name[])
-{
-  const CustomDataLayer *layer = nullptr;
-  eAttrDomain domain;
-
-  if (name[0]) {
-    int layer_i = 0;
-
-    domain = ATTR_DOMAIN_POINT;
-    layer_i = CustomData_get_named_layer_index(cd_vdata, CD_PROP_COLOR, name);
-    layer_i = layer_i == -1 ?
-                  CustomData_get_named_layer_index(cd_vdata, CD_PROP_BYTE_COLOR, name) :
-                  layer_i;
-
-    if (layer_i == -1) {
-      domain = ATTR_DOMAIN_CORNER;
-      layer_i = layer_i == -1 ? CustomData_get_named_layer_index(cd_ldata, CD_PROP_COLOR, name) :
-                                layer_i;
-      layer_i = layer_i == -1 ?
-                    CustomData_get_named_layer_index(cd_ldata, CD_PROP_BYTE_COLOR, name) :
-                    layer_i;
-    }
-
-    /* NOTE: this is not the same as the layer_i below. */
-    if (layer_i != -1) {
-      layer = (domain == ATTR_DOMAIN_POINT ? cd_vdata : cd_ldata)->layers + layer_i;
-    }
-  }
-  else {
-    layer = BKE_id_attributes_render_color_get(&me_query->id);
-  }
-
-  if (!layer) {
-    return -1;
-  }
-
-  /* NOTE: this is the logical index into the color attribute list,
-   * not the customdata index. */
-  int vcol_i = BKE_id_attribute_to_index(
-      (ID *)me_query, layer, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL);
-
-  return vcol_i;
-}
-
 static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Object *object,
                                                    const Mesh *me,
                                                    struct GPUMaterial **gpumat_array,
@@ -433,56 +288,33 @@ static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Object *object,
   DRW_MeshCDMask cd_used;
   mesh_cd_layers_type_clear(&cd_used);
 
+  const CustomDataLayer *default_color = BKE_id_attributes_render_color_get(&me_query.id);
+  const StringRefNull default_color_name = default_color ? default_color->name : "";
+
   for (int i = 0; i < gpumat_array_len; i++) {
     GPUMaterial *gpumat = gpumat_array[i];
-    if (gpumat) {
-      ListBase gpu_attrs = GPU_material_attributes(gpumat);
-      LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
-        const char *name = gpu_attr->name;
-        eCustomDataType type = static_cast<eCustomDataType>(gpu_attr->type);
-        int layer = -1;
-        std::optional<eAttrDomain> domain;
-
-        if (type == CD_AUTO_FROM_NAME) {
-          /* We need to deduce what exact layer is used.
-           *
-           * We do it based on the specified name.
-           */
-          if (name[0] != '\0') {
-            layer = CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name);
-            type = CD_MTFACE;
-
-            if (layer == -1) {
-              layer = CustomData_get_named_layer(cd_vdata, CD_PROP_COLOR, name);
-              if (layer != -1) {
-                type = CD_PROP_COLOR;
-                domain = ATTR_DOMAIN_POINT;
-              }
-            }
-
-            if (layer == -1) {
-              layer = CustomData_get_named_layer(cd_ldata, CD_PROP_COLOR, name);
-              if (layer != -1) {
-                type = CD_PROP_COLOR;
-                domain = ATTR_DOMAIN_CORNER;
-              }
-            }
-
-            if (layer == -1) {
-              layer = CustomData_get_named_layer(cd_vdata, CD_PROP_BYTE_COLOR, name);
-              if (layer != -1) {
-                type = CD_PROP_BYTE_COLOR;
-                domain = ATTR_DOMAIN_POINT;
-              }
-            }
+    if (gpumat == nullptr) {
+      continue;
+    }
+    ListBase gpu_attrs = GPU_material_attributes(gpumat);
+    LISTBASE_FOREACH (GPUMaterialAttribute *, gpu_attr, &gpu_attrs) {
+      const char *name = gpu_attr->name;
+      eCustomDataType type = static_cast<eCustomDataType>(gpu_attr->type);
+      int layer = -1;
+      std::optional<eAttrDomain> domain;
+
+      if (gpu_attr->is_default_color) {
+        name = default_color_name.c_str();
+      }
 
-            if (layer == -1) {
-              layer = CustomData_get_named_layer(cd_ldata, CD_PROP_BYTE_COLOR, name);
-              if (layer != -1) {
-                type = CD_PROP_BYTE_COLOR;
-                domain = ATTR_DOMAIN_CORNER;
-              }
-            }
+      if (type == CD_AUTO_FROM_NAME) {
+        /* We need to deduce what exact layer is used.
+         *
+         * We do it based on the specified name.
+         */
+        if (name[0] != '\0') {
+          layer = CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name);
+          type = CD_MTFACE;
 
 #if 0 /* Tangents are always from UV's - this will never happen. */
             if (layer == -1) {
@@ -490,108 +322,87 @@ static DRW_MeshCDMask mesh_cd_calc_used_gpu_layers(const Object *object,
               type = CD_TANGENT;
             }
 #endif
-            if (layer == -1) {
-              /* Try to match a generic attribute, we use the first attribute domain with a
-               * matching name. */
-              if (drw_custom_data_match_attribute(cd_vdata, name, &layer, &type)) {
-                domain = ATTR_DOMAIN_POINT;
-              }
-              else if (drw_custom_data_match_attribute(cd_ldata, name, &layer, &type)) {
-                domain = ATTR_DOMAIN_CORNER;
-              }
-              else if (drw_custom_data_match_attribute(cd_pdata, name, &layer, &type)) {
-                domain = ATTR_DOMAIN_FACE;
-              }
-              else if (drw_custom_data_match_attribute(cd_edata, name, &layer, &type)) {
-                domain = ATTR_DOMAIN_EDGE;
-              }
-              else {
-                layer = -1;
-              }
+          if (layer == -1) {
+            /* Try to match a generic attribute, we use the first attribute domain with a
+             * matching name. */
+            if (drw_custom_data_match_attribute(cd_vdata, name, &layer, &type)) {
+              domain = ATTR_DOMAIN_POINT;
             }
-
-            if (layer == -1) {
-              continue;
+            else if (drw_custom_data_match_attribute(cd_ldata, name, &layer, &type)) {
+              domain = ATTR_DOMAIN_CORNER;
             }
-          }
-          else {
-            /* Fall back to the UV layer, which matches old behavior. */
-            type = CD_MTFACE;
-          }
-        }
-
-        switch (type) {
-          case CD_MTFACE: {
-            if (layer == -1) {
-              layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) :
-                                          CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
+            else if (drw_custom_data_match_attribute(cd_pdata, name, &layer, &type)) {
+              domain = ATTR_DOMAIN_FACE;
             }
-            if (layer != -1) {
-              cd_used.uv |= (1 << layer);
-            }
-            break;
-          }
-          case CD_TANGENT: {
-            if (layer == -1) {
-              layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) :
-                                          CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
-
-              /* Only fallback to orco (below) when we have no UV layers, see: T56545 */
-              if (layer == -1 && name[0] != '\0') {
-                layer = CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
-              }
-            }
-            if (layer != -1) {
-              cd_used.tan |= (1 << layer);
+            else if (drw_custom_data_match_attribute(cd_edata, name, &layer, &type)) {
+              domain = ATTR_DOMAIN_EDGE;
             }
             else {
-              /* no UV layers at all => requesting orco */
-              cd_used.tan_orco = 1;
-              cd_used.orco = 1;
+              layer = -1;
             }
-            break;
           }
 
-          case CD_ORCO: {
-            cd_used.orco = 1;
-            break;
+          if (layer == -1) {
+            continue;
           }
+        }
+        else {
+          /* Fall back to the UV layer, which matches old behavior. */
+          type = CD_MTFACE;
+        }
+      }
 
-          /* NOTE: attr->type will always be CD_PROP_COLOR even for
-           * CD_PROP_BYTE_COLOR layers, see node_shader_gpu_vertex_color in
-           * node_shader_vertex_color.cc.
-           */
-          case CD_MCOL:
-          case CD_PROP_BYTE_COLOR:
-          case CD_PROP_COLOR: {
-            /* First check Color attributes, when not found check mesh attributes. Geometry nodes
-             * can generate those layers. */
-            int vcol_bit = mesh_cd_calc_gpu_layers_vcol_used(&me_query, cd_vdata, cd_ldata, name);
-
-            if (vcol_bit != -1) {
-              cd_used.vcol |= 1UL << (uint)vcol_bit;
-              break;
-            }
-
-            if (layer != -1 && domain.has_value()) {
-              drw_attributes_add_request(attributes, type, layer, *domain);
-            }
-            break;
+      switch (type) {
+        case CD_MTFACE: {
+          if (layer == -1) {
+            layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) :
+                                        CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
+          }
+          if (layer != -1) {
+            cd_used.uv |= (1 << layer);
           }
-          case CD_PROP_FLOAT3:
-          case CD_PROP_BOOL:
-          case CD_PROP_INT8:
-          case CD_PROP_INT32:
-          case CD_PROP_FLOAT:
-          case CD_PROP_FLOAT2: {
-            if (layer != -1 && domain.has_value()) {
-              drw_attributes_add_request(attributes, type, layer, *domain);
+          break;
+        }
+        case CD_TANGENT: {
+          if (layer == -1) {
+            layer = (name[0] != '\0') ? CustomData_get_named_layer(cd_ldata, CD_MLOOPUV, name) :
+                                        CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
+
+            /* Only fallback to orco (below) when we have no UV layers, see: T56545 */
+            if (layer == -1 && name[0] != '\0') {
+              layer = CustomData_get_render_layer(cd_ldata, CD_MLOOPUV);
             }
-            break;
           }
-          default:
-            break;
+          if (layer != -1) {
+            cd_used.tan |= (1 << layer);
+          }
+          else {
+            /* no UV layers at all => requesting orco */
+            cd_used.tan_orco = 1;
+            cd_used.orco = 1;
+          }
+          break;
         }
+
+        case CD_ORCO: {
+          cd_used.orco = 1;
+          break;
+        }
+        case CD_PROP_BYTE_COLOR:
+        case CD_PROP_COLOR:
+        case CD_PROP_FLOAT3:
+        case CD_PROP_BOOL:
+        case CD_PROP_INT8:
+        case CD_PROP_INT32:
+        case CD_PROP_FLOAT:
+        case CD_PROP_FLOAT2: {
+          if (layer != -1 && domain.has_value()) {
+            drw_attributes_add_request(attributes, name, type, layer, *domain);
+          }
+          break;
+        }
+        default:
+          break;
       }
     }
   }
@@ -745,8 +556,7 @@ static bool mesh_batch_cache_valid(Object *object, Mesh *me)
   }
 
   if (object->sculpt && object->sculpt->pbvh) {
-    if (cache->pbvh_is_drawing != BKE_pbvh_is_drawing(object->sculpt->pbvh) ||
-        BKE_pbvh_draw_cache_invalid(object->sculpt->pbvh)) {
+    if (cache->pbvh_is_drawing != BKE_pbvh_is_drawing(object->sculpt->pbvh)) {
       return false;
     }
 
@@ -863,10 +673,9 @@ static void mesh_batch_cache_discard_shaded_tri(MeshBatchCache *cache)
   FOREACH_MESH_BUFFER_CACHE (cache, mbc) {
     GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.uv);
     GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.tan);
-    GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.vcol);
     GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.orco);
   }
-  DRWBatchFlag batch_map = BATCH_MAP(vbo.uv, vbo.tan, vbo.vcol, vbo.orco);
+  DRWBatchFlag batch_map = BATCH_MAP(vbo.uv, vbo.tan, vbo.orco);
   mesh_batch_cache_discard_batch(cache, batch_map);
   mesh_cd_layers_type_clear(&cache->cd_used);
 }
@@ -1070,42 +879,35 @@ static void texpaint_request_active_uv(MeshBatchCache *cache, Object *object, Me
   mesh_cd_layers_type_merge(&cache->cd_needed, cd_needed);
 }
 
-static void texpaint_request_active_vcol(MeshBatchCache *cache, Object *object, Mesh *me)
+static void request_active_and_default_color_attributes(const Object &object,
+                                                        const Mesh &mesh,
+                                                        DRW_Attributes &attributes)
 {
-  DRW_MeshCDMask cd_needed;
-  mesh_cd_layers_type_clear(&cd_needed);
-  mesh_cd_calc_active_mloopcol_layer(object, me, &cd_needed);
-
-  BLI_assert(cd_needed.vcol != 0 &&
-             "No MLOOPCOL layer available in vertpaint, but batches requested anyway!");
-
-  mesh_cd_layers_type_merge(&cache->cd_needed, cd_needed);
-}
-
-static void sculpt_request_active_vcol(MeshBatchCache *cache, Object *object, Mesh *me)
-{
-  const Mesh *me_final = editmesh_final_or_this(object, me);
+  const Mesh *me_final = editmesh_final_or_this(&object, &mesh);
   const CustomData *cd_vdata = mesh_cd_vdata_get_from_mesh(me_final);
   const CustomData *cd_ldata = mesh_cd_ldata_get_from_mesh(me_final);
 
+  /* Necessary because which attributes are active/default is stored in #CustomData. */
   Mesh me_query = blender::dna::shallow_zero_initialize();
   BKE_id_attribute_copy_domains_temp(
       ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
 
-  const CustomDataLayer *active = BKE_id_attributes_active_color_get(&me_query.id);
-  const CustomDataLayer *render = BKE_id_attributes_render_color_get(&me_query.id);
-
-  int active_i = BKE_id_attribute_to_index(
-      &me_query.id, active, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL);
-  int render_i = BKE_id_attribute_to_index(
-      &me_query.id, render, ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL);
+  auto request_color_attribute = [&](const char *name) {
+    int layer_index;
+    eCustomDataType type;
+    if (drw_custom_data_match_attribute(cd_vdata, name, &layer_index, &type)) {
+      drw_attributes_add_request(&attributes, name, type, layer_index, ATTR_DOMAIN_POINT);
+    }
+    else if (drw_custom_data_match_attribute(cd_ldata, name, &layer_index, &type)) {
+      drw_attributes_add_request(&attributes, name, type, layer_index, ATTR_DOMAIN_CORNER);
+    }
+  };
 
-  if (active_i >= 0) {
-    cache->cd_needed.vcol |= 1UL << (uint)active_i;
+  if (const CustomDataLayer *active = BKE_id_attributes_active_color_get(&me_query.id)) {
+    request_color_attribute(active->name);
   }
-
-  if (render_i >= 0) {
-    cache->cd_needed.vcol |= 1UL << (uint)render_i;
+  if (const CustomDataLayer *render = BKE_id_attributes_render_color_get(&me_query.id)) {
+    request_color_attribute(render->name);
   }
 }
 
@@ -1214,7 +1016,13 @@ GPUBatch *DRW_mesh_batch_cache_get_surface_texpaint_single(Object *object, Mesh
 GPUBatch *DRW_mesh_batch_cache_get_surface_vertpaint(Object *object, Mesh *me)
 {
   MeshBatchCache *cache = mesh_batch_cache_get(me);
-  texpaint_request_active_vcol(cache, object, me);
+
+  DRW_Attributes attrs_needed{};
+  request_active_and_default_color_attributes(*object, *me, attrs_needed);
+
+  ThreadMutex *mesh_render_mutex = (ThreadMutex *)me->runtime.render_mutex;
+  drw_attributes_merge(&cache->attr_needed, &attrs_needed, mesh_render_mutex);
+
   mesh_batch_cache_request_surface_batches(cache);
   return cache->batch.surface;
 }
@@ -1222,7 +1030,13 @@ GPUBatch *DRW_mesh_batch_cache_get_surface_vertpaint(Object *object, Mesh *me)
 GPUBatch *DRW_mesh_batch_cache_get_surface_sculpt(Object *object, Mesh *me)
 {
   MeshBatchCache *cache = mesh_batch_cache_get(me);
-  sculpt_request_active_vcol(cache, object, me);
+
+  DRW_Attributes attrs_needed{};
+  request_active_and_default_color_attributes(*object, *me, attrs_needed);
+
+  ThreadMutex *mesh_render_mutex = (ThreadMutex *)me->runtime.render_mutex;
+  drw_attributes_merge(&cache->attr_needed, &attrs_needed, mesh_render_mutex);
+
   mesh_batch_cache_request_surface_batches(cache);
   return cache->batch.surface;
 }
@@ -1621,9 +1435,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
         if (cache->cd_used.sculpt_overlays != cache->cd_needed.sculpt_overlays) {
           GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.sculpt_data);
         }
-        if ((cache->cd_used.vcol & cache->cd_needed.vcol) != cache->cd_needed.vcol) {
-          GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.vcol);
-        }
         if (!drw_attributes_overlap(&cache->attr_used, &cache->attr_needed)) {
           for (int i = 0; i < GPU_MAX_ATTR; i++) {
             GPU_VERTBUF_DISCARD_SAFE(mbc->buff.vbo.attr[i]);
@@ -1697,12 +1508,13 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
   cache->batch_ready |= batch_requested;
 
   bool do_cage = false, do_uvcage = false;
-  if (is_editmode) {
+  if (is_editmode && is_mode_active) {
     Mesh *editmesh_eval_final = BKE_object_get_editmesh_eval_final(ob);
     Mesh *editmesh_eval_cage = BKE_object_get_editmesh_eval_cage(ob);
 
     do_cage = editmesh_eval_final != editmesh_eval_cage;
-    do_uvcage = !editmesh_eval_final->runtime.is_original;
+    do_uvcage = !(editmesh_eval_final->runtime.is_original_bmesh &&
+                  editmesh_eval_final->runtime.wrapper_type == ME_WRAPPER_TYPE_BMESH);
   }
 
   const bool do_subdivision = BKE_subsurf_modifier_has_gpu_subdiv(me);
@@ -1710,15 +1522,26 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
   MeshBufferList *mbuflist = &cache->final.buff;
 
   /* Initialize batches and request VBO's & IBO's. */
-  assert_deps_valid(
-      MBC_SURFACE,
-      {BUFFER_INDEX(ibo.tris),     BUFFER_INDEX(vbo.lnor),     BUFFER_INDEX(vbo.pos_nor),
-       BUFFER_INDEX(vbo.uv),       BUFFER_INDEX(vbo.vcol),     BUFFER_INDEX(vbo.attr[0]),
-       BUFFER_INDEX(vbo.attr[1]),  BUFFER_INDEX(vbo.attr[2]),  BUFFER_INDEX(vbo.attr[3]),
-       BUFFER_INDEX(vbo.attr[4]),  BUFFER_INDEX(vbo.attr[5]),  BUFFER_INDEX(vbo.attr[6]),
-       BUFFER_INDEX(vbo.attr[7]),  BUFFER_INDEX(vbo.attr[8]),  BUFFER_INDEX(vbo.attr[9]),
-       BUFFER_INDEX(vbo.attr[10]), BUFFER_INDEX(vbo.attr[11]), BUFFER_INDEX(vbo.attr[12]),
-       BUFFER_INDEX(vbo.attr[13]), BUFFER_INDEX(vbo.attr[14])});
+  assert_deps_valid(MBC_SURFACE,
+                    {BUFFER_INDEX(ibo.tris),
+                     BUFFER_INDEX(vbo.lnor),
+                     BUFFER_INDEX(vbo.pos_nor),
+                     BUFFER_INDEX(vbo.uv),
+                     BUFFER_INDEX(vbo.attr[0]),
+                     BUFFER_INDEX(vbo.attr[1]),
+                     BUFFER_INDEX(vbo.attr[2]),
+                     BUFFER_INDEX(vbo.attr[3]),
+                     BUFFER_INDEX(vbo.attr[4]),
+                     BUFFER_INDEX(vbo.attr[5]),
+                     BUFFER_INDEX(vbo.attr[6]),
+                     BUFFER_INDEX(vbo.attr[7]),
+                     BUFFER_INDEX(vbo.attr[8]),
+                     BUFFER_INDEX(vbo.attr[9]),
+                     BUFFER_INDEX(vbo.attr[10]),
+                     BUFFER_INDEX(vbo.attr[11]),
+                     BUFFER_INDEX(vbo.attr[12]),
+                     BUFFER_INDEX(vbo.attr[13]),
+                     BUFFER_INDEX(vbo.attr[14])});
   if (DRW_batch_requested(cache->batch.surface, GPU_PRIM_TRIS)) {
     DRW_ibo_request(cache->batch.surface, &mbuflist->ibo.tris);
     /* Order matters. First ones override latest VBO's attributes. */
@@ -1727,9 +1550,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
     if (cache->cd_used.uv != 0) {
       DRW_vbo_request(cache->batch.surface, &mbuflist->vbo.uv);
     }
-    if (cache->cd_used.vcol != 0) {
-      DRW_vbo_request(cache->batch.surface, &mbuflist->vbo.vcol);
-    }
     drw_add_attributes_vbo(cache->batch.surface, mbuflist, &cache->attr_used);
   }
   assert_deps_valid(MBC_ALL_VERTS, {BUFFER_INDEX(vbo.pos_nor)});
@@ -1807,12 +1627,12 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
   assert_deps_valid(
       MBC_SURFACE_PER_MAT,
       {BUFFER_INDEX(vbo.lnor),     BUFFER_INDEX(vbo.pos_nor),  BUFFER_INDEX(vbo.uv),
-       BUFFER_INDEX(vbo.tan),      BUFFER_INDEX(vbo.vcol),     BUFFER_INDEX(vbo.orco),
-       BUFFER_INDEX(vbo.attr[0]),  BUFFER_INDEX(vbo.attr[1]),  BUFFER_INDEX(vbo.attr[2]),
-       BUFFER_INDEX(vbo.attr[3]),  BUFFER_INDEX(vbo.attr[4]),  BUFFER_INDEX(vbo.attr[5]),
-       BUFFER_INDEX(vbo.attr[6]),  BUFFER_INDEX(vbo.attr[7]),  BUFFER_INDEX(vbo.attr[8]),
-       BUFFER_INDEX(vbo.attr[9]),  BUFFER_INDEX(vbo.attr[10]), BUFFER_INDEX(vbo.attr[11]),
-       BUFFER_INDEX(vbo.attr[12]), BUFFER_INDEX(vbo.attr[13]), BUFFER_INDEX(vbo.attr[14])});
+       BUFFER_INDEX(vbo.tan),      BUFFER_INDEX(vbo.orco),     BUFFER_INDEX(vbo.attr[0]),
+       BUFFER_INDEX(vbo.attr[1]),  BUFFER_INDEX(vbo.attr[2]),  BUFFER_INDEX(vbo.attr[3]),
+       BUFFER_INDEX(vbo.attr[4]),  BUFFER_INDEX(vbo.attr[5]),  BUFFER_INDEX(vbo.attr[6]),
+       BUFFER_INDEX(vbo.attr[7]),  BUFFER_INDEX(vbo.attr[8]),  BUFFER_INDEX(vbo.attr[9]),
+       BUFFER_INDEX(vbo.attr[10]), BUFFER_INDEX(vbo.attr[11]), BUFFER_INDEX(vbo.attr[12]),
+       BUFFER_INDEX(vbo.attr[13]), BUFFER_INDEX(vbo.attr[14])});
   assert_deps_valid(MBC_SURFACE_PER_MAT, {TRIS_PER_MAT_INDEX});
   for (int i = 0; i < cache->mat_len; i++) {
     if (DRW_batch_requested(cache->surface_per_mat[i], GPU_PRIM_TRIS)) {
@@ -1826,9 +1646,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
       if ((cache->cd_used.tan != 0) || (cache->cd_used.tan_orco != 0)) {
         DRW_vbo_request(cache->surface_per_mat[i], &mbuflist->vbo.tan);
       }
-      if (cache->cd_used.vcol != 0) {
-        DRW_vbo_request(cache->surface_per_mat[i], &mbuflist->vbo.vcol);
-      }
       if (cache->cd_used.orco != 0) {
         DRW_vbo_request(cache->surface_per_mat[i], &mbuflist->vbo.orco);
       }
@@ -1994,7 +1811,6 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
   assert_final_deps_valid(BUFFER_INDEX(vbo.lnor));
   assert_final_deps_valid(BUFFER_INDEX(vbo.pos_nor));
   assert_final_deps_valid(BUFFER_INDEX(vbo.uv));
-  assert_final_deps_valid(BUFFER_INDEX(vbo.vcol));
   assert_final_deps_valid(BUFFER_INDEX(vbo.sculpt_data));
   assert_final_deps_valid(BUFFER_INDEX(vbo.weights));
   assert_final_deps_valid(BUFFER_INDEX(vbo.edge_fac));
@@ -2078,6 +1894,7 @@ void DRW_mesh_batch_cache_create_requested(struct TaskGraph *task_graph,
                            ob->obmat,
                            true,
                            false,
+                           do_cage,
                            ts,
                            use_hide);
   }
diff --git a/source/blender/draw/intern/draw_cache_impl_metaball.c b/source/blender/draw/intern/draw_cache_impl_metaball.c
deleted file mode 100644
index 1408dc91069..00000000000
--- a/source/blender/draw/intern/draw_cache_impl_metaball.c
+++ /dev/null
@@ -1,294 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- * Copyright 2017 Blender Foundation. All rights reserved. */
-
-/** \file
- * \ingroup draw
- *
- * \brief MetaBall API for render engines
- */
-
-#include "MEM_guardedalloc.h"
-
-#include "BLI_math_base.h"
-#include "BLI_utildefines.h"
-
-#include "DNA_meta_types.h"
-#include "DNA_object_types.h"
-
-#include "BKE_curve.h"
-#include "BKE_mball.h"
-
-#include "GPU_batch.h"
-
-#include "DRW_render.h"
-#include "draw_cache_impl.h" /* own include */
-
-static void metaball_batch_cache_clear(MetaBall *mb);
-
-/* -------------------------------------------------------------------- */
-/** \name MetaBall GPUBatch Cache
- * \{ */
-
-typedef struct MetaBallBatchCache {
-  GPUBatch *batch;
-  GPUBatch **shaded_triangles;
-
-  int mat_len;
-
-  /* Shared */
-  GPUVertBuf *pos_nor_in_order;
-
-  /* Wireframe */
-  struct {
-    GPUBatch *batch;
-  } face_wire;
-
-  /* Edge detection */
-  GPUBatch *edge_detection;
-  GPUIndexBuf *edges_adj_lines;
-
-  /* settings to determine if cache is invalid */
-  bool is_dirty;
-
-  /* Valid only if edge_detection is up to date. */
-  bool is_manifold;
-} MetaBallBatchCache;
-
-/* GPUBatch cache management. */
-
-static bool metaball_batch_cache_valid(MetaBall *mb)
-{
-  MetaBallBatchCache *cache = mb->batch_cache;
-
-  if (cache == NULL) {
-    return false;
-  }
-
-  return cache->is_dirty == false;
-}
-
-static void metaball_batch_cache_init(MetaBall *mb)
-{
-  MetaBallBatchCache *cache = mb->batch_cache;
-
-  if (!cache) {
-    cache = mb->batch_cache = MEM_mallocN(sizeof(*cache), __func__);
-  }
-  cache->batch = NULL;
-  cache->mat_len = 0;
-  cache->shaded_triangles = NULL;
-  cache->is_dirty = false;
-  cache->pos_nor_in_order = NULL;
-  cache->face_wire.batch = NULL;
-  cache->edge_detection = NULL;
-  cache->edges_adj_lines = NULL;
-  cache->is_manifold = false;
-}
-
-void DRW_mball_batch_cache_validate(MetaBall *mb)
-{
-  if (!metaball_batch_cache_valid(mb)) {
-    metaball_batch_cache_clear(mb);
-    metaball_batch_cache_init(mb);
-  }
-}
-
-static MetaBallBatchCache *metaball_batch_cache_get(MetaBall *mb)
-{
-  return mb->batch_cache;
-}
-
-void DRW_mball_batch_cache_dirty_tag(MetaBall *mb, int mode)
-{
-  MetaBallBatchCache *cache = mb->batch_cache;
-  if (cache == NULL) {
-    return;
-  }
-  switch (mode) {
-    case BKE_MBALL_BATCH_DIRTY_ALL:
-      cache->is_dirty = true;
-      break;
-    default:
-      BLI_assert(0);
-  }
-}
-
-static void metaball_batch_cache_clear(MetaBall *mb)
-{
-  MetaBallBatchCache *cache = mb->batch_cache;
-  if (!cache) {
-    return;
-  }
-
-  GPU_BATCH_DISCARD_SAFE(cache->face_wire.batch);
-  GPU_BATCH_DISCARD_SAFE(cache->batch);
-  GPU_BATCH_DISCARD_SAFE(cache->edge_detection);
-  GPU_VERTBUF_DISCARD_SAFE(cache->pos_nor_in_order);
-  GPU_INDEXBUF_DISCARD_SAFE(cache->edges_adj_lines);
-  /* NOTE: shaded_triangles[0] is already freed by `cache->batch`. */
-  MEM_SAFE_FREE(cache->shaded_triangles);
-  cache->mat_len = 0;
-  cache->is_manifold = false;
-}
-
-void DRW_mball_batch_cache_free(MetaBall *mb)
-{
-  metaball_batch_cache_clear(mb);
-  MEM_SAFE_FREE(mb->batch_cache);
-}
-
-static GPUVertBuf *mball_batch_cache_get_pos_and_normals(Object *ob,
-                                                         MetaBallBatchCache *cache,
-                                                         const struct Scene *scene)
-{
-  if (cache->pos_nor_in_order == NULL) {
-    ListBase *lb = &ob->runtime.curve_cache->disp;
-    cache->pos_nor_in_order = GPU_vertbuf_calloc();
-    DRW_displist_vertbuf_create_pos_and_nor(lb, cache->pos_nor_in_order, scene);
-  }
-  return cache->pos_nor_in_order;
-}
-
-static GPUIndexBuf *mball_batch_cache_get_edges_adj_lines(Object *ob, MetaBallBatchCache *cache)
-{
-  if (cache->edges_adj_lines == NULL) {
-    ListBase *lb = &ob->runtime.curve_cache->disp;
-    cache->edges_adj_lines = GPU_indexbuf_calloc();
-    DRW_displist_indexbuf_create_edges_adjacency_lines(
-        lb, cache->edges_adj_lines, &cache->is_manifold);
-  }
-  return cache->edges_adj_lines;
-}
-
-/** \} */
-
-/* -------------------------------------------------------------------- */
-/** \name Public Object/MetaBall API
- * \{ */
-
-GPUBatch *DRW_metaball_batch_cache_get_triangles_with_normals(Object *ob)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  MetaBall *mb = ob->data;
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  const DRWContextState *draw_ctx = DRW_context_state_get();
-  const struct Scene *scene = draw_ctx->scene;
-
-  if (cache->batch == NULL) {
-    ListBase *lb = &ob->runtime.curve_cache->disp;
-    GPUIndexBuf *ibo = GPU_indexbuf_calloc();
-    DRW_displist_indexbuf_create_triangles_in_order(lb, ibo);
-    cache->batch = GPU_batch_create_ex(GPU_PRIM_TRIS,
-                                       mball_batch_cache_get_pos_and_normals(ob, cache, scene),
-                                       ibo,
-                                       GPU_BATCH_OWNS_INDEX);
-  }
-
-  return cache->batch;
-}
-
-GPUBatch **DRW_metaball_batch_cache_get_surface_shaded(Object *ob,
-                                                       MetaBall *mb,
-                                                       struct GPUMaterial **UNUSED(gpumat_array),
-                                                       uint gpumat_array_len)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  BLI_assert(gpumat_array_len == DRW_metaball_material_count_get(mb));
-
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  if (cache->shaded_triangles == NULL) {
-    cache->mat_len = gpumat_array_len;
-    cache->shaded_triangles = MEM_callocN(sizeof(*cache->shaded_triangles) * cache->mat_len,
-                                          __func__);
-    cache->shaded_triangles[0] = DRW_metaball_batch_cache_get_triangles_with_normals(ob);
-    for (int i = 1; i < cache->mat_len; i++) {
-      cache->shaded_triangles[i] = NULL;
-    }
-  }
-  return cache->shaded_triangles;
-}
-
-GPUBatch *DRW_metaball_batch_cache_get_wireframes_face(Object *ob)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  MetaBall *mb = ob->data;
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  const DRWContextState *draw_ctx = DRW_context_state_get();
-  const struct Scene *scene = draw_ctx->scene;
-
-  if (cache->face_wire.batch == NULL) {
-    ListBase *lb = &ob->runtime.curve_cache->disp;
-
-    GPUVertBuf *vbo_wiredata = GPU_vertbuf_calloc();
-    DRW_displist_vertbuf_create_wiredata(lb, vbo_wiredata);
-
-    GPUIndexBuf *ibo = GPU_indexbuf_calloc();
-    DRW_displist_indexbuf_create_lines_in_order(lb, ibo);
-
-    cache->face_wire.batch = GPU_batch_create_ex(
-        GPU_PRIM_LINES,
-        mball_batch_cache_get_pos_and_normals(ob, cache, scene),
-        ibo,
-        GPU_BATCH_OWNS_INDEX);
-
-    GPU_batch_vertbuf_add_ex(cache->face_wire.batch, vbo_wiredata, true);
-  }
-
-  return cache->face_wire.batch;
-}
-
-struct GPUBatch *DRW_metaball_batch_cache_get_edge_detection(struct Object *ob,
-                                                             bool *r_is_manifold)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  MetaBall *mb = ob->data;
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  const DRWContextState *draw_ctx = DRW_context_state_get();
-  const struct Scene *scene = draw_ctx->scene;
-
-  if (cache->edge_detection == NULL) {
-    cache->edge_detection = GPU_batch_create(
-        GPU_PRIM_LINES_ADJ,
-        mball_batch_cache_get_pos_and_normals(ob, cache, scene),
-        mball_batch_cache_get_edges_adj_lines(ob, cache));
-  }
-
-  if (r_is_manifold) {
-    *r_is_manifold = cache->is_manifold;
-  }
-
-  return cache->edge_detection;
-}
-
-struct GPUVertBuf *DRW_mball_batch_cache_pos_vertbuf_get(Object *ob)
-{
-  if (!BKE_mball_is_basis(ob)) {
-    return NULL;
-  }
-
-  MetaBall *mb = ob->data;
-  MetaBallBatchCache *cache = metaball_batch_cache_get(mb);
-  const DRWContextState *draw_ctx = DRW_context_state_get();
-  const struct Scene *scene = draw_ctx->scene;
-
-  return mball_batch_cache_get_pos_and_normals(ob, cache, scene);
-}
-
-int DRW_metaball_material_count_get(MetaBall *mb)
-{
-  return max_ii(1, mb->totcol);
-}
-
-/** \} */
diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c
index c1d609bf648..9c1784b1de2 100644
--- a/source/blender/draw/intern/draw_cache_impl_particles.c
+++ b/source/blender/draw/intern/draw_cache_impl_particles.c
@@ -11,6 +11,7 @@
 
 #include "MEM_guardedalloc.h"
 
+#include "BLI_alloca.h"
 #include "BLI_ghash.h"
 #include "BLI_math_vector.h"
 #include "BLI_string.h"
@@ -24,12 +25,15 @@
 
 #include "BKE_customdata.h"
 #include "BKE_mesh.h"
+#include "BKE_mesh_legacy_convert.h"
 #include "BKE_particle.h"
 #include "BKE_pointcache.h"
 
 #include "ED_particle.h"
 
 #include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_context.h"
 #include "GPU_material.h"
 
 #include "DEG_depsgraph_query.h"
@@ -181,10 +185,11 @@ static void particle_batch_cache_clear_hair(ParticleHairCache *hair_cache)
     GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_uv_buf[i]);
     DRW_TEXTURE_FREE_SAFE(hair_cache->uv_tex[i]);
   }
-  for (int i = 0; i < MAX_MCOL; i++) {
+  for (int i = 0; i < hair_cache->num_col_layers; i++) {
     GPU_VERTBUF_DISCARD_SAFE(hair_cache->proc_col_buf[i]);
     DRW_TEXTURE_FREE_SAFE(hair_cache->col_tex[i]);
   }
+
   for (int i = 0; i < MAX_HAIR_SUBDIV; i++) {
     GPU_VERTBUF_DISCARD_SAFE(hair_cache->final[i].proc_buf);
     DRW_TEXTURE_FREE_SAFE(hair_cache->final[i].proc_tex);
@@ -217,9 +222,24 @@ static void particle_batch_cache_clear(ParticleSystem *psys)
   GPU_VERTBUF_DISCARD_SAFE(cache->edit_tip_pos);
 }
 
+static void particle_batch_cache_free_hair(ParticleHairCache *hair)
+{
+  MEM_SAFE_FREE(hair->proc_col_buf);
+  MEM_SAFE_FREE(hair->col_tex);
+  MEM_SAFE_FREE(hair->col_layer_names);
+}
+
 void DRW_particle_batch_cache_free(ParticleSystem *psys)
 {
   particle_batch_cache_clear(psys);
+
+  ParticleBatchCache *cache = psys->batch_cache;
+
+  if (cache) {
+    particle_batch_cache_free_hair(&cache->hair);
+    particle_batch_cache_free_hair(&cache->edit_hair);
+  }
+
   MEM_SAFE_FREE(psys->batch_cache);
 }
 
@@ -295,7 +315,8 @@ static void particle_calculate_parent_uvs(ParticleSystem *psys,
     }
   }
   if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) {
-    MFace *mface = &psmd->mesh_final->mface[num];
+    MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE);
+    MFace *mface = &mfaces[num];
     for (int j = 0; j < num_uv_layers; j++) {
       psys_interpolate_uvs(mtfaces[j] + num, mface->v4, particle->fuv, r_uv[j]);
     }
@@ -324,7 +345,8 @@ static void particle_calculate_parent_mcol(ParticleSystem *psys,
     }
   }
   if (!ELEM(num, DMCACHE_NOTFOUND, DMCACHE_ISCHILD)) {
-    MFace *mface = &psmd->mesh_final->mface[num];
+    MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE);
+    MFace *mface = &mfaces[num];
     for (int j = 0; j < num_col_layers; j++) {
       /* CustomDataLayer CD_MCOL has 4 structs per face. */
       psys_interpolate_mcol(mcols[j] + num * 4, mface->v4, particle->fuv, &r_mcol[j]);
@@ -350,7 +372,8 @@ static void particle_interpolate_children_uvs(ParticleSystem *psys,
   ChildParticle *particle = &psys->child[child_index];
   int num = particle->num;
   if (num != DMCACHE_NOTFOUND) {
-    MFace *mface = &psmd->mesh_final->mface[num];
+    MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE);
+    MFace *mface = &mfaces[num];
     for (int j = 0; j < num_uv_layers; j++) {
       psys_interpolate_uvs(mtfaces[j] + num, mface->v4, particle->fuv, r_uv[j]);
     }
@@ -374,7 +397,8 @@ static void particle_interpolate_children_mcol(ParticleSystem *psys,
   ChildParticle *particle = &psys->child[child_index];
   int num = particle->num;
   if (num != DMCACHE_NOTFOUND) {
-    MFace *mface = &psmd->mesh_final->mface[num];
+    MFace *mfaces = CustomData_get_layer(&psmd->mesh_final->fdata, CD_MFACE);
+    MFace *mface = &mfaces[num];
     for (int j = 0; j < num_col_layers; j++) {
       /* CustomDataLayer CD_MCOL has 4 structs per face. */
       psys_interpolate_mcol(mcols[j] + num * 4, mface->v4, particle->fuv, &r_mcol[j]);
@@ -790,7 +814,10 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach
   GPUVertFormat format = {0};
   GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-  cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format(&format);
+  /* Transform feedback buffer only needs to be resident in device memory. */
+  GPUUsageType type = GPU_transform_feedback_support() ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_STATIC;
+  cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+      &format, type | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
   /* Create a destination buffer for the transform feedback. Sized appropriately */
   /* Those are points! not line segments. */
@@ -832,10 +859,10 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
 
   GPUVertBufRaw data_step, seg_step;
   GPUVertBufRaw uv_step[MAX_MTFACE];
-  GPUVertBufRaw col_step[MAX_MCOL];
+  GPUVertBufRaw *col_step = BLI_array_alloca(col_step, cache->num_col_layers);
 
   const MTFace *mtfaces[MAX_MTFACE] = {NULL};
-  const MCol *mcols[MAX_MCOL] = {NULL};
+  const MCol **mcols = BLI_array_alloca(mcols, cache->num_col_layers);
   float(**parent_uvs)[2] = NULL;
   MCol **parent_mcol = NULL;
 
@@ -853,20 +880,22 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
       &format_col, "col", GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
 
   memset(cache->uv_layer_names, 0, sizeof(cache->uv_layer_names));
-  memset(cache->col_layer_names, 0, sizeof(cache->col_layer_names));
 
   /* Strand Data */
-  cache->proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+  cache->proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+      &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache->proc_strand_buf, cache->strands_len);
   GPU_vertbuf_attr_get_raw_data(cache->proc_strand_buf, data_id, &data_step);
 
-  cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+  cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+      &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache->proc_strand_seg_buf, cache->strands_len);
   GPU_vertbuf_attr_get_raw_data(cache->proc_strand_seg_buf, seg_id, &seg_step);
 
   /* UV layers */
   for (int i = 0; i < cache->num_uv_layers; i++) {
-    cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format(&format_uv);
+    cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format_ex(
+        &format_uv, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len);
     GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]);
 
@@ -884,9 +913,20 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
       BLI_strncpy(cache->uv_layer_names[i][n++], "a", MAX_LAYER_NAME_LEN);
     }
   }
+
+  MEM_SAFE_FREE(cache->proc_col_buf);
+  MEM_SAFE_FREE(cache->col_tex);
+  MEM_SAFE_FREE(cache->col_layer_names);
+
+  cache->proc_col_buf = MEM_calloc_arrayN(cache->num_col_layers, sizeof(void *), "proc_col_buf");
+  cache->col_tex = MEM_calloc_arrayN(cache->num_col_layers, sizeof(void *), "col_tex");
+  cache->col_layer_names = MEM_calloc_arrayN(
+      cache->num_col_layers, sizeof(*cache->col_layer_names), "col_layer_names");
+
   /* Vertex colors */
   for (int i = 0; i < cache->num_col_layers; i++) {
-    cache->proc_col_buf[i] = GPU_vertbuf_create_with_format(&format_col);
+    cache->proc_col_buf[i] = GPU_vertbuf_create_with_format_ex(
+        &format_col, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len);
     GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]);
 
@@ -1032,8 +1072,9 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit,
   static GPUVertFormat format = {0};
   GPU_vertformat_clear(&format);
 
-  /* initialize vertex format */
-  GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+  /* NOTE: initialize vertex format. Using GPU_COMP_U32 to satisfy Metal's 4-byte minimum
+   * stride requirement. */
+  GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
 
   GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
   GPU_vertbuf_data_alloc(vbo, 1);
@@ -1074,7 +1115,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
     uint pos_id = GPU_vertformat_attr_add(
         &pos_format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-    cache->proc_point_buf = GPU_vertbuf_create_with_format(&pos_format);
+    cache->proc_point_buf = GPU_vertbuf_create_with_format_ex(
+        &pos_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_point_buf, cache->point_len);
 
     GPUVertBufRaw pos_step;
@@ -1084,7 +1126,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
     uint length_id = GPU_vertformat_attr_add(
         &length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
 
-    cache->proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
+    cache->proc_length_buf = GPU_vertbuf_create_with_format_ex(
+        &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_length_buf, cache->strands_len);
 
     GPUVertBufRaw length_step;
diff --git a/source/blender/draw/intern/draw_cache_impl_pointcloud.c b/source/blender/draw/intern/draw_cache_impl_pointcloud.cc
index d715899e291..a43b23c8969 100644
--- a/source/blender/draw/intern/draw_cache_impl_pointcloud.c
+++ b/source/blender/draw/intern/draw_cache_impl_pointcloud.cc
@@ -13,23 +13,23 @@
 
 #include "BLI_math_base.h"
 #include "BLI_math_vector.h"
+#include "BLI_task.hh"
 #include "BLI_utildefines.h"
 
 #include "DNA_object_types.h"
 #include "DNA_pointcloud_types.h"
 
+#include "BKE_attribute.hh"
 #include "BKE_pointcloud.h"
 
 #include "GPU_batch.h"
 
 #include "draw_cache_impl.h" /* own include */
 
-static void pointcloud_batch_cache_clear(PointCloud *pointcloud);
-
 /* ---------------------------------------------------------------------- */
 /* PointCloud GPUBatch Cache */
 
-typedef struct PointCloudBatchCache {
+struct PointCloudBatchCache {
   GPUVertBuf *pos;  /* Position and radius. */
   GPUVertBuf *geom; /* Instanced geometry for each point in the cloud (small sphere). */
   GPUIndexBuf *geom_indices;
@@ -42,58 +42,51 @@ typedef struct PointCloudBatchCache {
   bool is_dirty;
 
   int mat_len;
-} PointCloudBatchCache;
+};
 
 /* GPUBatch cache management. */
 
-static bool pointcloud_batch_cache_valid(PointCloud *pointcloud)
+static PointCloudBatchCache *pointcloud_batch_cache_get(PointCloud &pointcloud)
+{
+  return static_cast<PointCloudBatchCache *>(pointcloud.batch_cache);
+}
+
+static bool pointcloud_batch_cache_valid(PointCloud &pointcloud)
 {
-  PointCloudBatchCache *cache = pointcloud->batch_cache;
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
 
-  if (cache == NULL) {
+  if (cache == nullptr) {
     return false;
   }
-  if (cache->mat_len != DRW_pointcloud_material_count_get(pointcloud)) {
+  if (cache->mat_len != DRW_pointcloud_material_count_get(&pointcloud)) {
     return false;
   }
   return cache->is_dirty == false;
 }
 
-static void pointcloud_batch_cache_init(PointCloud *pointcloud)
+static void pointcloud_batch_cache_init(PointCloud &pointcloud)
 {
-  PointCloudBatchCache *cache = pointcloud->batch_cache;
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
 
   if (!cache) {
-    cache = pointcloud->batch_cache = MEM_callocN(sizeof(*cache), __func__);
+    cache = MEM_cnew<PointCloudBatchCache>(__func__);
+    pointcloud.batch_cache = cache;
   }
   else {
     memset(cache, 0, sizeof(*cache));
   }
 
-  cache->mat_len = DRW_pointcloud_material_count_get(pointcloud);
-  cache->surface_per_mat = MEM_callocN(sizeof(GPUBatch *) * cache->mat_len,
-                                       "pointcloud suface_per_mat");
+  cache->mat_len = DRW_pointcloud_material_count_get(&pointcloud);
+  cache->surface_per_mat = static_cast<GPUBatch **>(
+      MEM_callocN(sizeof(GPUBatch *) * cache->mat_len, __func__));
 
   cache->is_dirty = false;
 }
 
-void DRW_pointcloud_batch_cache_validate(PointCloud *pointcloud)
-{
-  if (!pointcloud_batch_cache_valid(pointcloud)) {
-    pointcloud_batch_cache_clear(pointcloud);
-    pointcloud_batch_cache_init(pointcloud);
-  }
-}
-
-static PointCloudBatchCache *pointcloud_batch_cache_get(PointCloud *pointcloud)
-{
-  return pointcloud->batch_cache;
-}
-
 void DRW_pointcloud_batch_cache_dirty_tag(PointCloud *pointcloud, int mode)
 {
-  PointCloudBatchCache *cache = pointcloud->batch_cache;
-  if (cache == NULL) {
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(*pointcloud);
+  if (cache == nullptr) {
     return;
   }
   switch (mode) {
@@ -105,9 +98,9 @@ void DRW_pointcloud_batch_cache_dirty_tag(PointCloud *pointcloud, int mode)
   }
 }
 
-static void pointcloud_batch_cache_clear(PointCloud *pointcloud)
+static void pointcloud_batch_cache_clear(PointCloud &pointcloud)
 {
-  PointCloudBatchCache *cache = pointcloud->batch_cache;
+  PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
   if (!cache) {
     return;
   }
@@ -126,50 +119,65 @@ static void pointcloud_batch_cache_clear(PointCloud *pointcloud)
   MEM_SAFE_FREE(cache->surface_per_mat);
 }
 
+void DRW_pointcloud_batch_cache_validate(PointCloud *pointcloud)
+{
+  if (!pointcloud_batch_cache_valid(*pointcloud)) {
+    pointcloud_batch_cache_clear(*pointcloud);
+    pointcloud_batch_cache_init(*pointcloud);
+  }
+}
+
 void DRW_pointcloud_batch_cache_free(PointCloud *pointcloud)
 {
-  pointcloud_batch_cache_clear(pointcloud);
+  pointcloud_batch_cache_clear(*pointcloud);
   MEM_SAFE_FREE(pointcloud->batch_cache);
 }
 
-static void pointcloud_batch_cache_ensure_pos(Object *ob, PointCloudBatchCache *cache)
+static void pointcloud_batch_cache_ensure_pos(const PointCloud &pointcloud,
+                                              PointCloudBatchCache &cache)
 {
-  if (cache->pos != NULL) {
+  using namespace blender;
+  if (cache.pos != nullptr) {
     return;
   }
 
-  PointCloud *pointcloud = ob->data;
-  const bool has_radius = pointcloud->radius != NULL;
-
-  static GPUVertFormat format = {0};
-  static GPUVertFormat format_no_radius = {0};
-  static uint pos;
-  if (format.attr_len == 0) {
-    /* initialize vertex format */
-    /* From the opengl wiki:
-     * Note that size does not have to exactly match the size used by the vertex shader. If the
-     * vertex shader has fewer components than the attribute provides, then the extras are ignored.
-     * If the vertex shader has more components than the array provides, the extras are given
-     * values from the vector (0, 0, 0, 1) for the missing XYZW components.
-     */
-    pos = GPU_vertformat_attr_add(&format_no_radius, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-    pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
-  }
-
-  cache->pos = GPU_vertbuf_create_with_format(has_radius ? &format : &format_no_radius);
-  GPU_vertbuf_data_alloc(cache->pos, pointcloud->totpoint);
-
-  if (has_radius) {
-    float(*vbo_data)[4] = (float(*)[4])GPU_vertbuf_get_data(cache->pos);
-    for (int i = 0; i < pointcloud->totpoint; i++) {
-      copy_v3_v3(vbo_data[i], pointcloud->co[i]);
-      /* TODO(fclem): remove multiplication here.
-       * Here only for keeping the size correct for now. */
-      vbo_data[i][3] = pointcloud->radius[i] * 100.0f;
+  const bke::AttributeAccessor attributes = pointcloud.attributes();
+  const VArraySpan<float3> positions = attributes.lookup<float3>("position", ATTR_DOMAIN_POINT);
+  const VArray<float> radii = attributes.lookup<float>("radius", ATTR_DOMAIN_POINT);
+  /* From the opengl wiki:
+   * Note that size does not have to exactly match the size used by the vertex shader. If the
+   * vertex shader has fewer components than the attribute provides, then the extras are ignored.
+   * If the vertex shader has more components than the array provides, the extras are given
+   * values from the vector (0, 0, 0, 1) for the missing XYZW components. */
+  if (radii) {
+    static GPUVertFormat format = {0};
+    if (format.attr_len == 0) {
+      GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
     }
+    cache.pos = GPU_vertbuf_create_with_format(&format);
+    GPU_vertbuf_data_alloc(cache.pos, positions.size());
+    const VArraySpan<float> radii_span(radii);
+    MutableSpan<float4> vbo_data{static_cast<float4 *>(GPU_vertbuf_get_data(cache.pos)),
+                                 pointcloud.totpoint};
+    threading::parallel_for(vbo_data.index_range(), 4096, [&](IndexRange range) {
+      for (const int i : range) {
+        vbo_data[i].x = positions[i].x;
+        vbo_data[i].y = positions[i].y;
+        vbo_data[i].z = positions[i].z;
+        /* TODO(fclem): remove multiplication. Here only for keeping the size correct for now. */
+        vbo_data[i].w = radii_span[i] * 100.0f;
+      }
+    });
   }
   else {
-    GPU_vertbuf_attr_fill(cache->pos, pos, pointcloud->co);
+    static GPUVertFormat format = {0};
+    static uint pos;
+    if (format.attr_len == 0) {
+      pos = GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
+    }
+    cache.pos = GPU_vertbuf_create_with_format(&format);
+    GPU_vertbuf_data_alloc(cache.pos, positions.size());
+    GPU_vertbuf_attr_fill(cache.pos, pos, positions.data());
   }
 }
 
@@ -188,24 +196,23 @@ static const uint half_octahedron_tris[4][3] = {
     {0, 4, 1},
 };
 
-static void pointcloud_batch_cache_ensure_geom(Object *UNUSED(ob), PointCloudBatchCache *cache)
+static void pointcloud_batch_cache_ensure_geom(PointCloudBatchCache &cache)
 {
-  if (cache->geom != NULL) {
+  if (cache.geom != nullptr) {
     return;
   }
 
   static GPUVertFormat format = {0};
   static uint pos;
   if (format.attr_len == 0) {
-    /* initialize vertex format */
     pos = GPU_vertformat_attr_add(&format, "pos_inst", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
     GPU_vertformat_alias_add(&format, "nor");
   }
 
-  cache->geom = GPU_vertbuf_create_with_format(&format);
-  GPU_vertbuf_data_alloc(cache->geom, ARRAY_SIZE(half_octahedron_normals));
+  cache.geom = GPU_vertbuf_create_with_format(&format);
+  GPU_vertbuf_data_alloc(cache.geom, ARRAY_SIZE(half_octahedron_normals));
 
-  GPU_vertbuf_attr_fill(cache->geom, pos, half_octahedron_normals);
+  GPU_vertbuf_attr_fill(cache.geom, pos, half_octahedron_normals);
 
   GPUIndexBufBuilder builder;
   GPU_indexbuf_init(&builder,
@@ -217,17 +224,17 @@ static void pointcloud_batch_cache_ensure_geom(Object *UNUSED(ob), PointCloudBat
     GPU_indexbuf_add_tri_verts(&builder, UNPACK3(half_octahedron_tris[i]));
   }
 
-  cache->geom_indices = GPU_indexbuf_build(&builder);
+  cache.geom_indices = GPU_indexbuf_build(&builder);
 }
 
 GPUBatch *DRW_pointcloud_batch_cache_get_dots(Object *ob)
 {
-  PointCloud *pointcloud = ob->data;
+  PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
   PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
 
-  if (cache->dots == NULL) {
-    pointcloud_batch_cache_ensure_pos(ob, cache);
-    cache->dots = GPU_batch_create(GPU_PRIM_POINTS, cache->pos, NULL);
+  if (cache->dots == nullptr) {
+    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
+    cache->dots = GPU_batch_create(GPU_PRIM_POINTS, cache->pos, nullptr);
   }
 
   return cache->dots;
@@ -235,12 +242,12 @@ GPUBatch *DRW_pointcloud_batch_cache_get_dots(Object *ob)
 
 GPUBatch *DRW_pointcloud_batch_cache_get_surface(Object *ob)
 {
-  PointCloud *pointcloud = ob->data;
+  PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
   PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
 
-  if (cache->surface == NULL) {
-    pointcloud_batch_cache_ensure_pos(ob, cache);
-    pointcloud_batch_cache_ensure_geom(ob, cache);
+  if (cache->surface == nullptr) {
+    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
+    pointcloud_batch_cache_ensure_geom(*cache);
 
     cache->surface = GPU_batch_create(GPU_PRIM_TRIS, cache->geom, cache->geom_indices);
     GPU_batch_instbuf_add_ex(cache->surface, cache->pos, false);
@@ -253,14 +260,14 @@ GPUBatch **DRW_cache_pointcloud_surface_shaded_get(Object *ob,
                                                    struct GPUMaterial **UNUSED(gpumat_array),
                                                    uint gpumat_array_len)
 {
-  PointCloud *pointcloud = ob->data;
+  PointCloud &pointcloud = *static_cast<PointCloud *>(ob->data);
   PointCloudBatchCache *cache = pointcloud_batch_cache_get(pointcloud);
   BLI_assert(cache->mat_len == gpumat_array_len);
   UNUSED_VARS(gpumat_array_len);
 
-  if (cache->surface_per_mat[0] == NULL) {
-    pointcloud_batch_cache_ensure_pos(ob, cache);
-    pointcloud_batch_cache_ensure_geom(ob, cache);
+  if (cache->surface_per_mat[0] == nullptr) {
+    pointcloud_batch_cache_ensure_pos(pointcloud, *cache);
+    pointcloud_batch_cache_ensure_geom(*cache);
 
     cache->surface_per_mat[0] = GPU_batch_create(GPU_PRIM_TRIS, cache->geom, cache->geom_indices);
     GPU_batch_instbuf_add_ex(cache->surface_per_mat[0], cache->pos, false);
diff --git a/source/blender/draw/intern/draw_cache_impl_subdivision.cc b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
index b37a420b555..ab935809f96 100644
--- a/source/blender/draw/intern/draw_cache_impl_subdivision.cc
+++ b/source/blender/draw/intern/draw_cache_impl_subdivision.cc
@@ -7,8 +7,10 @@
 #include "DNA_object_types.h"
 #include "DNA_scene_types.h"
 
+#include "BKE_attribute.hh"
 #include "BKE_editmesh.h"
 #include "BKE_mesh.h"
+#include "BKE_mesh_mapping.h"
 #include "BKE_modifier.h"
 #include "BKE_object.h"
 #include "BKE_scene.h"
@@ -19,8 +21,8 @@
 #include "BKE_subdiv_modifier.h"
 
 #include "BLI_linklist.h"
-
 #include "BLI_string.h"
+#include "BLI_virtual_array.hh"
 
 #include "PIL_time.h"
 
@@ -44,6 +46,8 @@
 #include "draw_cache_inline.h"
 #include "mesh_extractors/extract_mesh.hh"
 
+using blender::Span;
+
 extern "C" char datatoc_common_subdiv_custom_data_interp_comp_glsl[];
 extern "C" char datatoc_common_subdiv_ibo_lines_comp_glsl[];
 extern "C" char datatoc_common_subdiv_ibo_tris_comp_glsl[];
@@ -668,20 +672,23 @@ static void draw_subdiv_cache_extra_coarse_face_data_bm(BMesh *bm,
   }
 }
 
-static void draw_subdiv_cache_extra_coarse_face_data_mesh(Mesh *mesh, uint32_t *flags_data)
+static void draw_subdiv_cache_extra_coarse_face_data_mesh(const MeshRenderData *mr,
+                                                          Mesh *mesh,
+                                                          uint32_t *flags_data)
 {
-  for (int i = 0; i < mesh->totpoly; i++) {
+  const Span<MPoly> polys = mesh->polys();
+  for (const int i : polys.index_range()) {
     uint32_t flag = 0;
-    if ((mesh->mpoly[i].flag & ME_SMOOTH) != 0) {
+    if ((polys[i].flag & ME_SMOOTH) != 0) {
       flag |= SUBDIV_COARSE_FACE_FLAG_SMOOTH;
     }
-    if ((mesh->mpoly[i].flag & ME_FACE_SEL) != 0) {
+    if ((polys[i].flag & ME_FACE_SEL) != 0) {
       flag |= SUBDIV_COARSE_FACE_FLAG_SELECT;
     }
-    if ((mesh->mpoly[i].flag & ME_HIDE) != 0) {
+    if (mr->hide_poly && mr->hide_poly[i]) {
       flag |= SUBDIV_COARSE_FACE_FLAG_HIDDEN;
     }
-    flags_data[i] = (uint)(mesh->mpoly[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
+    flags_data[i] = (uint)(polys[i].loopstart) | (flag << SUBDIV_COARSE_FACE_FLAG_OFFSET);
   }
 }
 
@@ -691,7 +698,7 @@ static void draw_subdiv_cache_extra_coarse_face_data_mapped(Mesh *mesh,
                                                             uint32_t *flags_data)
 {
   if (bm == nullptr) {
-    draw_subdiv_cache_extra_coarse_face_data_mesh(mesh, flags_data);
+    draw_subdiv_cache_extra_coarse_face_data_mesh(mr, mesh, flags_data);
     return;
   }
 
@@ -722,11 +729,11 @@ static void draw_subdiv_cache_update_extra_coarse_face_data(DRWSubdivCache *cach
   if (mr->extract_type == MR_EXTRACT_BMESH) {
     draw_subdiv_cache_extra_coarse_face_data_bm(cache->bm, mr->efa_act, flags_data);
   }
-  else if (mr->extract_type == MR_EXTRACT_MAPPED) {
+  else if (mr->p_origindex != nullptr) {
     draw_subdiv_cache_extra_coarse_face_data_mapped(mesh, cache->bm, mr, flags_data);
   }
   else {
-    draw_subdiv_cache_extra_coarse_face_data_mesh(mesh, flags_data);
+    draw_subdiv_cache_extra_coarse_face_data_mesh(mr, mesh, flags_data);
   }
 
   /* Make sure updated data is re-uploaded. */
@@ -801,15 +808,15 @@ struct DRWCacheBuildingContext {
 };
 
 static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_context,
-                                         const int num_vertices,
+                                         const int num_verts,
                                          const int num_edges,
                                          const int num_loops,
-                                         const int num_polygons,
+                                         const int num_polys,
                                          const int *subdiv_polygon_offset)
 {
   /* num_loops does not take into account meshes with only loose geometry, which might be meshes
-   * used as custom bone shapes, so let's check the num_vertices also. */
-  if (num_vertices == 0 && num_loops == 0) {
+   * used as custom bone shapes, so let's check the num_verts also. */
+  if (num_verts == 0 && num_loops == 0) {
     return false;
   }
 
@@ -820,12 +827,12 @@ static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_con
   if (num_loops != 0) {
     cache->num_subdiv_edges = (uint)num_edges;
     cache->num_subdiv_loops = (uint)num_loops;
-    cache->num_subdiv_verts = (uint)num_vertices;
-    cache->num_subdiv_quads = (uint)num_polygons;
+    cache->num_subdiv_verts = (uint)num_verts;
+    cache->num_subdiv_quads = (uint)num_polys;
     cache->subdiv_polygon_offset = static_cast<int *>(MEM_dupallocN(subdiv_polygon_offset));
   }
 
-  cache->may_have_loose_geom = num_vertices != 0 || num_edges != 0;
+  cache->may_have_loose_geom = num_verts != 0 || num_edges != 0;
 
   /* Initialize cache buffers, prefer dynamic usage so we can reuse memory on the host even after
    * it was sent to the device, since we may use the data while building other buffers on the CPU
@@ -876,7 +883,7 @@ static bool draw_subdiv_topology_info_cb(const SubdivForeachContext *foreach_con
   if (cache->num_subdiv_verts) {
     ctx->vert_origindex_map = static_cast<int *>(
         MEM_mallocN(cache->num_subdiv_verts * sizeof(int), "subdiv_vert_origindex_map"));
-    for (int i = 0; i < num_vertices; i++) {
+    for (int i = 0; i < num_verts; i++) {
       ctx->vert_origindex_map[i] = -1;
     }
   }
@@ -1089,6 +1096,7 @@ static bool draw_subdiv_build_cache(DRWSubdivCache *cache,
   }
 
   /* Only build polygon related data if we have polygons. */
+  const Span<MPoly> polys = mesh_eval->polys();
   if (cache->num_subdiv_loops != 0) {
     /* Build buffers for the PatchMap. */
     draw_patch_map_build(&cache->gpu_patch_map, subdiv);
@@ -1102,7 +1110,7 @@ static bool draw_subdiv_build_cache(DRWSubdivCache *cache,
         GPU_vertbuf_get_data(cache->fdots_patch_coords);
     for (int i = 0; i < mesh_eval->totpoly; i++) {
       const int ptex_face_index = cache->face_ptex_offset[i];
-      if (mesh_eval->mpoly[i].totloop == 4) {
+      if (polys[i].totloop == 4) {
         /* For quads, the center coordinate of the coarse face has `u = v = 0.5`. */
         blender_fdots_patch_coords[i] = make_patch_coord(ptex_face_index, 0.5f, 0.5f);
       }
@@ -1115,16 +1123,16 @@ static bool draw_subdiv_build_cache(DRWSubdivCache *cache,
     }
 
     cache->subdiv_polygon_offset_buffer = draw_subdiv_build_origindex_buffer(
-        cache->subdiv_polygon_offset, mesh_eval->totpoly);
+        cache->subdiv_polygon_offset, polys.size());
 
     cache->face_ptex_offset_buffer = draw_subdiv_build_origindex_buffer(cache->face_ptex_offset,
-                                                                        mesh_eval->totpoly + 1);
+                                                                        polys.size() + 1);
 
     build_vertex_face_adjacency_maps(cache);
   }
 
   cache->resolution = to_mesh_settings.resolution;
-  cache->num_coarse_poly = mesh_eval->totpoly;
+  cache->num_coarse_poly = polys.size();
 
   /* To avoid floating point precision issues when evaluating patches at patch boundaries,
    * ensure that all loops sharing a vertex use the same patch coordinate. This could cause
@@ -1204,8 +1212,8 @@ struct DRWSubdivUboStorage {
    * of out of bond accesses as compute dispatch are of fixed size. */
   uint total_dispatch_size;
 
-  int _pad0;
-  int _pad2;
+  int is_edit_mode;
+  int use_hide;
   int _pad3;
 };
 
@@ -1236,6 +1244,8 @@ static void draw_subdiv_init_ubo_storage(const DRWSubdivCache *cache,
   ubo->coarse_face_hidden_mask = SUBDIV_COARSE_FACE_FLAG_HIDDEN_MASK;
   ubo->coarse_face_loopstart_mask = SUBDIV_COARSE_FACE_LOOP_START_MASK;
   ubo->total_dispatch_size = total_dispatch_size;
+  ubo->is_edit_mode = cache->is_edit_mode;
+  ubo->use_hide = cache->use_hide;
 }
 
 static void draw_subdiv_ubo_update_and_bind(const DRWSubdivCache *cache,
@@ -1468,6 +1478,11 @@ void draw_subdiv_interp_custom_data(const DRWSubdivCache *cache,
 {
   GPUShader *shader = nullptr;
 
+  if (!draw_subdiv_cache_need_polygon_data(cache)) {
+    /* Happens on meshes with only loose geometry. */
+    return;
+  }
+
   if (dimensions == 1) {
     shader = get_subdiv_shader(SHADER_COMP_CUSTOM_DATA_INTERP_1D,
                                "#define SUBDIV_POLYGON_OFFSET\n"
@@ -1953,17 +1968,19 @@ static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache,
     return;
   }
 
+  const blender::VArraySpan<int> material_indices = mesh_eval->attributes().lookup_or_default<int>(
+      "material_index", ATTR_DOMAIN_FACE, 0);
+
   /* Count number of subdivided polygons for each material. */
   int *mat_start = static_cast<int *>(MEM_callocN(sizeof(int) * mat_len, "subdiv mat_start"));
   int *subdiv_polygon_offset = cache->subdiv_polygon_offset;
 
   /* TODO: parallel_reduce? */
   for (int i = 0; i < mesh_eval->totpoly; i++) {
-    const MPoly *mpoly = &mesh_eval->mpoly[i];
     const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
                                                             subdiv_polygon_offset[i + 1];
     const int quad_count = next_offset - subdiv_polygon_offset[i];
-    const int mat_index = mpoly->mat_nr;
+    const int mat_index = material_indices[i];
     mat_start[mat_index] += quad_count;
   }
 
@@ -1982,8 +1999,7 @@ static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache,
       MEM_mallocN(sizeof(int) * mesh_eval->totpoly, "per_polygon_mat_offset"));
 
   for (int i = 0; i < mesh_eval->totpoly; i++) {
-    const MPoly *mpoly = &mesh_eval->mpoly[i];
-    const int mat_index = mpoly->mat_nr;
+    const int mat_index = material_indices[i];
     const int single_material_index = subdiv_polygon_offset[i];
     const int material_offset = mat_end[mat_index];
     const int next_offset = (i == mesh_eval->totpoly - 1) ? number_of_quads :
@@ -2004,7 +2020,7 @@ static void draw_subdiv_cache_ensure_mat_offsets(DRWSubdivCache *cache,
 
 static bool draw_subdiv_create_requested_buffers(Object *ob,
                                                  Mesh *mesh,
-                                                 struct MeshBatchCache *batch_cache,
+                                                 MeshBatchCache *batch_cache,
                                                  MeshBufferCache *mbc,
                                                  const bool is_editmode,
                                                  const bool is_paint_mode,
@@ -2012,6 +2028,7 @@ static bool draw_subdiv_create_requested_buffers(Object *ob,
                                                  const float obmat[4][4],
                                                  const bool do_final,
                                                  const bool do_uvedit,
+                                                 const bool do_cage,
                                                  const ToolSettings *ts,
                                                  const bool use_hide,
                                                  OpenSubdiv_EvaluatorCache *evaluator_cache)
@@ -2038,7 +2055,7 @@ static bool draw_subdiv_create_requested_buffers(Object *ob,
   draw_subdiv_invalidate_evaluator_for_orco(subdiv, mesh_eval);
 
   if (!BKE_subdiv_eval_begin_from_mesh(
-          subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GLSL_COMPUTE, evaluator_cache)) {
+          subdiv, mesh_eval, nullptr, SUBDIV_EVALUATOR_TYPE_GPU, evaluator_cache)) {
     /* This could happen in two situations:
      * - OpenSubdiv is disabled.
      * - Something totally bad happened, and OpenSubdiv rejected our
@@ -2055,9 +2072,8 @@ static bool draw_subdiv_create_requested_buffers(Object *ob,
     return false;
   }
 
-  /* Edges which do not come from coarse edges should not be drawn in edit mode, only in object
-   * mode when optimal display in turned off. */
-  const bool optimal_display = runtime_data->use_optimal_display || is_editmode;
+  /* Edges which do not come from coarse edges should not be drawn in edit cage mode. */
+  const bool optimal_display = runtime_data->use_optimal_display || (is_editmode && !do_cage);
 
   draw_cache->bm = bm;
   draw_cache->mesh = mesh_eval;
@@ -2083,6 +2099,12 @@ static bool draw_subdiv_create_requested_buffers(Object *ob,
   MeshRenderData *mr = mesh_render_data_create(
       ob, mesh, is_editmode, is_paint_mode, is_mode_active, obmat, do_final, do_uvedit, ts);
   mr->use_hide = use_hide;
+  draw_cache->use_hide = use_hide;
+
+  /* Used for setting loop normals flags. Mapped extraction is only used during edit mode.
+   * See comments in #extract_lnor_iter_poly_mesh.
+   */
+  draw_cache->is_edit_mode = mr->edit_bmesh != nullptr;
 
   draw_subdiv_cache_update_extra_coarse_face_data(draw_cache, mesh_eval, mr);
 
@@ -2134,9 +2156,20 @@ void DRW_subdivide_loose_geom(DRWSubdivCache *subdiv_cache, MeshBufferCache *cac
   int subd_vert_offset = 0;
 
   /* Subdivide each loose coarse edge. */
+  const Span<MVert> coarse_verts = coarse_mesh->verts();
+  const Span<MEdge> coarse_edges = coarse_mesh->edges();
+
+  int *vert_to_edge_buffer;
+  MeshElemMap *vert_to_edge_map;
+  BKE_mesh_vert_edge_map_create(&vert_to_edge_map,
+                                &vert_to_edge_buffer,
+                                coarse_edges.data(),
+                                coarse_mesh->totvert,
+                                coarse_edges.size());
+
   for (int i = 0; i < coarse_loose_edge_len; i++) {
     const int coarse_edge_index = cache->loose_geom.edges[i];
-    const MEdge *coarse_edge = &coarse_mesh->medge[cache->loose_geom.edges[i]];
+    const MEdge *coarse_edge = &coarse_edges[cache->loose_geom.edges[i]];
 
     /* Perform interpolation of each vertex. */
     for (int i = 0; i < resolution - 1; i++, subd_edge_offset++) {
@@ -2147,8 +2180,13 @@ void DRW_subdivide_loose_geom(DRWSubdivCache *subdiv_cache, MeshBufferCache *cac
       DRWSubdivLooseVertex &subd_v1 = loose_subd_verts[subd_vert_offset];
       subd_v1.coarse_vertex_index = (i == 0) ? coarse_edge->v1 : -1u;
       const float u1 = i * inv_resolution_1;
-      BKE_subdiv_mesh_interpolate_position_on_edge(
-          coarse_mesh, coarse_edge, is_simple, u1, subd_v1.co);
+      BKE_subdiv_mesh_interpolate_position_on_edge(coarse_verts.data(),
+                                                   coarse_edges.data(),
+                                                   vert_to_edge_map,
+                                                   coarse_edge_index,
+                                                   is_simple,
+                                                   u1,
+                                                   subd_v1.co);
 
       subd_edge.loose_subdiv_v1_index = subd_vert_offset++;
 
@@ -2156,17 +2194,25 @@ void DRW_subdivide_loose_geom(DRWSubdivCache *subdiv_cache, MeshBufferCache *cac
       DRWSubdivLooseVertex &subd_v2 = loose_subd_verts[subd_vert_offset];
       subd_v2.coarse_vertex_index = ((i + 1) == resolution - 1) ? coarse_edge->v2 : -1u;
       const float u2 = (i + 1) * inv_resolution_1;
-      BKE_subdiv_mesh_interpolate_position_on_edge(
-          coarse_mesh, coarse_edge, is_simple, u2, subd_v2.co);
+      BKE_subdiv_mesh_interpolate_position_on_edge(coarse_verts.data(),
+                                                   coarse_edges.data(),
+                                                   vert_to_edge_map,
+                                                   coarse_edge_index,
+                                                   is_simple,
+                                                   u2,
+                                                   subd_v2.co);
 
       subd_edge.loose_subdiv_v2_index = subd_vert_offset++;
     }
   }
 
+  MEM_freeN(vert_to_edge_buffer);
+  MEM_freeN(vert_to_edge_map);
+
   /* Copy the remaining loose_verts. */
   for (int i = 0; i < coarse_loose_vert_len; i++) {
     const int coarse_vertex_index = cache->loose_geom.verts[i];
-    const MVert &coarse_vertex = coarse_mesh->mvert[coarse_vertex_index];
+    const MVert &coarse_vertex = coarse_verts[coarse_vertex_index];
 
     DRWSubdivLooseVertex &subd_v = loose_subd_verts[subd_vert_offset++];
     subd_v.coarse_vertex_index = cache->loose_geom.verts[i];
@@ -2195,7 +2241,7 @@ static OpenSubdiv_EvaluatorCache *g_evaluator_cache = nullptr;
 
 void DRW_create_subdivision(Object *ob,
                             Mesh *mesh,
-                            struct MeshBatchCache *batch_cache,
+                            MeshBatchCache *batch_cache,
                             MeshBufferCache *mbc,
                             const bool is_editmode,
                             const bool is_paint_mode,
@@ -2203,11 +2249,12 @@ void DRW_create_subdivision(Object *ob,
                             const float obmat[4][4],
                             const bool do_final,
                             const bool do_uvedit,
+                            const bool do_cage,
                             const ToolSettings *ts,
                             const bool use_hide)
 {
   if (g_evaluator_cache == nullptr) {
-    g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GLSL_COMPUTE);
+    g_evaluator_cache = openSubdiv_createEvaluatorCache(OPENSUBDIV_EVALUATOR_GPU);
   }
 
 #undef TIME_SUBDIV
@@ -2226,6 +2273,7 @@ void DRW_create_subdivision(Object *ob,
                                             obmat,
                                             do_final,
                                             do_uvedit,
+                                            do_cage,
                                             ts,
                                             use_hide,
                                             g_evaluator_cache)) {
diff --git a/source/blender/draw/intern/draw_color_management.cc b/source/blender/draw/intern/draw_color_management.cc
index bb11f1ab3ad..eab86226be5 100644
--- a/source/blender/draw/intern/draw_color_management.cc
+++ b/source/blender/draw/intern/draw_color_management.cc
@@ -169,7 +169,7 @@ void DRW_transform_none(GPUTexture *tex)
 
   /* Draw as texture for final render (without immediate mode). */
   GPUBatch *geom = DRW_cache_fullscreen_quad_get();
-  GPU_batch_program_set_builtin(geom, GPU_SHADER_2D_IMAGE_COLOR);
+  GPU_batch_program_set_builtin(geom, GPU_SHADER_3D_IMAGE_COLOR);
   GPU_batch_uniform_4f(geom, "color", 1.0f, 1.0f, 1.0f, 1.0f);
   GPU_batch_texture_bind(geom, "image", tex);
 
diff --git a/source/blender/draw/intern/draw_command.cc b/source/blender/draw/intern/draw_command.cc
new file mode 100644
index 00000000000..ff69885b3b6
--- /dev/null
+++ b/source/blender/draw/intern/draw_command.cc
@@ -0,0 +1,600 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_compute.h"
+#include "GPU_debug.h"
+
+#include "draw_command.hh"
+#include "draw_shader.h"
+#include "draw_view.hh"
+
+#include <bitset>
+#include <sstream>
+
+namespace blender::draw::command {
+
+/* -------------------------------------------------------------------- */
+/** \name Commands Execution
+ * \{ */
+
+void ShaderBind::execute(RecordingState &state) const
+{
+  if (assign_if_different(state.shader, shader)) {
+    GPU_shader_bind(shader);
+  }
+}
+
+void ResourceBind::execute() const
+{
+  if (slot == -1) {
+    return;
+  }
+  switch (type) {
+    case ResourceBind::Type::Sampler:
+      GPU_texture_bind_ex(is_reference ? *texture_ref : texture, sampler, slot, false);
+      break;
+    case ResourceBind::Type::Image:
+      GPU_texture_image_bind(is_reference ? *texture_ref : texture, slot);
+      break;
+    case ResourceBind::Type::UniformBuf:
+      GPU_uniformbuf_bind(is_reference ? *uniform_buf_ref : uniform_buf, slot);
+      break;
+    case ResourceBind::Type::StorageBuf:
+      GPU_storagebuf_bind(is_reference ? *storage_buf_ref : storage_buf, slot);
+      break;
+  }
+}
+
+void PushConstant::execute(RecordingState &state) const
+{
+  if (location == -1) {
+    return;
+  }
+  switch (type) {
+    case PushConstant::Type::IntValue:
+      GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int4_value);
+      break;
+    case PushConstant::Type::IntReference:
+      GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int_ref);
+      break;
+    case PushConstant::Type::FloatValue:
+      GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float4_value);
+      break;
+    case PushConstant::Type::FloatReference:
+      GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float_ref);
+      break;
+  }
+}
+
+void Draw::execute(RecordingState &state) const
+{
+  state.front_facing_set(handle.has_inverted_handedness());
+
+  if (GPU_shader_draw_parameters_support() == false) {
+    GPU_batch_resource_id_buf_set(batch, state.resource_id_buf);
+  }
+
+  GPU_batch_set_shader(batch, state.shader);
+  GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len);
+}
+
+void DrawMulti::execute(RecordingState &state) const
+{
+  DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_;
+  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
+
+  uint group_index = this->group_first;
+  while (group_index != (uint)-1) {
+    const DrawGroup &group = groups[group_index];
+
+    if (group.vertex_len > 0) {
+      if (GPU_shader_draw_parameters_support() == false) {
+        GPU_batch_resource_id_buf_set(group.gpu_batch, state.resource_id_buf);
+      }
+
+      GPU_batch_set_shader(group.gpu_batch, state.shader);
+
+      constexpr intptr_t stride = sizeof(DrawCommand);
+      /* We have 2 indirect command reserved per draw group. */
+      intptr_t offset = stride * group_index * 2;
+
+      /* Draw negatively scaled geometry first. */
+      if (group.len - group.front_facing_len > 0) {
+        state.front_facing_set(true);
+        GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset);
+      }
+
+      if (group.front_facing_len > 0) {
+        state.front_facing_set(false);
+        GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset + stride);
+      }
+    }
+
+    group_index = group.next;
+  }
+}
+
+void DrawIndirect::execute(RecordingState &state) const
+{
+  state.front_facing_set(handle.has_inverted_handedness());
+
+  GPU_batch_draw_indirect(batch, *indirect_buf, 0);
+}
+
+void Dispatch::execute(RecordingState &state) const
+{
+  if (is_reference) {
+    GPU_compute_dispatch(state.shader, size_ref->x, size_ref->y, size_ref->z);
+  }
+  else {
+    GPU_compute_dispatch(state.shader, size.x, size.y, size.z);
+  }
+}
+
+void DispatchIndirect::execute(RecordingState &state) const
+{
+  GPU_compute_dispatch_indirect(state.shader, *indirect_buf);
+}
+
+void Barrier::execute() const
+{
+  GPU_memory_barrier(type);
+}
+
+void Clear::execute() const
+{
+  GPUFrameBuffer *fb = GPU_framebuffer_active_get();
+  GPU_framebuffer_clear(fb, (eGPUFrameBufferBits)clear_channels, color, depth, stencil);
+}
+
+void StateSet::execute(RecordingState &recording_state) const
+{
+  /**
+   * Does not support locked state for the moment and never should.
+   * Better implement a less hacky selection!
+   */
+  BLI_assert(DST.state_lock == 0);
+
+  if (!assign_if_different(recording_state.pipeline_state, new_state)) {
+    return;
+  }
+
+  /* Keep old API working. Keep the state tracking in sync. */
+  /* TODO(fclem): Move at the end of a pass. */
+  DST.state = new_state;
+
+  GPU_state_set(to_write_mask(new_state),
+                to_blend(new_state),
+                to_face_cull_test(new_state),
+                to_depth_test(new_state),
+                to_stencil_test(new_state),
+                to_stencil_op(new_state),
+                to_provoking_vertex(new_state));
+
+  if (new_state & DRW_STATE_SHADOW_OFFSET) {
+    GPU_shadow_offset(true);
+  }
+  else {
+    GPU_shadow_offset(false);
+  }
+
+  /* TODO: this should be part of shader state. */
+  if (new_state & DRW_STATE_CLIP_PLANES) {
+    GPU_clip_distances(recording_state.view_clip_plane_count);
+  }
+  else {
+    GPU_clip_distances(0);
+  }
+
+  if (new_state & DRW_STATE_IN_FRONT_SELECT) {
+    /* XXX `GPU_depth_range` is not a perfect solution
+     * since very distant geometries can still be occluded.
+     * Also the depth test precision of these geometries is impaired.
+     * However, it solves the selection for the vast majority of cases. */
+    GPU_depth_range(0.0f, 0.01f);
+  }
+  else {
+    GPU_depth_range(0.0f, 1.0f);
+  }
+
+  if (new_state & DRW_STATE_PROGRAM_POINT_SIZE) {
+    GPU_program_point_size(true);
+  }
+  else {
+    GPU_program_point_size(false);
+  }
+}
+
+void StencilSet::execute() const
+{
+  GPU_stencil_write_mask_set(write_mask);
+  GPU_stencil_compare_mask_set(compare_mask);
+  GPU_stencil_reference_set(reference);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Commands Serialization for debugging
+ * \{ */
+
+std::string ShaderBind::serialize() const
+{
+  return std::string(".shader_bind(") + GPU_shader_get_name(shader) + ")";
+}
+
+std::string ResourceBind::serialize() const
+{
+  switch (type) {
+    case Type::Sampler:
+      return std::string(".bind_texture") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) +
+             (sampler != GPU_SAMPLER_MAX ? ", sampler=" + std::to_string(sampler) : "") + ")";
+    case Type::Image:
+      return std::string(".bind_image") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) + ")";
+    case Type::UniformBuf:
+      return std::string(".bind_uniform_buf") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) + ")";
+    case Type::StorageBuf:
+      return std::string(".bind_storage_buf") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) + ")";
+    default:
+      BLI_assert_unreachable();
+      return "";
+  }
+}
+
+std::string PushConstant::serialize() const
+{
+  std::stringstream ss;
+  for (int i = 0; i < array_len; i++) {
+    switch (comp_len) {
+      case 1:
+        switch (type) {
+          case Type::IntValue:
+            ss << int1_value;
+            break;
+          case Type::IntReference:
+            ss << int_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float1_value;
+            break;
+          case Type::FloatReference:
+            ss << float_ref[i];
+            break;
+        }
+        break;
+      case 2:
+        switch (type) {
+          case Type::IntValue:
+            ss << int2_value;
+            break;
+          case Type::IntReference:
+            ss << int2_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float2_value;
+            break;
+          case Type::FloatReference:
+            ss << float2_ref[i];
+            break;
+        }
+        break;
+      case 3:
+        switch (type) {
+          case Type::IntValue:
+            ss << int3_value;
+            break;
+          case Type::IntReference:
+            ss << int3_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float3_value;
+            break;
+          case Type::FloatReference:
+            ss << float3_ref[i];
+            break;
+        }
+        break;
+      case 4:
+        switch (type) {
+          case Type::IntValue:
+            ss << int4_value;
+            break;
+          case Type::IntReference:
+            ss << int4_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float4_value;
+            break;
+          case Type::FloatReference:
+            ss << float4_ref[i];
+            break;
+        }
+        break;
+      case 16:
+        switch (type) {
+          case Type::IntValue:
+          case Type::IntReference:
+            BLI_assert_unreachable();
+            break;
+          case Type::FloatValue:
+            ss << *reinterpret_cast<const float4x4 *>(&float4_value);
+            break;
+          case Type::FloatReference:
+            ss << *float4x4_ref;
+            break;
+        }
+        break;
+    }
+    if (i < array_len - 1) {
+      ss << ", ";
+    }
+  }
+
+  return std::string(".push_constant(") + std::to_string(location) + ", data=" + ss.str() + ")";
+}
+
+std::string Draw::serialize() const
+{
+  std::string inst_len = (instance_len == (uint)-1) ? "from_batch" : std::to_string(instance_len);
+  std::string vert_len = (vertex_len == (uint)-1) ? "from_batch" : std::to_string(vertex_len);
+  std::string vert_first = (vertex_first == (uint)-1) ? "from_batch" :
+                                                        std::to_string(vertex_first);
+  return std::string(".draw(inst_len=") + inst_len + ", vert_len=" + vert_len +
+         ", vert_first=" + vert_first + ", res_id=" + std::to_string(handle.resource_index()) +
+         ")";
+}
+
+std::string DrawMulti::serialize(std::string line_prefix) const
+{
+  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
+
+  MutableSpan<DrawPrototype> prototypes(multi_draw_buf->prototype_buf_.data(),
+                                        multi_draw_buf->prototype_count_);
+
+  /* This emulates the GPU sorting but without the unstable draw order. */
+  std::sort(
+      prototypes.begin(), prototypes.end(), [](const DrawPrototype &a, const DrawPrototype &b) {
+        return (a.group_id < b.group_id) ||
+               (a.group_id == b.group_id && a.resource_handle > b.resource_handle);
+      });
+
+  /* Compute prefix sum to have correct offsets. */
+  uint prefix_sum = 0u;
+  for (DrawGroup &group : groups) {
+    group.start = prefix_sum;
+    prefix_sum += group.front_proto_len + group.back_proto_len;
+  }
+
+  std::stringstream ss;
+
+  uint group_len = 0;
+  uint group_index = this->group_first;
+  while (group_index != (uint)-1) {
+    const DrawGroup &grp = groups[group_index];
+
+    ss << std::endl << line_prefix << "  .group(id=" << group_index << ", len=" << grp.len << ")";
+
+    intptr_t offset = grp.start;
+
+    if (grp.back_proto_len > 0) {
+      for (DrawPrototype &proto : prototypes.slice({offset, grp.back_proto_len})) {
+        BLI_assert(proto.group_id == group_index);
+        ResourceHandle handle(proto.resource_handle);
+        BLI_assert(handle.has_inverted_handedness());
+        ss << std::endl
+           << line_prefix << "    .proto(instance_len=" << std::to_string(proto.instance_len)
+           << ", resource_id=" << std::to_string(handle.resource_index()) << ", back_face)";
+      }
+      offset += grp.back_proto_len;
+    }
+
+    if (grp.front_proto_len > 0) {
+      for (DrawPrototype &proto : prototypes.slice({offset, grp.front_proto_len})) {
+        BLI_assert(proto.group_id == group_index);
+        ResourceHandle handle(proto.resource_handle);
+        BLI_assert(!handle.has_inverted_handedness());
+        ss << std::endl
+           << line_prefix << "    .proto(instance_len=" << std::to_string(proto.instance_len)
+           << ", resource_id=" << std::to_string(handle.resource_index()) << ", front_face)";
+      }
+    }
+
+    group_index = grp.next;
+    group_len++;
+  }
+
+  ss << std::endl;
+
+  return line_prefix + ".draw_multi(" + std::to_string(group_len) + ")" + ss.str();
+}
+
+std::string DrawIndirect::serialize() const
+{
+  return std::string(".draw_indirect()");
+}
+
+std::string Dispatch::serialize() const
+{
+  int3 sz = is_reference ? *size_ref : size;
+  return std::string(".dispatch") + (is_reference ? "_ref" : "") + "(" + std::to_string(sz.x) +
+         ", " + std::to_string(sz.y) + ", " + std::to_string(sz.z) + ")";
+}
+
+std::string DispatchIndirect::serialize() const
+{
+  return std::string(".dispatch_indirect()");
+}
+
+std::string Barrier::serialize() const
+{
+  /* TODO(@fclem): Better serialization... */
+  return std::string(".barrier(") + std::to_string(type) + ")";
+}
+
+std::string Clear::serialize() const
+{
+  std::stringstream ss;
+  if (eGPUFrameBufferBits(clear_channels) & GPU_COLOR_BIT) {
+    ss << "color=" << color;
+    if (eGPUFrameBufferBits(clear_channels) & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) {
+      ss << ", ";
+    }
+  }
+  if (eGPUFrameBufferBits(clear_channels) & GPU_DEPTH_BIT) {
+    ss << "depth=" << depth;
+    if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
+      ss << ", ";
+    }
+  }
+  if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
+    ss << "stencil=0b" << std::bitset<8>(stencil) << ")";
+  }
+  return std::string(".clear(") + ss.str() + ")";
+}
+
+std::string StateSet::serialize() const
+{
+  /* TODO(@fclem): Better serialization... */
+  return std::string(".state_set(") + std::to_string(new_state) + ")";
+}
+
+std::string StencilSet::serialize() const
+{
+  std::stringstream ss;
+  ss << ".stencil_set(write_mask=0b" << std::bitset<8>(write_mask) << ", compare_mask=0b"
+     << std::bitset<8>(compare_mask) << ", reference=0b" << std::bitset<8>(reference);
+  return ss.str();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Commands buffers binding / command / resource ID generation
+ * \{ */
+
+void DrawCommandBuf::bind(RecordingState &state,
+                          Vector<Header, 0> &headers,
+                          Vector<Undetermined, 0> &commands)
+{
+  UNUSED_VARS(headers, commands);
+
+  resource_id_count_ = 0;
+
+  for (const Header &header : headers) {
+    if (header.type != Type::Draw) {
+      continue;
+    }
+
+    Draw &cmd = commands[header.index].draw;
+
+    int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len;
+    /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
+    GPU_batch_draw_parameter_get(
+        cmd.batch, &batch_vert_len, &batch_vert_first, &batch_base_index, &batch_inst_len);
+    /* Instancing attributes are not supported using the new pipeline since we use the base
+     * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
+    BLI_assert(batch_inst_len == 1);
+
+    if (cmd.vertex_len == (uint)-1) {
+      cmd.vertex_len = batch_vert_len;
+    }
+
+    if (cmd.handle.raw > 0) {
+      /* Save correct offset to start of resource_id buffer region for this draw. */
+      uint instance_first = resource_id_count_;
+      resource_id_count_ += cmd.instance_len;
+      /* Ensure the buffer is big enough. */
+      resource_id_buf_.get_or_resize(resource_id_count_ - 1);
+
+      /* Copy the resource id for all instances. */
+      uint index = cmd.handle.resource_index();
+      for (int i = instance_first; i < (instance_first + cmd.instance_len); i++) {
+        resource_id_buf_[i] = index;
+      }
+    }
+  }
+
+  resource_id_buf_.push_update();
+
+  if (GPU_shader_draw_parameters_support() == false) {
+    state.resource_id_buf = resource_id_buf_;
+  }
+  else {
+    GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
+  }
+}
+
+void DrawMultiBuf::bind(RecordingState &state,
+                        Vector<Header, 0> &headers,
+                        Vector<Undetermined, 0> &commands,
+                        VisibilityBuf &visibility_buf)
+{
+  UNUSED_VARS(headers, commands);
+
+  GPU_debug_group_begin("DrawMultiBuf.bind");
+
+  resource_id_count_ = 0u;
+  for (DrawGroup &group : MutableSpan<DrawGroup>(group_buf_.data(), group_count_)) {
+    /* Compute prefix sum of all instance of previous group. */
+    group.start = resource_id_count_;
+    resource_id_count_ += group.len;
+
+    int batch_inst_len;
+    /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
+    GPU_batch_draw_parameter_get(group.gpu_batch,
+                                 &group.vertex_len,
+                                 &group.vertex_first,
+                                 &group.base_index,
+                                 &batch_inst_len);
+
+    /* Instancing attributes are not supported using the new pipeline since we use the base
+     * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
+    BLI_assert(batch_inst_len == 1);
+    UNUSED_VARS_NDEBUG(batch_inst_len);
+
+    /* Now that we got the batch information, we can set the counters to 0. */
+    group.total_counter = group.front_facing_counter = group.back_facing_counter = 0;
+  }
+
+  group_buf_.push_update();
+  prototype_buf_.push_update();
+  /* Allocate enough for the expansion pass. */
+  resource_id_buf_.get_or_resize(resource_id_count_);
+  /* Two command per group. */
+  command_buf_.get_or_resize(group_count_ * 2);
+
+  if (prototype_count_ > 0) {
+    GPUShader *shader = DRW_shader_draw_command_generate_get();
+    GPU_shader_bind(shader);
+    GPU_shader_uniform_1i(shader, "prototype_len", prototype_count_);
+    GPU_storagebuf_bind(group_buf_, GPU_shader_get_ssbo(shader, "group_buf"));
+    GPU_storagebuf_bind(visibility_buf, GPU_shader_get_ssbo(shader, "visibility_buf"));
+    GPU_storagebuf_bind(prototype_buf_, GPU_shader_get_ssbo(shader, "prototype_buf"));
+    GPU_storagebuf_bind(command_buf_, GPU_shader_get_ssbo(shader, "command_buf"));
+    GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
+    GPU_compute_dispatch(shader, divide_ceil_u(prototype_count_, DRW_COMMAND_GROUP_SIZE), 1, 1);
+    if (GPU_shader_draw_parameters_support() == false) {
+      GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+      state.resource_id_buf = resource_id_buf_;
+    }
+    else {
+      GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    }
+  }
+
+  GPU_debug_group_end();
+}
+
+/** \} */
+
+};  // namespace blender::draw::command
diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh
new file mode 100644
index 00000000000..46a9199a267
--- /dev/null
+++ b/source/blender/draw/intern/draw_command.hh
@@ -0,0 +1,534 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * Commands stored inside draw passes. Converted into GPU commands upon pass submission.
+ *
+ * Draw calls (primitive rendering commands) are managed by either `DrawCommandBuf` or
+ * `DrawMultiBuf`. See implementation details at their definition.
+ */
+
+#include "BKE_global.h"
+#include "BLI_map.hh"
+#include "DRW_gpu_wrapper.hh"
+
+#include "draw_command_shared.hh"
+#include "draw_handle.hh"
+#include "draw_state.h"
+#include "draw_view.hh"
+
+namespace blender::draw::command {
+
+class DrawCommandBuf;
+class DrawMultiBuf;
+
+/* -------------------------------------------------------------------- */
+/** \name Recording State
+ * \{ */
+
+/**
+ * Command recording state.
+ * Keep track of several states and avoid redundant state changes.
+ */
+struct RecordingState {
+  GPUShader *shader = nullptr;
+  bool front_facing = true;
+  bool inverted_view = false;
+  DRWState pipeline_state = DRW_STATE_NO_DRAW;
+  int view_clip_plane_count = 0;
+  /** Used for gl_BaseInstance workaround. */
+  GPUStorageBuf *resource_id_buf = nullptr;
+
+  void front_facing_set(bool facing)
+  {
+    /* Facing is inverted if view is not in expected handedness. */
+    facing = this->inverted_view == facing;
+    /* Remove redundant changes. */
+    if (assign_if_different(this->front_facing, facing)) {
+      GPU_front_facing(!facing);
+    }
+  }
+
+  void cleanup()
+  {
+    if (front_facing == false) {
+      GPU_front_facing(false);
+    }
+
+    if (G.debug & G_DEBUG_GPU) {
+      GPU_storagebuf_unbind_all();
+      GPU_texture_image_unbind_all();
+      GPU_texture_unbind_all();
+      GPU_uniformbuf_unbind_all();
+    }
+  }
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Regular Commands
+ * \{ */
+
+enum class Type : uint8_t {
+  /**
+   * None Type commands are either uninitialized or are repurposed as data storage.
+   * They are skipped during submission.
+   */
+  None = 0,
+
+  /** Commands stored as Undetermined in regular command buffer. */
+  Barrier,
+  Clear,
+  Dispatch,
+  DispatchIndirect,
+  Draw,
+  DrawIndirect,
+  PushConstant,
+  ResourceBind,
+  ShaderBind,
+  StateSet,
+  StencilSet,
+
+  /** Special commands stored in separate buffers. */
+  SubPass,
+  DrawMulti,
+};
+
+/**
+ * The index of the group is implicit since it is known by the one who want to
+ * access it. This also allows to have an indexed object to split the command
+ * stream.
+ */
+struct Header {
+  /** Command type. */
+  Type type;
+  /** Command index in command heap of this type. */
+  uint index;
+};
+
+struct ShaderBind {
+  GPUShader *shader;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct ResourceBind {
+  eGPUSamplerState sampler;
+  int slot;
+  bool is_reference;
+
+  enum class Type : uint8_t {
+    Sampler = 0,
+    Image,
+    UniformBuf,
+    StorageBuf,
+  } type;
+
+  union {
+    /** TODO: Use draw::Texture|StorageBuffer|UniformBuffer as resources as they will give more
+     * debug info. */
+    GPUUniformBuf *uniform_buf;
+    GPUUniformBuf **uniform_buf_ref;
+    GPUStorageBuf *storage_buf;
+    GPUStorageBuf **storage_buf_ref;
+    /** NOTE: Texture is used for both Sampler and Image binds. */
+    GPUTexture *texture;
+    GPUTexture **texture_ref;
+  };
+
+  ResourceBind() = default;
+
+  ResourceBind(int slot_, GPUUniformBuf *res)
+      : slot(slot_), is_reference(false), type(Type::UniformBuf), uniform_buf(res){};
+  ResourceBind(int slot_, GPUUniformBuf **res)
+      : slot(slot_), is_reference(true), type(Type::UniformBuf), uniform_buf_ref(res){};
+  ResourceBind(int slot_, GPUStorageBuf *res)
+      : slot(slot_), is_reference(false), type(Type::StorageBuf), storage_buf(res){};
+  ResourceBind(int slot_, GPUStorageBuf **res)
+      : slot(slot_), is_reference(true), type(Type::StorageBuf), storage_buf_ref(res){};
+  ResourceBind(int slot_, draw::Image *res)
+      : slot(slot_), is_reference(false), type(Type::Image), texture(draw::as_texture(res)){};
+  ResourceBind(int slot_, draw::Image **res)
+      : slot(slot_), is_reference(true), type(Type::Image), texture_ref(draw::as_texture(res)){};
+  ResourceBind(int slot_, GPUTexture *res, eGPUSamplerState state)
+      : sampler(state), slot(slot_), is_reference(false), type(Type::Sampler), texture(res){};
+  ResourceBind(int slot_, GPUTexture **res, eGPUSamplerState state)
+      : sampler(state), slot(slot_), is_reference(true), type(Type::Sampler), texture_ref(res){};
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+struct PushConstant {
+  int location;
+  uint8_t array_len;
+  uint8_t comp_len;
+  enum class Type : uint8_t {
+    IntValue = 0,
+    FloatValue,
+    IntReference,
+    FloatReference,
+  } type;
+  /**
+   * IMPORTANT: Data is at the end of the struct as it can span over the next commands.
+   * These next commands are not real commands but just memory to hold the data and are not
+   * referenced by any Command::Header.
+   * This is a hack to support float4x4 copy.
+   */
+  union {
+    int int1_value;
+    int2 int2_value;
+    int3 int3_value;
+    int4 int4_value;
+    float float1_value;
+    float2 float2_value;
+    float3 float3_value;
+    float4 float4_value;
+    const int *int_ref;
+    const int2 *int2_ref;
+    const int3 *int3_ref;
+    const int4 *int4_ref;
+    const float *float_ref;
+    const float2 *float2_ref;
+    const float3 *float3_ref;
+    const float4 *float4_ref;
+    const float4x4 *float4x4_ref;
+  };
+
+  PushConstant() = default;
+
+  PushConstant(int loc, const float &val)
+      : location(loc), array_len(1), comp_len(1), type(Type::FloatValue), float1_value(val){};
+  PushConstant(int loc, const float2 &val)
+      : location(loc), array_len(1), comp_len(2), type(Type::FloatValue), float2_value(val){};
+  PushConstant(int loc, const float3 &val)
+      : location(loc), array_len(1), comp_len(3), type(Type::FloatValue), float3_value(val){};
+  PushConstant(int loc, const float4 &val)
+      : location(loc), array_len(1), comp_len(4), type(Type::FloatValue), float4_value(val){};
+
+  PushConstant(int loc, const int &val)
+      : location(loc), array_len(1), comp_len(1), type(Type::IntValue), int1_value(val){};
+  PushConstant(int loc, const int2 &val)
+      : location(loc), array_len(1), comp_len(2), type(Type::IntValue), int2_value(val){};
+  PushConstant(int loc, const int3 &val)
+      : location(loc), array_len(1), comp_len(3), type(Type::IntValue), int3_value(val){};
+  PushConstant(int loc, const int4 &val)
+      : location(loc), array_len(1), comp_len(4), type(Type::IntValue), int4_value(val){};
+
+  PushConstant(int loc, const float *val, int arr)
+      : location(loc), array_len(arr), comp_len(1), type(Type::FloatReference), float_ref(val){};
+  PushConstant(int loc, const float2 *val, int arr)
+      : location(loc), array_len(arr), comp_len(2), type(Type::FloatReference), float2_ref(val){};
+  PushConstant(int loc, const float3 *val, int arr)
+      : location(loc), array_len(arr), comp_len(3), type(Type::FloatReference), float3_ref(val){};
+  PushConstant(int loc, const float4 *val, int arr)
+      : location(loc), array_len(arr), comp_len(4), type(Type::FloatReference), float4_ref(val){};
+  PushConstant(int loc, const float4x4 *val)
+      : location(loc), array_len(1), comp_len(16), type(Type::FloatReference), float4x4_ref(val){};
+
+  PushConstant(int loc, const int *val, int arr)
+      : location(loc), array_len(arr), comp_len(1), type(Type::IntReference), int_ref(val){};
+  PushConstant(int loc, const int2 *val, int arr)
+      : location(loc), array_len(arr), comp_len(2), type(Type::IntReference), int2_ref(val){};
+  PushConstant(int loc, const int3 *val, int arr)
+      : location(loc), array_len(arr), comp_len(3), type(Type::IntReference), int3_ref(val){};
+  PushConstant(int loc, const int4 *val, int arr)
+      : location(loc), array_len(arr), comp_len(4), type(Type::IntReference), int4_ref(val){};
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct Draw {
+  GPUBatch *batch;
+  uint instance_len;
+  uint vertex_len;
+  uint vertex_first;
+  ResourceHandle handle;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct DrawMulti {
+  GPUBatch *batch;
+  DrawMultiBuf *multi_draw_buf;
+  uint group_first;
+  uint uuid;
+
+  void execute(RecordingState &state) const;
+  std::string serialize(std::string line_prefix) const;
+};
+
+struct DrawIndirect {
+  GPUBatch *batch;
+  GPUStorageBuf **indirect_buf;
+  ResourceHandle handle;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct Dispatch {
+  bool is_reference;
+  union {
+    int3 size;
+    int3 *size_ref;
+  };
+
+  Dispatch() = default;
+
+  Dispatch(int3 group_len) : is_reference(false), size(group_len){};
+  Dispatch(int3 *group_len) : is_reference(true), size_ref(group_len){};
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct DispatchIndirect {
+  GPUStorageBuf **indirect_buf;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct Barrier {
+  eGPUBarrier type;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+struct Clear {
+  uint8_t clear_channels; /* #eGPUFrameBufferBits. But want to save some bits. */
+  uint8_t stencil;
+  float depth;
+  float4 color;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+struct StateSet {
+  DRWState new_state;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct StencilSet {
+  uint write_mask;
+  uint compare_mask;
+  uint reference;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+union Undetermined {
+  ShaderBind shader_bind;
+  ResourceBind resource_bind;
+  PushConstant push_constant;
+  Draw draw;
+  DrawMulti draw_multi;
+  DrawIndirect draw_indirect;
+  Dispatch dispatch;
+  DispatchIndirect dispatch_indirect;
+  Barrier barrier;
+  Clear clear;
+  StateSet state_set;
+  StencilSet stencil_set;
+};
+
+/** Try to keep the command size as low as possible for performance. */
+BLI_STATIC_ASSERT(sizeof(Undetermined) <= 24, "One of the command type is too large.")
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw Commands
+ *
+ * A draw command buffer used to issue single draw commands without instance merging or any
+ * other optimizations.
+ *
+ * It still uses a ResourceIdBuf to keep the same shader interface as multi draw commands.
+ *
+ * \{ */
+
+class DrawCommandBuf {
+  friend Manager;
+
+ private:
+  using ResourceIdBuf = StorageArrayBuffer<uint, 128, false>;
+
+  /** Array of resource id. One per instance. Generated on GPU and send to GPU. */
+  ResourceIdBuf resource_id_buf_;
+  /** Used items in the resource_id_buf_. Not it's allocated length. */
+  uint resource_id_count_ = 0;
+
+ public:
+  void clear(){};
+
+  void append_draw(Vector<Header, 0> &headers,
+                   Vector<Undetermined, 0> &commands,
+                   GPUBatch *batch,
+                   uint instance_len,
+                   uint vertex_len,
+                   uint vertex_first,
+                   ResourceHandle handle)
+  {
+    vertex_first = vertex_first != -1 ? vertex_first : 0;
+    instance_len = instance_len != -1 ? instance_len : 1;
+
+    int64_t index = commands.append_and_get_index({});
+    headers.append({Type::Draw, static_cast<uint>(index)});
+    commands[index].draw = {batch, instance_len, vertex_len, vertex_first, handle};
+  }
+
+  void bind(RecordingState &state, Vector<Header, 0> &headers, Vector<Undetermined, 0> &commands);
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Multi Draw Commands
+ *
+ * For efficient rendering of large scene we strive to minimize the number of draw call and state
+ * changes. To this end, we group many rendering commands and sort them per render state using
+ * `DrawGroup` as a container. This is done automatically for any successive commands with the
+ * same state.
+ *
+ * A `DrawGroup` is the combination of a `GPUBatch` (VBO state) and a `command::DrawMulti`
+ * (Pipeline State).
+ *
+ * Inside each `DrawGroup` all instances of a same `GPUBatch` is merged into a single indirect
+ * command.
+ *
+ * To support this arbitrary reordering, we only need to know the offset of all the commands for a
+ * specific `DrawGroup`. This is done on CPU by doing a simple prefix sum. The result is pushed to
+ * GPU and used on CPU to issue the right command indirect.
+ *
+ * Each draw command is stored in an unsorted array of `DrawPrototype` and sent directly to the
+ * GPU.
+ *
+ * A command generation compute shader then go over each `DrawPrototype`. For each it adds it (or
+ * not depending on visibility) to the correct draw command using the offset of the `DrawGroup`
+ * computed on CPU. After that, it also outputs one resource ID for each instance inside a
+ * `DrawPrototype`.
+ *
+ * \{ */
+
+class DrawMultiBuf {
+  friend Manager;
+  friend DrawMulti;
+
+ private:
+  using DrawGroupBuf = StorageArrayBuffer<DrawGroup, 16>;
+  using DrawPrototypeBuf = StorageArrayBuffer<DrawPrototype, 16>;
+  using DrawCommandBuf = StorageArrayBuffer<DrawCommand, 16, true>;
+  using ResourceIdBuf = StorageArrayBuffer<uint, 128, true>;
+
+  using DrawGroupKey = std::pair<uint, GPUBatch *>;
+  using DrawGroupMap = Map<DrawGroupKey, uint>;
+  /** Maps a DrawMulti command and a gpu batch to their unique DrawGroup command. */
+  DrawGroupMap group_ids_;
+
+  /** DrawGroup Command heap. Uploaded to GPU for sorting. */
+  DrawGroupBuf group_buf_ = {"DrawGroupBuf"};
+  /** Command Prototypes. Unsorted */
+  DrawPrototypeBuf prototype_buf_ = {"DrawPrototypeBuf"};
+  /** Command list generated by the sorting / compaction steps. Lives on GPU. */
+  DrawCommandBuf command_buf_ = {"DrawCommandBuf"};
+  /** Array of resource id. One per instance. Lives on GPU. */
+  ResourceIdBuf resource_id_buf_ = {"ResourceIdBuf"};
+  /** Give unique ID to each header so we can use that as hash key. */
+  uint header_id_counter_ = 0;
+  /** Number of groups inside group_buf_. */
+  uint group_count_ = 0;
+  /** Number of prototype command inside prototype_buf_. */
+  uint prototype_count_ = 0;
+  /** Used items in the resource_id_buf_. Not it's allocated length. */
+  uint resource_id_count_ = 0;
+
+ public:
+  void clear()
+  {
+    header_id_counter_ = 0;
+    group_count_ = 0;
+    prototype_count_ = 0;
+    group_ids_.clear();
+  }
+
+  void append_draw(Vector<Header, 0> &headers,
+                   Vector<Undetermined, 0> &commands,
+                   GPUBatch *batch,
+                   uint instance_len,
+                   uint vertex_len,
+                   uint vertex_first,
+                   ResourceHandle handle)
+  {
+    /* Unsupported for now. Use PassSimple. */
+    BLI_assert(vertex_first == 0 || vertex_first == -1);
+    BLI_assert(vertex_len == -1);
+    UNUSED_VARS_NDEBUG(vertex_len, vertex_first);
+
+    instance_len = instance_len != -1 ? instance_len : 1;
+
+    /* If there was some state changes since previous call, we have to create another command. */
+    if (headers.is_empty() || headers.last().type != Type::DrawMulti) {
+      uint index = commands.append_and_get_index({});
+      headers.append({Type::DrawMulti, index});
+      commands[index].draw_multi = {batch, this, (uint)-1, header_id_counter_++};
+    }
+
+    DrawMulti &cmd = commands.last().draw_multi;
+
+    uint &group_id = group_ids_.lookup_or_add(DrawGroupKey(cmd.uuid, batch), (uint)-1);
+
+    bool inverted = handle.has_inverted_handedness();
+
+    if (group_id == (uint)-1) {
+      uint new_group_id = group_count_++;
+
+      DrawGroup &group = group_buf_.get_or_resize(new_group_id);
+      group.next = cmd.group_first;
+      group.len = instance_len;
+      group.front_facing_len = inverted ? 0 : instance_len;
+      group.gpu_batch = batch;
+      group.front_proto_len = 0;
+      group.back_proto_len = 0;
+      /* For serialization only. */
+      (inverted ? group.back_proto_len : group.front_proto_len)++;
+      /* Append to list. */
+      cmd.group_first = new_group_id;
+      group_id = new_group_id;
+    }
+    else {
+      DrawGroup &group = group_buf_[group_id];
+      group.len += instance_len;
+      group.front_facing_len += inverted ? 0 : instance_len;
+      /* For serialization only. */
+      (inverted ? group.back_proto_len : group.front_proto_len)++;
+    }
+
+    DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++);
+    draw.group_id = group_id;
+    draw.resource_handle = handle.raw;
+    draw.instance_len = instance_len;
+  }
+
+  void bind(RecordingState &state,
+            Vector<Header, 0> &headers,
+            Vector<Undetermined, 0> &commands,
+            VisibilityBuf &visibility_buf);
+};
+
+/** \} */
+
+};  // namespace blender::draw::command
diff --git a/source/blender/draw/intern/draw_command_shared.hh b/source/blender/draw/intern/draw_command_shared.hh
new file mode 100644
index 00000000000..9fbbe23f0ce
--- /dev/null
+++ b/source/blender/draw/intern/draw_command_shared.hh
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#ifndef GPU_SHADER
+#  include "BLI_span.hh"
+#  include "GPU_shader_shared_utils.h"
+
+namespace blender::draw::command {
+
+struct RecordingState;
+
+#endif
+
+/* -------------------------------------------------------------------- */
+/** \name Multi Draw
+ * \{ */
+
+/**
+ * A DrawGroup allow to split the command stream into batch-able chunks of commands with
+ * the same render state.
+ */
+struct DrawGroup {
+  /** Index of next #DrawGroup from the same header. */
+  uint next;
+
+  /** Index of the first instances after sorting. */
+  uint start;
+  /** Total number of instances (including inverted facing). Needed to issue the draw call. */
+  uint len;
+  /** Number of non inverted scaling instances in this Group. */
+  uint front_facing_len;
+
+  /** #GPUBatch values to be copied to #DrawCommand after sorting (if not overridden). */
+  int vertex_len;
+  int vertex_first;
+  int base_index;
+
+  /** Atomic counters used during command sorting. */
+  uint total_counter;
+
+#ifndef GPU_SHADER
+  /* NOTE: Union just to make sure the struct has always the same size on all platform. */
+  union {
+    struct {
+      /** For debug printing only. */
+      uint front_proto_len;
+      uint back_proto_len;
+      /** Needed to create the correct draw call. */
+      GPUBatch *gpu_batch;
+    };
+    struct {
+#endif
+      uint front_facing_counter;
+      uint back_facing_counter;
+      uint _pad0, _pad1;
+#ifndef GPU_SHADER
+    };
+  };
+#endif
+};
+BLI_STATIC_ASSERT_ALIGN(DrawGroup, 16)
+
+/**
+ * Representation of a future draw call inside a DrawGroup. This #DrawPrototype is then
+ * converted into #DrawCommand on GPU after visibility and compaction. Multiple
+ * #DrawPrototype might get merged into the same final #DrawCommand.
+ */
+struct DrawPrototype {
+  /* Reference to parent DrawGroup to get the GPUBatch vertex / instance count. */
+  uint group_id;
+  /* Resource handle associated with this call. Also reference visibility. */
+  uint resource_handle;
+  /* Number of instances. */
+  uint instance_len;
+  uint _pad0;
+};
+BLI_STATIC_ASSERT_ALIGN(DrawPrototype, 16)
+
+/** \} */
+
+#ifndef GPU_SHADER
+};  // namespace blender::draw::command
+#endif
diff --git a/source/blender/draw/intern/draw_common.c b/source/blender/draw/intern/draw_common.c
index 0f330dbb519..c1b4c3c1f81 100644
--- a/source/blender/draw/intern/draw_common.c
+++ b/source/blender/draw/intern/draw_common.c
@@ -280,10 +280,11 @@ int DRW_object_wire_theme_get(Object *ob, ViewLayer *view_layer, float **r_color
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
   const bool is_edit = (draw_ctx->object_mode & OB_MODE_EDIT) && (ob->mode & OB_MODE_EDIT);
-  const bool active = view_layer->basact &&
-                      ((ob->base_flag & BASE_FROM_DUPLI) ?
-                           (DRW_object_get_dupli_parent(ob) == view_layer->basact->object) :
-                           (view_layer->basact->object == ob));
+  BKE_view_layer_synced_ensure(draw_ctx->scene, view_layer);
+  const Base *base = BKE_view_layer_active_base_get(view_layer);
+  const bool active = base && ((ob->base_flag & BASE_FROM_DUPLI) ?
+                                   (DRW_object_get_dupli_parent(ob) == base->object) :
+                                   (base->object == ob));
 
   /* confusing logic here, there are 2 methods of setting the color
    * 'colortab[colindex]' and 'theme_id', colindex overrides theme_id.
@@ -417,7 +418,6 @@ bool DRW_object_is_flat(Object *ob, int *r_axis)
             OB_CURVES_LEGACY,
             OB_SURF,
             OB_FONT,
-            OB_MBALL,
             OB_CURVES,
             OB_POINTCLOUD,
             OB_VOLUME)) {
diff --git a/source/blender/draw/intern/draw_common_shader_shared.h b/source/blender/draw/intern/draw_common_shader_shared.h
index c9819d9da87..57cb7880ce6 100644
--- a/source/blender/draw/intern/draw_common_shader_shared.h
+++ b/source/blender/draw/intern/draw_common_shader_shared.h
@@ -19,7 +19,7 @@ typedef struct GlobalsUboStorage GlobalsUboStorage;
 #define UBO_LAST_COLOR color_uv_shadow
 
 /* Used as ubo but colors can be directly referenced as well */
-/* NOTE: Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */
+/* \note Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */
 struct GlobalsUboStorage {
   /* UBOs data needs to be 16 byte aligned (size of vec4) */
   float4 color_wire;
diff --git a/source/blender/draw/intern/draw_curves.cc b/source/blender/draw/intern/draw_curves.cc
index c40f2275968..a61769e7a63 100644
--- a/source/blender/draw/intern/draw_curves.cc
+++ b/source/blender/draw/intern/draw_curves.cc
@@ -33,25 +33,17 @@
 #include "draw_manager.h"
 #include "draw_shader.h"
 
-#ifndef __APPLE__
-#  define USE_TRANSFORM_FEEDBACK
-#  define USE_COMPUTE_SHADERS
-#endif
-
 BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get()
 {
-#ifdef USE_COMPUTE_SHADERS
   if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
     return PART_REFINE_SHADER_COMPUTE;
   }
-#endif
-#ifdef USE_TRANSFORM_FEEDBACK
-  return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
-#endif
+  if (GPU_transform_feedback_support()) {
+    return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
+  }
   return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND;
 }
 
-#ifndef USE_TRANSFORM_FEEDBACK
 struct CurvesEvalCall {
   struct CurvesEvalCall *next;
   GPUVertBuf *vbo;
@@ -63,7 +55,6 @@ static CurvesEvalCall *g_tf_calls = nullptr;
 static int g_tf_id_offset;
 static int g_tf_target_width;
 static int g_tf_target_height;
-#endif
 
 static GPUVertBuf *g_dummy_vbo = nullptr;
 static GPUTexture *g_dummy_texture = nullptr;
@@ -106,18 +97,20 @@ void DRW_curves_init(DRWData *drw_data)
   CurvesUniformBufPool *pool = drw_data->curves_ubos;
   pool->reset();
 
-#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS)
-  g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0);
-#else
-  g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR);
-#endif
+  if (GPU_transform_feedback_support() || GPU_compute_shader_support()) {
+    g_tf_pass = DRW_pass_create("Update Curves Pass", (DRWState)0);
+  }
+  else {
+    g_tf_pass = DRW_pass_create("Update Curves Pass", DRW_STATE_WRITE_COLOR);
+  }
 
   if (g_dummy_vbo == nullptr) {
     /* initialize vertex format */
     GPUVertFormat format = {0};
     uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-    g_dummy_vbo = GPU_vertbuf_create_with_format(&format);
+    g_dummy_vbo = GPU_vertbuf_create_with_format_ex(
+        &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
     const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
     GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
@@ -201,21 +194,24 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache,
 {
   GPUShader *tf_shader = curves_eval_shader_get(CURVES_EVAL_CATMULL_ROM);
 
-#ifdef USE_TRANSFORM_FEEDBACK
-  DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo);
-#else
-  DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
-
-  CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__);
-  pr_call->next = g_tf_calls;
-  pr_call->vbo = vbo;
-  pr_call->shgrp = tf_shgrp;
-  pr_call->vert_len = final_points_len;
-  g_tf_calls = pr_call;
-  DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
-  DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
-  DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
-#endif
+  DRWShadingGroup *tf_shgrp = nullptr;
+  if (GPU_transform_feedback_support()) {
+    tf_shgrp = DRW_shgroup_transform_feedback_create(tf_shader, g_tf_pass, vbo);
+  }
+  else {
+    tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
+
+    CurvesEvalCall *pr_call = MEM_new<CurvesEvalCall>(__func__);
+    pr_call->next = g_tf_calls;
+    pr_call->vbo = vbo;
+    pr_call->shgrp = tf_shgrp;
+    pr_call->vert_len = final_points_len;
+    g_tf_calls = pr_call;
+    DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
+    DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
+    DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
+  }
+  BLI_assert(tf_shgrp != nullptr);
 
   drw_curves_cache_shgrp_attach_resources(tf_shgrp, cache, tex, subdiv);
   DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
@@ -246,13 +242,14 @@ static void drw_curves_cache_update_transform_feedback(CurvesEvalCache *cache, c
   }
 }
 
-static CurvesEvalCache *drw_curves_cache_get(Object *object,
+static CurvesEvalCache *drw_curves_cache_get(Curves &curves,
                                              GPUMaterial *gpu_material,
                                              int subdiv,
                                              int thickness_res)
 {
   CurvesEvalCache *cache;
-  bool update = curves_ensure_procedural_data(object, &cache, gpu_material, subdiv, thickness_res);
+  const bool update = curves_ensure_procedural_data(
+      &curves, &cache, gpu_material, subdiv, thickness_res);
 
   if (update) {
     if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
@@ -268,12 +265,13 @@ static CurvesEvalCache *drw_curves_cache_get(Object *object,
 GPUVertBuf *DRW_curves_pos_buffer_get(Object *object)
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
-  Scene *scene = draw_ctx->scene;
+  const Scene *scene = draw_ctx->scene;
 
-  int subdiv = scene->r.hair_subdiv;
-  int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
+  const int subdiv = scene->r.hair_subdiv;
+  const int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
 
-  CurvesEvalCache *cache = drw_curves_cache_get(object, nullptr, subdiv, thickness_res);
+  Curves &curves = *static_cast<Curves *>(object->data);
+  CurvesEvalCache *cache = drw_curves_cache_get(curves, nullptr, subdiv, thickness_res);
 
   return cache->final[subdiv].proc_buf;
 }
@@ -303,15 +301,16 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
                                                GPUMaterial *gpu_material)
 {
   const DRWContextState *draw_ctx = DRW_context_state_get();
-  Scene *scene = draw_ctx->scene;
+  const Scene *scene = draw_ctx->scene;
   CurvesUniformBufPool *pool = DST.vmempool->curves_ubos;
   CurvesInfosBuf &curves_infos = pool->alloc();
+  Curves &curves_id = *static_cast<Curves *>(object->data);
 
-  int subdiv = scene->r.hair_subdiv;
-  int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
+  const int subdiv = scene->r.hair_subdiv;
+  const int thickness_res = (scene->r.hair_type == SCE_HAIR_SHAPE_STRAND) ? 1 : 2;
 
   CurvesEvalCache *curves_cache = drw_curves_cache_get(
-      object, gpu_material, subdiv, thickness_res);
+      curves_id, gpu_material, subdiv, thickness_res);
 
   DRWShadingGroup *shgrp = DRW_shgroup_create_sub(shgrp_parent);
 
@@ -330,13 +329,10 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
 
   /* Use the radius of the root and tip of the first curve for now. This is a workaround that we
    * use for now because we can't use a per-point radius yet. */
-  Curves &curves_id = *static_cast<Curves *>(object->data);
   const blender::bke::CurvesGeometry &curves = blender::bke::CurvesGeometry::wrap(
       curves_id.geometry);
   if (curves.curves_num() >= 1) {
-    CurveComponent curves_component;
-    curves_component.replace(&curves_id, GeometryOwnershipType::ReadOnly);
-    blender::VArray<float> radii = curves_component.attribute_get_for_read(
+    blender::VArray<float> radii = curves.attributes().lookup_or_default(
         "radius", ATTR_DOMAIN_POINT, 0.005f);
     const blender::IndexRange first_curve_points = curves.points_for_curve(0);
     const float first_radius = radii[first_curve_points.first()];
@@ -383,7 +379,7 @@ DRWShadingGroup *DRW_shgroup_curves_create_sub(Object *object,
      * attributes. */
     const int index = attribute_index_in_material(gpu_material, request.attribute_name);
     if (index != -1) {
-      curves_infos.is_point_attribute[index] = request.domain == ATTR_DOMAIN_POINT;
+      curves_infos.is_point_attribute[index][0] = request.domain == ATTR_DOMAIN_POINT;
     }
   }
 
@@ -411,82 +407,118 @@ void DRW_curves_update()
   /* Update legacy hair too, to avoid verbosity in callers. */
   DRW_hair_update();
 
-#ifndef USE_TRANSFORM_FEEDBACK
-  /**
-   * Workaround to transform feedback not working on mac.
-   * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
-   *
-   * So instead of using transform feedback we render to a texture,
-   * read back the result to system memory and re-upload as VBO data.
-   * It is really not ideal performance wise, but it is the simplest
-   * and the most local workaround that still uses the power of the GPU.
-   */
-
-  if (g_tf_calls == nullptr) {
-    return;
-  }
+  if (!GPU_transform_feedback_support()) {
+    /**
+     * Workaround to transform feedback not working on mac.
+     * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
+     *
+     * So instead of using transform feedback we render to a texture,
+     * read back the result to system memory and re-upload as VBO data.
+     * It is really not ideal performance wise, but it is the simplest
+     * and the most local workaround that still uses the power of the GPU.
+     */
+
+    if (g_tf_calls == nullptr) {
+      return;
+    }
 
-  /* Search ideal buffer size. */
-  uint max_size = 0;
-  for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
-    max_size = max_ii(max_size, pr_call->vert_len);
-  }
+    /* Search ideal buffer size. */
+    uint max_size = 0;
+    for (CurvesEvalCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
+      max_size = max_ii(max_size, pr_call->vert_len);
+    }
+
+    /* Create target Texture / Frame-buffer */
+    /* Don't use max size as it can be really heavy and fail.
+     * Do chunks of maximum 2048 * 2048 hair points. */
+    int width = 2048;
+    int height = min_ii(width, 1 + max_size / width);
+    GPUTexture *tex = DRW_texture_pool_query_2d(
+        width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update);
+    g_tf_target_height = height;
+    g_tf_target_width = width;
+
+    GPUFrameBuffer *fb = nullptr;
+    GPU_framebuffer_ensure_config(&fb,
+                                  {
+                                      GPU_ATTACHMENT_NONE,
+                                      GPU_ATTACHMENT_TEXTURE(tex),
+                                  });
+
+    float *data = static_cast<float *>(
+        MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"));
+
+    GPU_framebuffer_bind(fb);
+    while (g_tf_calls != nullptr) {
+      CurvesEvalCall *pr_call = g_tf_calls;
+      g_tf_calls = g_tf_calls->next;
+
+      g_tf_id_offset = 0;
+      while (pr_call->vert_len > 0) {
+        int max_read_px_len = min_ii(width * height, pr_call->vert_len);
+
+        DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
+        /* Read back result to main memory. */
+        GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
+        /* Upload back to VBO. */
+        GPU_vertbuf_use(pr_call->vbo);
+        GPU_vertbuf_update_sub(pr_call->vbo,
+                               sizeof(float[4]) * g_tf_id_offset,
+                               sizeof(float[4]) * max_read_px_len,
+                               data);
+
+        g_tf_id_offset += max_read_px_len;
+        pr_call->vert_len -= max_read_px_len;
+      }
 
-  /* Create target Texture / Frame-buffer */
-  /* Don't use max size as it can be really heavy and fail.
-   * Do chunks of maximum 2048 * 2048 hair points. */
-  int width = 2048;
-  int height = min_ii(width, 1 + max_size / width);
-  GPUTexture *tex = DRW_texture_pool_query_2d(
-      width, height, GPU_RGBA32F, (DrawEngineType *)DRW_curves_update);
-  g_tf_target_height = height;
-  g_tf_target_width = width;
-
-  GPUFrameBuffer *fb = nullptr;
-  GPU_framebuffer_ensure_config(&fb,
-                                {
-                                    GPU_ATTACHMENT_NONE,
-                                    GPU_ATTACHMENT_TEXTURE(tex),
-                                });
-
-  float *data = static_cast<float *>(
-      MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer"));
-
-  GPU_framebuffer_bind(fb);
-  while (g_tf_calls != nullptr) {
-    CurvesEvalCall *pr_call = g_tf_calls;
-    g_tf_calls = g_tf_calls->next;
-
-    g_tf_id_offset = 0;
-    while (pr_call->vert_len > 0) {
-      int max_read_px_len = min_ii(width * height, pr_call->vert_len);
-
-      DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
-      /* Read back result to main memory. */
-      GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
-      /* Upload back to VBO. */
-      GPU_vertbuf_use(pr_call->vbo);
-      GPU_vertbuf_update_sub(pr_call->vbo,
-                             sizeof(float[4]) * g_tf_id_offset,
-                             sizeof(float[4]) * max_read_px_len,
-                             data);
-
-      g_tf_id_offset += max_read_px_len;
-      pr_call->vert_len -= max_read_px_len;
+      MEM_freeN(pr_call);
     }
 
-    MEM_freeN(pr_call);
+    MEM_freeN(data);
+    GPU_framebuffer_free(fb);
   }
+  else {
+    /* NOTE(Metal): If compute is not supported, bind a temporary frame-buffer to avoid
+     * side-effects from rendering in the active buffer.
+     * We also need to guarantee that a Frame-buffer is active to perform any rendering work,
+     * even if there is no output */
+    GPUFrameBuffer *temp_fb = nullptr;
+    GPUFrameBuffer *prev_fb = nullptr;
+    if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
+      if (!GPU_compute_shader_support()) {
+        prev_fb = GPU_framebuffer_active_get();
+        char errorOut[256];
+        /* if the frame-buffer is invalid we need a dummy frame-buffer to be bound. */
+        if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) {
+          int width = 64;
+          int height = 64;
+          GPUTexture *tex = DRW_texture_pool_query_2d(
+              width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update);
+          g_tf_target_height = height;
+          g_tf_target_width = width;
+
+          GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)});
+
+          GPU_framebuffer_bind(temp_fb);
+        }
+      }
+    }
 
-  MEM_freeN(data);
-  GPU_framebuffer_free(fb);
-#else
-  /* Just render the pass when using compute shaders or transform feedback. */
-  DRW_draw_pass(g_tf_pass);
-  if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
-    GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    /* Just render the pass when using compute shaders or transform feedback. */
+    DRW_draw_pass(g_tf_pass);
+    if (drw_curves_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
+      GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    }
+
+    /* Release temporary frame-buffer. */
+    if (temp_fb != nullptr) {
+      GPU_framebuffer_free(temp_fb);
+    }
+    /* Rebind existing frame-buffer */
+    if (prev_fb != nullptr) {
+      GPU_framebuffer_bind(prev_fb);
+    }
   }
-#endif
 }
 
 void DRW_curves_free()
diff --git a/source/blender/draw/intern/draw_curves_private.h b/source/blender/draw/intern/draw_curves_private.h
index 7d54e1089d6..31122ed5248 100644
--- a/source/blender/draw/intern/draw_curves_private.h
+++ b/source/blender/draw/intern/draw_curves_private.h
@@ -16,6 +16,12 @@
 extern "C" {
 #endif
 
+struct Curves;
+struct GPUVertBuf;
+struct GPUIndexBuf;
+struct GPUBatch;
+struct GPUTexture;
+
 #define MAX_THICKRES 2    /* see eHairType */
 #define MAX_HAIR_SUBDIV 4 /* see hair_subdiv rna */
 
@@ -25,11 +31,6 @@ typedef enum CurvesEvalShader {
 } CurvesEvalShader;
 #define CURVES_EVAL_SHADER_NUM 3
 
-struct GPUVertBuf;
-struct GPUIndexBuf;
-struct GPUBatch;
-struct GPUTexture;
-
 typedef struct CurvesEvalFinalCache {
   /* Output of the subdivision stage: vertex buffer sized to subdiv level. */
   GPUVertBuf *proc_buf;
@@ -95,7 +96,7 @@ typedef struct CurvesEvalCache {
 /**
  * Ensure all necessary textures and buffers exist for GPU accelerated drawing.
  */
-bool curves_ensure_procedural_data(struct Object *object,
+bool curves_ensure_procedural_data(struct Curves *curves,
                                    struct CurvesEvalCache **r_hair_cache,
                                    struct GPUMaterial *gpu_material,
                                    int subdiv,
diff --git a/source/blender/draw/intern/draw_debug.c b/source/blender/draw/intern/draw_debug.c
deleted file mode 100644
index b568119627e..00000000000
--- a/source/blender/draw/intern/draw_debug.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- * Copyright 2018 Blender Foundation. */
-
-/** \file
- * \ingroup draw
- *
- * \brief Simple API to draw debug shapes in the viewport.
- */
-
-#include "MEM_guardedalloc.h"
-
-#include "DNA_object_types.h"
-
-#include "BKE_object.h"
-
-#include "BLI_link_utils.h"
-
-#include "GPU_immediate.h"
-#include "GPU_matrix.h"
-
-#include "draw_debug.h"
-#include "draw_manager.h"
-
-/* --------- Register --------- */
-
-/* Matrix applied to all points before drawing. Could be a stack if needed. */
-static float g_modelmat[4][4];
-
-void DRW_debug_modelmat_reset(void)
-{
-  unit_m4(g_modelmat);
-}
-
-void DRW_debug_modelmat(const float modelmat[4][4])
-{
-  copy_m4_m4(g_modelmat, modelmat);
-}
-
-void DRW_debug_line_v3v3(const float v1[3], const float v2[3], const float color[4])
-{
-  DRWDebugLine *line = MEM_mallocN(sizeof(DRWDebugLine), "DRWDebugLine");
-  mul_v3_m4v3(line->pos[0], g_modelmat, v1);
-  mul_v3_m4v3(line->pos[1], g_modelmat, v2);
-  copy_v4_v4(line->color, color);
-  BLI_LINKS_PREPEND(DST.debug.lines, line);
-}
-
-void DRW_debug_polygon_v3(const float (*v)[3], const int vert_len, const float color[4])
-{
-  BLI_assert(vert_len > 1);
-
-  for (int i = 0; i < vert_len; i++) {
-    DRW_debug_line_v3v3(v[i], v[(i + 1) % vert_len], color);
-  }
-}
-
-void DRW_debug_m4(const float m[4][4])
-{
-  float v0[3] = {0.0f, 0.0f, 0.0f};
-  float v1[3] = {1.0f, 0.0f, 0.0f};
-  float v2[3] = {0.0f, 1.0f, 0.0f};
-  float v3[3] = {0.0f, 0.0f, 1.0f};
-
-  mul_m4_v3(m, v0);
-  mul_m4_v3(m, v1);
-  mul_m4_v3(m, v2);
-  mul_m4_v3(m, v3);
-
-  DRW_debug_line_v3v3(v0, v1, (float[4]){1.0f, 0.0f, 0.0f, 1.0f});
-  DRW_debug_line_v3v3(v0, v2, (float[4]){0.0f, 1.0f, 0.0f, 1.0f});
-  DRW_debug_line_v3v3(v0, v3, (float[4]){0.0f, 0.0f, 1.0f, 1.0f});
-}
-
-void DRW_debug_bbox(const BoundBox *bbox, const float color[4])
-{
-  DRW_debug_line_v3v3(bbox->vec[0], bbox->vec[1], color);
-  DRW_debug_line_v3v3(bbox->vec[1], bbox->vec[2], color);
-  DRW_debug_line_v3v3(bbox->vec[2], bbox->vec[3], color);
-  DRW_debug_line_v3v3(bbox->vec[3], bbox->vec[0], color);
-
-  DRW_debug_line_v3v3(bbox->vec[4], bbox->vec[5], color);
-  DRW_debug_line_v3v3(bbox->vec[5], bbox->vec[6], color);
-  DRW_debug_line_v3v3(bbox->vec[6], bbox->vec[7], color);
-  DRW_debug_line_v3v3(bbox->vec[7], bbox->vec[4], color);
-
-  DRW_debug_line_v3v3(bbox->vec[0], bbox->vec[4], color);
-  DRW_debug_line_v3v3(bbox->vec[1], bbox->vec[5], color);
-  DRW_debug_line_v3v3(bbox->vec[2], bbox->vec[6], color);
-  DRW_debug_line_v3v3(bbox->vec[3], bbox->vec[7], color);
-}
-
-void DRW_debug_m4_as_bbox(const float m[4][4], const float color[4], const bool invert)
-{
-  BoundBox bb;
-  const float min[3] = {-1.0f, -1.0f, -1.0f}, max[3] = {1.0f, 1.0f, 1.0f};
-  float project_matrix[4][4];
-  if (invert) {
-    invert_m4_m4(project_matrix, m);
-  }
-  else {
-    copy_m4_m4(project_matrix, m);
-  }
-
-  BKE_boundbox_init_from_minmax(&bb, min, max);
-  for (int i = 0; i < 8; i++) {
-    mul_project_m4_v3(project_matrix, bb.vec[i]);
-  }
-  DRW_debug_bbox(&bb, color);
-}
-
-void DRW_debug_sphere(const float center[3], const float radius, const float color[4])
-{
-  float size_mat[4][4];
-  DRWDebugSphere *sphere = MEM_mallocN(sizeof(DRWDebugSphere), "DRWDebugSphere");
-  /* Bake all transform into a Matrix4 */
-  scale_m4_fl(size_mat, radius);
-  copy_m4_m4(sphere->mat, g_modelmat);
-  translate_m4(sphere->mat, center[0], center[1], center[2]);
-  mul_m4_m4m4(sphere->mat, sphere->mat, size_mat);
-
-  copy_v4_v4(sphere->color, color);
-  BLI_LINKS_PREPEND(DST.debug.spheres, sphere);
-}
-
-/* --------- Render --------- */
-
-static void drw_debug_draw_lines(void)
-{
-  int count = BLI_linklist_count((LinkNode *)DST.debug.lines);
-
-  if (count == 0) {
-    return;
-  }
-
-  GPUVertFormat *vert_format = immVertexFormat();
-  uint pos = GPU_vertformat_attr_add(vert_format, "pos", GPU_COMP_F32, 3, GPU_FETCH_FLOAT);
-  uint col = GPU_vertformat_attr_add(vert_format, "color", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
-
-  immBindBuiltinProgram(GPU_SHADER_3D_FLAT_COLOR);
-
-  immBegin(GPU_PRIM_LINES, count * 2);
-
-  while (DST.debug.lines) {
-    void *next = DST.debug.lines->next;
-
-    immAttr4fv(col, DST.debug.lines->color);
-    immVertex3fv(pos, DST.debug.lines->pos[0]);
-
-    immAttr4fv(col, DST.debug.lines->color);
-    immVertex3fv(pos, DST.debug.lines->pos[1]);
-
-    MEM_freeN(DST.debug.lines);
-    DST.debug.lines = next;
-  }
-  immEnd();
-
-  immUnbindProgram();
-}
-
-static void drw_debug_draw_spheres(void)
-{
-  int count = BLI_linklist_count((LinkNode *)DST.debug.spheres);
-
-  if (count == 0) {
-    return;
-  }
-
-  float persmat[4][4];
-  DRW_view_persmat_get(NULL, persmat, false);
-
-  GPUBatch *empty_sphere = DRW_cache_empty_sphere_get();
-  GPU_batch_program_set_builtin(empty_sphere, GPU_SHADER_3D_UNIFORM_COLOR);
-  while (DST.debug.spheres) {
-    void *next = DST.debug.spheres->next;
-    float MVP[4][4];
-
-    mul_m4_m4m4(MVP, persmat, DST.debug.spheres->mat);
-    GPU_batch_uniform_mat4(empty_sphere, "ModelViewProjectionMatrix", MVP);
-    GPU_batch_uniform_4fv(empty_sphere, "color", DST.debug.spheres->color);
-    GPU_batch_draw(empty_sphere);
-
-    MEM_freeN(DST.debug.spheres);
-    DST.debug.spheres = next;
-  }
-}
-
-void drw_debug_draw(void)
-{
-  drw_debug_draw_lines();
-  drw_debug_draw_spheres();
-}
-
-void drw_debug_init(void)
-{
-  DRW_debug_modelmat_reset();
-}
diff --git a/source/blender/draw/intern/draw_debug.cc b/source/blender/draw/intern/draw_debug.cc
new file mode 100644
index 00000000000..b0662a42ea0
--- /dev/null
+++ b/source/blender/draw/intern/draw_debug.cc
@@ -0,0 +1,736 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2018 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ *
+ * \brief Simple API to draw debug shapes in the viewport.
+ */
+
+#include "BKE_object.h"
+#include "BLI_link_utils.h"
+#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_debug.h"
+
+#include "draw_debug.h"
+#include "draw_debug.hh"
+#include "draw_manager.h"
+#include "draw_shader.h"
+#include "draw_shader_shared.h"
+
+#include <iomanip>
+
+#ifdef DEBUG
+#  define DRAW_DEBUG
+#else
+/* Uncomment to forcibly enable debug draw in release mode. */
+//#define DRAW_DEBUG
+#endif
+
+namespace blender::draw {
+
+/* -------------------------------------------------------------------- */
+/** \name Init and state
+ * \{ */
+
+DebugDraw::DebugDraw()
+{
+  constexpr int circle_resolution = 16;
+  for (auto axis : IndexRange(3)) {
+    for (auto edge : IndexRange(circle_resolution)) {
+      for (auto vert : IndexRange(2)) {
+        const float angle = (2 * M_PI) * (edge + vert) / float(circle_resolution);
+        float point[3] = {cosf(angle), sinf(angle), 0.0f};
+        sphere_verts_.append(
+            float3(point[(0 + axis) % 3], point[(1 + axis) % 3], point[(2 + axis) % 3]));
+      }
+    }
+  }
+
+  constexpr int point_resolution = 4;
+  for (auto axis : IndexRange(3)) {
+    for (auto edge : IndexRange(point_resolution)) {
+      for (auto vert : IndexRange(2)) {
+        const float angle = (2 * M_PI) * (edge + vert) / float(point_resolution);
+        float point[3] = {cosf(angle), sinf(angle), 0.0f};
+        point_verts_.append(
+            float3(point[(0 + axis) % 3], point[(1 + axis) % 3], point[(2 + axis) % 3]));
+      }
+    }
+  }
+};
+
+void DebugDraw::init()
+{
+  cpu_print_buf_.command.vertex_len = 0;
+  cpu_print_buf_.command.vertex_first = 0;
+  cpu_print_buf_.command.instance_len = 1;
+  cpu_print_buf_.command.instance_first_array = 0;
+
+  cpu_draw_buf_.command.vertex_len = 0;
+  cpu_draw_buf_.command.vertex_first = 0;
+  cpu_draw_buf_.command.instance_len = 1;
+  cpu_draw_buf_.command.instance_first_array = 0;
+
+  gpu_print_buf_.command.vertex_len = 0;
+  gpu_print_buf_.command.vertex_first = 0;
+  gpu_print_buf_.command.instance_len = 1;
+  gpu_print_buf_.command.instance_first_array = 0;
+  gpu_print_buf_used = false;
+
+  gpu_draw_buf_.command.vertex_len = 0;
+  gpu_draw_buf_.command.vertex_first = 0;
+  gpu_draw_buf_.command.instance_len = 1;
+  gpu_draw_buf_.command.instance_first_array = 0;
+  gpu_draw_buf_used = false;
+
+  modelmat_reset();
+}
+
+void DebugDraw::modelmat_reset()
+{
+  model_mat_ = float4x4::identity();
+}
+
+void DebugDraw::modelmat_set(const float modelmat[4][4])
+{
+  model_mat_ = modelmat;
+}
+
+GPUStorageBuf *DebugDraw::gpu_draw_buf_get()
+{
+  BLI_assert(GPU_shader_storage_buffer_objects_support());
+  if (!gpu_draw_buf_used) {
+    gpu_draw_buf_used = true;
+    gpu_draw_buf_.push_update();
+  }
+  return gpu_draw_buf_;
+}
+
+GPUStorageBuf *DebugDraw::gpu_print_buf_get()
+{
+  BLI_assert(GPU_shader_storage_buffer_objects_support());
+  if (!gpu_print_buf_used) {
+    gpu_print_buf_used = true;
+    gpu_print_buf_.push_update();
+  }
+  return gpu_print_buf_;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw functions
+ * \{ */
+
+void DebugDraw::draw_line(float3 v1, float3 v2, float4 color)
+{
+  draw_line(v1, v2, color_pack(color));
+}
+
+void DebugDraw::draw_polygon(Span<float3> poly_verts, float4 color)
+{
+  BLI_assert(!poly_verts.is_empty());
+
+  uint col = color_pack(color);
+  float3 v0 = model_mat_ * poly_verts.last();
+  for (auto vert : poly_verts) {
+    float3 v1 = model_mat_ * vert;
+    draw_line(v0, v1, col);
+    v0 = v1;
+  }
+}
+
+void DebugDraw::draw_matrix(const float4x4 m4)
+{
+  float3 v0 = float3(0.0f, 0.0f, 0.0f);
+  float3 v1 = float3(1.0f, 0.0f, 0.0f);
+  float3 v2 = float3(0.0f, 1.0f, 0.0f);
+  float3 v3 = float3(0.0f, 0.0f, 1.0f);
+
+  mul_project_m4_v3(m4.ptr(), v0);
+  mul_project_m4_v3(m4.ptr(), v1);
+  mul_project_m4_v3(m4.ptr(), v2);
+  mul_project_m4_v3(m4.ptr(), v3);
+
+  draw_line(v0, v1, float4(1.0f, 0.0f, 0.0f, 1.0f));
+  draw_line(v0, v2, float4(0.0f, 1.0f, 0.0f, 1.0f));
+  draw_line(v0, v3, float4(0.0f, 0.0f, 1.0f, 1.0f));
+}
+
+void DebugDraw::draw_bbox(const BoundBox &bbox, const float4 color)
+{
+  uint col = color_pack(color);
+  draw_line(bbox.vec[0], bbox.vec[1], col);
+  draw_line(bbox.vec[1], bbox.vec[2], col);
+  draw_line(bbox.vec[2], bbox.vec[3], col);
+  draw_line(bbox.vec[3], bbox.vec[0], col);
+
+  draw_line(bbox.vec[4], bbox.vec[5], col);
+  draw_line(bbox.vec[5], bbox.vec[6], col);
+  draw_line(bbox.vec[6], bbox.vec[7], col);
+  draw_line(bbox.vec[7], bbox.vec[4], col);
+
+  draw_line(bbox.vec[0], bbox.vec[4], col);
+  draw_line(bbox.vec[1], bbox.vec[5], col);
+  draw_line(bbox.vec[2], bbox.vec[6], col);
+  draw_line(bbox.vec[3], bbox.vec[7], col);
+}
+
+void DebugDraw::draw_matrix_as_bbox(float4x4 mat, const float4 color)
+{
+  BoundBox bb;
+  const float min[3] = {-1.0f, -1.0f, -1.0f}, max[3] = {1.0f, 1.0f, 1.0f};
+  BKE_boundbox_init_from_minmax(&bb, min, max);
+  for (auto i : IndexRange(8)) {
+    mul_project_m4_v3(mat.ptr(), bb.vec[i]);
+  }
+  draw_bbox(bb, color);
+}
+
+void DebugDraw::draw_sphere(const float3 center, float radius, const float4 color)
+{
+  uint col = color_pack(color);
+  for (auto i : IndexRange(sphere_verts_.size() / 2)) {
+    float3 v0 = sphere_verts_[i * 2] * radius + center;
+    float3 v1 = sphere_verts_[i * 2 + 1] * radius + center;
+    draw_line(v0, v1, col);
+  }
+}
+
+void DebugDraw::draw_point(const float3 center, float radius, const float4 color)
+{
+  uint col = color_pack(color);
+  for (auto i : IndexRange(point_verts_.size() / 2)) {
+    float3 v0 = point_verts_[i * 2] * radius + center;
+    float3 v1 = point_verts_[i * 2 + 1] * radius + center;
+    draw_line(v0, v1, col);
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Print functions
+ * \{ */
+
+template<> void DebugDraw::print_value<uint>(const uint &value)
+{
+  print_value_uint(value, false, false, true);
+}
+template<> void DebugDraw::print_value<int>(const int &value)
+{
+  print_value_uint(uint(abs(value)), false, (value < 0), false);
+}
+template<> void DebugDraw::print_value<bool>(const bool &value)
+{
+  print_string(value ? "true " : "false");
+}
+template<> void DebugDraw::print_value<float>(const float &val)
+{
+  std::stringstream ss;
+  ss << std::setw(12) << std::to_string(val);
+  print_string(ss.str());
+}
+template<> void DebugDraw::print_value<double>(const double &val)
+{
+  print_value(float(val));
+}
+
+template<> void DebugDraw::print_value_hex<uint>(const uint &value)
+{
+  print_value_uint(value, true, false, false);
+}
+template<> void DebugDraw::print_value_hex<int>(const int &value)
+{
+  print_value_uint(uint(value), true, false, false);
+}
+template<> void DebugDraw::print_value_hex<float>(const float &value)
+{
+  print_value_uint(*reinterpret_cast<const uint *>(&value), true, false, false);
+}
+template<> void DebugDraw::print_value_hex<double>(const double &val)
+{
+  print_value_hex(float(val));
+}
+
+template<> void DebugDraw::print_value_binary<uint>(const uint &value)
+{
+  print_value_binary(value);
+}
+template<> void DebugDraw::print_value_binary<int>(const int &value)
+{
+  print_value_binary(uint(value));
+}
+template<> void DebugDraw::print_value_binary<float>(const float &value)
+{
+  print_value_binary(*reinterpret_cast<const uint *>(&value));
+}
+template<> void DebugDraw::print_value_binary<double>(const double &val)
+{
+  print_value_binary(float(val));
+}
+
+template<> void DebugDraw::print_value<float2>(const float2 &value)
+{
+  print_no_endl("float2(", value[0], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<float3>(const float3 &value)
+{
+  print_no_endl("float3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<float4>(const float4 &value)
+{
+  print_no_endl("float4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+template<> void DebugDraw::print_value<int2>(const int2 &value)
+{
+  print_no_endl("int2(", value[0], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<int3>(const int3 &value)
+{
+  print_no_endl("int3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<int4>(const int4 &value)
+{
+  print_no_endl("int4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+template<> void DebugDraw::print_value<uint2>(const uint2 &value)
+{
+  print_no_endl("uint2(", value[0], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<uint3>(const uint3 &value)
+{
+  print_no_endl("uint3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+template<> void DebugDraw::print_value<uint4>(const uint4 &value)
+{
+  print_no_endl("uint4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Internals
+ *
+ * IMPORTANT: All of these are copied from the shader libs (common_debug_draw_lib.glsl &
+ * common_debug_print_lib.glsl). They need to be kept in sync to write the same data.
+ * \{ */
+
+void DebugDraw::draw_line(float3 v1, float3 v2, uint color)
+{
+  DebugDrawBuf &buf = cpu_draw_buf_;
+  uint index = buf.command.vertex_len;
+  if (index + 2 < DRW_DEBUG_DRAW_VERT_MAX) {
+    buf.verts[index + 0] = vert_pack(model_mat_ * v1, color);
+    buf.verts[index + 1] = vert_pack(model_mat_ * v2, color);
+    buf.command.vertex_len += 2;
+  }
+}
+
+/* Keep in sync with drw_debug_color_pack(). */
+uint DebugDraw::color_pack(float4 color)
+{
+  color = math::clamp(color, 0.0f, 1.0f);
+  uint result = 0;
+  result |= uint(color.x * 255.0f) << 0u;
+  result |= uint(color.y * 255.0f) << 8u;
+  result |= uint(color.z * 255.0f) << 16u;
+  result |= uint(color.w * 255.0f) << 24u;
+  return result;
+}
+
+DRWDebugVert DebugDraw::vert_pack(float3 pos, uint color)
+{
+  DRWDebugVert vert;
+  vert.pos0 = *reinterpret_cast<uint32_t *>(&pos.x);
+  vert.pos1 = *reinterpret_cast<uint32_t *>(&pos.y);
+  vert.pos2 = *reinterpret_cast<uint32_t *>(&pos.z);
+  vert.color = color;
+  return vert;
+}
+
+void DebugDraw::print_newline()
+{
+  print_col_ = 0u;
+  print_row_ = ++cpu_print_buf_.command.instance_first_array;
+}
+
+void DebugDraw::print_string_start(uint len)
+{
+  /* Break before word. */
+  if (print_col_ + len > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) {
+    print_newline();
+  }
+}
+
+/* Copied from gpu_shader_dependency. */
+void DebugDraw::print_string(std::string str)
+{
+  size_t len_before_pad = str.length();
+  /* Pad string to uint size to avoid out of bound reads. */
+  while (str.length() % 4 != 0) {
+    str += " ";
+  }
+
+  print_string_start(len_before_pad);
+  for (size_t i = 0; i < len_before_pad; i += 4) {
+    union {
+      uint8_t chars[4];
+      uint32_t word;
+    };
+
+    chars[0] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 0);
+    chars[1] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 1);
+    chars[2] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 2);
+    chars[3] = *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 3);
+
+    if (i + 4 > len_before_pad) {
+      chars[len_before_pad - i] = '\0';
+    }
+    print_char4(word);
+  }
+}
+
+/* Keep in sync with shader. */
+void DebugDraw::print_char4(uint data)
+{
+  /* Convert into char stream. */
+  for (; data != 0u; data >>= 8u) {
+    uint char1 = data & 0xFFu;
+    /* Check for null terminator. */
+    if (char1 == 0x00) {
+      break;
+    }
+    /* NOTE: Do not skip the header manually like in GPU. */
+    uint cursor = cpu_print_buf_.command.vertex_len++;
+    if (cursor < DRW_DEBUG_PRINT_MAX) {
+      /* For future usage. (i.e: Color) */
+      uint flags = 0u;
+      uint col = print_col_++;
+      uint print_header = (flags << 24u) | (print_row_ << 16u) | (col << 8u);
+      cpu_print_buf_.char_array[cursor] = print_header | char1;
+      /* Break word. */
+      if (print_col_ > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) {
+        print_newline();
+      }
+    }
+  }
+}
+
+void DebugDraw::print_append_char(uint char1, uint &char4)
+{
+  char4 = (char4 << 8u) | char1;
+}
+
+void DebugDraw::print_append_digit(uint digit, uint &char4)
+{
+  const uint char_A = 0x41u;
+  const uint char_0 = 0x30u;
+  bool is_hexadecimal = digit > 9u;
+  char4 = (char4 << 8u) | (is_hexadecimal ? (char_A + digit - 10u) : (char_0 + digit));
+}
+
+void DebugDraw::print_append_space(uint &char4)
+{
+  char4 = (char4 << 8u) | 0x20u;
+}
+
+void DebugDraw::print_value_binary(uint value)
+{
+  print_string("0b");
+  print_string_start(10u * 4u);
+  uint digits[10] = {0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u};
+  uint digit = 0u;
+  for (uint i = 0u; i < 32u; i++) {
+    print_append_digit(((value >> i) & 1u), digits[digit / 4u]);
+    digit++;
+    if ((i % 4u) == 3u) {
+      print_append_space(digits[digit / 4u]);
+      digit++;
+    }
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 9; j >= 0; j--) {
+    print_char4(digits[j]);
+  }
+}
+
+void DebugDraw::print_value_uint(uint value,
+                                 const bool hex,
+                                 bool is_negative,
+                                 const bool is_unsigned)
+{
+  print_string_start(3u * 4u);
+  const uint blank_value = hex ? 0x30303030u : 0x20202020u;
+  const uint prefix = hex ? 0x78302020u : 0x20202020u;
+  uint digits[3] = {blank_value, blank_value, prefix};
+  const uint base = hex ? 16u : 10u;
+  uint digit = 0u;
+  /* Add `u` suffix. */
+  if (is_unsigned) {
+    print_append_char('u', digits[digit / 4u]);
+    digit++;
+  }
+  /* Number's digits. */
+  for (; value != 0u || digit == uint(is_unsigned); value /= base) {
+    print_append_digit(value % base, digits[digit / 4u]);
+    digit++;
+  }
+  /* Add negative sign. */
+  if (is_negative) {
+    print_append_char('-', digits[digit / 4u]);
+    digit++;
+  }
+  /* Need to pad to uint alignment because we are issuing chars in "reverse". */
+  for (uint i = digit % 4u; i < 4u && i > 0u; i++) {
+    print_append_space(digits[digit / 4u]);
+    digit++;
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 2; j >= 0; j--) {
+    print_char4(digits[j]);
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Display
+ * \{ */
+
+void DebugDraw::display_lines()
+{
+  if (cpu_draw_buf_.command.vertex_len == 0 && gpu_draw_buf_used == false) {
+    return;
+  }
+  GPU_debug_group_begin("Lines");
+  cpu_draw_buf_.push_update();
+
+  float4x4 persmat;
+  const DRWView *view = DRW_view_get_active();
+  DRW_view_persmat_get(view, persmat.ptr(), false);
+
+  drw_state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS);
+
+  GPUBatch *batch = drw_cache_procedural_lines_get();
+  GPUShader *shader = DRW_shader_debug_draw_display_get();
+  GPU_batch_set_shader(batch, shader);
+  int slot = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS);
+  GPU_shader_uniform_mat4(shader, "persmat", persmat.ptr());
+
+  if (gpu_draw_buf_used) {
+    GPU_debug_group_begin("GPU");
+    GPU_storagebuf_bind(gpu_draw_buf_, slot);
+    GPU_batch_draw_indirect(batch, gpu_draw_buf_, 0);
+    GPU_storagebuf_unbind(gpu_draw_buf_);
+    GPU_debug_group_end();
+  }
+
+  GPU_debug_group_begin("CPU");
+  GPU_storagebuf_bind(cpu_draw_buf_, slot);
+  GPU_batch_draw_indirect(batch, cpu_draw_buf_, 0);
+  GPU_storagebuf_unbind(cpu_draw_buf_);
+  GPU_debug_group_end();
+
+  GPU_debug_group_end();
+}
+
+void DebugDraw::display_prints()
+{
+  if (cpu_print_buf_.command.vertex_len == 0 && gpu_print_buf_used == false) {
+    return;
+  }
+  GPU_debug_group_begin("Prints");
+  cpu_print_buf_.push_update();
+
+  drw_state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_PROGRAM_POINT_SIZE);
+
+  GPUBatch *batch = drw_cache_procedural_points_get();
+  GPUShader *shader = DRW_shader_debug_print_display_get();
+  GPU_batch_set_shader(batch, shader);
+  int slot = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT);
+
+  if (gpu_print_buf_used) {
+    GPU_debug_group_begin("GPU");
+    GPU_storagebuf_bind(gpu_print_buf_, slot);
+    GPU_batch_draw_indirect(batch, gpu_print_buf_, 0);
+    GPU_storagebuf_unbind(gpu_print_buf_);
+    GPU_debug_group_end();
+  }
+
+  GPU_debug_group_begin("CPU");
+  GPU_storagebuf_bind(cpu_print_buf_, slot);
+  GPU_batch_draw_indirect(batch, cpu_print_buf_, 0);
+  GPU_storagebuf_unbind(cpu_print_buf_);
+  GPU_debug_group_end();
+
+  GPU_debug_group_end();
+}
+
+void DebugDraw::display_to_view()
+{
+  GPU_debug_group_begin("DebugDraw");
+
+  display_lines();
+  /* Print 3D shapes before text to avoid overlaps. */
+  display_prints();
+  /* Init again so we don't draw the same thing twice. */
+  init();
+
+  GPU_debug_group_end();
+}
+
+}  // namespace blender::draw
+
+blender::draw::DebugDraw *DRW_debug_get()
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return nullptr;
+  }
+  return reinterpret_cast<blender::draw::DebugDraw *>(DST.debug);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name C-API private
+ * \{ */
+
+void drw_debug_draw()
+{
+#ifdef DRAW_DEBUG
+  if (!GPU_shader_storage_buffer_objects_support() || DST.debug == nullptr) {
+    return;
+  }
+  /* TODO(@fclem): Convenience for now. Will have to move to #DRWManager. */
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->display_to_view();
+#endif
+}
+
+/**
+ * NOTE: Init is once per draw manager cycle.
+ */
+void drw_debug_init()
+{
+  /* Module should not be used in release builds. */
+  /* TODO(@fclem): Hide the functions declarations without using `ifdefs` everywhere. */
+#ifdef DRAW_DEBUG
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  /* TODO(@fclem): Convenience for now. Will have to move to #DRWManager. */
+  if (DST.debug == nullptr) {
+    DST.debug = reinterpret_cast<DRWDebugModule *>(new blender::draw::DebugDraw());
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->init();
+#endif
+}
+
+void drw_debug_module_free(DRWDebugModule *module)
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  if (module != nullptr) {
+    delete reinterpret_cast<blender::draw::DebugDraw *>(module);
+  }
+}
+
+GPUStorageBuf *drw_debug_gpu_draw_buf_get()
+{
+  return reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->gpu_draw_buf_get();
+}
+
+GPUStorageBuf *drw_debug_gpu_print_buf_get()
+{
+  return reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->gpu_print_buf_get();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name C-API public
+ * \{ */
+
+void DRW_debug_modelmat_reset()
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->modelmat_reset();
+}
+
+void DRW_debug_modelmat(const float modelmat[4][4])
+{
+#ifdef DRAW_DEBUG
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->modelmat_set(modelmat);
+#else
+  UNUSED_VARS(modelmat);
+#endif
+}
+
+void DRW_debug_line_v3v3(const float v1[3], const float v2[3], const float color[4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_line(v1, v2, color);
+}
+
+void DRW_debug_polygon_v3(const float (*v)[3], int vert_len, const float color[4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_polygon(
+      blender::Span<float3>((float3 *)v, vert_len), color);
+}
+
+void DRW_debug_m4(const float m[4][4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_matrix(m);
+}
+
+void DRW_debug_m4_as_bbox(const float m[4][4], bool invert, const float color[4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  blender::float4x4 m4 = m;
+  if (invert) {
+    m4 = m4.inverted();
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_matrix_as_bbox(m4, color);
+}
+
+void DRW_debug_bbox(const BoundBox *bbox, const float color[4])
+{
+#ifdef DRAW_DEBUG
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_bbox(*bbox, color);
+#else
+  UNUSED_VARS(bbox, color);
+#endif
+}
+
+void DRW_debug_sphere(const float center[3], float radius, const float color[4])
+{
+  if (!GPU_shader_storage_buffer_objects_support()) {
+    return;
+  }
+  reinterpret_cast<blender::draw::DebugDraw *>(DST.debug)->draw_sphere(center, radius, color);
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_debug.h b/source/blender/draw/intern/draw_debug.h
index 333d734edb9..9a56a12242e 100644
--- a/source/blender/draw/intern/draw_debug.h
+++ b/source/blender/draw/intern/draw_debug.h
@@ -3,21 +3,38 @@
 
 /** \file
  * \ingroup draw
+ *
+ * \brief Simple API to draw debug shapes in the viewport.
+ * IMPORTANT: This is the legacy API for C. Use draw_debug.hh instead in new C++ code.
  */
 
 #pragma once
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct DRWDebugModule DRWDebugModule;
+
 struct BoundBox;
 
 void DRW_debug_modelmat_reset(void);
 void DRW_debug_modelmat(const float modelmat[4][4]);
 
+/**
+ * IMPORTANT: For now there is a limit of DRW_DEBUG_DRAW_VERT_MAX that can be drawn
+ * using all the draw functions.
+ */
 void DRW_debug_line_v3v3(const float v1[3], const float v2[3], const float color[4]);
 void DRW_debug_polygon_v3(const float (*v)[3], int vert_len, const float color[4]);
 /**
  * \note g_modelmat is still applied on top.
  */
 void DRW_debug_m4(const float m[4][4]);
-void DRW_debug_m4_as_bbox(const float m[4][4], const float color[4], bool invert);
+void DRW_debug_m4_as_bbox(const float m[4][4], bool invert, const float color[4]);
 void DRW_debug_bbox(const BoundBox *bbox, const float color[4]);
 void DRW_debug_sphere(const float center[3], float radius, const float color[4]);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/source/blender/draw/intern/draw_debug.hh b/source/blender/draw/intern/draw_debug.hh
new file mode 100644
index 00000000000..c83936bf1af
--- /dev/null
+++ b/source/blender/draw/intern/draw_debug.hh
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ *
+ * \brief Simple API to draw debug shapes and log in the viewport.
+ *
+ * Both CPU and GPU implementation are supported and symmetrical (meaning GPU shader can use it
+ * too, see common_debug_print/draw_lib.glsl).
+ *
+ * NOTE: CPU logging will overlap GPU logging on screen as it is drawn after.
+ */
+
+#pragma once
+
+#include "BLI_math_vec_types.hh"
+#include "BLI_string_ref.hh"
+#include "BLI_vector.hh"
+#include "DNA_object_types.h"
+#include "DRW_gpu_wrapper.hh"
+
+namespace blender::draw {
+
+/* Shortcuts to avoid boilerplate code and match shader API. */
+#define drw_debug_line(...) DRW_debug_get()->draw_line(__VA_ARGS__)
+#define drw_debug_polygon(...) DRW_debug_get()->draw_polygon(__VA_ARGS__)
+#define drw_debug_bbox(...) DRW_debug_get()->draw_bbox(__VA_ARGS__)
+#define drw_debug_sphere(...) DRW_debug_get()->draw_sphere(__VA_ARGS__)
+#define drw_debug_point(...) DRW_debug_get()->draw_point(__VA_ARGS__)
+#define drw_debug_matrix(...) DRW_debug_get()->draw_matrix(__VA_ARGS__)
+#define drw_debug_matrix_as_bbox(...) DRW_debug_get()->draw_matrix_as_bbox(__VA_ARGS__)
+#define drw_print(...) DRW_debug_get()->print(__VA_ARGS__)
+#define drw_print_hex(...) DRW_debug_get()->print_hex(__VA_ARGS__)
+#define drw_print_binary(...) DRW_debug_get()->print_binary(__VA_ARGS__)
+#define drw_print_no_endl(...) DRW_debug_get()->print_no_endl(__VA_ARGS__)
+
+/* Will log variable along with its name, like the shader version of print(). */
+#define drw_print_id(v_) DRW_debug_get()->print(#v_, "= ", v_)
+#define drw_print_id_no_endl(v_) DRW_debug_get()->print_no_endl(#v_, "= ", v_)
+
+class DebugDraw {
+ private:
+  using DebugDrawBuf = StorageBuffer<DRWDebugDrawBuffer>;
+  using DebugPrintBuf = StorageBuffer<DRWDebugPrintBuffer>;
+
+  /** Data buffers containing all verts or chars to draw. */
+  DebugDrawBuf cpu_draw_buf_ = {"DebugDrawBuf-CPU"};
+  DebugDrawBuf gpu_draw_buf_ = {"DebugDrawBuf-GPU"};
+  DebugPrintBuf cpu_print_buf_ = {"DebugPrintBuf-CPU"};
+  DebugPrintBuf gpu_print_buf_ = {"DebugPrintBuf-GPU"};
+  /** True if the gpu buffer have been requested and may contain data to draw. */
+  bool gpu_print_buf_used = false;
+  bool gpu_draw_buf_used = false;
+  /** Matrix applied to all points before drawing. Could be a stack if needed. */
+  float4x4 model_mat_;
+  /** Precomputed shapes verts. */
+  Vector<float3> sphere_verts_;
+  Vector<float3> point_verts_;
+  /** Cursor position for print functionality. */
+  uint print_col_ = 0;
+  uint print_row_ = 0;
+
+ public:
+  DebugDraw();
+  ~DebugDraw(){};
+
+  /**
+   * Resets all buffers and reset model matrix state.
+   * Not to be called by user.
+   */
+  void init();
+
+  /**
+   * Resets model matrix state to identity.
+   */
+  void modelmat_reset();
+  /**
+   * Sets model matrix transform to apply to any vertex passed to drawing functions.
+   */
+  void modelmat_set(const float modelmat[4][4]);
+
+  /**
+   * Drawing functions that will draw wire-frames with the given color.
+   */
+  void draw_line(float3 v1, float3 v2, float4 color = {1, 0, 0, 1});
+  void draw_polygon(Span<float3> poly_verts, float4 color = {1, 0, 0, 1});
+  void draw_bbox(const BoundBox &bbox, const float4 color = {1, 0, 0, 1});
+  void draw_sphere(const float3 center, float radius, const float4 color = {1, 0, 0, 1});
+  void draw_point(const float3 center, float radius = 0.01f, const float4 color = {1, 0, 0, 1});
+  /**
+   * Draw a matrix transformation as 3 colored axes.
+   */
+  void draw_matrix(const float4x4 m4);
+  /**
+   * Draw a matrix as a 2 units length bounding box, centered on origin.
+   */
+  void draw_matrix_as_bbox(float4x4 mat, const float4 color = {1, 0, 0, 1});
+
+  /**
+   * Will draw all debug shapes and text cached up until now to the current view / frame-buffer.
+   * Draw buffers will be emptied and ready for new debug data.
+   */
+  void display_to_view();
+
+  /**
+   * Log variable or strings inside the viewport.
+   * Using a unique non string argument will print the variable name with it.
+   * Concatenate by using multiple arguments. i.e: `print("Looped ", n, "times.")`.
+   */
+  template<typename... Ts> void print(StringRefNull str, Ts... args)
+  {
+    print_no_endl(str, args...);
+    print_newline();
+  }
+  template<typename T> void print(const T &value)
+  {
+    print_value(value);
+    print_newline();
+  }
+  template<typename T> void print_hex(const T &value)
+  {
+    print_value_hex(value);
+    print_newline();
+  }
+  template<typename T> void print_binary(const T &value)
+  {
+    print_value_binary(value);
+    print_newline();
+  }
+
+  /**
+   * Same as `print()` but does not finish the line.
+   */
+  void print_no_endl(std::string arg)
+  {
+    print_string(arg);
+  }
+  void print_no_endl(StringRef arg)
+  {
+    print_string(arg);
+  }
+  void print_no_endl(StringRefNull arg)
+  {
+    print_string(arg);
+  }
+  void print_no_endl(char const *arg)
+  {
+    print_string(StringRefNull(arg));
+  }
+  template<typename T> void print_no_endl(T arg)
+  {
+    print_value(arg);
+  }
+  template<typename T, typename... Ts> void print_no_endl(T arg, Ts... args)
+  {
+    print_no_endl(arg);
+    print_no_endl(args...);
+  }
+
+  /**
+   * Not to be called by user. Should become private.
+   */
+  GPUStorageBuf *gpu_draw_buf_get();
+  GPUStorageBuf *gpu_print_buf_get();
+
+ private:
+  uint color_pack(float4 color);
+  DRWDebugVert vert_pack(float3 pos, uint color);
+
+  void draw_line(float3 v1, float3 v2, uint color);
+
+  void print_newline();
+  void print_string_start(uint len);
+  void print_string(std::string str);
+  void print_char4(uint data);
+  void print_append_char(uint char1, uint &char4);
+  void print_append_digit(uint digit, uint &char4);
+  void print_append_space(uint &char4);
+  void print_value_binary(uint value);
+  void print_value_uint(uint value, const bool hex, bool is_negative, const bool is_unsigned);
+
+  template<typename T> void print_value(const T &value);
+  template<typename T> void print_value_hex(const T &value);
+  template<typename T> void print_value_binary(const T &value);
+
+  void display_lines();
+  void display_prints();
+};
+
+}  // namespace blender::draw
+
+/**
+ * Ease of use function to get the debug module.
+ * TODO(fclem): Should be removed once DRWManager is no longer global.
+ * IMPORTANT: Can return nullptr if storage buffer is not supported.
+ */
+blender::draw::DebugDraw *DRW_debug_get();
diff --git a/source/blender/draw/intern/draw_defines.h b/source/blender/draw/intern/draw_defines.h
new file mode 100644
index 00000000000..3df7e47cffb
--- /dev/null
+++ b/source/blender/draw/intern/draw_defines.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup draw
+ *
+ * List of defines that are shared with the GPUShaderCreateInfos. We do this to avoid
+ * dragging larger headers into the createInfo pipeline which would cause problems.
+ */
+
+#pragma once
+
+#define DRW_VIEW_UBO_SLOT 0
+
+#define DRW_RESOURCE_ID_SLOT 11
+#define DRW_OBJ_MAT_SLOT 10
+#define DRW_OBJ_INFOS_SLOT 9
+#define DRW_OBJ_ATTR_SLOT 8
+
+#define DRW_DEBUG_PRINT_SLOT 15
+#define DRW_DEBUG_DRAW_SLOT 14
+
+#define DRW_COMMAND_GROUP_SIZE 64
+#define DRW_FINALIZE_GROUP_SIZE 64
+/* Must be multiple of 32. Set to 32 for shader simplicity. */
+#define DRW_VISIBILITY_GROUP_SIZE 32
diff --git a/source/blender/draw/intern/draw_hair.cc b/source/blender/draw/intern/draw_hair.cc
index 0a3c16e0d71..ceee1c7cb48 100644
--- a/source/blender/draw/intern/draw_hair.cc
+++ b/source/blender/draw/intern/draw_hair.cc
@@ -22,33 +22,29 @@
 #include "GPU_batch.h"
 #include "GPU_capabilities.h"
 #include "GPU_compute.h"
+#include "GPU_context.h"
 #include "GPU_material.h"
 #include "GPU_shader.h"
 #include "GPU_texture.h"
 #include "GPU_vertex_buffer.h"
 
+#include "DRW_gpu_wrapper.hh"
+
 #include "draw_hair_private.h"
 #include "draw_shader.h"
-
-#ifndef __APPLE__
-#  define USE_TRANSFORM_FEEDBACK
-#  define USE_COMPUTE_SHADERS
-#endif
+#include "draw_shader_shared.h"
 
 BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get()
 {
-#ifdef USE_COMPUTE_SHADERS
   if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
     return PART_REFINE_SHADER_COMPUTE;
   }
-#endif
-#ifdef USE_TRANSFORM_FEEDBACK
-  return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
-#endif
+  if (GPU_transform_feedback_support()) {
+    return PART_REFINE_SHADER_TRANSFORM_FEEDBACK;
+  }
   return PART_REFINE_SHADER_TRANSFORM_FEEDBACK_WORKAROUND;
 }
 
-#ifndef USE_TRANSFORM_FEEDBACK
 struct ParticleRefineCall {
   struct ParticleRefineCall *next;
   GPUVertBuf *vbo;
@@ -60,11 +56,11 @@ static ParticleRefineCall *g_tf_calls = nullptr;
 static int g_tf_id_offset;
 static int g_tf_target_width;
 static int g_tf_target_height;
-#endif
 
 static GPUVertBuf *g_dummy_vbo = nullptr;
 static GPUTexture *g_dummy_texture = nullptr;
-static DRWPass *g_tf_pass; /* XXX can be a problem with multiple DRWManager in the future */
+static DRWPass *g_tf_pass; /* XXX can be a problem with multiple #DRWManager in the future */
+static blender::draw::UniformBuffer<CurvesInfos> *g_dummy_curves_info = nullptr;
 
 static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement)
 {
@@ -73,26 +69,35 @@ static GPUShader *hair_refine_shader_get(ParticleRefineShader refinement)
 
 void DRW_hair_init(void)
 {
-#if defined(USE_TRANSFORM_FEEDBACK) || defined(USE_COMPUTE_SHADERS)
-  g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW);
-#else
-  g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR);
-#endif
+  if (GPU_transform_feedback_support() || GPU_compute_shader_support()) {
+    g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_NO_DRAW);
+  }
+  else {
+    g_tf_pass = DRW_pass_create("Update Hair Pass", DRW_STATE_WRITE_COLOR);
+  }
 
   if (g_dummy_vbo == nullptr) {
     /* initialize vertex format */
     GPUVertFormat format = {0};
     uint dummy_id = GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-    g_dummy_vbo = GPU_vertbuf_create_with_format(&format);
+    g_dummy_vbo = GPU_vertbuf_create_with_format_ex(
+        &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
     const float vert[4] = {0.0f, 0.0f, 0.0f, 0.0f};
     GPU_vertbuf_data_alloc(g_dummy_vbo, 1);
     GPU_vertbuf_attr_fill(g_dummy_vbo, dummy_id, vert);
-    /* Create vbo immediately to bind to texture buffer. */
+    /* Create VBO immediately to bind to texture buffer. */
     GPU_vertbuf_use(g_dummy_vbo);
 
     g_dummy_texture = GPU_texture_create_from_vertbuf("hair_dummy_attr", g_dummy_vbo);
+
+    g_dummy_curves_info = MEM_new<blender::draw::UniformBuffer<CurvesInfos>>(
+        "g_dummy_curves_info");
+    memset(g_dummy_curves_info->is_point_attribute,
+           0,
+           sizeof(g_dummy_curves_info->is_point_attribute));
+    g_dummy_curves_info->push_update();
   }
 }
 
@@ -135,22 +140,25 @@ static void drw_hair_particle_cache_update_transform_feedback(ParticleHairCache
   if (final_points_len > 0) {
     GPUShader *tf_shader = hair_refine_shader_get(PART_REFINE_CATMULL_ROM);
 
-#ifdef USE_TRANSFORM_FEEDBACK
-    DRWShadingGroup *tf_shgrp = DRW_shgroup_transform_feedback_create(
-        tf_shader, g_tf_pass, cache->final[subdiv].proc_buf);
-#else
-    DRWShadingGroup *tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
-
-    ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__);
-    pr_call->next = g_tf_calls;
-    pr_call->vbo = cache->final[subdiv].proc_buf;
-    pr_call->shgrp = tf_shgrp;
-    pr_call->vert_len = final_points_len;
-    g_tf_calls = pr_call;
-    DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
-    DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
-    DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
-#endif
+    DRWShadingGroup *tf_shgrp = nullptr;
+    if (GPU_transform_feedback_support()) {
+      tf_shgrp = DRW_shgroup_transform_feedback_create(
+          tf_shader, g_tf_pass, cache->final[subdiv].proc_buf);
+    }
+    else {
+      tf_shgrp = DRW_shgroup_create(tf_shader, g_tf_pass);
+
+      ParticleRefineCall *pr_call = (ParticleRefineCall *)MEM_mallocN(sizeof(*pr_call), __func__);
+      pr_call->next = g_tf_calls;
+      pr_call->vbo = cache->final[subdiv].proc_buf;
+      pr_call->shgrp = tf_shgrp;
+      pr_call->vert_len = final_points_len;
+      g_tf_calls = pr_call;
+      DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
+      DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
+      DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
+    }
+    BLI_assert(tf_shgrp != nullptr);
 
     drw_hair_particle_cache_shgrp_attach_resources(tf_shgrp, cache, subdiv);
     DRW_shgroup_call_procedural_points(tf_shgrp, nullptr, final_points_len);
@@ -239,7 +247,7 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
 
   DRWShadingGroup *shgrp = DRW_shgroup_create_sub(shgrp_parent);
 
-  /* TODO: optimize this. Only bind the ones GPUMaterial needs. */
+  /* TODO: optimize this. Only bind the ones #GPUMaterial needs. */
   for (int i = 0; i < hair_cache->num_uv_layers; i++) {
     for (int n = 0; n < MAX_LAYER_NAME_CT && hair_cache->uv_layer_names[i][n][0] != '\0'; n++) {
       DRW_shgroup_uniform_texture(shgrp, hair_cache->uv_layer_names[i][n], hair_cache->uv_tex[i]);
@@ -276,6 +284,8 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
   if (hair_cache->length_tex) {
     DRW_shgroup_uniform_texture(shgrp, "l", hair_cache->length_tex);
   }
+
+  DRW_shgroup_uniform_block(shgrp, "drw_curves", *g_dummy_curves_info);
   DRW_shgroup_uniform_int(shgrp, "hairStrandsRes", &hair_cache->final[subdiv].strands_res, 1);
   DRW_shgroup_uniform_int_copy(shgrp, "hairThicknessRes", thickness_res);
   DRW_shgroup_uniform_float_copy(shgrp, "hairRadShape", hair_rad_shape);
@@ -293,85 +303,122 @@ DRWShadingGroup *DRW_shgroup_hair_create_sub(Object *object,
 
 void DRW_hair_update()
 {
-#ifndef USE_TRANSFORM_FEEDBACK
-  /**
-   * Workaround to transform feedback not working on mac.
-   * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
-   *
-   * So instead of using transform feedback we render to a texture,
-   * read back the result to system memory and re-upload as VBO data.
-   * It is really not ideal performance wise, but it is the simplest
-   * and the most local workaround that still uses the power of the GPU.
-   */
-
-  if (g_tf_calls == nullptr) {
-    return;
-  }
+  if (!GPU_transform_feedback_support()) {
+    /**
+     * Workaround to transform feedback not working on mac.
+     * On some system it crashes (see T58489) and on some other it renders garbage (see T60171).
+     *
+     * So instead of using transform feedback we render to a texture,
+     * read back the result to system memory and re-upload as VBO data.
+     * It is really not ideal performance wise, but it is the simplest
+     * and the most local workaround that still uses the power of the GPU.
+     */
+
+    if (g_tf_calls == nullptr) {
+      return;
+    }
 
-  /* Search ideal buffer size. */
-  uint max_size = 0;
-  for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
-    max_size = max_ii(max_size, pr_call->vert_len);
-  }
+    /* Search ideal buffer size. */
+    uint max_size = 0;
+    for (ParticleRefineCall *pr_call = g_tf_calls; pr_call; pr_call = pr_call->next) {
+      max_size = max_ii(max_size, pr_call->vert_len);
+    }
+
+    /* Create target Texture / Frame-buffer */
+    /* Don't use max size as it can be really heavy and fail.
+     * Do chunks of maximum 2048 * 2048 hair points. */
+    int width = 2048;
+    int height = min_ii(width, 1 + max_size / width);
+    GPUTexture *tex = DRW_texture_pool_query_2d(
+        width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update);
+    g_tf_target_height = height;
+    g_tf_target_width = width;
+
+    GPUFrameBuffer *fb = nullptr;
+    GPU_framebuffer_ensure_config(&fb,
+                                  {
+                                      GPU_ATTACHMENT_NONE,
+                                      GPU_ATTACHMENT_TEXTURE(tex),
+                                  });
+
+    float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer");
+
+    GPU_framebuffer_bind(fb);
+    while (g_tf_calls != nullptr) {
+      ParticleRefineCall *pr_call = g_tf_calls;
+      g_tf_calls = g_tf_calls->next;
+
+      g_tf_id_offset = 0;
+      while (pr_call->vert_len > 0) {
+        int max_read_px_len = min_ii(width * height, pr_call->vert_len);
+
+        DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
+        /* Read back result to main memory. */
+        GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
+        /* Upload back to VBO. */
+        GPU_vertbuf_use(pr_call->vbo);
+        GPU_vertbuf_update_sub(pr_call->vbo,
+                               sizeof(float[4]) * g_tf_id_offset,
+                               sizeof(float[4]) * max_read_px_len,
+                               data);
+
+        g_tf_id_offset += max_read_px_len;
+        pr_call->vert_len -= max_read_px_len;
+      }
 
-  /* Create target Texture / Frame-buffer */
-  /* Don't use max size as it can be really heavy and fail.
-   * Do chunks of maximum 2048 * 2048 hair points. */
-  int width = 2048;
-  int height = min_ii(width, 1 + max_size / width);
-  GPUTexture *tex = DRW_texture_pool_query_2d(
-      width, height, GPU_RGBA32F, (DrawEngineType *)DRW_hair_update);
-  g_tf_target_height = height;
-  g_tf_target_width = width;
-
-  GPUFrameBuffer *fb = nullptr;
-  GPU_framebuffer_ensure_config(&fb,
-                                {
-                                    GPU_ATTACHMENT_NONE,
-                                    GPU_ATTACHMENT_TEXTURE(tex),
-                                });
-
-  float *data = (float *)MEM_mallocN(sizeof(float[4]) * width * height, "tf fallback buffer");
-
-  GPU_framebuffer_bind(fb);
-  while (g_tf_calls != nullptr) {
-    ParticleRefineCall *pr_call = g_tf_calls;
-    g_tf_calls = g_tf_calls->next;
-
-    g_tf_id_offset = 0;
-    while (pr_call->vert_len > 0) {
-      int max_read_px_len = min_ii(width * height, pr_call->vert_len);
-
-      DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
-      /* Read back result to main memory. */
-      GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, GPU_DATA_FLOAT, data);
-      /* Upload back to VBO. */
-      GPU_vertbuf_use(pr_call->vbo);
-      GPU_vertbuf_update_sub(pr_call->vbo,
-                             sizeof(float[4]) * g_tf_id_offset,
-                             sizeof(float[4]) * max_read_px_len,
-                             data);
-
-      g_tf_id_offset += max_read_px_len;
-      pr_call->vert_len -= max_read_px_len;
+      MEM_freeN(pr_call);
     }
 
-    MEM_freeN(pr_call);
+    MEM_freeN(data);
+    GPU_framebuffer_free(fb);
   }
+  else {
+    /* NOTE(Metal): If compute is not supported, bind a temporary frame-buffer to avoid
+     * side-effects from rendering in the active buffer.
+     * We also need to guarantee that a frame-buffer is active to perform any rendering work,
+     * even if there is no output. */
+    GPUFrameBuffer *temp_fb = nullptr;
+    GPUFrameBuffer *prev_fb = nullptr;
+    if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) {
+      if (!GPU_compute_shader_support()) {
+        prev_fb = GPU_framebuffer_active_get();
+        char errorOut[256];
+        /* if the frame-buffer is invalid we need a dummy frame-buffer to be bound. */
+        if (!GPU_framebuffer_check_valid(prev_fb, errorOut)) {
+          int width = 64;
+          int height = 64;
+          GPUTexture *tex = DRW_texture_pool_query_2d(
+              width, height, GPU_DEPTH_COMPONENT32F, (DrawEngineType *)DRW_hair_update);
+          g_tf_target_height = height;
+          g_tf_target_width = width;
+
+          GPU_framebuffer_ensure_config(&temp_fb, {GPU_ATTACHMENT_TEXTURE(tex)});
+
+          GPU_framebuffer_bind(temp_fb);
+        }
+      }
+    }
+
+    /* Just render the pass when using compute shaders or transform feedback. */
+    DRW_draw_pass(g_tf_pass);
+    if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
+      GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    }
 
-  MEM_freeN(data);
-  GPU_framebuffer_free(fb);
-#else
-  /* Just render the pass when using compute shaders or transform feedback. */
-  DRW_draw_pass(g_tf_pass);
-  if (drw_hair_shader_type_get() == PART_REFINE_SHADER_COMPUTE) {
-    GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    /* Release temporary frame-buffer. */
+    if (temp_fb != nullptr) {
+      GPU_framebuffer_free(temp_fb);
+    }
+    /* Rebind existing frame-buffer */
+    if (prev_fb != nullptr) {
+      GPU_framebuffer_bind(prev_fb);
+    }
   }
-#endif
 }
 
 void DRW_hair_free(void)
 {
   GPU_VERTBUF_DISCARD_SAFE(g_dummy_vbo);
   DRW_TEXTURE_FREE_SAFE(g_dummy_texture);
+  MEM_delete(g_dummy_curves_info);
 }
diff --git a/source/blender/draw/intern/draw_hair_private.h b/source/blender/draw/intern/draw_hair_private.h
index 5d84c8863f2..c7e9e1e22de 100644
--- a/source/blender/draw/intern/draw_hair_private.h
+++ b/source/blender/draw/intern/draw_hair_private.h
@@ -61,9 +61,9 @@ typedef struct ParticleHairCache {
   GPUTexture *uv_tex[MAX_MTFACE];
   char uv_layer_names[MAX_MTFACE][MAX_LAYER_NAME_CT][MAX_LAYER_NAME_LEN];
 
-  GPUVertBuf *proc_col_buf[MAX_MCOL];
-  GPUTexture *col_tex[MAX_MCOL];
-  char col_layer_names[MAX_MCOL][MAX_LAYER_NAME_CT][MAX_LAYER_NAME_LEN];
+  GPUVertBuf **proc_col_buf;
+  GPUTexture **col_tex;
+  char (*col_layer_names)[MAX_LAYER_NAME_CT][MAX_LAYER_NAME_LEN];
 
   int num_uv_layers;
   int num_col_layers;
diff --git a/source/blender/draw/intern/draw_handle.hh b/source/blender/draw/intern/draw_handle.hh
new file mode 100644
index 00000000000..5f96bfa5dcd
--- /dev/null
+++ b/source/blender/draw/intern/draw_handle.hh
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * A unique identifier for each object component.
+ * It is used to access each component data such as matrices and object attributes.
+ * It is valid only for the current draw, it is not persistent.
+ *
+ * The most significant bit is used to encode if the object needs to invert the front face winding
+ * because of its object matrix handedness. This is handy because this means sorting inside
+ * #DrawGroup command will put all inverted commands last.
+ *
+ * Default value of 0 points toward an non-cull-able object with unit bounding box centered at
+ * the origin.
+ */
+
+#include "draw_shader_shared.h"
+
+struct Object;
+struct DupliObject;
+
+namespace blender::draw {
+
+struct ResourceHandle {
+  uint raw;
+
+  ResourceHandle() = default;
+  ResourceHandle(uint raw_) : raw(raw_){};
+  ResourceHandle(uint index, bool inverted_handedness)
+  {
+    raw = index;
+    SET_FLAG_FROM_TEST(raw, inverted_handedness, 0x80000000u);
+  }
+
+  bool has_inverted_handedness() const
+  {
+    return (raw & 0x80000000u) != 0;
+  }
+
+  uint resource_index() const
+  {
+    return (raw & 0x7FFFFFFFu);
+  }
+};
+
+/* TODO(fclem): Move to somewhere more appropriated after cleaning up the header dependencies. */
+struct ObjectRef {
+  Object *object;
+  /** Dupli object that corresponds to the current object. */
+  DupliObject *dupli_object;
+  /** Object that created the dupli-list the current object is part of. */
+  Object *dupli_parent;
+};
+
+};  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_instance_data.c b/source/blender/draw/intern/draw_instance_data.c
index 0e4e67f3320..ac2aea4524d 100644
--- a/source/blender/draw/intern/draw_instance_data.c
+++ b/source/blender/draw/intern/draw_instance_data.c
@@ -27,6 +27,7 @@
 #include "BKE_duplilist.h"
 
 #include "RNA_access.h"
+#include "RNA_path.h"
 
 #include "BLI_bitmap.h"
 #include "BLI_memblock.h"
@@ -563,7 +564,8 @@ typedef struct DRWUniformAttrBuf {
   struct DRWUniformAttrBuf *next_empty;
 } DRWUniformAttrBuf;
 
-static DRWUniformAttrBuf *drw_uniform_attrs_pool_ensure(GHash *table, GPUUniformAttrList *key)
+static DRWUniformAttrBuf *drw_uniform_attrs_pool_ensure(GHash *table,
+                                                        const GPUUniformAttrList *key)
 {
   void **pkey, **pval;
 
@@ -641,23 +643,16 @@ static void drw_uniform_attribute_lookup(GPUUniformAttr *attr,
 {
   copy_v4_fl(r_data, 0);
 
-  char idprop_name[(sizeof(attr->name) * 2) + 4];
-  {
-    char attr_name_esc[sizeof(attr->name) * 2];
-    BLI_str_escape(attr_name_esc, attr->name, sizeof(attr_name_esc));
-    SNPRINTF(idprop_name, "[\"%s\"]", attr_name_esc);
-  }
-
   /* If requesting instance data, check the parent particle system and object. */
   if (attr->use_dupli) {
     if (dupli_source && dupli_source->particle_system) {
       ParticleSettings *settings = dupli_source->particle_system->part;
-      if (drw_uniform_property_lookup((ID *)settings, idprop_name, r_data) ||
+      if (drw_uniform_property_lookup((ID *)settings, attr->name_id_prop, r_data) ||
           drw_uniform_property_lookup((ID *)settings, attr->name, r_data)) {
         return;
       }
     }
-    if (drw_uniform_property_lookup((ID *)dupli_parent, idprop_name, r_data) ||
+    if (drw_uniform_property_lookup((ID *)dupli_parent, attr->name_id_prop, r_data) ||
         drw_uniform_property_lookup((ID *)dupli_parent, attr->name, r_data)) {
       return;
     }
@@ -665,9 +660,9 @@ static void drw_uniform_attribute_lookup(GPUUniformAttr *attr,
 
   /* Check the object and mesh. */
   if (ob) {
-    if (drw_uniform_property_lookup((ID *)ob, idprop_name, r_data) ||
+    if (drw_uniform_property_lookup((ID *)ob, attr->name_id_prop, r_data) ||
         drw_uniform_property_lookup((ID *)ob, attr->name, r_data) ||
-        drw_uniform_property_lookup((ID *)ob->data, idprop_name, r_data) ||
+        drw_uniform_property_lookup((ID *)ob->data, attr->name_id_prop, r_data) ||
         drw_uniform_property_lookup((ID *)ob->data, attr->name, r_data)) {
       return;
     }
@@ -675,7 +670,7 @@ static void drw_uniform_attribute_lookup(GPUUniformAttr *attr,
 }
 
 void drw_uniform_attrs_pool_update(GHash *table,
-                                   GPUUniformAttrList *key,
+                                   const GPUUniformAttrList *key,
                                    DRWResourceHandle *handle,
                                    Object *ob,
                                    Object *dupli_parent,
@@ -696,7 +691,8 @@ void drw_uniform_attrs_pool_update(GHash *table,
   }
 }
 
-DRWSparseUniformBuf *DRW_uniform_attrs_pool_find_ubo(GHash *table, struct GPUUniformAttrList *key)
+DRWSparseUniformBuf *DRW_uniform_attrs_pool_find_ubo(GHash *table,
+                                                     const struct GPUUniformAttrList *key)
 {
   DRWUniformAttrBuf *buffer = BLI_ghash_lookup(table, key);
   return buffer ? &buffer->ubos : NULL;
diff --git a/source/blender/draw/intern/draw_instance_data.h b/source/blender/draw/intern/draw_instance_data.h
index 4b5cf63bb3b..9053544d98a 100644
--- a/source/blender/draw/intern/draw_instance_data.h
+++ b/source/blender/draw/intern/draw_instance_data.h
@@ -106,4 +106,4 @@ struct GHash *DRW_uniform_attrs_pool_new(void);
 void DRW_uniform_attrs_pool_flush_all(struct GHash *table);
 void DRW_uniform_attrs_pool_clear_all(struct GHash *table);
 struct DRWSparseUniformBuf *DRW_uniform_attrs_pool_find_ubo(struct GHash *table,
-                                                            struct GPUUniformAttrList *key);
+                                                            const struct GPUUniformAttrList *key);
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index bc9d0a3d02a..9761aa8c789 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -43,6 +43,7 @@
 #include "DNA_camera_types.h"
 #include "DNA_mesh_types.h"
 #include "DNA_meshdata_types.h"
+#include "DNA_userdef_types.h"
 #include "DNA_world_types.h"
 
 #include "ED_gpencil.h"
@@ -84,6 +85,7 @@
 #include "draw_cache_impl.h"
 
 #include "engines/basic/basic_engine.h"
+#include "engines/compositor/compositor_engine.h"
 #include "engines/eevee/eevee_engine.h"
 #include "engines/eevee_next/eevee_engine.h"
 #include "engines/external/external_engine.h"
@@ -179,7 +181,7 @@ static void drw_task_graph_deinit(void)
 
 bool DRW_object_is_renderable(const Object *ob)
 {
-  BLI_assert((ob->base_flag & BASE_VISIBLE_DEPSGRAPH) != 0);
+  BLI_assert((ob->base_flag & BASE_ENABLED_AND_MAYBE_VISIBLE_IN_VIEWPORT) != 0);
 
   if (ob->type == OB_MESH) {
     if ((ob == DST.draw_ctx.object_edit) || DRW_object_is_in_edit_mode(ob)) {
@@ -212,17 +214,6 @@ int DRW_object_visibility_in_active_context(const Object *ob)
   return BKE_object_visibility(ob, mode);
 }
 
-bool DRW_object_is_flat_normal(const Object *ob)
-{
-  if (ob->type == OB_MESH) {
-    const Mesh *me = ob->data;
-    if (me->mpoly && me->mpoly[0].flag & ME_SMOOTH) {
-      return false;
-    }
-  }
-  return true;
-}
-
 bool DRW_object_use_hide_faces(const struct Object *ob)
 {
   if (ob->type == OB_MESH) {
@@ -235,7 +226,7 @@ bool DRW_object_use_hide_faces(const struct Object *ob)
         return (me->editflag & ME_EDIT_PAINT_FACE_SEL) != 0;
       case OB_MODE_VERTEX_PAINT:
       case OB_MODE_WEIGHT_PAINT:
-        return (me->editflag & (ME_EDIT_PAINT_FACE_SEL | ME_EDIT_PAINT_VERT_SEL)) != 0;
+        return true;
     }
   }
 
@@ -1010,6 +1001,8 @@ static void drw_engines_init(void)
 
 static void drw_engines_cache_init(void)
 {
+  DRW_manager_begin_sync();
+
   DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
     if (data->text_draw_cache) {
       DRW_text_cache_destroy(data->text_draw_cache);
@@ -1081,6 +1074,8 @@ static void drw_engines_cache_finish(void)
       engine->cache_finish(data);
     }
   }
+
+  DRW_manager_end_sync();
 }
 
 static void drw_engines_draw_scene(void)
@@ -1225,6 +1220,31 @@ static void drw_engines_enable_editors(void)
   }
 }
 
+static bool is_compositor_enabled(void)
+{
+  if (!U.experimental.use_realtime_compositor) {
+    return false;
+  }
+
+  if (!(DST.draw_ctx.v3d->shading.flag & V3D_SHADING_COMPOSITOR)) {
+    return false;
+  }
+
+  if (!(DST.draw_ctx.v3d->shading.type >= OB_MATERIAL)) {
+    return false;
+  }
+
+  if (!DST.draw_ctx.scene->use_nodes) {
+    return false;
+  }
+
+  if (!DST.draw_ctx.scene->nodetree) {
+    return false;
+  }
+
+  return true;
+}
+
 static void drw_engines_enable(ViewLayer *UNUSED(view_layer),
                                RenderEngineType *engine_type,
                                bool gpencil_engine_needed)
@@ -1237,6 +1257,11 @@ static void drw_engines_enable(ViewLayer *UNUSED(view_layer),
   if (gpencil_engine_needed && ((drawtype >= OB_SOLID) || !use_xray)) {
     use_drw_engine(&draw_engine_gpencil_type);
   }
+
+  if (is_compositor_enabled()) {
+    use_drw_engine(&draw_engine_compositor_type);
+  }
+
   drw_engines_enable_overlays();
 
 #ifdef WITH_DRAW_DEBUG
@@ -1299,13 +1324,14 @@ void DRW_notify_view_update(const DRWUpdateContext *update_ctx)
   /* Reset before using it. */
   drw_state_prepare_clean_for_draw(&DST);
 
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx = (DRWContextState){
       .region = region,
       .rv3d = rv3d,
       .v3d = v3d,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .engine_type = engine_type,
       .depsgraph = depsgraph,
       .object_mode = OB_MODE_OBJECT,
@@ -1323,11 +1349,7 @@ void DRW_notify_view_update(const DRWUpdateContext *update_ctx)
     drw_engines_enable(view_layer, engine_type, gpencil_engine_needed);
     drw_engines_data_validate();
 
-    DRW_ENABLED_ENGINE_ITER (DST.view_data_active, draw_engine, data) {
-      if (draw_engine->view_update) {
-        draw_engine->view_update(data);
-      }
-    }
+    DRW_view_data_engines_view_update(DST.view_data_active);
 
     drw_engines_disable();
   }
@@ -1356,13 +1378,14 @@ static void drw_notify_view_update_offscreen(struct Depsgraph *depsgraph,
     /* Reset before using it. */
     drw_state_prepare_clean_for_draw(&DST);
 
+    BKE_view_layer_synced_ensure(scene, view_layer);
     DST.draw_ctx = (DRWContextState){
         .region = region,
         .rv3d = rv3d,
         .v3d = v3d,
         .scene = scene,
         .view_layer = view_layer,
-        .obact = OBACT(view_layer),
+        .obact = BKE_view_layer_active_object_get(view_layer),
         .engine_type = engine_type,
         .depsgraph = depsgraph,
     };
@@ -1379,11 +1402,7 @@ static void drw_notify_view_update_offscreen(struct Depsgraph *depsgraph,
       drw_engines_enable(view_layer, engine_type, gpencil_engine_needed);
       drw_engines_data_validate();
 
-      DRW_ENABLED_ENGINE_ITER (DST.view_data_active, draw_engine, data) {
-        if (draw_engine->view_update) {
-          draw_engine->view_update(data);
-        }
-      }
+      DRW_view_data_engines_view_update(DST.view_data_active);
 
       drw_engines_disable();
     }
@@ -1608,11 +1627,11 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
                              GPUViewport *viewport,
                              const bContext *evil_C)
 {
-
   Scene *scene = DEG_get_evaluated_scene(depsgraph);
   ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph);
   RegionView3D *rv3d = region->regiondata;
 
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx.evil_C = evil_C;
   DST.draw_ctx = (DRWContextState){
       .region = region,
@@ -1620,7 +1639,7 @@ void DRW_draw_render_loop_ex(struct Depsgraph *depsgraph,
       .v3d = v3d,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .engine_type = engine_type,
       .depsgraph = depsgraph,
 
@@ -2127,12 +2146,13 @@ void DRW_draw_render_loop_2d_ex(struct Depsgraph *depsgraph,
   Scene *scene = DEG_get_evaluated_scene(depsgraph);
   ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph);
 
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx.evil_C = evil_C;
   DST.draw_ctx = (DRWContextState){
       .region = region,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .depsgraph = depsgraph,
       .space_data = CTX_wm_space_data(evil_C),
 
@@ -2333,7 +2353,9 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
   Scene *scene = DEG_get_evaluated_scene(depsgraph);
   RenderEngineType *engine_type = ED_view3d_engine_type(scene, v3d->shading.type);
   ViewLayer *view_layer = DEG_get_evaluated_view_layer(depsgraph);
-  Object *obact = OBACT(view_layer);
+
+  BKE_view_layer_synced_ensure(scene, view_layer);
+  Object *obact = BKE_view_layer_active_object_get(view_layer);
   Object *obedit = use_obedit_skip ? NULL : OBEDIT_FROM_OBACT(obact);
 #ifndef USE_GPU_SELECT
   UNUSED_VARS(scene, view_layer, v3d, region, rect);
@@ -2442,7 +2464,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
     drw_engines_world_update(scene);
 
     if (use_obedit) {
-      FOREACH_OBJECT_IN_MODE_BEGIN (view_layer, v3d, object_type, object_mode, ob_iter) {
+      FOREACH_OBJECT_IN_MODE_BEGIN (scene, view_layer, v3d, object_type, object_mode, ob_iter) {
         drw_engines_cache_populate(ob_iter);
       }
       FOREACH_OBJECT_IN_MODE_END;
@@ -2463,7 +2485,7 @@ void DRW_draw_select_loop(struct Depsgraph *depsgraph,
         }
 
         if (use_pose_exception && (ob->mode & OB_MODE_POSE)) {
-          if ((ob->base_flag & BASE_VISIBLE_VIEWLAYER) == 0) {
+          if ((ob->base_flag & BASE_ENABLED_AND_VISIBLE_IN_DEFAULT_VIEWPORT) == 0) {
             continue;
           }
         }
@@ -2564,13 +2586,14 @@ static void drw_draw_depth_loop_impl(struct Depsgraph *depsgraph,
   DST.options.is_depth = true;
 
   /* Instead of 'DRW_context_state_init(C, &DST.draw_ctx)', assign from args */
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx = (DRWContextState){
       .region = region,
       .rv3d = rv3d,
       .v3d = v3d,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .engine_type = engine_type,
       .depsgraph = depsgraph,
   };
@@ -2683,7 +2706,7 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons
   GPUViewport *viewport = WM_draw_region_get_viewport(region);
   if (!viewport) {
     /* Selection engine requires a viewport.
-     * TODO(germano): This should be done internally in the engine. */
+     * TODO(@germano): This should be done internally in the engine. */
     sel_ctx->is_dirty = true;
     sel_ctx->objects_drawn_len = 0;
     sel_ctx->index_drawn_len = 1;
@@ -2697,13 +2720,14 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons
   drw_state_prepare_clean_for_draw(&DST);
 
   /* Instead of 'DRW_context_state_init(C, &DST.draw_ctx)', assign from args */
+  BKE_view_layer_synced_ensure(scene, view_layer);
   DST.draw_ctx = (DRWContextState){
       .region = region,
       .rv3d = region->regiondata,
       .v3d = v3d,
       .scene = scene,
       .view_layer = view_layer,
-      .obact = OBACT(view_layer),
+      .obact = BKE_view_layer_active_object_get(view_layer),
       .depsgraph = depsgraph,
   };
   drw_task_graph_init();
@@ -2959,6 +2983,7 @@ void DRW_engines_register(void)
   DRW_engine_register(&draw_engine_overlay_type);
   DRW_engine_register(&draw_engine_select_type);
   DRW_engine_register(&draw_engine_basic_type);
+  DRW_engine_register(&draw_engine_compositor_type);
 #ifdef WITH_DRAW_DEBUG
   DRW_engine_register(&draw_engine_debug_select_type);
 #endif
@@ -2968,9 +2993,6 @@ void DRW_engines_register(void)
 
   /* setup callbacks */
   {
-    BKE_mball_batch_cache_dirty_tag_cb = DRW_mball_batch_cache_dirty_tag;
-    BKE_mball_batch_cache_free_cb = DRW_mball_batch_cache_free;
-
     BKE_curve_batch_cache_dirty_tag_cb = DRW_curve_batch_cache_dirty_tag;
     BKE_curve_batch_cache_free_cb = DRW_curve_batch_cache_free;
 
@@ -3039,6 +3061,9 @@ void DRW_engines_free(void)
   DRW_stats_free();
   DRW_globals_free();
 
+  drw_debug_module_free(DST.debug);
+  DST.debug = NULL;
+
   DRW_UBO_FREE_SAFE(G_draw.block_ubo);
   DRW_UBO_FREE_SAFE(G_draw.view_ubo);
   DRW_TEXTURE_FREE_SAFE(G_draw.ramp);
diff --git a/source/blender/draw/intern/draw_manager.cc b/source/blender/draw/intern/draw_manager.cc
new file mode 100644
index 00000000000..169d86b2ea1
--- /dev/null
+++ b/source/blender/draw/intern/draw_manager.cc
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "BKE_global.h"
+#include "GPU_compute.h"
+
+#include "draw_debug.hh"
+#include "draw_defines.h"
+#include "draw_manager.h"
+#include "draw_manager.hh"
+#include "draw_pass.hh"
+#include "draw_shader.h"
+
+namespace blender::draw {
+
+Manager::~Manager()
+{
+  for (GPUTexture *texture : acquired_textures) {
+    /* Decrease refcount and free if 0. */
+    GPU_texture_free(texture);
+  }
+}
+
+void Manager::begin_sync()
+{
+  /* TODO: This means the reference is kept until further redraw or manager tear-down. Instead,
+   * they should be released after each draw loop. But for now, mimics old DRW behavior. */
+  for (GPUTexture *texture : acquired_textures) {
+    /* Decrease refcount and free if 0. */
+    GPU_texture_free(texture);
+  }
+
+  acquired_textures.clear();
+
+#ifdef DEBUG
+  /* Detect uninitialized data. */
+  memset(matrix_buf.data(), 0xF0, resource_len_ * sizeof(*matrix_buf.data()));
+  memset(bounds_buf.data(), 0xF0, resource_len_ * sizeof(*bounds_buf.data()));
+  memset(infos_buf.data(), 0xF0, resource_len_ * sizeof(*infos_buf.data()));
+#endif
+  resource_len_ = 0;
+  attribute_len_ = 0;
+  /* TODO(fclem): Resize buffers if too big, but with an hysteresis threshold. */
+
+  object_active = DST.draw_ctx.obact;
+
+  /* Init the 0 resource. */
+  resource_handle(float4x4::identity());
+}
+
+void Manager::end_sync()
+{
+  GPU_debug_group_begin("Manager.end_sync");
+
+  matrix_buf.push_update();
+  bounds_buf.push_update();
+  infos_buf.push_update();
+  attributes_buf.push_update();
+  attributes_buf_legacy.push_update();
+
+  /* Useful for debugging the following resource finalize. But will trigger the drawing of the GPU
+   * debug draw/print buffers for every frame. Not nice for performance. */
+  // debug_bind();
+
+  /* Dispatch compute to finalize the resources on GPU. Save a bit of CPU time. */
+  uint thread_groups = divide_ceil_u(resource_len_, DRW_FINALIZE_GROUP_SIZE);
+  GPUShader *shader = DRW_shader_draw_resource_finalize_get();
+  GPU_shader_bind(shader);
+  GPU_shader_uniform_1i(shader, "resource_len", resource_len_);
+  GPU_storagebuf_bind(matrix_buf, GPU_shader_get_ssbo(shader, "matrix_buf"));
+  GPU_storagebuf_bind(bounds_buf, GPU_shader_get_ssbo(shader, "bounds_buf"));
+  GPU_storagebuf_bind(infos_buf, GPU_shader_get_ssbo(shader, "infos_buf"));
+  GPU_compute_dispatch(shader, thread_groups, 1, 1);
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+
+  GPU_debug_group_end();
+}
+
+void Manager::debug_bind()
+{
+#ifdef DEBUG
+  if (DST.debug == nullptr) {
+    return;
+  }
+  GPU_storagebuf_bind(drw_debug_gpu_draw_buf_get(), DRW_DEBUG_DRAW_SLOT);
+  GPU_storagebuf_bind(drw_debug_gpu_print_buf_get(), DRW_DEBUG_PRINT_SLOT);
+#  ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER
+  /* Add a barrier to allow multiple shader writing to the same buffer. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+#  endif
+#endif
+}
+
+void Manager::resource_bind()
+{
+  GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT);
+  GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT);
+  GPU_storagebuf_bind(attributes_buf, DRW_OBJ_ATTR_SLOT);
+  /* 2 is the hardcoded location of the uniform attr UBO. */
+  /* TODO(@fclem): Remove this workaround. */
+  GPU_uniformbuf_bind(attributes_buf_legacy, 2);
+}
+
+void Manager::submit(PassSimple &pass, View &view)
+{
+  view.bind();
+
+  debug_bind();
+
+  command::RecordingState state;
+  state.inverted_view = view.is_inverted();
+
+  pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_);
+
+  resource_bind();
+
+  pass.submit(state);
+
+  state.cleanup();
+}
+
+void Manager::submit(PassMain &pass, View &view)
+{
+  view.bind();
+
+  debug_bind();
+
+  bool freeze_culling = (U.experimental.use_viewport_debug && DST.draw_ctx.v3d &&
+                         (DST.draw_ctx.v3d->debug_flag & V3D_DEBUG_FREEZE_CULLING) != 0);
+
+  view.compute_visibility(bounds_buf, resource_len_, freeze_culling);
+
+  command::RecordingState state;
+  state.inverted_view = view.is_inverted();
+
+  pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_, view.visibility_buf_);
+
+  resource_bind();
+
+  pass.submit(state);
+
+  state.cleanup();
+}
+
+void Manager::submit(PassSortable &pass, View &view)
+{
+  pass.sort();
+
+  this->submit(static_cast<PassMain &>(pass), view);
+}
+
+void Manager::submit(PassSimple &pass)
+{
+  debug_bind();
+
+  command::RecordingState state;
+
+  pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_);
+
+  resource_bind();
+
+  pass.submit(state);
+
+  state.cleanup();
+}
+
+Manager::SubmitDebugOutput Manager::submit_debug(PassSimple &pass, View &view)
+{
+  submit(pass, view);
+
+  pass.draw_commands_buf_.resource_id_buf_.read();
+
+  Manager::SubmitDebugOutput output;
+  output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(),
+                        pass.draw_commands_buf_.resource_id_count_};
+  /* There is no visibility data for PassSimple. */
+  output.visibility = {(uint *)view.visibility_buf_.data(), 0};
+  return output;
+}
+
+Manager::SubmitDebugOutput Manager::submit_debug(PassMain &pass, View &view)
+{
+  submit(pass, view);
+
+  GPU_finish();
+
+  pass.draw_commands_buf_.resource_id_buf_.read();
+  view.visibility_buf_.read();
+
+  Manager::SubmitDebugOutput output;
+  output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(),
+                        pass.draw_commands_buf_.resource_id_count_};
+  output.visibility = {(uint *)view.visibility_buf_.data(), divide_ceil_u(resource_len_, 32)};
+  return output;
+}
+
+Manager::DataDebugOutput Manager::data_debug()
+{
+  matrix_buf.read();
+  bounds_buf.read();
+  infos_buf.read();
+
+  Manager::DataDebugOutput output;
+  output.matrices = {matrix_buf.data(), resource_len_};
+  output.bounds = {bounds_buf.data(), resource_len_};
+  output.infos = {infos_buf.data(), resource_len_};
+  return output;
+}
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index 6d384c599d8..4f71e665390 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -188,6 +188,7 @@ typedef enum {
   DRW_CMD_DRAW_INSTANCE = 2,
   DRW_CMD_DRAW_INSTANCE_RANGE = 3,
   DRW_CMD_DRAW_PROCEDURAL = 4,
+  DRW_CMD_DRAW_INDIRECT = 5,
 
   /* Compute Commands. */
   DRW_CMD_COMPUTE = 8,
@@ -203,7 +204,7 @@ typedef enum {
   /* Needs to fit in 4bits */
 } eDRWCommandType;
 
-#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_PROCEDURAL
+#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_INDIRECT
 
 typedef struct DRWCommandDraw {
   GPUBatch *batch;
@@ -232,6 +233,12 @@ typedef struct DRWCommandDrawInstanceRange {
   uint inst_count;
 } DRWCommandDrawInstanceRange;
 
+typedef struct DRWCommandDrawIndirect {
+  GPUBatch *batch;
+  DRWResourceHandle handle;
+  GPUStorageBuf *indirect_buf;
+} DRWCommandDrawIndirect;
+
 typedef struct DRWCommandCompute {
   int groups_x_len;
   int groups_y_len;
@@ -286,6 +293,7 @@ typedef union DRWCommand {
   DRWCommandDrawInstance instance;
   DRWCommandDrawInstanceRange instance_range;
   DRWCommandDrawProcedural procedural;
+  DRWCommandDrawIndirect draw_indirect;
   DRWCommandCompute compute;
   DRWCommandComputeRef compute_ref;
   DRWCommandComputeIndirect compute_indirect;
@@ -369,7 +377,7 @@ struct DRWUniform {
     /* DRW_UNIFORM_INT_COPY */
     int ivalue[4];
     /* DRW_UNIFORM_BLOCK_OBATTRS */
-    struct GPUUniformAttrList *uniform_attrs;
+    const struct GPUUniformAttrList *uniform_attrs;
   };
   int location;      /* Uniform location or binding point for textures and UBO's. */
   uint8_t type;      /* #DRWUniformType */
@@ -395,7 +403,7 @@ struct DRWShadingGroup {
       DRWResourceHandle pass_handle; /* Memblock key to parent pass. */
 
       /* Set of uniform attributes used by this shader. */
-      struct GPUUniformAttrList *uniform_attrs;
+      const struct GPUUniformAttrList *uniform_attrs;
     };
     /* This struct is used after cache populate if using the Z sorting.
      * It will not conflict with the above struct. */
@@ -493,20 +501,6 @@ typedef struct DRWCommandSmallChunk {
 BLI_STATIC_ASSERT_ALIGN(DRWCommandChunk, 16);
 #endif
 
-/* ------------- DRAW DEBUG ------------ */
-
-typedef struct DRWDebugLine {
-  struct DRWDebugLine *next; /* linked list */
-  float pos[2][3];
-  float color[4];
-} DRWDebugLine;
-
-typedef struct DRWDebugSphere {
-  struct DRWDebugSphere *next; /* linked list */
-  float mat[4][4];
-  float color[4];
-} DRWDebugSphere;
-
 /* ------------- Memory Pools ------------ */
 
 /* Contains memory pools information */
@@ -533,10 +527,12 @@ typedef struct DRWData {
   void *volume_grids_ubos; /* VolumeUniformBufPool */
   /** List of smoke textures to free after drawing. */
   ListBase smoke_textures;
-  /** Texture pool to reuse temp texture across engines. */
-  /* TODO(@fclem): The pool could be shared even between view-ports. */
+  /**
+   * Texture pool to reuse temp texture across engines.
+   * TODO(@fclem): The pool could be shared even between view-ports.
+   */
   struct DRWTexturePool *texture_pool;
-  /** Per stereo view data. Contains engine data and default framebuffers. */
+  /** Per stereo view data. Contains engine data and default frame-buffers. */
   struct DRWViewData *view_data[2];
   /** Per draw-call curves object data. */
   struct CurvesUniformBufPool *curves_ubos;
@@ -646,11 +642,7 @@ typedef struct DRWManager {
 
   GPUDrawList *draw_list;
 
-  struct {
-    /* TODO(@fclem): optimize: use chunks. */
-    DRWDebugLine *lines;
-    DRWDebugSphere *spheres;
-  } debug;
+  DRWDebugModule *debug;
 } DRWManager;
 
 extern DRWManager DST; /* TODO: get rid of this and allow multi-threaded rendering. */
@@ -665,6 +657,9 @@ void drw_state_set(DRWState state);
 
 void drw_debug_draw(void);
 void drw_debug_init(void);
+void drw_debug_module_free(DRWDebugModule *module);
+GPUStorageBuf *drw_debug_gpu_draw_buf_get(void);
+GPUStorageBuf *drw_debug_gpu_print_buf_get(void);
 
 eDRWCommandType command_type_get(const uint64_t *command_type_bits, int index);
 
@@ -683,9 +678,10 @@ void drw_resource_buffer_finish(DRWData *vmempool);
 GPUBatch *drw_cache_procedural_points_get(void);
 GPUBatch *drw_cache_procedural_lines_get(void);
 GPUBatch *drw_cache_procedural_triangles_get(void);
+GPUBatch *drw_cache_procedural_triangle_strips_get(void);
 
 void drw_uniform_attrs_pool_update(struct GHash *table,
-                                   struct GPUUniformAttrList *key,
+                                   const struct GPUUniformAttrList *key,
                                    DRWResourceHandle *handle,
                                    struct Object *ob,
                                    struct Object *dupli_parent,
@@ -698,6 +694,9 @@ bool drw_engine_data_engines_data_validate(GPUViewport *viewport, void **engine_
 void drw_engine_data_cache_release(GPUViewport *viewport);
 void drw_engine_data_free(GPUViewport *viewport);
 
+void DRW_manager_begin_sync(void);
+void DRW_manager_end_sync(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/draw/intern/draw_manager.hh b/source/blender/draw/intern/draw_manager.hh
new file mode 100644
index 00000000000..fbd3d28d3f4
--- /dev/null
+++ b/source/blender/draw/intern/draw_manager.hh
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * `draw::Manager` is the interface between scene data and viewport engines.
+ *
+ * It holds per component data (`ObjectInfo`, `ObjectMatrices`, ...) indexed per `ResourceHandle`.
+ *
+ * \note It is currently work in progress and should replace the old global draw manager.
+ */
+
+#include "BLI_listbase_wrapper.hh"
+#include "BLI_sys_types.h"
+#include "GPU_material.h"
+
+#include "draw_resource.hh"
+#include "draw_view.hh"
+
+#include <string>
+
+namespace blender::draw {
+
+/* Forward declarations. */
+
+namespace detail {
+template<typename T> class Pass;
+}  // namespace detail
+
+namespace command {
+class DrawCommandBuf;
+class DrawMultiBuf;
+}  // namespace command
+
+using PassSimple = detail::Pass<command::DrawCommandBuf>;
+using PassMain = detail::Pass<command::DrawMultiBuf>;
+class PassSortable;
+
+class Manager {
+  using ObjectMatricesBuf = StorageArrayBuffer<ObjectMatrices, 128>;
+  using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>;
+  using ObjectInfosBuf = StorageArrayBuffer<ObjectInfos, 128>;
+  using ObjectAttributeBuf = StorageArrayBuffer<ObjectAttribute, 128>;
+  /**
+   * TODO(@fclem): Remove once we get rid of old EEVEE code-base.
+   * `DRW_RESOURCE_CHUNK_LEN = 512`.
+   */
+  using ObjectAttributeLegacyBuf = UniformArrayBuffer<float4, 8 * 512>;
+
+ public:
+  struct SubmitDebugOutput {
+    /** Indexed by resource id. */
+    Span<uint32_t> visibility;
+    /** Indexed by drawn instance. */
+    Span<uint32_t> resource_id;
+  };
+
+  struct DataDebugOutput {
+    /** Indexed by resource id. */
+    Span<ObjectMatrices> matrices;
+    /** Indexed by resource id. */
+    Span<ObjectBounds> bounds;
+    /** Indexed by resource id. */
+    Span<ObjectInfos> infos;
+  };
+
+  /**
+   * Buffers containing all object data. Referenced by resource index.
+   * Exposed as public members for shader access after sync.
+   */
+  ObjectMatricesBuf matrix_buf;
+  ObjectBoundsBuf bounds_buf;
+  ObjectInfosBuf infos_buf;
+
+  /**
+   * Object Attributes are reference by indirection data inside ObjectInfos.
+   * This is because attribute list is arbitrary.
+   */
+  ObjectAttributeBuf attributes_buf;
+  /**
+   * TODO(@fclem): Remove once we get rid of old EEVEE code-base.
+   * Only here to satisfy bindings.
+   */
+  ObjectAttributeLegacyBuf attributes_buf_legacy;
+
+  /**
+   * List of textures coming from Image data-blocks.
+   * They need to be reference-counted in order to avoid being freed in another thread.
+   */
+  Vector<GPUTexture *> acquired_textures;
+
+ private:
+  /** Number of resource handle recorded. */
+  uint resource_len_ = 0;
+  /** Number of object attribute recorded. */
+  uint attribute_len_ = 0;
+
+  Object *object_active = nullptr;
+
+ public:
+  Manager(){};
+  ~Manager();
+
+  /**
+   * Create a new resource handle for the given object. Can be called multiple time with the
+   * same object **successively** without duplicating the data.
+   */
+  ResourceHandle resource_handle(const ObjectRef ref);
+  /**
+   * Get resource id for a loose matrix. The draw-calls for this resource handle won't be culled
+   * and there won't be any associated object info / bounds. Assumes correct handedness / winding.
+   */
+  ResourceHandle resource_handle(const float4x4 &model_matrix);
+  /**
+   * Get resource id for a loose matrix with bounds. The draw-calls for this resource handle will
+   * be culled bute there won't be any associated object info / bounds. Assumes correct handedness
+   * / winding.
+   */
+  ResourceHandle resource_handle(const float4x4 &model_matrix,
+                                 const float3 &bounds_center,
+                                 const float3 &bounds_half_extent);
+
+  /**
+   * Populate additional per resource data on demand.
+   */
+  void extract_object_attributes(ResourceHandle handle,
+                                 const ObjectRef &ref,
+                                 Span<GPUMaterial *> materials);
+
+  /**
+   * Submit a pass for drawing. All resource reference will be dereferenced and commands will be
+   * sent to GPU.
+   */
+  void submit(PassSimple &pass, View &view);
+  void submit(PassMain &pass, View &view);
+  void submit(PassSortable &pass, View &view);
+  /**
+   * Variant without any view. Must not contain any shader using `draw_view` create info.
+   */
+  void submit(PassSimple &pass);
+
+  /**
+   * Submit a pass for drawing but read back all data buffers for inspection.
+   */
+  SubmitDebugOutput submit_debug(PassSimple &pass, View &view);
+  SubmitDebugOutput submit_debug(PassMain &pass, View &view);
+
+  /**
+   * Check data buffers of the draw manager. Only to be used after end_sync().
+   */
+  DataDebugOutput data_debug();
+
+  /**
+   * Will acquire the texture using ref counting and release it after drawing. To be used for
+   * texture coming from blender Image.
+   */
+  void acquire_texture(GPUTexture *texture)
+  {
+    GPU_texture_ref(texture);
+    acquired_textures.append(texture);
+  }
+
+  /** TODO(fclem): The following should become private at some point. */
+  void begin_sync();
+  void end_sync();
+
+  void debug_bind();
+  void resource_bind();
+};
+
+inline ResourceHandle Manager::resource_handle(const ObjectRef ref)
+{
+  bool is_active_object = (ref.dupli_object ? ref.dupli_parent : ref.object) == object_active;
+  matrix_buf.get_or_resize(resource_len_).sync(*ref.object);
+  bounds_buf.get_or_resize(resource_len_).sync(*ref.object);
+  infos_buf.get_or_resize(resource_len_).sync(ref, is_active_object);
+  return ResourceHandle(resource_len_++, (ref.object->transflag & OB_NEG_SCALE) != 0);
+}
+
+inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix)
+{
+  matrix_buf.get_or_resize(resource_len_).sync(model_matrix);
+  bounds_buf.get_or_resize(resource_len_).sync();
+  infos_buf.get_or_resize(resource_len_).sync();
+  return ResourceHandle(resource_len_++, false);
+}
+
+inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix,
+                                               const float3 &bounds_center,
+                                               const float3 &bounds_half_extent)
+{
+  matrix_buf.get_or_resize(resource_len_).sync(model_matrix);
+  bounds_buf.get_or_resize(resource_len_).sync(bounds_center, bounds_half_extent);
+  infos_buf.get_or_resize(resource_len_).sync();
+  return ResourceHandle(resource_len_++, false);
+}
+
+inline void Manager::extract_object_attributes(ResourceHandle handle,
+                                               const ObjectRef &ref,
+                                               Span<GPUMaterial *> materials)
+{
+  ObjectInfos &infos = infos_buf.get_or_resize(handle.resource_index());
+  infos.object_attrs_offset = attribute_len_;
+
+  /* Simple cache solution to avoid duplicates. */
+  Vector<uint32_t, 4> hash_cache;
+
+  for (const GPUMaterial *mat : materials) {
+    const GPUUniformAttrList *attr_list = GPU_material_uniform_attributes(mat);
+    if (attr_list == nullptr) {
+      continue;
+    }
+
+    LISTBASE_FOREACH (const GPUUniformAttr *, attr, &attr_list->list) {
+      /** WATCH: Linear Search. Avoid duplicate attributes across materials. */
+      if ((mat != materials.first()) && (hash_cache.first_index_of_try(attr->hash_code) != -1)) {
+        /* Attribute has already been added to the attribute buffer by another material. */
+        continue;
+      }
+      hash_cache.append(attr->hash_code);
+      if (attributes_buf.get_or_resize(attribute_len_).sync(ref, *attr)) {
+        infos.object_attrs_len++;
+        attribute_len_++;
+      }
+    }
+  }
+}
+
+}  // namespace blender::draw
+
+/* TODO(@fclem): This is for testing. The manager should be passed to the engine through the
+ * callbacks. */
+blender::draw::Manager *DRW_manager_get();
+blender::draw::ObjectRef DRW_object_ref_get(Object *object);
diff --git a/source/blender/draw/intern/draw_manager_data.c b/source/blender/draw/intern/draw_manager_data.c
index 188d9114cd7..c75049508f9 100644
--- a/source/blender/draw/intern/draw_manager_data.c
+++ b/source/blender/draw/intern/draw_manager_data.c
@@ -17,9 +17,14 @@
 #include "BKE_pbvh.h"
 #include "BKE_volume.h"
 
+/* For debug cursor position. */
+#include "WM_api.h"
+#include "wm_window.h"
+
 #include "DNA_curve_types.h"
 #include "DNA_mesh_types.h"
 #include "DNA_meta_types.h"
+#include "DNA_screen_types.h"
 
 #include "BLI_alloca.h"
 #include "BLI_hash.h"
@@ -39,6 +44,16 @@
 
 #include "intern/gpu_codegen.h"
 
+/**
+ * IMPORTANT:
+ * In order to be able to write to the same print buffer sequentially, we add a barrier to allow
+ * multiple shader calls writing to the same buffer.
+ * However, this adds explicit synchronization events which might change the rest of the
+ * application behavior and hide some bugs. If you know you are using shader debug print in only
+ * one shader pass, you can comment this out to remove the aforementioned barrier.
+ */
+#define DISABLE_DEBUG_SHADER_PRINT_BARRIER
+
 /* -------------------------------------------------------------------- */
 /** \name Uniform Buffer Object (DRW_uniformbuffer)
  * \{ */
@@ -878,6 +893,17 @@ static void drw_command_draw_procedural(DRWShadingGroup *shgroup,
   cmd->vert_count = vert_count;
 }
 
+static void drw_command_draw_indirect(DRWShadingGroup *shgroup,
+                                      GPUBatch *batch,
+                                      DRWResourceHandle handle,
+                                      GPUStorageBuf *indirect_buf)
+{
+  DRWCommandDrawIndirect *cmd = drw_command_create(shgroup, DRW_CMD_DRAW_INDIRECT);
+  cmd->batch = batch;
+  cmd->handle = handle;
+  cmd->indirect_buf = indirect_buf;
+}
+
 static void drw_command_set_select_id(DRWShadingGroup *shgroup, GPUVertBuf *buf, uint select_id)
 {
   /* Only one can be valid. */
@@ -1005,6 +1031,7 @@ void DRW_shgroup_call_compute_indirect(DRWShadingGroup *shgroup, GPUStorageBuf *
 
   drw_command_compute_indirect(shgroup, indirect_buf);
 }
+
 void DRW_shgroup_barrier(DRWShadingGroup *shgroup, eGPUBarrier type)
 {
   BLI_assert(GPU_compute_shader_support());
@@ -1044,6 +1071,38 @@ void DRW_shgroup_call_procedural_triangles(DRWShadingGroup *shgroup, Object *ob,
   drw_shgroup_call_procedural_add_ex(shgroup, geom, ob, tri_count * 3);
 }
 
+void DRW_shgroup_call_procedural_indirect(DRWShadingGroup *shgroup,
+                                          GPUPrimType primitive_type,
+                                          Object *ob,
+                                          GPUStorageBuf *indirect_buf)
+{
+  struct GPUBatch *geom = NULL;
+  switch (primitive_type) {
+    case GPU_PRIM_POINTS:
+      geom = drw_cache_procedural_points_get();
+      break;
+    case GPU_PRIM_LINES:
+      geom = drw_cache_procedural_lines_get();
+      break;
+    case GPU_PRIM_TRIS:
+      geom = drw_cache_procedural_triangles_get();
+      break;
+    case GPU_PRIM_TRI_STRIP:
+      geom = drw_cache_procedural_triangle_strips_get();
+      break;
+    default:
+      BLI_assert_msg(0,
+                     "Unsupported primitive type in DRW_shgroup_call_procedural_indirect. Add new "
+                     "one as needed.");
+      break;
+  }
+  if (G.f & G_FLAG_PICKSEL) {
+    drw_command_set_select_id(shgroup, NULL, DST.select_id);
+  }
+  DRWResourceHandle handle = drw_resource_handle(shgroup, ob ? ob->obmat : NULL, ob);
+  drw_command_draw_indirect(shgroup, geom, handle, indirect_buf);
+}
+
 void DRW_shgroup_call_instances(DRWShadingGroup *shgroup,
                                 Object *ob,
                                 struct GPUBatch *geom,
@@ -1129,16 +1188,15 @@ static void sculpt_draw_cb(DRWSculptCallbackData *scd, GPU_PBVH_Buffers *buffers
       DRW_shgroup_uniform_vec3(
           shgrp, "materialDiffuseColor", SCULPT_DEBUG_COLOR(scd->debug_node_nr++), 1);
     }
+
     /* DRW_shgroup_call_no_cull reuses matrices calculations for all the drawcalls of this
      * object. */
     DRW_shgroup_call_no_cull(shgrp, geom, scd->ob);
   }
 }
 
-static void sculpt_debug_cb(void *user_data,
-                            const float bmin[3],
-                            const float bmax[3],
-                            PBVHNodeFlags flag)
+static void sculpt_debug_cb(
+    PBVHNode *node, void *user_data, const float bmin[3], const float bmax[3], PBVHNodeFlags flag)
 {
   int *debug_node_nr = (int *)user_data;
   BoundBox bb;
@@ -1153,7 +1211,10 @@ static void sculpt_debug_cb(void *user_data,
   }
 #else /* Color coded leaf bounds. */
   if (flag & PBVH_Leaf) {
-    DRW_debug_bbox(&bb, SCULPT_DEBUG_COLOR((*debug_node_nr)++));
+    int color = (*debug_node_nr)++;
+    color += BKE_pbvh_debug_draw_gen_get(node);
+
+    DRW_debug_bbox(&bb, SCULPT_DEBUG_COLOR(color));
   }
 #endif
 }
@@ -1246,8 +1307,8 @@ static void drw_sculpt_generate_calls(DRWSculptCallbackData *scd)
     DRW_debug_modelmat(scd->ob->obmat);
     BKE_pbvh_draw_debug_cb(
         pbvh,
-        (void (*)(
-            void *d, const float min[3], const float max[3], PBVHNodeFlags f))sculpt_debug_cb,
+        (void (*)(PBVHNode * n, void *d, const float min[3], const float max[3], PBVHNodeFlags f))
+            sculpt_debug_cb,
         &debug_node_nr);
   }
 }
@@ -1466,6 +1527,27 @@ static void drw_shgroup_init(DRWShadingGroup *shgroup, GPUShader *shader)
         shgroup, view_ubo_location, DRW_UNIFORM_BLOCK, G_draw.view_ubo, 0, 0, 1);
   }
 
+#ifdef DEBUG
+  int debug_print_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_PRINT);
+  if (debug_print_location != -1) {
+    GPUStorageBuf *buf = drw_debug_gpu_print_buf_get();
+    drw_shgroup_uniform_create_ex(
+        shgroup, debug_print_location, DRW_UNIFORM_STORAGE_BLOCK, buf, 0, 0, 1);
+#  ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER
+    /* Add a barrier to allow multiple shader writing to the same buffer. */
+    DRW_shgroup_barrier(shgroup, GPU_BARRIER_SHADER_STORAGE);
+#  endif
+  }
+
+  int debug_draw_location = GPU_shader_get_builtin_ssbo(shader, GPU_STORAGE_BUFFER_DEBUG_VERTS);
+  if (debug_draw_location != -1) {
+    GPUStorageBuf *buf = drw_debug_gpu_draw_buf_get();
+    drw_shgroup_uniform_create_ex(
+        shgroup, debug_draw_location, DRW_UNIFORM_STORAGE_BLOCK, buf, 0, 0, 1);
+    /* NOTE(fclem): No barrier as ordering is not important. */
+  }
+#endif
+
   /* Not supported. */
   BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW_INV) == -1);
   BLI_assert(GPU_shader_get_builtin_uniform(shader, GPU_UNIFORM_MODELVIEW) == -1);
@@ -1556,7 +1638,7 @@ void DRW_shgroup_add_material_resources(DRWShadingGroup *grp, struct GPUMaterial
     DRW_shgroup_uniform_block(grp, GPU_UBO_BLOCK_NAME, ubo);
   }
 
-  GPUUniformAttrList *uattrs = GPU_material_uniform_attributes(material);
+  const GPUUniformAttrList *uattrs = GPU_material_uniform_attributes(material);
   if (uattrs != NULL) {
     int loc = GPU_shader_get_uniform_block_binding(grp->shader, GPU_ATTRIBUTE_UBO_BLOCK_NAME);
     drw_shgroup_uniform_create_ex(grp, loc, DRW_UNIFORM_BLOCK_OBATTRS, uattrs, 0, 0, 1);
@@ -1942,6 +2024,13 @@ DRWView *DRW_view_create(const float viewmat[4][4],
 
   copy_v4_fl4(view->storage.viewcamtexcofac, 1.0f, 1.0f, 0.0f, 0.0f);
 
+  if (DST.draw_ctx.evil_C && DST.draw_ctx.region) {
+    int region_origin[2] = {DST.draw_ctx.region->winrct.xmin, DST.draw_ctx.region->winrct.ymin};
+    struct wmWindow *win = CTX_wm_window(DST.draw_ctx.evil_C);
+    wm_cursor_position_get(win, &view->storage.mouse_pixel[0], &view->storage.mouse_pixel[1]);
+    sub_v2_v2v2_int(view->storage.mouse_pixel, view->storage.mouse_pixel, region_origin);
+  }
+
   DRW_view_update(view, viewmat, winmat, culling_viewmat, culling_winmat);
 
   return view;
@@ -2041,6 +2130,14 @@ void DRW_view_update(DRWView *view,
   draw_frustum_bound_sphere_calc(
       &view->frustum_corners, viewinv, winmat, wininv, &view->frustum_bsphere);
 
+  /* TODO(fclem): Deduplicate. */
+  for (int i = 0; i < 8; i++) {
+    copy_v3_v3(view->storage.frustum_corners[i], view->frustum_corners.vec[i]);
+  }
+  for (int i = 0; i < 6; i++) {
+    copy_v4_v4(view->storage.frustum_planes[i], view->frustum_planes[i]);
+  }
+
 #ifdef DRW_DEBUG_CULLING
   if (G.debug_value != 0) {
     DRW_debug_sphere(
diff --git a/source/blender/draw/intern/draw_manager_exec.c b/source/blender/draw/intern/draw_manager_exec.c
index e7e0e0ce41f..0e39cc1d3b9 100644
--- a/source/blender/draw/intern/draw_manager_exec.c
+++ b/source/blender/draw/intern/draw_manager_exec.c
@@ -318,6 +318,7 @@ void DRW_state_reset(void)
   DRW_state_reset_ex(DRW_STATE_DEFAULT);
 
   GPU_texture_unbind_all();
+  GPU_texture_image_unbind_all();
   GPU_uniformbuf_unbind_all();
   GPU_storagebuf_unbind_all();
 
@@ -874,6 +875,25 @@ static void draw_call_single_do(DRWShadingGroup *shgroup,
                         state->baseinst_loc);
 }
 
+/* Not to be mistaken with draw_indirect_call which does batch many drawcalls together. This one
+ * only execute an indirect drawcall with user indirect buffer. */
+static void draw_call_indirect(DRWShadingGroup *shgroup,
+                               DRWCommandsState *state,
+                               GPUBatch *batch,
+                               DRWResourceHandle handle,
+                               GPUStorageBuf *indirect_buf)
+{
+  draw_call_batching_flush(shgroup, state);
+  draw_call_resource_bind(state, &handle);
+
+  if (G.f & G_FLAG_PICKSEL) {
+    GPU_select_load_id(state->select_id);
+  }
+
+  GPU_batch_set_shader(batch, shgroup->shader);
+  GPU_batch_draw_indirect(batch, indirect_buf, 0);
+}
+
 static void draw_call_batching_start(DRWCommandsState *state)
 {
   state->neg_scale = false;
@@ -970,6 +990,7 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
       /* Unbinding can be costly. Skip in normal condition. */
       if (G.debug & G_DEBUG_GPU) {
         GPU_texture_unbind_all();
+        GPU_texture_image_unbind_all();
         GPU_uniformbuf_unbind_all();
         GPU_storagebuf_unbind_all();
       }
@@ -996,12 +1017,13 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
     while ((cmd = draw_command_iter_step(&iter, &cmd_type))) {
 
       switch (cmd_type) {
+        case DRW_CMD_DRAW_PROCEDURAL:
         case DRW_CMD_DRWSTATE:
         case DRW_CMD_STENCIL:
           draw_call_batching_flush(shgroup, &state);
           break;
         case DRW_CMD_DRAW:
-        case DRW_CMD_DRAW_PROCEDURAL:
+        case DRW_CMD_DRAW_INDIRECT:
         case DRW_CMD_DRAW_INSTANCE:
           if (draw_call_is_culled(&cmd->instance.handle, DST.view_active)) {
             continue;
@@ -1055,6 +1077,13 @@ static void draw_shgroup(DRWShadingGroup *shgroup, DRWState pass_state)
                               1,
                               true);
           break;
+        case DRW_CMD_DRAW_INDIRECT:
+          draw_call_indirect(shgroup,
+                             &state,
+                             cmd->draw_indirect.batch,
+                             cmd->draw_indirect.handle,
+                             cmd->draw_indirect.indirect_buf);
+          break;
         case DRW_CMD_DRAW_INSTANCE:
           draw_call_single_do(shgroup,
                               &state,
diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c
index 4bc3898c5e7..1ada99093c6 100644
--- a/source/blender/draw/intern/draw_manager_shader.c
+++ b/source/blender/draw/intern/draw_manager_shader.c
@@ -297,6 +297,18 @@ GPUShader *DRW_shader_create_with_lib_ex(const char *vert,
   return sh;
 }
 
+GPUShader *DRW_shader_create_compute_with_shaderlib(const char *comp,
+                                                    const DRWShaderLibrary *lib,
+                                                    const char *defines,
+                                                    const char *name)
+{
+  char *comp_with_lib = DRW_shader_library_create_shader_string(lib, comp);
+  GPUShader *sh = GPU_shader_create_compute(comp_with_lib, NULL, defines, name);
+  MEM_SAFE_FREE(comp_with_lib);
+
+  return sh;
+}
+
 GPUShader *DRW_shader_create_with_shaderlib_ex(const char *vert,
                                                const char *geom,
                                                const char *frag,
diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh
new file mode 100644
index 00000000000..e1a0a6652ac
--- /dev/null
+++ b/source/blender/draw/intern/draw_pass.hh
@@ -0,0 +1,1005 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * Passes record draw commands. Commands are executed only when a pass is submitted for execution.
+ *
+ * `PassMain`:
+ * Should be used on heavy load passes such as ones that may contain scene objects. Draw call
+ * submission is optimized for large number of draw calls. But has a significant overhead per
+ * #Pass. Use many #PassSub along with a main #Pass to reduce the overhead and allow groupings of
+ * commands. \note The draw call order inside a batch of multiple draw with the exact same state is
+ * not guaranteed and is not even deterministic. Use a #PassSimple or #PassSortable if ordering is
+ * needed. \note As of now, it is also quite limited in the type of draw command it can record
+ * (no custom vertex count, no custom first vertex).
+ *
+ * `PassSimple`:
+ * Does not have the overhead of #PassMain but does not have the culling and batching optimization.
+ * It should be used for passes that needs a few commands or that needs guaranteed draw call order.
+ *
+ * `Pass<T>::Sub`:
+ * A lightweight #Pass that lives inside a main #Pass. It can only be created from #Pass.sub()
+ * and is auto managed. This mean it can be created, filled and thrown away. A #PassSub reference
+ * is valid until the next #Pass.init() of the parent pass. Commands recorded inside a #PassSub are
+ * inserted inside the parent #Pass where the sub have been created during submission.
+ *
+ * `PassSortable`:
+ * This is a sort of `PassMain` augmented with a per sub-pass sorting value. They can't directly
+ * contain draw command, everything needs to be inside sub-passes. Sub-passes are automatically
+ * sorted before submission.
+ *
+ * \note A pass can be recorded once and resubmitted any number of time. This can be a good
+ * optimization for passes that are always the same for each frame. The only thing to be aware of
+ * is the life time of external resources. If a pass contains draw-calls with non default
+ * #ResourceHandle (not 0) or a reference to any non static resources
+ * (#GPUBatch, #PushConstant ref, #ResourceBind ref) it will have to be re-recorded
+ * if any of these reference becomes invalid.
+ */
+
+#include "BKE_image.h"
+#include "BLI_vector.hh"
+#include "DRW_gpu_wrapper.hh"
+#include "GPU_debug.h"
+#include "GPU_material.h"
+
+#include "draw_command.hh"
+#include "draw_handle.hh"
+#include "draw_manager.hh"
+#include "draw_pass.hh"
+#include "draw_shader_shared.h"
+#include "draw_state.h"
+
+#include "intern/gpu_codegen.h"
+
+namespace blender::draw {
+
+using namespace blender::draw;
+using namespace blender::draw::command;
+
+class Manager;
+
+/* -------------------------------------------------------------------- */
+/** \name Pass API
+ * \{ */
+
+namespace detail {
+
+/**
+ * Special container that never moves allocated items and has fast indexing.
+ */
+template<typename T,
+         /** Numbers of element of type T to allocate together. */
+         int64_t block_size = 16>
+class SubPassVector {
+ private:
+  Vector<std::unique_ptr<Vector<T, block_size>>, 0> blocks_;
+
+ public:
+  void clear()
+  {
+    blocks_.clear();
+  }
+
+  int64_t append_and_get_index(T &&elem)
+  {
+    /* Do not go over the inline size so that existing members never move. */
+    if (blocks_.is_empty() || blocks_.last()->size() == block_size) {
+      blocks_.append(std::make_unique<Vector<T, block_size>>());
+    }
+    return blocks_.last()->append_and_get_index(std::move(elem)) +
+           (blocks_.size() - 1) * block_size;
+  }
+
+  T &operator[](int64_t index)
+  {
+    return (*blocks_[index / block_size])[index % block_size];
+  }
+
+  const T &operator[](int64_t index) const
+  {
+    return (*blocks_[index / block_size])[index % block_size];
+  }
+};
+
+/**
+ * Public API of a draw pass.
+ */
+template<
+    /** Type of command buffer used to create the draw calls. */
+    typename DrawCommandBufType>
+class PassBase {
+  friend Manager;
+
+  /** Will use texture own sampler state. */
+  static constexpr eGPUSamplerState sampler_auto = GPU_SAMPLER_MAX;
+
+ protected:
+  /** Highest level of the command stream. Split command stream in different command types. */
+  Vector<command::Header, 0> headers_;
+  /** Commands referenced by headers (which contains their types). */
+  Vector<command::Undetermined, 0> commands_;
+  /* Reference to draw commands buffer. Either own or from parent pass. */
+  DrawCommandBufType &draw_commands_buf_;
+  /* Reference to sub-pass commands buffer. Either own or from parent pass. */
+  SubPassVector<PassBase<DrawCommandBufType>> &sub_passes_;
+  /** Currently bound shader. Used for interface queries. */
+  GPUShader *shader_;
+
+ public:
+  const char *debug_name;
+
+  PassBase(const char *name,
+           DrawCommandBufType &draw_command_buf,
+           SubPassVector<PassBase<DrawCommandBufType>> &sub_passes,
+           GPUShader *shader = nullptr)
+      : draw_commands_buf_(draw_command_buf),
+        sub_passes_(sub_passes),
+        shader_(shader),
+        debug_name(name){};
+
+  /**
+   * Reset the pass command pool.
+   * \note Implemented in derived class. Not a virtual function to avoid indirection. Here only for
+   * API readability listing.
+   */
+  void init();
+
+  /**
+   * Create a sub-pass inside this pass.
+   */
+  PassBase<DrawCommandBufType> &sub(const char *name);
+
+  /**
+   * Changes the fixed function pipeline state.
+   * Starts as DRW_STATE_NO_DRAW at the start of a Pass submission.
+   * SubPass inherit previous pass state.
+   *
+   * IMPORTANT: This does not set the stencil mask/reference values. Add a call to state_stencil()
+   * to ensure correct behavior of stencil aware draws.
+   */
+  void state_set(DRWState state);
+
+  /**
+   * Clear the current frame-buffer.
+   */
+  void clear_color(float4 color);
+  void clear_depth(float depth);
+  void clear_stencil(uint8_t stencil);
+  void clear_depth_stencil(float depth, uint8_t stencil);
+  void clear_color_depth_stencil(float4 color, float depth, uint8_t stencil);
+
+  /**
+   * Reminders:
+   * - (compare_mask & reference) is what is tested against (compare_mask & stencil_value)
+   *   stencil_value being the value stored in the stencil buffer.
+   * - (write-mask & reference) is what gets written if the test condition is fulfilled.
+   */
+  void state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask);
+
+  /**
+   * Bind a shader. Any following bind() or push_constant() call will use its interface.
+   */
+  void shader_set(GPUShader *shader);
+
+  /**
+   * Bind a material shader along with its associated resources. Any following bind() or
+   * push_constant() call will use its interface.
+   * IMPORTANT: Assumes material is compiled and can be used (no compilation error).
+   */
+  void material_set(Manager &manager, GPUMaterial *material);
+
+  /**
+   * Record a draw call.
+   * \note Setting the count or first to -1 will use the values from the batch.
+   * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded.
+   */
+  void draw(GPUBatch *batch,
+            uint instance_len = -1,
+            uint vertex_len = -1,
+            uint vertex_first = -1,
+            ResourceHandle handle = {0});
+
+  /**
+   * Shorter version for the common case.
+   * \note Implemented in derived class. Not a virtual function to avoid indirection.
+   */
+  void draw(GPUBatch *batch, ResourceHandle handle);
+
+  /**
+   * Record a procedural draw call. Geometry is **NOT** source from a GPUBatch.
+   * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded.
+   */
+  void draw_procedural(GPUPrimType primitive,
+                       uint instance_len,
+                       uint vertex_len,
+                       uint vertex_first = -1,
+                       ResourceHandle handle = {0});
+
+  /**
+   * Indirect variants.
+   * \note If needed, the resource id need to also be set accordingly in the DrawCommand.
+   */
+  void draw_indirect(GPUBatch *batch,
+                     StorageBuffer<DrawCommand, true> &indirect_buffer,
+                     ResourceHandle handle = {0});
+  void draw_procedural_indirect(GPUPrimType primitive,
+                                StorageBuffer<DrawCommand, true> &indirect_buffer,
+                                ResourceHandle handle = {0});
+
+  /**
+   * Record a compute dispatch call.
+   */
+  void dispatch(int3 group_len);
+  void dispatch(int3 *group_len);
+  void dispatch(StorageBuffer<DispatchCommand> &indirect_buffer);
+
+  /**
+   * Record a barrier call to synchronize arbitrary load/store operation between draw calls.
+   */
+  void barrier(eGPUBarrier type);
+
+  /**
+   * Bind a shader resource.
+   *
+   * Reference versions are to be used when the resource might be resize / realloc or even change
+   * between the time it is referenced and the time it is dereferenced for drawing.
+   *
+   * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data
+   * still alive until pass submission.
+   *
+   * \note Variations using slot will not query a shader interface and can be used before
+   * binding a shader.
+   */
+  void bind_image(const char *name, GPUTexture *image);
+  void bind_image(const char *name, GPUTexture **image);
+  void bind_image(int slot, GPUTexture *image);
+  void bind_image(int slot, GPUTexture **image);
+  void bind_texture(const char *name, GPUTexture *texture, eGPUSamplerState state = sampler_auto);
+  void bind_texture(const char *name, GPUTexture **texture, eGPUSamplerState state = sampler_auto);
+  void bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state = sampler_auto);
+  void bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state = sampler_auto);
+  void bind_ssbo(const char *name, GPUStorageBuf *buffer);
+  void bind_ssbo(const char *name, GPUStorageBuf **buffer);
+  void bind_ssbo(int slot, GPUStorageBuf *buffer);
+  void bind_ssbo(int slot, GPUStorageBuf **buffer);
+  void bind_ubo(const char *name, GPUUniformBuf *buffer);
+  void bind_ubo(const char *name, GPUUniformBuf **buffer);
+  void bind_ubo(int slot, GPUUniformBuf *buffer);
+  void bind_ubo(int slot, GPUUniformBuf **buffer);
+
+  /**
+   * Update a shader constant.
+   *
+   * Reference versions are to be used when the resource might change between the time it is
+   * referenced and the time it is dereferenced for drawing.
+   *
+   * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data
+   * still alive until pass submission.
+   *
+   * \note bool reference version is expected to take bool1 reference which is aliased to int.
+   */
+  void push_constant(const char *name, const float &data);
+  void push_constant(const char *name, const float2 &data);
+  void push_constant(const char *name, const float3 &data);
+  void push_constant(const char *name, const float4 &data);
+  void push_constant(const char *name, const int &data);
+  void push_constant(const char *name, const int2 &data);
+  void push_constant(const char *name, const int3 &data);
+  void push_constant(const char *name, const int4 &data);
+  void push_constant(const char *name, const bool &data);
+  void push_constant(const char *name, const float4x4 &data);
+  void push_constant(const char *name, const float *data, int array_len = 1);
+  void push_constant(const char *name, const float2 *data, int array_len = 1);
+  void push_constant(const char *name, const float3 *data, int array_len = 1);
+  void push_constant(const char *name, const float4 *data, int array_len = 1);
+  void push_constant(const char *name, const int *data, int array_len = 1);
+  void push_constant(const char *name, const int2 *data, int array_len = 1);
+  void push_constant(const char *name, const int3 *data, int array_len = 1);
+  void push_constant(const char *name, const int4 *data, int array_len = 1);
+  void push_constant(const char *name, const float4x4 *data);
+
+  /**
+   * Turn the pass into a string for inspection.
+   */
+  std::string serialize(std::string line_prefix = "") const;
+
+  friend std::ostream &operator<<(std::ostream &stream, const PassBase &pass)
+  {
+    return stream << pass.serialize();
+  }
+
+ protected:
+  /**
+   * Internal Helpers
+   */
+
+  int push_constant_offset(const char *name);
+
+  void clear(eGPUFrameBufferBits planes, float4 color, float depth, uint8_t stencil);
+
+  GPUBatch *procedural_batch_get(GPUPrimType primitive);
+
+  /**
+   * Return a new command recorded with the given type.
+   */
+  command::Undetermined &create_command(command::Type type);
+
+  void submit(command::RecordingState &state) const;
+};
+
+template<typename DrawCommandBufType> class Pass : public detail::PassBase<DrawCommandBufType> {
+ public:
+  using Sub = detail::PassBase<DrawCommandBufType>;
+
+ private:
+  /** Sub-passes referenced by headers. */
+  SubPassVector<detail::PassBase<DrawCommandBufType>> sub_passes_main_;
+  /** Draws are recorded as indirect draws for compatibility with the multi-draw pipeline. */
+  DrawCommandBufType draw_commands_buf_main_;
+
+ public:
+  Pass(const char *name)
+      : detail::PassBase<DrawCommandBufType>(name, draw_commands_buf_main_, sub_passes_main_){};
+
+  void init()
+  {
+    this->headers_.clear();
+    this->commands_.clear();
+    this->sub_passes_.clear();
+    this->draw_commands_buf_.clear();
+  }
+};  // namespace blender::draw
+
+}  // namespace detail
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Pass types
+ * \{ */
+
+/**
+ * Normal pass type. No visibility or draw-call optimization.
+ */
+// using PassSimple = detail::Pass<DrawCommandBuf>;
+
+/**
+ * Main pass type.
+ * Optimized for many draw calls and sub-pass.
+ *
+ * IMPORTANT: To be used only for passes containing lots of draw calls since it has a potentially
+ * high overhead due to batching and culling optimizations.
+ */
+// using PassMain = detail::Pass<DrawMultiBuf>;
+
+/**
+ * Special pass type for rendering transparent objects.
+ * The base level can only be composed of sub passes that will be ordered by a sorting value.
+ */
+class PassSortable : public PassMain {
+  friend Manager;
+
+ private:
+  /** Sorting value associated with each sub pass. */
+  Vector<float> sorting_values_;
+
+  bool sorted_ = false;
+
+ public:
+  PassSortable(const char *name_) : PassMain(name_){};
+
+  void init()
+  {
+    sorting_values_.clear();
+    sorted_ = false;
+    PassMain::init();
+  }
+
+  PassMain::Sub &sub(const char *name, float sorting_value)
+  {
+    int64_t index = sub_passes_.append_and_get_index(
+        PassBase(name, draw_commands_buf_, sub_passes_, shader_));
+    headers_.append({Type::SubPass, static_cast<uint>(index)});
+    sorting_values_.append(sorting_value);
+    return sub_passes_[index];
+  }
+
+  std::string serialize(std::string line_prefix = "") const
+  {
+    if (sorted_ == false) {
+      const_cast<PassSortable *>(this)->sort();
+    }
+    return PassMain::serialize(line_prefix);
+  }
+
+ protected:
+  void sort()
+  {
+    if (sorted_ == false) {
+      std::sort(headers_.begin(), headers_.end(), [&](Header &a, Header &b) {
+        BLI_assert(a.type == Type::SubPass && b.type == Type::SubPass);
+        float a_val = sorting_values_[a.index];
+        float b_val = sorting_values_[b.index];
+        return a_val < b_val || (a_val == b_val && a.index < b.index);
+      });
+      sorted_ = true;
+    }
+  }
+};
+
+/** \} */
+
+namespace detail {
+
+/* -------------------------------------------------------------------- */
+/** \name PassBase Implementation
+ * \{ */
+
+template<class T> inline command::Undetermined &PassBase<T>::create_command(command::Type type)
+{
+  int64_t index = commands_.append_and_get_index({});
+  headers_.append({type, static_cast<uint>(index)});
+  return commands_[index];
+}
+
+template<class T>
+inline void PassBase<T>::clear(eGPUFrameBufferBits planes,
+                               float4 color,
+                               float depth,
+                               uint8_t stencil)
+{
+  create_command(command::Type::Clear).clear = {(uint8_t)planes, stencil, depth, color};
+}
+
+template<class T> inline GPUBatch *PassBase<T>::procedural_batch_get(GPUPrimType primitive)
+{
+  switch (primitive) {
+    case GPU_PRIM_POINTS:
+      return drw_cache_procedural_points_get();
+    case GPU_PRIM_LINES:
+      return drw_cache_procedural_lines_get();
+    case GPU_PRIM_TRIS:
+      return drw_cache_procedural_triangles_get();
+    case GPU_PRIM_TRI_STRIP:
+      return drw_cache_procedural_triangle_strips_get();
+    default:
+      /* Add new one as needed. */
+      BLI_assert_unreachable();
+      return nullptr;
+  }
+}
+
+template<class T> inline PassBase<T> &PassBase<T>::sub(const char *name)
+{
+  int64_t index = sub_passes_.append_and_get_index(
+      PassBase(name, draw_commands_buf_, sub_passes_, shader_));
+  headers_.append({command::Type::SubPass, static_cast<uint>(index)});
+  return sub_passes_[index];
+}
+
+template<class T> void PassBase<T>::submit(command::RecordingState &state) const
+{
+  GPU_debug_group_begin(debug_name);
+
+  for (const command::Header &header : headers_) {
+    switch (header.type) {
+      default:
+      case Type::None:
+        break;
+      case Type::SubPass:
+        sub_passes_[header.index].submit(state);
+        break;
+      case command::Type::ShaderBind:
+        commands_[header.index].shader_bind.execute(state);
+        break;
+      case command::Type::ResourceBind:
+        commands_[header.index].resource_bind.execute();
+        break;
+      case command::Type::PushConstant:
+        commands_[header.index].push_constant.execute(state);
+        break;
+      case command::Type::Draw:
+        commands_[header.index].draw.execute(state);
+        break;
+      case command::Type::DrawMulti:
+        commands_[header.index].draw_multi.execute(state);
+        break;
+      case command::Type::DrawIndirect:
+        commands_[header.index].draw_indirect.execute(state);
+        break;
+      case command::Type::Dispatch:
+        commands_[header.index].dispatch.execute(state);
+        break;
+      case command::Type::DispatchIndirect:
+        commands_[header.index].dispatch_indirect.execute(state);
+        break;
+      case command::Type::Barrier:
+        commands_[header.index].barrier.execute();
+        break;
+      case command::Type::Clear:
+        commands_[header.index].clear.execute();
+        break;
+      case command::Type::StateSet:
+        commands_[header.index].state_set.execute(state);
+        break;
+      case command::Type::StencilSet:
+        commands_[header.index].stencil_set.execute();
+        break;
+    }
+  }
+
+  GPU_debug_group_end();
+}
+
+template<class T> std::string PassBase<T>::serialize(std::string line_prefix) const
+{
+  std::stringstream ss;
+  ss << line_prefix << "." << debug_name << std::endl;
+  line_prefix += "  ";
+  for (const command::Header &header : headers_) {
+    switch (header.type) {
+      default:
+      case Type::None:
+        break;
+      case Type::SubPass:
+        ss << sub_passes_[header.index].serialize(line_prefix);
+        break;
+      case Type::ShaderBind:
+        ss << line_prefix << commands_[header.index].shader_bind.serialize() << std::endl;
+        break;
+      case Type::ResourceBind:
+        ss << line_prefix << commands_[header.index].resource_bind.serialize() << std::endl;
+        break;
+      case Type::PushConstant:
+        ss << line_prefix << commands_[header.index].push_constant.serialize() << std::endl;
+        break;
+      case Type::Draw:
+        ss << line_prefix << commands_[header.index].draw.serialize() << std::endl;
+        break;
+      case Type::DrawMulti:
+        ss << commands_[header.index].draw_multi.serialize(line_prefix);
+        break;
+      case Type::DrawIndirect:
+        ss << line_prefix << commands_[header.index].draw_indirect.serialize() << std::endl;
+        break;
+      case Type::Dispatch:
+        ss << line_prefix << commands_[header.index].dispatch.serialize() << std::endl;
+        break;
+      case Type::DispatchIndirect:
+        ss << line_prefix << commands_[header.index].dispatch_indirect.serialize() << std::endl;
+        break;
+      case Type::Barrier:
+        ss << line_prefix << commands_[header.index].barrier.serialize() << std::endl;
+        break;
+      case Type::Clear:
+        ss << line_prefix << commands_[header.index].clear.serialize() << std::endl;
+        break;
+      case Type::StateSet:
+        ss << line_prefix << commands_[header.index].state_set.serialize() << std::endl;
+        break;
+      case Type::StencilSet:
+        ss << line_prefix << commands_[header.index].stencil_set.serialize() << std::endl;
+        break;
+    }
+  }
+  return ss.str();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw calls
+ * \{ */
+
+template<class T>
+inline void PassBase<T>::draw(
+    GPUBatch *batch, uint instance_len, uint vertex_len, uint vertex_first, ResourceHandle handle)
+{
+  if (instance_len == 0 || vertex_len == 0) {
+    return;
+  }
+  BLI_assert(shader_);
+  draw_commands_buf_.append_draw(
+      headers_, commands_, batch, instance_len, vertex_len, vertex_first, handle);
+}
+
+template<class T> inline void PassBase<T>::draw(GPUBatch *batch, ResourceHandle handle)
+{
+  this->draw(batch, -1, -1, -1, handle);
+}
+
+template<class T>
+inline void PassBase<T>::draw_procedural(GPUPrimType primitive,
+                                         uint instance_len,
+                                         uint vertex_len,
+                                         uint vertex_first,
+                                         ResourceHandle handle)
+{
+  this->draw(procedural_batch_get(primitive), instance_len, vertex_len, vertex_first, handle);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Indirect draw calls
+ * \{ */
+
+template<class T>
+inline void PassBase<T>::draw_indirect(GPUBatch *batch,
+                                       StorageBuffer<DrawCommand, true> &indirect_buffer,
+                                       ResourceHandle handle)
+{
+  BLI_assert(shader_);
+  create_command(Type::DrawIndirect).draw_indirect = {batch, &indirect_buffer, handle};
+}
+
+template<class T>
+inline void PassBase<T>::draw_procedural_indirect(
+    GPUPrimType primitive,
+    StorageBuffer<DrawCommand, true> &indirect_buffer,
+    ResourceHandle handle)
+{
+  this->draw_indirect(procedural_batch_get(primitive), indirect_buffer, handle);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Compute Dispatch Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::dispatch(int3 group_len)
+{
+  BLI_assert(shader_);
+  create_command(Type::Dispatch).dispatch = {group_len};
+}
+
+template<class T> inline void PassBase<T>::dispatch(int3 *group_len)
+{
+  BLI_assert(shader_);
+  create_command(Type::Dispatch).dispatch = {group_len};
+}
+
+template<class T>
+inline void PassBase<T>::dispatch(StorageBuffer<DispatchCommand> &indirect_buffer)
+{
+  BLI_assert(shader_);
+  create_command(Type::DispatchIndirect).dispatch_indirect = {&indirect_buffer};
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Clear Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::clear_color(float4 color)
+{
+  this->clear(GPU_COLOR_BIT, color, 0.0f, 0);
+}
+
+template<class T> inline void PassBase<T>::clear_depth(float depth)
+{
+  this->clear(GPU_DEPTH_BIT, float4(0.0f), depth, 0);
+}
+
+template<class T> inline void PassBase<T>::clear_stencil(uint8_t stencil)
+{
+  this->clear(GPU_STENCIL_BIT, float4(0.0f), 0.0f, stencil);
+}
+
+template<class T> inline void PassBase<T>::clear_depth_stencil(float depth, uint8_t stencil)
+{
+  this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT, float4(0.0f), depth, stencil);
+}
+
+template<class T>
+inline void PassBase<T>::clear_color_depth_stencil(float4 color, float depth, uint8_t stencil)
+{
+  this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT | GPU_COLOR_BIT, color, depth, stencil);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Barrier Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::barrier(eGPUBarrier type)
+{
+  create_command(Type::Barrier).barrier = {type};
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name State Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::state_set(DRWState state)
+{
+  create_command(Type::StateSet).state_set = {state};
+}
+
+template<class T>
+inline void PassBase<T>::state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask)
+{
+  create_command(Type::StencilSet).stencil_set = {write_mask, reference, compare_mask};
+}
+
+template<class T> inline void PassBase<T>::shader_set(GPUShader *shader)
+{
+  shader_ = shader;
+  create_command(Type::ShaderBind).shader_bind = {shader};
+}
+
+template<class T> inline void PassBase<T>::material_set(Manager &manager, GPUMaterial *material)
+{
+  GPUPass *gpupass = GPU_material_get_pass(material);
+  shader_set(GPU_pass_shader_get(gpupass));
+
+  /* Bind all textures needed by the material. */
+  ListBase textures = GPU_material_textures(material);
+  for (GPUMaterialTexture *tex : ListBaseWrapper<GPUMaterialTexture>(textures)) {
+    if (tex->ima) {
+      /* Image */
+      ImageUser *iuser = tex->iuser_available ? &tex->iuser : nullptr;
+      if (tex->tiled_mapping_name[0]) {
+        GPUTexture *tiles = BKE_image_get_gpu_tiles(tex->ima, iuser, nullptr);
+        manager.acquire_texture(tiles);
+        bind_texture(tex->sampler_name, tiles, (eGPUSamplerState)tex->sampler_state);
+
+        GPUTexture *tile_map = BKE_image_get_gpu_tilemap(tex->ima, iuser, nullptr);
+        manager.acquire_texture(tile_map);
+        bind_texture(tex->tiled_mapping_name, tile_map, (eGPUSamplerState)tex->sampler_state);
+      }
+      else {
+        GPUTexture *texture = BKE_image_get_gpu_texture(tex->ima, iuser, nullptr);
+        manager.acquire_texture(texture);
+        bind_texture(tex->sampler_name, texture, (eGPUSamplerState)tex->sampler_state);
+      }
+    }
+    else if (tex->colorband) {
+      /* Color Ramp */
+      bind_texture(tex->sampler_name, *tex->colorband);
+    }
+  }
+
+  GPUUniformBuf *ubo = GPU_material_uniform_buffer_get(material);
+  if (ubo != nullptr) {
+    bind_ubo(GPU_UBO_BLOCK_NAME, ubo);
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Resource bind Implementation
+ * \{ */
+
+template<class T> inline int PassBase<T>::push_constant_offset(const char *name)
+{
+  return GPU_shader_get_uniform(shader_, name);
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf *buffer)
+{
+  this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer);
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf *buffer)
+{
+  this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer);
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(const char *name,
+                                      GPUTexture *texture,
+                                      eGPUSamplerState state)
+{
+  this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state);
+}
+
+template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture *image)
+{
+  this->bind_image(GPU_shader_get_texture_binding(shader_, name), image);
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf *buffer)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf *buffer)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, texture, state};
+}
+
+template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture *image)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)};
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf **buffer)
+{
+  this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer);
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf **buffer)
+{
+  this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer);
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(const char *name,
+                                      GPUTexture **texture,
+                                      eGPUSamplerState state)
+{
+  this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state);
+}
+
+template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture **image)
+{
+  this->bind_image(GPU_shader_get_texture_binding(shader_, name), image);
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf **buffer)
+{
+
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf **buffer)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, texture, state};
+}
+
+template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture **image)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)};
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Push Constant Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float2 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float3 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float4 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int2 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int3 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int4 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const bool &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float2 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float3 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float4 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int2 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int3 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int4 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 *data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 &data)
+{
+  /* WORKAROUND: Push 3 consecutive commands to hold the 64 bytes of the float4x4.
+   * This assumes that all commands are always stored in flat array of memory. */
+  Undetermined commands[3];
+
+  PushConstant &cmd = commands[0].push_constant;
+  cmd.location = push_constant_offset(name);
+  cmd.array_len = 1;
+  cmd.comp_len = 16;
+  cmd.type = PushConstant::Type::FloatValue;
+  /* Copy overrides the next 2 commands. We append them as Type::None to not evaluate them. */
+  *reinterpret_cast<float4x4 *>(&cmd.float4_value) = data;
+
+  create_command(Type::PushConstant) = commands[0];
+  create_command(Type::None) = commands[1];
+  create_command(Type::None) = commands[2];
+}
+
+/** \} */
+
+}  // namespace detail
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_resource.cc b/source/blender/draw/intern/draw_resource.cc
new file mode 100644
index 00000000000..689df4edb31
--- /dev/null
+++ b/source/blender/draw/intern/draw_resource.cc
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "DNA_particle_types.h"
+#include "RNA_access.h"
+#include "RNA_path.h"
+#include "RNA_types.h"
+
+#include "draw_handle.hh"
+#include "draw_manager.hh"
+#include "draw_shader_shared.h"
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectAttributes
+ * \{ */
+
+/**
+ * Extract object attribute from RNA property.
+ * Returns true if the attribute was correctly extracted.
+ * This function mirrors lookup_property in cycles/blender/blender_object.cpp
+ */
+bool ObjectAttribute::id_property_lookup(ID *id, const char *name)
+{
+  PointerRNA ptr, id_ptr;
+  PropertyRNA *prop;
+
+  if (id == nullptr) {
+    return false;
+  }
+
+  RNA_id_pointer_create(id, &id_ptr);
+
+  if (!RNA_path_resolve(&id_ptr, name, &ptr, &prop)) {
+    return false;
+  }
+
+  if (prop == nullptr) {
+    return false;
+  }
+
+  PropertyType type = RNA_property_type(prop);
+  int array_len = RNA_property_array_length(&ptr, prop);
+
+  if (array_len == 0) {
+    float value;
+
+    if (type == PROP_FLOAT) {
+      value = RNA_property_float_get(&ptr, prop);
+    }
+    else if (type == PROP_INT) {
+      value = RNA_property_int_get(&ptr, prop);
+    }
+    else {
+      return false;
+    }
+
+    *reinterpret_cast<float4 *>(&data_x) = float4(value, value, value, 1.0f);
+    return true;
+  }
+
+  if (type == PROP_FLOAT && array_len <= 4) {
+    *reinterpret_cast<float4 *>(&data_x) = float4(0.0f, 0.0f, 0.0f, 1.0f);
+    RNA_property_float_get_array(&ptr, prop, &data_x);
+    return true;
+  }
+  return false;
+}
+
+/**
+ * Go through all possible source of the given object uniform attribute.
+ * Returns true if the attribute was correctly filled.
+ * This function mirrors lookup_instance_property in cycles/blender/blender_object.cpp
+ */
+bool ObjectAttribute::sync(const blender::draw::ObjectRef &ref, const GPUUniformAttr &attr)
+{
+  hash_code = attr.hash_code;
+
+  /* If requesting instance data, check the parent particle system and object. */
+  if (attr.use_dupli) {
+    if ((ref.dupli_object != nullptr) && (ref.dupli_object->particle_system != nullptr)) {
+      ParticleSettings *settings = ref.dupli_object->particle_system->part;
+      if (this->id_property_lookup((ID *)settings, attr.name_id_prop) ||
+          this->id_property_lookup((ID *)settings, attr.name)) {
+        return true;
+      }
+    }
+    if (this->id_property_lookup((ID *)ref.dupli_parent, attr.name_id_prop) ||
+        this->id_property_lookup((ID *)ref.dupli_parent, attr.name)) {
+      return true;
+    }
+  }
+
+  /* Check the object and mesh. */
+  if (ref.object != nullptr) {
+    if (this->id_property_lookup((ID *)ref.object, attr.name_id_prop) ||
+        this->id_property_lookup((ID *)ref.object, attr.name) ||
+        this->id_property_lookup((ID *)ref.object->data, attr.name_id_prop) ||
+        this->id_property_lookup((ID *)ref.object->data, attr.name)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_resource.hh b/source/blender/draw/intern/draw_resource.hh
new file mode 100644
index 00000000000..2df38e32ed2
--- /dev/null
+++ b/source/blender/draw/intern/draw_resource.hh
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * Component / Object level resources like object attributes, matrices, visibility etc...
+ * Each of them are reference by resource index (#ResourceHandle).
+ */
+
+#include "BKE_curve.h"
+#include "BKE_duplilist.h"
+#include "BKE_mesh.h"
+#include "BKE_object.h"
+#include "BKE_volume.h"
+#include "BLI_hash.h"
+#include "DNA_curve_types.h"
+#include "DNA_layer_types.h"
+#include "DNA_meta_types.h"
+#include "DNA_object_types.h"
+
+#include "draw_handle.hh"
+#include "draw_manager.hh"
+#include "draw_shader_shared.h"
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectMatrices
+ * \{ */
+
+inline void ObjectMatrices::sync(const Object &object)
+{
+  model = object.obmat;
+  model_inverse = object.imat;
+}
+
+inline void ObjectMatrices::sync(const float4x4 &model_matrix)
+{
+  model = model_matrix;
+  model_inverse = model_matrix.inverted();
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const ObjectMatrices &matrices)
+{
+  stream << "ObjectMatrices(" << std::endl;
+  stream << "model=" << matrices.model << ", " << std::endl;
+  stream << "model_inverse=" << matrices.model_inverse << ")" << std::endl;
+  return stream;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectInfos
+ * \{ */
+
+ENUM_OPERATORS(eObjectInfoFlag, OBJECT_NEGATIVE_SCALE)
+
+inline void ObjectInfos::sync()
+{
+  object_attrs_len = 0;
+  object_attrs_offset = 0;
+
+  flag = eObjectInfoFlag::OBJECT_NO_INFO;
+}
+
+inline void ObjectInfos::sync(const blender::draw::ObjectRef ref, bool is_active_object)
+{
+  object_attrs_len = 0;
+  object_attrs_offset = 0;
+
+  color = ref.object->color;
+  index = ref.object->index;
+  SET_FLAG_FROM_TEST(flag, is_active_object, eObjectInfoFlag::OBJECT_ACTIVE);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->base_flag & BASE_SELECTED, eObjectInfoFlag::OBJECT_SELECTED);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->base_flag & BASE_FROM_DUPLI, eObjectInfoFlag::OBJECT_FROM_DUPLI);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->base_flag & BASE_FROM_SET, eObjectInfoFlag::OBJECT_FROM_SET);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->transflag & OB_NEG_SCALE, eObjectInfoFlag::OBJECT_NEGATIVE_SCALE);
+
+  if (ref.dupli_object == nullptr) {
+    /* TODO(fclem): this is rather costly to do at draw time. Maybe we can
+     * put it in ob->runtime and make depsgraph ensure it is up to date. */
+    random = BLI_hash_int_2d(BLI_hash_string(ref.object->id.name + 2), 0) *
+             (1.0f / (float)0xFFFFFFFF);
+  }
+  else {
+    random = ref.dupli_object->random_id * (1.0f / (float)0xFFFFFFFF);
+  }
+  /* Default values. Set if needed. */
+  random = 0.0f;
+
+  if (ref.object->data == nullptr) {
+    orco_add = float3(0.0f);
+    orco_mul = float3(1.0f);
+    return;
+  }
+
+  switch (GS(reinterpret_cast<ID *>(ref.object->data)->name)) {
+    case ID_VO: {
+      BoundBox &bbox = *BKE_volume_boundbox_get(ref.object);
+      orco_add = (float3(bbox.vec[6]) + float3(bbox.vec[0])) * 0.5f; /* Center. */
+      orco_mul = float3(bbox.vec[6]) - float3(bbox.vec[0]);          /* Size. */
+      break;
+    }
+    case ID_ME: {
+      BKE_mesh_texspace_get(static_cast<Mesh *>(ref.object->data), orco_add, orco_mul);
+      break;
+    }
+    case ID_CU_LEGACY: {
+      Curve &cu = *static_cast<Curve *>(ref.object->data);
+      BKE_curve_texspace_ensure(&cu);
+      orco_add = cu.loc;
+      orco_mul = cu.size;
+      break;
+    }
+    case ID_MB: {
+      MetaBall &mb = *static_cast<MetaBall *>(ref.object->data);
+      orco_add = mb.loc;
+      orco_mul = mb.size;
+      break;
+    }
+    default:
+      orco_add = float3(0.0f);
+      orco_mul = float3(1.0f);
+      break;
+  }
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const ObjectInfos &infos)
+{
+  stream << "ObjectInfos(";
+  if (infos.flag == eObjectInfoFlag::OBJECT_NO_INFO) {
+    stream << "skipped)" << std::endl;
+    return stream;
+  }
+  stream << "orco_add=" << infos.orco_add << ", ";
+  stream << "orco_mul=" << infos.orco_mul << ", ";
+  stream << "color=" << infos.color << ", ";
+  stream << "index=" << infos.index << ", ";
+  stream << "random=" << infos.random << ", ";
+  stream << "flag=" << infos.flag << ")" << std::endl;
+  return stream;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectBounds
+ * \{ */
+
+inline void ObjectBounds::sync()
+{
+  bounding_sphere.w = -1.0f; /* Disable test. */
+}
+
+inline void ObjectBounds::sync(Object &ob)
+{
+  const BoundBox *bbox = BKE_object_boundbox_get(&ob);
+  if (bbox == nullptr) {
+    bounding_sphere.w = -1.0f; /* Disable test. */
+    return;
+  }
+  *reinterpret_cast<float3 *>(&bounding_corners[0]) = bbox->vec[0];
+  *reinterpret_cast<float3 *>(&bounding_corners[1]) = bbox->vec[4];
+  *reinterpret_cast<float3 *>(&bounding_corners[2]) = bbox->vec[3];
+  *reinterpret_cast<float3 *>(&bounding_corners[3]) = bbox->vec[1];
+  bounding_sphere.w = 0.0f; /* Enable test. */
+}
+
+inline void ObjectBounds::sync(const float3 &center, const float3 &size)
+{
+  *reinterpret_cast<float3 *>(&bounding_corners[0]) = center - size;
+  *reinterpret_cast<float3 *>(&bounding_corners[1]) = center + float3(+size.x, -size.y, -size.z);
+  *reinterpret_cast<float3 *>(&bounding_corners[2]) = center + float3(-size.x, +size.y, -size.z);
+  *reinterpret_cast<float3 *>(&bounding_corners[3]) = center + float3(-size.x, -size.y, +size.z);
+  bounding_sphere.w = 0.0; /* Enable test. */
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const ObjectBounds &bounds)
+{
+  stream << "ObjectBounds(";
+  if (bounds.bounding_sphere.w == -1.0f) {
+    stream << "skipped)" << std::endl;
+    return stream;
+  }
+  stream << std::endl;
+  stream << ".bounding_corners[0]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[0]) << std::endl;
+  stream << ".bounding_corners[1]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[1]) << std::endl;
+  stream << ".bounding_corners[2]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[2]) << std::endl;
+  stream << ".bounding_corners[3]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[3]) << std::endl;
+  stream << ".sphere=(pos=" << float3(bounds.bounding_sphere)
+         << ", rad=" << bounds.bounding_sphere.w << std::endl;
+  stream << ")" << std::endl;
+  return stream;
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_shader.cc b/source/blender/draw/intern/draw_shader.cc
index 001ceb0ae8d..960348b4a94 100644
--- a/source/blender/draw/intern/draw_shader.cc
+++ b/source/blender/draw/intern/draw_shader.cc
@@ -17,13 +17,15 @@
 #include "draw_shader.h"
 
 extern "C" char datatoc_common_hair_lib_glsl[];
-
 extern "C" char datatoc_common_hair_refine_vert_glsl[];
-extern "C" char datatoc_common_hair_refine_comp_glsl[];
-extern "C" char datatoc_gpu_shader_3D_smooth_color_frag_glsl[];
 
 static struct {
   struct GPUShader *hair_refine_sh[PART_REFINE_MAX_SHADER];
+  struct GPUShader *debug_print_display_sh;
+  struct GPUShader *debug_draw_display_sh;
+  struct GPUShader *draw_visibility_compute_sh;
+  struct GPUShader *draw_resource_finalize_sh;
+  struct GPUShader *draw_command_generate_sh;
 } e_data = {{nullptr}};
 
 /* -------------------------------------------------------------------- */
@@ -109,6 +111,47 @@ GPUShader *DRW_shader_curves_refine_get(CurvesEvalShader type, eParticleRefineSh
   return e_data.hair_refine_sh[type];
 }
 
+GPUShader *DRW_shader_debug_print_display_get()
+{
+  if (e_data.debug_print_display_sh == nullptr) {
+    e_data.debug_print_display_sh = GPU_shader_create_from_info_name("draw_debug_print_display");
+  }
+  return e_data.debug_print_display_sh;
+}
+
+GPUShader *DRW_shader_debug_draw_display_get()
+{
+  if (e_data.debug_draw_display_sh == nullptr) {
+    e_data.debug_draw_display_sh = GPU_shader_create_from_info_name("draw_debug_draw_display");
+  }
+  return e_data.debug_draw_display_sh;
+}
+
+GPUShader *DRW_shader_draw_visibility_compute_get()
+{
+  if (e_data.draw_visibility_compute_sh == nullptr) {
+    e_data.draw_visibility_compute_sh = GPU_shader_create_from_info_name(
+        "draw_visibility_compute");
+  }
+  return e_data.draw_visibility_compute_sh;
+}
+
+GPUShader *DRW_shader_draw_resource_finalize_get()
+{
+  if (e_data.draw_resource_finalize_sh == nullptr) {
+    e_data.draw_resource_finalize_sh = GPU_shader_create_from_info_name("draw_resource_finalize");
+  }
+  return e_data.draw_resource_finalize_sh;
+}
+
+GPUShader *DRW_shader_draw_command_generate_get()
+{
+  if (e_data.draw_command_generate_sh == nullptr) {
+    e_data.draw_command_generate_sh = GPU_shader_create_from_info_name("draw_command_generate");
+  }
+  return e_data.draw_command_generate_sh;
+}
+
 /** \} */
 
 void DRW_shaders_free()
@@ -116,4 +159,9 @@ void DRW_shaders_free()
   for (int i = 0; i < PART_REFINE_MAX_SHADER; i++) {
     DRW_SHADER_FREE_SAFE(e_data.hair_refine_sh[i]);
   }
+  DRW_SHADER_FREE_SAFE(e_data.debug_print_display_sh);
+  DRW_SHADER_FREE_SAFE(e_data.debug_draw_display_sh);
+  DRW_SHADER_FREE_SAFE(e_data.draw_visibility_compute_sh);
+  DRW_SHADER_FREE_SAFE(e_data.draw_resource_finalize_sh);
+  DRW_SHADER_FREE_SAFE(e_data.draw_command_generate_sh);
 }
diff --git a/source/blender/draw/intern/draw_shader.h b/source/blender/draw/intern/draw_shader.h
index 63d755cc334..3b8c0425fa9 100644
--- a/source/blender/draw/intern/draw_shader.h
+++ b/source/blender/draw/intern/draw_shader.h
@@ -30,6 +30,12 @@ struct GPUShader *DRW_shader_hair_refine_get(ParticleRefineShader refinement,
 struct GPUShader *DRW_shader_curves_refine_get(CurvesEvalShader type,
                                                eParticleRefineShaderType sh_type);
 
+struct GPUShader *DRW_shader_debug_print_display_get(void);
+struct GPUShader *DRW_shader_debug_draw_display_get(void);
+struct GPUShader *DRW_shader_draw_visibility_compute_get(void);
+struct GPUShader *DRW_shader_draw_resource_finalize_get(void);
+struct GPUShader *DRW_shader_draw_command_generate_get(void);
+
 void DRW_shaders_free(void);
 
 #ifdef __cplusplus
diff --git a/source/blender/draw/intern/draw_shader_shared.h b/source/blender/draw/intern/draw_shader_shared.h
index 94c0c53dab7..bedbedcf438 100644
--- a/source/blender/draw/intern/draw_shader_shared.h
+++ b/source/blender/draw/intern/draw_shader_shared.h
@@ -1,14 +1,42 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
 #ifndef GPU_SHADER
+#  pragma once
+
 #  include "GPU_shader.h"
 #  include "GPU_shader_shared_utils.h"
+#  include "draw_defines.h"
 
 typedef struct ViewInfos ViewInfos;
 typedef struct ObjectMatrices ObjectMatrices;
 typedef struct ObjectInfos ObjectInfos;
+typedef struct ObjectBounds ObjectBounds;
 typedef struct VolumeInfos VolumeInfos;
 typedef struct CurvesInfos CurvesInfos;
+typedef struct ObjectAttribute ObjectAttribute;
+typedef struct DrawCommand DrawCommand;
+typedef struct DispatchCommand DispatchCommand;
+typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer;
+typedef struct DRWDebugVert DRWDebugVert;
+typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer;
+
+#  ifdef __cplusplus
+/* C++ only forward declarations. */
+struct Object;
+struct ID;
+struct GPUUniformAttr;
+
+namespace blender::draw {
+
+struct ObjectRef;
+
+}  // namespace blender::draw
+
+#  else /* __cplusplus */
+/* C only forward declarations. */
+typedef enum eObjectInfoFlag eObjectInfoFlag;
+
+#  endif
 #endif
 
 #define DRW_SHADER_SHARED_H
@@ -40,9 +68,18 @@ struct ViewInfos {
   float2 viewport_size_inverse;
 
   /** Frustum culling data. */
-  /** NOTE: vec3 arrays are padded to vec4. */
+  /** \note vec3 array padded to vec4. */
   float4 frustum_corners[8];
   float4 frustum_planes[6];
+  float4 frustum_bound_sphere;
+
+  /** For debugging purpose */
+  /* Mouse pixel. */
+  int2 mouse_pixel;
+
+  /** True if facing needs to be inverted. */
+  bool1 is_inverted;
+  int _pad0;
 };
 BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
 
@@ -60,23 +97,89 @@ BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
 #  define CameraTexCoFactors drw_view.viewcamtexcofac
 #endif
 
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug draw shapes
+ * \{ */
+
 struct ObjectMatrices {
-  float4x4 drw_modelMatrix;
-  float4x4 drw_modelMatrixInverse;
+  float4x4 model;
+  float4x4 model_inverse;
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  void sync(const Object &object);
+  void sync(const float4x4 &model_matrix);
+#endif
+};
+BLI_STATIC_ASSERT_ALIGN(ObjectMatrices, 16)
+
+enum eObjectInfoFlag {
+  OBJECT_SELECTED = (1u << 0u),
+  OBJECT_FROM_DUPLI = (1u << 1u),
+  OBJECT_FROM_SET = (1u << 2u),
+  OBJECT_ACTIVE = (1u << 3u),
+  OBJECT_NEGATIVE_SCALE = (1u << 4u),
+  /* Avoid skipped info to change culling. */
+  OBJECT_NO_INFO = ~OBJECT_NEGATIVE_SCALE
 };
-BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
 
 struct ObjectInfos {
-  float4 drw_OrcoTexCoFactors[2];
-  float4 drw_ObjectColor;
-  float4 drw_Infos;
+#if defined(GPU_SHADER) && !defined(DRAW_FINALIZE_SHADER)
+  /* TODO Rename to struct member for glsl too. */
+  float4 orco_mul_bias[2];
+  float4 color;
+  float4 infos;
+#else
+  /** Uploaded as center + size. Converted to mul+bias to local coord. */
+  float3 orco_add;
+  uint object_attrs_offset;
+  float3 orco_mul;
+  uint object_attrs_len;
+
+  float4 color;
+  uint index;
+  uint _pad2;
+  float random;
+  eObjectInfoFlag flag;
+#endif
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  void sync();
+  void sync(const blender::draw::ObjectRef ref, bool is_active_object);
+#endif
 };
-BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
+BLI_STATIC_ASSERT_ALIGN(ObjectInfos, 16)
+
+struct ObjectBounds {
+  /**
+   * Uploaded as vertex (0, 4, 3, 1) of the bbox in local space, matching XYZ axis order.
+   * Then processed by GPU and stored as (0, 4-0, 3-0, 1-0) in world space for faster culling.
+   */
+  float4 bounding_corners[4];
+  /** Bounding sphere derived from the bounding corner. Computed on GPU. */
+  float4 bounding_sphere;
+  /** Radius of the inscribed sphere derived from the bounding corner. Computed on GPU. */
+#define _inner_sphere_radius bounding_corners[3].w
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  void sync();
+  void sync(Object &ob);
+  void sync(const float3 &center, const float3 &size);
+#endif
+};
+BLI_STATIC_ASSERT_ALIGN(ObjectBounds, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Object attributes
+ * \{ */
 
 struct VolumeInfos {
-  /* Object to grid-space. */
+  /** Object to grid-space. */
   float4x4 grids_xform[DRW_GRID_PER_VOLUME_MAX];
-  /* NOTE: vec4 for alignment. Only float3 needed. */
+  /** \note vec4 for alignment. Only float3 needed. */
   float4 color_mul;
   float density_scale;
   float temperature_mul;
@@ -86,13 +189,127 @@ struct VolumeInfos {
 BLI_STATIC_ASSERT_ALIGN(VolumeInfos, 16)
 
 struct CurvesInfos {
-  /* Per attribute scope, follows loading order.
-   * NOTE: uint as bool in GLSL is 4 bytes. */
-  uint is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX];
-  int _pad;
+  /** Per attribute scope, follows loading order.
+   * \note uint as bool in GLSL is 4 bytes.
+   * \note GLSL pad arrays of scalar to 16 bytes (std140). */
+  uint4 is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX];
 };
 BLI_STATIC_ASSERT_ALIGN(CurvesInfos, 16)
 
-#define OrcoTexCoFactors (drw_infos[resource_id].drw_OrcoTexCoFactors)
-#define ObjectInfo (drw_infos[resource_id].drw_Infos)
-#define ObjectColor (drw_infos[resource_id].drw_ObjectColor)
+#pragma pack(push, 4)
+struct ObjectAttribute {
+  /* Workaround the padding cost from alignment requirements.
+   * (see GL spec : 7.6.2.2 Standard Uniform Block Layout) */
+  float data_x, data_y, data_z, data_w;
+  uint hash_code;
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  bool sync(const blender::draw::ObjectRef &ref, const GPUUniformAttr &attr);
+  bool id_property_lookup(ID *id, const char *name);
+#endif
+};
+#pragma pack(pop)
+/** \note we only align to 4 bytes and fetch data manually so make sure
+ * C++ compiler gives us the same size. */
+BLI_STATIC_ASSERT_ALIGN(ObjectAttribute, 20)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Indirect commands structures.
+ * \{ */
+
+struct DrawCommand {
+  /* TODO(fclem): Rename */
+  uint vertex_len;
+  uint instance_len;
+  uint vertex_first;
+#if defined(GPU_SHADER)
+  uint base_index;
+  /** \note base_index is i_first for non-indexed draw-calls. */
+#  define _instance_first_array base_index
+#else
+  union {
+    uint base_index;
+    /* Use this instead of instance_first_indexed for non indexed draw calls. */
+    uint instance_first_array;
+  };
+#endif
+
+  uint instance_first_indexed;
+
+  uint _pad0, _pad1, _pad2;
+};
+BLI_STATIC_ASSERT_ALIGN(DrawCommand, 16)
+
+struct DispatchCommand {
+  uint num_groups_x;
+  uint num_groups_y;
+  uint num_groups_z;
+  uint _pad0;
+};
+BLI_STATIC_ASSERT_ALIGN(DispatchCommand, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug print
+ * \{ */
+
+/* Take the header (DrawCommand) into account. */
+#define DRW_DEBUG_PRINT_MAX (8 * 1024) - 4
+/** \note Cannot be more than 255 (because of column encoding). */
+#define DRW_DEBUG_PRINT_WORD_WRAP_COLUMN 120u
+
+/* The debug print buffer is laid-out as the following struct.
+ * But we use plain array in shader code instead because of driver issues. */
+struct DRWDebugPrintBuffer {
+  DrawCommand command;
+  /** Each character is encoded as 3 `uchar` with char_index, row and column position. */
+  uint char_array[DRW_DEBUG_PRINT_MAX];
+};
+BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16)
+
+/* Use number of char as vertex count. Equivalent to `DRWDebugPrintBuffer.command.v_count`. */
+#define drw_debug_print_cursor drw_debug_print_buf[0]
+/* Reuse first instance as row index as we don't use instancing. Equivalent to
+ * `DRWDebugPrintBuffer.command.i_first`. */
+#define drw_debug_print_row_shared drw_debug_print_buf[3]
+/** Offset to the first data. Equal to: `sizeof(DrawCommand) / sizeof(uint)`.
+ * This is needed because we bind the whole buffer as a `uint` array. */
+#define drw_debug_print_offset 8
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug draw shapes
+ * \{ */
+
+struct DRWDebugVert {
+  /* This is a weird layout, but needed to be able to use DRWDebugVert as
+   * a DrawCommand and avoid alignment issues. See drw_debug_verts_buf[] definition. */
+  uint pos0;
+  uint pos1;
+  uint pos2;
+  uint color;
+};
+BLI_STATIC_ASSERT_ALIGN(DRWDebugVert, 16)
+
+/* Take the header (DrawCommand) into account. */
+#define DRW_DEBUG_DRAW_VERT_MAX (64 * 1024) - 1
+
+/* The debug draw buffer is laid-out as the following struct.
+ * But we use plain array in shader code instead because of driver issues. */
+struct DRWDebugDrawBuffer {
+  DrawCommand command;
+  DRWDebugVert verts[DRW_DEBUG_DRAW_VERT_MAX];
+};
+BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16)
+
+/* Equivalent to `DRWDebugDrawBuffer.command.v_count`. */
+#define drw_debug_draw_v_count drw_debug_verts_buf[0].pos0
+/** Offset to the first data. Equal to: `sizeof(DrawCommand) / sizeof(DRWDebugVert)`.
+ * This is needed because we bind the whole buffer as a `DRWDebugVert` array. */
+#define drw_debug_draw_offset 2
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_state.h b/source/blender/draw/intern/draw_state.h
new file mode 100644
index 00000000000..bf1e63e0852
--- /dev/null
+++ b/source/blender/draw/intern/draw_state.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file
+ * \ingroup draw
+ *
+ * Internal Pipeline State tracking. It is higher level than GPU state as everything fits a single
+ * enum.
+ */
+
+/**
+ * DRWState is a bit-mask that stores the current render state and the desired render state. Based
+ * on the differences the minimum state changes can be invoked to setup the desired render state.
+ *
+ * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive
+ * therefore they aren't ordered as a bit mask.
+ */
+typedef enum {
+  /** To be used for compute passes. */
+  DRW_STATE_NO_DRAW = 0,
+  /** Write mask */
+  DRW_STATE_WRITE_DEPTH = (1 << 0),
+  DRW_STATE_WRITE_COLOR = (1 << 1),
+  /* Write Stencil. These options are mutual exclusive and packed into 2 bits */
+  DRW_STATE_WRITE_STENCIL = (1 << 2),
+  DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2),
+  DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2),
+  /** Depth test. These options are mutual exclusive and packed into 3 bits */
+  DRW_STATE_DEPTH_ALWAYS = (1 << 4),
+  DRW_STATE_DEPTH_LESS = (2 << 4),
+  DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4),
+  DRW_STATE_DEPTH_EQUAL = (4 << 4),
+  DRW_STATE_DEPTH_GREATER = (5 << 4),
+  DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4),
+  /** Culling test */
+  DRW_STATE_CULL_BACK = (1 << 7),
+  DRW_STATE_CULL_FRONT = (1 << 8),
+  /** Stencil test. These options are mutually exclusive and packed into 2 bits. */
+  DRW_STATE_STENCIL_ALWAYS = (1 << 9),
+  DRW_STATE_STENCIL_EQUAL = (2 << 9),
+  DRW_STATE_STENCIL_NEQUAL = (3 << 9),
+
+  /** Blend state. These options are mutual exclusive and packed into 4 bits */
+  DRW_STATE_BLEND_ADD = (1 << 11),
+  /** Same as additive but let alpha accumulate without pre-multiply. */
+  DRW_STATE_BLEND_ADD_FULL = (2 << 11),
+  /** Standard alpha blending. */
+  DRW_STATE_BLEND_ALPHA = (3 << 11),
+  /** Use that if color is already pre-multiply by alpha. */
+  DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11),
+  DRW_STATE_BLEND_BACKGROUND = (5 << 11),
+  DRW_STATE_BLEND_OIT = (6 << 11),
+  DRW_STATE_BLEND_MUL = (7 << 11),
+  DRW_STATE_BLEND_SUB = (8 << 11),
+  /** Use dual source blending. WARNING: Only one color buffer allowed. */
+  DRW_STATE_BLEND_CUSTOM = (9 << 11),
+  DRW_STATE_LOGIC_INVERT = (10 << 11),
+  DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11),
+
+  DRW_STATE_IN_FRONT_SELECT = (1 << 27),
+  DRW_STATE_SHADOW_OFFSET = (1 << 28),
+  DRW_STATE_CLIP_PLANES = (1 << 29),
+  DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30),
+  /** DO NOT USE. Assumed always enabled. Only used internally. */
+  DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31),
+} DRWState;
+
+ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE);
+
+#define DRW_STATE_DEFAULT \
+  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL)
+#define DRW_STATE_BLEND_ENABLED \
+  (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \
+   DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \
+   DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT)
+#define DRW_STATE_RASTERIZER_ENABLED \
+  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \
+   DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
+#define DRW_STATE_DEPTH_TEST_ENABLED \
+  (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \
+   DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL)
+#define DRW_STATE_STENCIL_TEST_ENABLED \
+  (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL)
+#define DRW_STATE_WRITE_STENCIL_ENABLED \
+  (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \
+   DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+
+namespace blender::draw {
+
+/* -------------------------------------------------------------------- */
+/** \name DRWState to GPU state conversion
+ * \{ */
+
+static inline eGPUWriteMask to_write_mask(DRWState state)
+{
+  eGPUWriteMask write_mask = GPU_WRITE_NONE;
+  if (state & DRW_STATE_WRITE_DEPTH) {
+    write_mask |= GPU_WRITE_DEPTH;
+  }
+  if (state & DRW_STATE_WRITE_COLOR) {
+    write_mask |= GPU_WRITE_COLOR;
+  }
+  if (state & DRW_STATE_WRITE_STENCIL_ENABLED) {
+    write_mask |= GPU_WRITE_STENCIL;
+  }
+  return write_mask;
+}
+
+static inline eGPUFaceCullTest to_face_cull_test(DRWState state)
+{
+  switch (state & (DRW_STATE_CULL_BACK | DRW_STATE_CULL_FRONT)) {
+    case DRW_STATE_CULL_BACK:
+      return GPU_CULL_BACK;
+    case DRW_STATE_CULL_FRONT:
+      return GPU_CULL_FRONT;
+    default:
+      return GPU_CULL_NONE;
+  }
+}
+
+static inline eGPUDepthTest to_depth_test(DRWState state)
+{
+  switch (state & DRW_STATE_DEPTH_TEST_ENABLED) {
+    case DRW_STATE_DEPTH_LESS:
+      return GPU_DEPTH_LESS;
+    case DRW_STATE_DEPTH_LESS_EQUAL:
+      return GPU_DEPTH_LESS_EQUAL;
+    case DRW_STATE_DEPTH_EQUAL:
+      return GPU_DEPTH_EQUAL;
+    case DRW_STATE_DEPTH_GREATER:
+      return GPU_DEPTH_GREATER;
+    case DRW_STATE_DEPTH_GREATER_EQUAL:
+      return GPU_DEPTH_GREATER_EQUAL;
+    case DRW_STATE_DEPTH_ALWAYS:
+      return GPU_DEPTH_ALWAYS;
+    default:
+      return GPU_DEPTH_NONE;
+  }
+}
+
+static inline eGPUStencilOp to_stencil_op(DRWState state)
+{
+  switch (state & DRW_STATE_WRITE_STENCIL_ENABLED) {
+    case DRW_STATE_WRITE_STENCIL:
+      return GPU_STENCIL_OP_REPLACE;
+    case DRW_STATE_WRITE_STENCIL_SHADOW_PASS:
+      return GPU_STENCIL_OP_COUNT_DEPTH_PASS;
+    case DRW_STATE_WRITE_STENCIL_SHADOW_FAIL:
+      return GPU_STENCIL_OP_COUNT_DEPTH_FAIL;
+    default:
+      return GPU_STENCIL_OP_NONE;
+  }
+}
+
+static inline eGPUStencilTest to_stencil_test(DRWState state)
+{
+  switch (state & DRW_STATE_STENCIL_TEST_ENABLED) {
+    case DRW_STATE_STENCIL_ALWAYS:
+      return GPU_STENCIL_ALWAYS;
+    case DRW_STATE_STENCIL_EQUAL:
+      return GPU_STENCIL_EQUAL;
+    case DRW_STATE_STENCIL_NEQUAL:
+      return GPU_STENCIL_NEQUAL;
+    default:
+      return GPU_STENCIL_NONE;
+  }
+}
+
+static inline eGPUBlend to_blend(DRWState state)
+{
+  switch (state & DRW_STATE_BLEND_ENABLED) {
+    case DRW_STATE_BLEND_ADD:
+      return GPU_BLEND_ADDITIVE;
+    case DRW_STATE_BLEND_ADD_FULL:
+      return GPU_BLEND_ADDITIVE_PREMULT;
+    case DRW_STATE_BLEND_ALPHA:
+      return GPU_BLEND_ALPHA;
+    case DRW_STATE_BLEND_ALPHA_PREMUL:
+      return GPU_BLEND_ALPHA_PREMULT;
+    case DRW_STATE_BLEND_BACKGROUND:
+      return GPU_BLEND_BACKGROUND;
+    case DRW_STATE_BLEND_OIT:
+      return GPU_BLEND_OIT;
+    case DRW_STATE_BLEND_MUL:
+      return GPU_BLEND_MULTIPLY;
+    case DRW_STATE_BLEND_SUB:
+      return GPU_BLEND_SUBTRACT;
+    case DRW_STATE_BLEND_CUSTOM:
+      return GPU_BLEND_CUSTOM;
+    case DRW_STATE_LOGIC_INVERT:
+      return GPU_BLEND_INVERT;
+    case DRW_STATE_BLEND_ALPHA_UNDER_PREMUL:
+      return GPU_BLEND_ALPHA_UNDER_PREMUL;
+    default:
+      return GPU_BLEND_NONE;
+  }
+}
+
+static inline eGPUProvokingVertex to_provoking_vertex(DRWState state)
+{
+  switch (state & DRW_STATE_FIRST_VERTEX_CONVENTION) {
+    case DRW_STATE_FIRST_VERTEX_CONVENTION:
+      return GPU_VERTEX_FIRST;
+    default:
+      return GPU_VERTEX_LAST;
+  }
+}
+
+/** \} */
+
+};  // namespace blender::draw
+
+#endif
diff --git a/source/blender/draw/intern/draw_subdivision.h b/source/blender/draw/intern/draw_subdivision.h
index 2d9f4713feb..37b025e761d 100644
--- a/source/blender/draw/intern/draw_subdivision.h
+++ b/source/blender/draw/intern/draw_subdivision.h
@@ -177,6 +177,10 @@ typedef struct DRWSubdivCache {
 
   /* UBO to store settings for the various compute shaders. */
   struct GPUUniformBuf *ubo;
+
+  /* Extra flags, passed to the UBO. */
+  bool is_edit_mode;
+  bool use_hide;
 } DRWSubdivCache;
 
 /* Only frees the data of the cache, caller is responsible to free the cache itself if necessary.
@@ -195,6 +199,7 @@ void DRW_create_subdivision(struct Object *ob,
                             const float obmat[4][4],
                             const bool do_final,
                             const bool do_uvedit,
+                            const bool do_cage,
                             const ToolSettings *ts,
                             const bool use_hide);
 
diff --git a/source/blender/draw/intern/draw_texture_pool.cc b/source/blender/draw/intern/draw_texture_pool.cc
index b36cb5c809e..017ecec7be2 100644
--- a/source/blender/draw/intern/draw_texture_pool.cc
+++ b/source/blender/draw/intern/draw_texture_pool.cc
@@ -160,6 +160,19 @@ void DRW_texture_pool_texture_release(DRWTexturePool *pool, GPUTexture *tmp_tex)
   pool->tmp_tex_released.append(tmp_tex);
 }
 
+void DRW_texture_pool_take_texture_ownership(DRWTexturePool *pool, GPUTexture *tex)
+{
+  pool->tmp_tex_acquired.remove_first_occurrence_and_reorder(tex);
+}
+
+void DRW_texture_pool_give_texture_ownership(DRWTexturePool *pool, GPUTexture *tex)
+{
+  BLI_assert(pool->tmp_tex_acquired.first_index_of_try(tex) == -1 &&
+             pool->tmp_tex_released.first_index_of_try(tex) == -1 &&
+             pool->tmp_tex_pruned.first_index_of_try(tex) == -1);
+  pool->tmp_tex_acquired.append(tex);
+}
+
 void DRW_texture_pool_reset(DRWTexturePool *pool)
 {
   pool->last_user_id = -1;
diff --git a/source/blender/draw/intern/draw_texture_pool.h b/source/blender/draw/intern/draw_texture_pool.h
index 1c30ea88552..9fbbf630833 100644
--- a/source/blender/draw/intern/draw_texture_pool.h
+++ b/source/blender/draw/intern/draw_texture_pool.h
@@ -26,6 +26,7 @@ void DRW_texture_pool_free(DRWTexturePool *pool);
 /**
  * Try to find a texture corresponding to params into the texture pool.
  * If no texture was found, create one and add it to the pool.
+ * DEPRECATED: Use DRW_texture_pool_texture_acquire instead and do it just before rendering.
  */
 GPUTexture *DRW_texture_pool_query(
     DRWTexturePool *pool, int width, int height, eGPUTextureFormat format, void *user);
@@ -40,6 +41,22 @@ GPUTexture *DRW_texture_pool_texture_acquire(DRWTexturePool *pool,
  * Releases a previously acquired texture.
  */
 void DRW_texture_pool_texture_release(DRWTexturePool *pool, GPUTexture *tmp_tex);
+
+/**
+ * This effectively remove a texture from the texture pool, giving full ownership to the caller.
+ * The given texture needs to be been acquired through DRW_texture_pool_texture_acquire().
+ * IMPORTANT: This removes the need for a DRW_texture_pool_texture_release() call on this texture.
+ */
+void DRW_texture_pool_take_texture_ownership(DRWTexturePool *pool, GPUTexture *tex);
+/**
+ * This Inserts a texture into the texture pool, giving full ownership to the texture pool.
+ * The texture needs not to be in the pool already.
+ * The texture may be reused in a latter call to DRW_texture_pool_texture_acquire();
+ * IMPORTANT: DRW_texture_pool_texture_release() still needs to be called on this texture
+ * after usage.
+ */
+void DRW_texture_pool_give_texture_ownership(DRWTexturePool *pool, GPUTexture *tex);
+
 /**
  * Resets the user bits for each texture in the pool and delete unused ones.
  */
diff --git a/source/blender/draw/intern/draw_view.c b/source/blender/draw/intern/draw_view.c
index 817f97cbea4..35ff8891a0f 100644
--- a/source/blender/draw/intern/draw_view.c
+++ b/source/blender/draw/intern/draw_view.c
@@ -175,7 +175,7 @@ void DRW_draw_cursor(void)
       GPU_matrix_scale_2f(U.widget_unit, U.widget_unit);
 
       GPUBatch *cursor_batch = DRW_cache_cursor_get(is_aligned);
-      GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_2D_FLAT_COLOR);
+      GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_3D_FLAT_COLOR);
       GPU_batch_set_shader(cursor_batch, shader);
 
       GPU_batch_draw(cursor_batch);
@@ -241,7 +241,7 @@ void DRW_draw_cursor_2d_ex(const ARegion *region, const float cursor[2])
 
   GPUBatch *cursor_batch = DRW_cache_cursor_get(true);
 
-  GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_2D_FLAT_COLOR);
+  GPUShader *shader = GPU_shader_get_builtin_shader(GPU_SHADER_3D_FLAT_COLOR);
   GPU_batch_set_shader(cursor_batch, shader);
 
   GPU_batch_draw(cursor_batch);
diff --git a/source/blender/draw/intern/draw_view.cc b/source/blender/draw/intern/draw_view.cc
new file mode 100644
index 00000000000..cb0e1370c28
--- /dev/null
+++ b/source/blender/draw/intern/draw_view.cc
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "BLI_math_geom.h"
+#include "GPU_compute.h"
+#include "GPU_debug.h"
+
+#include "draw_debug.hh"
+#include "draw_shader.h"
+#include "draw_view.hh"
+
+namespace blender::draw {
+
+void View::sync(const float4x4 &view_mat, const float4x4 &win_mat)
+{
+  data_.viewmat = view_mat;
+  data_.viewinv = view_mat.inverted();
+  data_.winmat = win_mat;
+  data_.wininv = win_mat.inverted();
+  data_.persmat = data_.winmat * data_.viewmat;
+  data_.persinv = data_.persmat.inverted();
+  /* Should not be used anymore. */
+  data_.viewcamtexcofac = float4(1.0f, 1.0f, 0.0f, 0.0f);
+
+  data_.is_inverted = (is_negative_m4(view_mat.ptr()) == is_negative_m4(win_mat.ptr()));
+
+  update_view_vectors();
+
+  BoundBox &bound_box = *reinterpret_cast<BoundBox *>(&data_.frustum_corners);
+  BoundSphere &bound_sphere = *reinterpret_cast<BoundSphere *>(&data_.frustum_bound_sphere);
+  frustum_boundbox_calc(bound_box);
+  frustum_culling_planes_calc();
+  frustum_culling_sphere_calc(bound_box, bound_sphere);
+
+  dirty_ = true;
+}
+
+void View::frustum_boundbox_calc(BoundBox &bbox)
+{
+  /* Extract the 8 corners from a Projection Matrix. */
+#if 0 /* Equivalent to this but it has accuracy problems. */
+  BKE_boundbox_init_from_minmax(&bbox, float3(-1.0f),float3(1.0f));
+  for (int i = 0; i < 8; i++) {
+    mul_project_m4_v3(data_.wininv.ptr(), bbox.vec[i]);
+  }
+#endif
+
+  float left, right, bottom, top, near, far;
+  bool is_persp = data_.winmat[3][3] == 0.0f;
+
+  projmat_dimensions(data_.winmat.ptr(), &left, &right, &bottom, &top, &near, &far);
+
+  bbox.vec[0][2] = bbox.vec[3][2] = bbox.vec[7][2] = bbox.vec[4][2] = -near;
+  bbox.vec[0][0] = bbox.vec[3][0] = left;
+  bbox.vec[4][0] = bbox.vec[7][0] = right;
+  bbox.vec[0][1] = bbox.vec[4][1] = bottom;
+  bbox.vec[7][1] = bbox.vec[3][1] = top;
+
+  /* Get the coordinates of the far plane. */
+  if (is_persp) {
+    float sca_far = far / near;
+    left *= sca_far;
+    right *= sca_far;
+    bottom *= sca_far;
+    top *= sca_far;
+  }
+
+  bbox.vec[1][2] = bbox.vec[2][2] = bbox.vec[6][2] = bbox.vec[5][2] = -far;
+  bbox.vec[1][0] = bbox.vec[2][0] = left;
+  bbox.vec[6][0] = bbox.vec[5][0] = right;
+  bbox.vec[1][1] = bbox.vec[5][1] = bottom;
+  bbox.vec[2][1] = bbox.vec[6][1] = top;
+
+  /* Transform into world space. */
+  for (int i = 0; i < 8; i++) {
+    mul_m4_v3(data_.viewinv.ptr(), bbox.vec[i]);
+  }
+}
+
+void View::frustum_culling_planes_calc()
+{
+  planes_from_projmat(data_.persmat.ptr(),
+                      data_.frustum_planes[0],
+                      data_.frustum_planes[5],
+                      data_.frustum_planes[1],
+                      data_.frustum_planes[3],
+                      data_.frustum_planes[4],
+                      data_.frustum_planes[2]);
+
+  /* Normalize. */
+  for (int p = 0; p < 6; p++) {
+    data_.frustum_planes[p].w /= normalize_v3(data_.frustum_planes[p]);
+  }
+}
+
+void View::frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere)
+{
+  /* Extract Bounding Sphere */
+  if (data_.winmat[3][3] != 0.0f) {
+    /* Orthographic */
+    /* The most extreme points on the near and far plane. (normalized device coords). */
+    const float *nearpoint = bbox.vec[0];
+    const float *farpoint = bbox.vec[6];
+
+    /* just use median point */
+    mid_v3_v3v3(bsphere.center, farpoint, nearpoint);
+    bsphere.radius = len_v3v3(bsphere.center, farpoint);
+  }
+  else if (data_.winmat[2][0] == 0.0f && data_.winmat[2][1] == 0.0f) {
+    /* Perspective with symmetrical frustum. */
+
+    /* We obtain the center and radius of the circumscribed circle of the
+     * isosceles trapezoid composed by the diagonals of the near and far clipping plane */
+
+    /* center of each clipping plane */
+    float mid_min[3], mid_max[3];
+    mid_v3_v3v3(mid_min, bbox.vec[3], bbox.vec[4]);
+    mid_v3_v3v3(mid_max, bbox.vec[2], bbox.vec[5]);
+
+    /* square length of the diagonals of each clipping plane */
+    float a_sq = len_squared_v3v3(bbox.vec[3], bbox.vec[4]);
+    float b_sq = len_squared_v3v3(bbox.vec[2], bbox.vec[5]);
+
+    /* distance squared between clipping planes */
+    float h_sq = len_squared_v3v3(mid_min, mid_max);
+
+    float fac = (4 * h_sq + b_sq - a_sq) / (8 * h_sq);
+
+    /* The goal is to get the smallest sphere,
+     * not the sphere that passes through each corner */
+    CLAMP(fac, 0.0f, 1.0f);
+
+    interp_v3_v3v3(bsphere.center, mid_min, mid_max, fac);
+
+    /* distance from the center to one of the points of the far plane (1, 2, 5, 6) */
+    bsphere.radius = len_v3v3(bsphere.center, bbox.vec[1]);
+  }
+  else {
+    /* Perspective with asymmetrical frustum. */
+
+    /* We put the sphere center on the line that goes from origin
+     * to the center of the far clipping plane. */
+
+    /* Detect which of the corner of the far clipping plane is the farthest to the origin */
+    float nfar[4];               /* most extreme far point in NDC space */
+    float farxy[2];              /* far-point projection onto the near plane */
+    float farpoint[3] = {0.0f};  /* most extreme far point in camera coordinate */
+    float nearpoint[3];          /* most extreme near point in camera coordinate */
+    float farcenter[3] = {0.0f}; /* center of far clipping plane in camera coordinate */
+    float F = -1.0f, N;          /* square distance of far and near point to origin */
+    float f, n; /* distance of far and near point to z axis. f is always > 0 but n can be < 0 */
+    float e, s; /* far and near clipping distance (<0) */
+    float c;    /* slope of center line = distance of far clipping center
+                 * to z axis / far clipping distance. */
+    float z;    /* projection of sphere center on z axis (<0) */
+
+    /* Find farthest corner and center of far clip plane. */
+    float corner[3] = {1.0f, 1.0f, 1.0f}; /* in clip space */
+    for (int i = 0; i < 4; i++) {
+      float point[3];
+      mul_v3_project_m4_v3(point, data_.wininv.ptr(), corner);
+      float len = len_squared_v3(point);
+      if (len > F) {
+        copy_v3_v3(nfar, corner);
+        copy_v3_v3(farpoint, point);
+        F = len;
+      }
+      add_v3_v3(farcenter, point);
+      /* rotate by 90 degree to walk through the 4 points of the far clip plane */
+      float tmp = corner[0];
+      corner[0] = -corner[1];
+      corner[1] = tmp;
+    }
+
+    /* the far center is the average of the far clipping points */
+    mul_v3_fl(farcenter, 0.25f);
+    /* the extreme near point is the opposite point on the near clipping plane */
+    copy_v3_fl3(nfar, -nfar[0], -nfar[1], -1.0f);
+    mul_v3_project_m4_v3(nearpoint, data_.wininv.ptr(), nfar);
+    /* this is a frustum projection */
+    N = len_squared_v3(nearpoint);
+    e = farpoint[2];
+    s = nearpoint[2];
+    /* distance to view Z axis */
+    f = len_v2(farpoint);
+    /* get corresponding point on the near plane */
+    mul_v2_v2fl(farxy, farpoint, s / e);
+    /* this formula preserve the sign of n */
+    sub_v2_v2(nearpoint, farxy);
+    n = f * s / e - len_v2(nearpoint);
+    c = len_v2(farcenter) / e;
+    /* the big formula, it simplifies to (F-N)/(2(e-s)) for the symmetric case */
+    z = (F - N) / (2.0f * (e - s + c * (f - n)));
+
+    bsphere.center[0] = farcenter[0] * z / e;
+    bsphere.center[1] = farcenter[1] * z / e;
+    bsphere.center[2] = z;
+
+    /* For XR, the view matrix may contain a scale factor. Then, transforming only the center
+     * into world space after calculating the radius will result in incorrect behavior. */
+    mul_m4_v3(data_.viewinv.ptr(), bsphere.center); /* Transform to world space. */
+    mul_m4_v3(data_.viewinv.ptr(), farpoint);
+    bsphere.radius = len_v3v3(bsphere.center, farpoint);
+  }
+}
+
+void View::set_clip_planes(Span<float4> planes)
+{
+  BLI_assert(planes.size() <= ARRAY_SIZE(data_.clip_planes));
+  int i = 0;
+  for (const auto &plane : planes) {
+    data_.clip_planes[i++] = plane;
+  }
+}
+
+void View::update_viewport_size()
+{
+  float4 viewport;
+  GPU_viewport_size_get_f(viewport);
+  float2 viewport_size = float2(viewport.z, viewport.w);
+  if (assign_if_different(data_.viewport_size, viewport_size)) {
+    dirty_ = true;
+  }
+}
+
+void View::update_view_vectors()
+{
+  bool is_persp = data_.winmat[3][3] == 0.0f;
+
+  /* Near clip distance. */
+  data_.viewvecs[0][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f) :
+                                      -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2];
+
+  /* Far clip distance. */
+  data_.viewvecs[1][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f) :
+                                      -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2];
+
+  /* View vectors for the corners of the view frustum.
+   * Can be used to recreate the world space position easily */
+  float3 view_vecs[4] = {
+      {-1.0f, -1.0f, -1.0f},
+      {1.0f, -1.0f, -1.0f},
+      {-1.0f, 1.0f, -1.0f},
+      {-1.0f, -1.0f, 1.0f},
+  };
+
+  /* Convert the view vectors to view space */
+  for (int i = 0; i < 4; i++) {
+    mul_project_m4_v3(data_.wininv.ptr(), view_vecs[i]);
+    /* Normalized trick see:
+     * http://www.derschmale.com/2014/01/26/reconstructing-positions-from-the-depth-buffer */
+    if (is_persp) {
+      view_vecs[i].x /= view_vecs[i].z;
+      view_vecs[i].y /= view_vecs[i].z;
+    }
+  }
+
+  /**
+   * - If orthographic:
+   *   `view_vecs[0]` is the near-bottom-left corner of the frustum and
+   *   `view_vecs[1]` is the vector going from the near-bottom-left corner to
+   *   the far-top-right corner.
+   * - If perspective:
+   *   `view_vecs[0].xy` and `view_vecs[1].xy` are respectively the bottom-left corner
+   *   when `Z = 1`, and top-left corner if `Z = 1`.
+   *   `view_vecs[0].z` the near clip distance and `view_vecs[1].z` is the (signed)
+   *   distance from the near plane to the far clip plane.
+   */
+  copy_v3_v3(data_.viewvecs[0], view_vecs[0]);
+
+  /* we need to store the differences */
+  data_.viewvecs[1][0] = view_vecs[1][0] - view_vecs[0][0];
+  data_.viewvecs[1][1] = view_vecs[2][1] - view_vecs[0][1];
+  data_.viewvecs[1][2] = view_vecs[3][2] - view_vecs[0][2];
+}
+
+void View::bind()
+{
+  update_viewport_size();
+
+  if (dirty_) {
+    dirty_ = false;
+    data_.push_update();
+  }
+
+  GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT);
+}
+
+void View::compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze)
+{
+  if (debug_freeze && frozen_ == false) {
+    data_freeze_ = static_cast<ViewInfos>(data_);
+    data_freeze_.push_update();
+  }
+#ifdef DEBUG
+  if (debug_freeze) {
+    drw_debug_matrix_as_bbox(data_freeze_.persinv, float4(0, 1, 0, 1));
+  }
+#endif
+  frozen_ = debug_freeze;
+
+  GPU_debug_group_begin("View.compute_visibility");
+
+  /* TODO(fclem): Early out if visibility hasn't changed. */
+  /* TODO(fclem): Resize to nearest pow2 to reduce fragmentation. */
+  visibility_buf_.resize(divide_ceil_u(resource_len, 128));
+
+  uint32_t data = 0xFFFFFFFFu;
+  GPU_storagebuf_clear(visibility_buf_, GPU_R32UI, GPU_DATA_UINT, &data);
+
+  if (do_visibility_) {
+    GPUShader *shader = DRW_shader_draw_visibility_compute_get();
+    GPU_shader_bind(shader);
+    GPU_shader_uniform_1i(shader, "resource_len", resource_len);
+    GPU_storagebuf_bind(bounds, GPU_shader_get_ssbo(shader, "bounds_buf"));
+    GPU_storagebuf_bind(visibility_buf_, GPU_shader_get_ssbo(shader, "visibility_buf"));
+    GPU_uniformbuf_bind((frozen_) ? data_freeze_ : data_, DRW_VIEW_UBO_SLOT);
+    GPU_compute_dispatch(shader, divide_ceil_u(resource_len, DRW_VISIBILITY_GROUP_SIZE), 1, 1);
+    GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+  }
+
+  if (frozen_) {
+    /* Bind back the non frozen data. */
+    GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT);
+  }
+
+  GPU_debug_group_end();
+}
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_view.hh b/source/blender/draw/intern/draw_view.hh
new file mode 100644
index 00000000000..27e7a7a0028
--- /dev/null
+++ b/source/blender/draw/intern/draw_view.hh
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "DRW_gpu_wrapper.hh"
+#include "DRW_render.h"
+
+#include "draw_shader_shared.h"
+
+namespace blender::draw {
+
+class Manager;
+
+/* TODO: de-duplicate. */
+using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>;
+/** \note Using uint4 for declaration but bound as uint. */
+using VisibilityBuf = StorageArrayBuffer<uint4, 1, true>;
+
+class View {
+  friend Manager;
+
+ private:
+  UniformBuffer<ViewInfos> data_;
+  /** Frozen version of data_ used for debugging culling. */
+  UniformBuffer<ViewInfos> data_freeze_;
+  /** Result of the visibility computation. 1 bit per resource ID. */
+  VisibilityBuf visibility_buf_;
+
+  const char *debug_name_;
+
+  bool do_visibility_ = true;
+  bool dirty_ = true;
+  bool frozen_ = false;
+
+ public:
+  View(const char *name) : visibility_buf_(name), debug_name_(name){};
+  /* For compatibility with old system. Will be removed at some point. */
+  View(const char *name, const DRWView *view) : visibility_buf_(name), debug_name_(name)
+  {
+    float4x4 view_mat, win_mat;
+    DRW_view_viewmat_get(view, view_mat.ptr(), false);
+    DRW_view_winmat_get(view, win_mat.ptr(), false);
+    this->sync(view_mat, win_mat);
+  }
+
+  void set_clip_planes(Span<float4> planes);
+
+  void sync(const float4x4 &view_mat, const float4x4 &win_mat);
+
+  bool is_persp() const
+  {
+    return data_.winmat[3][3] == 0.0f;
+  }
+
+  bool is_inverted() const
+  {
+    return data_.is_inverted;
+  }
+
+  float far_clip() const
+  {
+    if (is_persp()) {
+      return -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f);
+    }
+    return -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2];
+  }
+
+  float near_clip() const
+  {
+    if (is_persp()) {
+      return -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f);
+    }
+    return -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2];
+  }
+
+ private:
+  /** Called from draw manager. */
+  void bind();
+  void compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze);
+
+  void update_view_vectors();
+  void update_viewport_size();
+
+  void frustum_boundbox_calc(BoundBox &bbox);
+  void frustum_culling_planes_calc();
+  void frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere);
+};
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_view_data.cc b/source/blender/draw/intern/draw_view_data.cc
index 3dc28dc9a9a..58d826e0218 100644
--- a/source/blender/draw/intern/draw_view_data.cc
+++ b/source/blender/draw/intern/draw_view_data.cc
@@ -7,6 +7,7 @@
 
 #include "BLI_vector.hh"
 
+#include "GPU_capabilities.h"
 #include "GPU_viewport.h"
 
 #include "DRW_render.h"
@@ -16,6 +17,7 @@
 #include "draw_manager_text.h"
 
 #include "draw_manager.h"
+#include "draw_manager.hh"
 #include "draw_view_data.h"
 
 using namespace blender;
@@ -33,6 +35,22 @@ struct DRWViewData {
 
   Vector<ViewportEngineData> engines;
   Vector<ViewportEngineData *> enabled_engines;
+
+  /** New per view/viewport manager. Null if not supported by current hardware. */
+  draw::Manager *manager = nullptr;
+
+  DRWViewData()
+  {
+    /* Only for GL >= 4.3 implementation for now. */
+    if (GPU_shader_storage_buffer_objects_support() && GPU_compute_shader_support()) {
+      manager = new draw::Manager();
+    }
+  };
+
+  ~DRWViewData()
+  {
+    delete manager;
+  };
 };
 
 DRWViewData *DRW_view_data_create(ListBase *engine_types)
@@ -197,6 +215,16 @@ void DRW_view_data_free_unused(DRWViewData *view_data)
   }
 }
 
+void DRW_view_data_engines_view_update(DRWViewData *view_data)
+{
+  for (ViewportEngineData &engine_data : view_data->engines) {
+    DrawEngineType *draw_engine = engine_data.engine_type->draw_engine;
+    if (draw_engine->view_update) {
+      draw_engine->view_update(&engine_data);
+    }
+  }
+}
+
 double *DRW_view_data_cache_time_get(DRWViewData *view_data)
 {
   return &view_data->cache_time;
@@ -227,3 +255,31 @@ ViewportEngineData *DRW_view_data_enabled_engine_iter_step(DRWEngineIterator *it
   ViewportEngineData *engine = iterator->engines[iterator->id++];
   return engine;
 }
+
+draw::Manager *DRW_manager_get()
+{
+  BLI_assert(DST.view_data_active->manager);
+  return reinterpret_cast<draw::Manager *>(DST.view_data_active->manager);
+}
+
+draw::ObjectRef DRW_object_ref_get(Object *object)
+{
+  BLI_assert(DST.view_data_active->manager);
+  return {object, DST.dupli_source, DST.dupli_parent};
+}
+
+void DRW_manager_begin_sync()
+{
+  if (DST.view_data_active->manager == nullptr) {
+    return;
+  }
+  reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->begin_sync();
+}
+
+void DRW_manager_end_sync()
+{
+  if (DST.view_data_active->manager == nullptr) {
+    return;
+  }
+  reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->end_sync();
+}
diff --git a/source/blender/draw/intern/draw_view_data.h b/source/blender/draw/intern/draw_view_data.h
index 918b9e81f87..f2c34c15f08 100644
--- a/source/blender/draw/intern/draw_view_data.h
+++ b/source/blender/draw/intern/draw_view_data.h
@@ -107,6 +107,7 @@ ViewportEngineData *DRW_view_data_engine_data_get_ensure(DRWViewData *view_data,
 void DRW_view_data_use_engine(DRWViewData *view_data, struct DrawEngineType *engine_type);
 void DRW_view_data_reset(DRWViewData *view_data);
 void DRW_view_data_free_unused(DRWViewData *view_data);
+void DRW_view_data_engines_view_update(DRWViewData *view_data);
 double *DRW_view_data_cache_time_get(DRWViewData *view_data);
 DefaultFramebufferList *DRW_view_data_default_framebuffer_list_get(DRWViewData *view_data);
 DefaultTextureList *DRW_view_data_default_texture_list_get(DRWViewData *view_data);
diff --git a/source/blender/draw/intern/draw_volume.cc b/source/blender/draw/intern/draw_volume.cc
index c4e58ab24cb..8f4383a98d8 100644
--- a/source/blender/draw/intern/draw_volume.cc
+++ b/source/blender/draw/intern/draw_volume.cc
@@ -89,6 +89,10 @@ void DRW_volume_free(void)
 
 static GPUTexture *grid_default_texture(eGPUDefaultValue default_value)
 {
+  if (g_data.dummy_one == nullptr) {
+    drw_volume_globals_init();
+  }
+
   switch (default_value) {
     case GPU_DEFAULT_0:
       return g_data.dummy_zero;
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh.hh b/source/blender/draw/intern/mesh_extractors/extract_mesh.hh
index 8052b277d45..10b94291e35 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh.hh
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh.hh
@@ -29,7 +29,6 @@ struct DRWSubdivCache;
 
 enum eMRExtractType {
   MR_EXTRACT_BMESH,
-  MR_EXTRACT_MAPPED,
   MR_EXTRACT_MESH,
 };
 
@@ -81,11 +80,18 @@ struct MeshRenderData {
   BMFace *efa_act_uv;
   /* Data created on-demand (usually not for #BMesh based data). */
   MLoopTri *mlooptri;
+  const int *material_indices;
   const float (*vert_normals)[3];
   const float (*poly_normals)[3];
+  const bool *hide_vert;
+  const bool *hide_edge;
+  const bool *hide_poly;
   float (*loop_normals)[3];
   int *lverts, *ledges;
 
+  const char *active_color_name;
+  const char *default_color_name;
+
   struct {
     int *tri_first_index;
     int *mat_tri_len;
@@ -93,6 +99,82 @@ struct MeshRenderData {
   } poly_sorted;
 };
 
+BLI_INLINE const Mesh *editmesh_final_or_this(const Object *object, const Mesh *me)
+{
+  if (me->edit_mesh != nullptr) {
+    Mesh *editmesh_eval_final = BKE_object_get_editmesh_eval_final(object);
+    if (editmesh_eval_final != nullptr) {
+      return editmesh_eval_final;
+    }
+  }
+
+  return me;
+}
+
+BLI_INLINE const CustomData *mesh_cd_ldata_get_from_mesh(const Mesh *me)
+{
+  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
+    case ME_WRAPPER_TYPE_MDATA:
+      return &me->ldata;
+      break;
+    case ME_WRAPPER_TYPE_BMESH:
+      return &me->edit_mesh->bm->ldata;
+      break;
+  }
+
+  BLI_assert(0);
+  return &me->ldata;
+}
+
+BLI_INLINE const CustomData *mesh_cd_pdata_get_from_mesh(const Mesh *me)
+{
+  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
+    case ME_WRAPPER_TYPE_MDATA:
+      return &me->pdata;
+      break;
+    case ME_WRAPPER_TYPE_BMESH:
+      return &me->edit_mesh->bm->pdata;
+      break;
+  }
+
+  BLI_assert(0);
+  return &me->pdata;
+}
+
+BLI_INLINE const CustomData *mesh_cd_edata_get_from_mesh(const Mesh *me)
+{
+  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
+    case ME_WRAPPER_TYPE_MDATA:
+      return &me->edata;
+      break;
+    case ME_WRAPPER_TYPE_BMESH:
+      return &me->edit_mesh->bm->edata;
+      break;
+  }
+
+  BLI_assert(0);
+  return &me->edata;
+}
+
+BLI_INLINE const CustomData *mesh_cd_vdata_get_from_mesh(const Mesh *me)
+{
+  switch ((eMeshWrapperType)me->runtime.wrapper_type) {
+    case ME_WRAPPER_TYPE_SUBD:
+    case ME_WRAPPER_TYPE_MDATA:
+      return &me->vdata;
+      break;
+    case ME_WRAPPER_TYPE_BMESH:
+      return &me->edit_mesh->bm->vdata;
+      break;
+  }
+
+  BLI_assert(0);
+  return &me->vdata;
+}
+
 BLI_INLINE BMFace *bm_original_face_get(const MeshRenderData *mr, int idx)
 {
   return ((mr->p_origindex != NULL) && (mr->p_origindex[idx] != ORIGINDEX_NONE) && mr->bm) ?
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
index 9824602b129..2f2e59c8c3b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_edituv.cc
@@ -22,7 +22,7 @@ struct MeshExtract_EditUvElem_Data {
 };
 
 static void extract_edituv_tris_init(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
+                                     MeshBatchCache *UNUSED(cache),
                                      void *UNUSED(ibo),
                                      void *tls_data)
 {
@@ -59,17 +59,15 @@ static void extract_edituv_tris_iter_looptri_mesh(const MeshRenderData *mr,
                                                   void *_data)
 {
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
-  const MPoly *mp = &mr->mpoly[mlt->poly];
-  edituv_tri_add(data,
-                 (mp->flag & ME_HIDE) != 0,
-                 (mp->flag & ME_FACE_SEL) != 0,
-                 mlt->tri[0],
-                 mlt->tri[1],
-                 mlt->tri[2]);
+  const BMFace *efa = bm_original_face_get(mr, mlt->poly);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+
+  edituv_tri_add(data, mp_hidden, mp_select, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
 }
 
 static void extract_edituv_tris_finish(const MeshRenderData *UNUSED(mr),
-                                       struct MeshBatchCache *UNUSED(cache),
+                                       MeshBatchCache *UNUSED(cache),
                                        void *buf,
                                        void *_data)
 {
@@ -117,7 +115,7 @@ static void extract_edituv_tris_iter_subdiv_bm(const DRWSubdivCache *UNUSED(subd
 }
 
 static void extract_edituv_tris_iter_subdiv_mesh(const DRWSubdivCache *UNUSED(subdiv_cache),
-                                                 const MeshRenderData *UNUSED(mr),
+                                                 const MeshRenderData *mr,
                                                  void *_data,
                                                  uint subdiv_quad_index,
                                                  const MPoly *coarse_quad)
@@ -125,24 +123,17 @@ static void extract_edituv_tris_iter_subdiv_mesh(const DRWSubdivCache *UNUSED(su
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   const uint loop_idx = subdiv_quad_index * 4;
 
-  edituv_tri_add(data,
-                 (coarse_quad->flag & ME_HIDE) != 0,
-                 (coarse_quad->flag & ME_FACE_SEL) != 0,
-                 loop_idx,
-                 loop_idx + 1,
-                 loop_idx + 2);
+  const BMFace *efa = bm_original_face_get(mr, coarse_quad - mr->mpoly);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
 
-  edituv_tri_add(data,
-                 (coarse_quad->flag & ME_HIDE) != 0,
-                 (coarse_quad->flag & ME_FACE_SEL) != 0,
-                 loop_idx,
-                 loop_idx + 2,
-                 loop_idx + 3);
+  edituv_tri_add(data, mp_hidden, mp_select, loop_idx, loop_idx + 1, loop_idx + 2);
+  edituv_tri_add(data, mp_hidden, mp_select, loop_idx, loop_idx + 2, loop_idx + 3);
 }
 
 static void extract_edituv_tris_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
                                               const MeshRenderData *UNUSED(mr),
-                                              struct MeshBatchCache *UNUSED(cache),
+                                              MeshBatchCache *UNUSED(cache),
                                               void *buf,
                                               void *_data)
 {
@@ -176,7 +167,7 @@ constexpr MeshExtract create_extractor_edituv_tris()
  * \{ */
 
 static void extract_edituv_lines_init(const MeshRenderData *mr,
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *UNUSED(ibo),
                                       void *tls_data)
 {
@@ -214,12 +205,24 @@ static void extract_edituv_lines_iter_poly_bm(const MeshRenderData *UNUSED(mr),
 
 static void extract_edituv_lines_iter_poly_mesh(const MeshRenderData *mr,
                                                 const MPoly *mp,
-                                                const int UNUSED(mp_index),
+                                                const int mp_index,
                                                 void *_data)
 {
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
+
+  bool mp_hidden, mp_select;
+  if (mr->bm) {
+    const BMFace *efa = bm_original_face_get(mr, mp_index);
+    mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+    mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+  }
+  else {
+    mp_hidden = (mr->hide_poly) ? mr->hide_poly[mp_index] : false;
+    mp_select = (mp->flag & ME_FACE_SEL) != 0;
+  }
+
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
     const MLoop *ml = &mloop[ml_index];
 
@@ -227,16 +230,12 @@ static void extract_edituv_lines_iter_poly_mesh(const MeshRenderData *mr,
     const int ml_index_next = (ml_index == ml_index_last) ? mp->loopstart : (ml_index + 1);
     const bool real_edge = (mr->e_origindex == nullptr ||
                             mr->e_origindex[ml->e] != ORIGINDEX_NONE);
-    edituv_edge_add(data,
-                    (mp->flag & ME_HIDE) != 0 || !real_edge,
-                    (mp->flag & ME_FACE_SEL) != 0,
-                    ml_index,
-                    ml_index_next);
+    edituv_edge_add(data, mp_hidden || !real_edge, mp_select, ml_index, ml_index_next);
   }
 }
 
 static void extract_edituv_lines_finish(const MeshRenderData *UNUSED(mr),
-                                        struct MeshBatchCache *UNUSED(cache),
+                                        MeshBatchCache *UNUSED(cache),
                                         void *buf,
                                         void *_data)
 {
@@ -266,6 +265,9 @@ static void extract_edituv_lines_iter_subdiv_bm(const DRWSubdivCache *subdiv_cac
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
 
+  const bool mp_hidden = BM_elem_flag_test_bool(coarse_poly, BM_ELEM_HIDDEN);
+  const bool mp_select = BM_elem_flag_test_bool(coarse_poly, BM_ELEM_SELECT);
+
   uint start_loop_idx = subdiv_quad_index * 4;
   uint end_loop_idx = (subdiv_quad_index + 1) * 4;
   for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) {
@@ -274,8 +276,8 @@ static void extract_edituv_lines_iter_subdiv_bm(const DRWSubdivCache *subdiv_cac
                             (mr->e_origindex == nullptr ||
                              mr->e_origindex[edge_origindex] != ORIGINDEX_NONE));
     edituv_edge_add(data,
-                    BM_elem_flag_test_bool(coarse_poly, BM_ELEM_HIDDEN) != 0 || !real_edge,
-                    BM_elem_flag_test_bool(coarse_poly, BM_ELEM_SELECT) != 0,
+                    mp_hidden || !real_edge,
+                    mp_select,
                     loop_idx,
                     (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1));
   }
@@ -290,6 +292,17 @@ static void extract_edituv_lines_iter_subdiv_mesh(const DRWSubdivCache *subdiv_c
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   int *subdiv_loop_edge_index = (int *)GPU_vertbuf_get_data(subdiv_cache->edges_orig_index);
 
+  bool mp_hidden, mp_select;
+  if (mr->bm) {
+    const BMFace *efa = bm_original_face_get(mr, coarse_poly - mr->mpoly);
+    mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+    mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+  }
+  else {
+    mp_hidden = (mr->hide_poly) ? mr->hide_poly[coarse_poly - mr->mpoly] : false;
+    mp_select = (coarse_poly->flag & ME_FACE_SEL) != 0;
+  }
+
   uint start_loop_idx = subdiv_quad_index * 4;
   uint end_loop_idx = (subdiv_quad_index + 1) * 4;
   for (uint loop_idx = start_loop_idx; loop_idx < end_loop_idx; loop_idx++) {
@@ -298,8 +311,8 @@ static void extract_edituv_lines_iter_subdiv_mesh(const DRWSubdivCache *subdiv_c
                             (mr->e_origindex == nullptr ||
                              mr->e_origindex[edge_origindex] != ORIGINDEX_NONE));
     edituv_edge_add(data,
-                    (coarse_poly->flag & ME_HIDE) != 0 || !real_edge,
-                    (coarse_poly->flag & ME_FACE_SEL) != 0,
+                    mp_hidden || !real_edge,
+                    mp_select,
                     loop_idx,
                     (loop_idx + 1 == end_loop_idx) ? start_loop_idx : (loop_idx + 1));
   }
@@ -307,7 +320,7 @@ static void extract_edituv_lines_iter_subdiv_mesh(const DRWSubdivCache *subdiv_c
 
 static void extract_edituv_lines_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
                                                const MeshRenderData *UNUSED(mr),
-                                               struct MeshBatchCache *UNUSED(cache),
+                                               MeshBatchCache *UNUSED(cache),
                                                void *buf,
                                                void *_data)
 {
@@ -341,7 +354,7 @@ constexpr MeshExtract create_extractor_edituv_lines()
  * \{ */
 
 static void extract_edituv_points_init(const MeshRenderData *mr,
-                                       struct MeshBatchCache *UNUSED(cache),
+                                       MeshBatchCache *UNUSED(cache),
                                        void *UNUSED(ibo),
                                        void *tls_data)
 {
@@ -378,23 +391,27 @@ static void extract_edituv_points_iter_poly_bm(const MeshRenderData *UNUSED(mr),
 
 static void extract_edituv_points_iter_poly_mesh(const MeshRenderData *mr,
                                                  const MPoly *mp,
-                                                 const int UNUSED(mp_index),
+                                                 const int mp_index,
                                                  void *_data)
 {
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+
+  const BMFace *efa = bm_original_face_get(mr, mp_index);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
     const MLoop *ml = &mloop[ml_index];
 
     const bool real_vert = !mr->v_origindex || mr->v_origindex[ml->v] != ORIGINDEX_NONE;
-    edituv_point_add(
-        data, ((mp->flag & ME_HIDE) != 0) || !real_vert, (mp->flag & ME_FACE_SEL) != 0, ml_index);
+    edituv_point_add(data, mp_hidden || !real_vert, mp_select, ml_index);
   }
 }
 
 static void extract_edituv_points_finish(const MeshRenderData *UNUSED(mr),
-                                         struct MeshBatchCache *UNUSED(cache),
+                                         MeshBatchCache *UNUSED(cache),
                                          void *buf,
                                          void *_data)
 {
@@ -444,22 +461,23 @@ static void extract_edituv_points_iter_subdiv_mesh(const DRWSubdivCache *subdiv_
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
   int *subdiv_loop_vert_index = (int *)GPU_vertbuf_get_data(subdiv_cache->verts_orig_index);
 
+  const BMFace *efa = bm_original_face_get(mr, coarse_quad - mr->mpoly);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+
   uint start_loop_idx = subdiv_quad_index * 4;
   uint end_loop_idx = (subdiv_quad_index + 1) * 4;
   for (uint i = start_loop_idx; i < end_loop_idx; i++) {
     const int vert_origindex = subdiv_loop_vert_index[i];
     const bool real_vert = !mr->v_origindex || (vert_origindex != -1 &&
                                                 mr->v_origindex[vert_origindex] != ORIGINDEX_NONE);
-    edituv_point_add(data,
-                     ((coarse_quad->flag & ME_HIDE) != 0) || !real_vert,
-                     (coarse_quad->flag & ME_FACE_SEL) != 0,
-                     i);
+    edituv_point_add(data, mp_hidden || !real_vert, mp_select, i);
   }
 }
 
 static void extract_edituv_points_finish_subdiv(const struct DRWSubdivCache *UNUSED(subdiv_cache),
                                                 const MeshRenderData *UNUSED(mr),
-                                                struct MeshBatchCache *UNUSED(cache),
+                                                MeshBatchCache *UNUSED(cache),
                                                 void *buf,
                                                 void *_data)
 {
@@ -493,7 +511,7 @@ constexpr MeshExtract create_extractor_edituv_points()
  * \{ */
 
 static void extract_edituv_fdots_init(const MeshRenderData *mr,
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *UNUSED(ibo),
                                       void *tls_data)
 {
@@ -533,6 +551,11 @@ static void extract_edituv_fdots_iter_poly_mesh(const MeshRenderData *mr,
                                                 void *_data)
 {
   MeshExtract_EditUvElem_Data *data = static_cast<MeshExtract_EditUvElem_Data *>(_data);
+
+  const BMFace *efa = bm_original_face_get(mr, mp_index);
+  const bool mp_hidden = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_HIDDEN) : true;
+  const bool mp_select = (efa) ? BM_elem_flag_test_bool(efa, BM_ELEM_SELECT) : false;
+
   if (mr->use_subsurf_fdots) {
     const BLI_bitmap *facedot_tags = mr->me->runtime.subsurf_face_dot_tags;
 
@@ -543,21 +566,17 @@ static void extract_edituv_fdots_iter_poly_mesh(const MeshRenderData *mr,
 
       const bool real_fdot = !mr->p_origindex || (mr->p_origindex[mp_index] != ORIGINDEX_NONE);
       const bool subd_fdot = BLI_BITMAP_TEST(facedot_tags, ml->v);
-      edituv_facedot_add(data,
-                         ((mp->flag & ME_HIDE) != 0) || !real_fdot || !subd_fdot,
-                         (mp->flag & ME_FACE_SEL) != 0,
-                         mp_index);
+      edituv_facedot_add(data, mp_hidden || !real_fdot || !subd_fdot, mp_select, mp_index);
     }
   }
   else {
     const bool real_fdot = !mr->p_origindex || (mr->p_origindex[mp_index] != ORIGINDEX_NONE);
-    edituv_facedot_add(
-        data, ((mp->flag & ME_HIDE) != 0) || !real_fdot, (mp->flag & ME_FACE_SEL) != 0, mp_index);
+    edituv_facedot_add(data, mp_hidden || !real_fdot, mp_select, mp_index);
   }
 }
 
 static void extract_edituv_fdots_finish(const MeshRenderData *UNUSED(mr),
-                                        struct MeshBatchCache *UNUSED(cache),
+                                        MeshBatchCache *UNUSED(cache),
                                         void *buf,
                                         void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc
index 4eebea1b79f..8dc00617039 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_fdots.cc
@@ -15,7 +15,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_fdots_init(const MeshRenderData *mr,
-                               struct MeshBatchCache *UNUSED(cache),
+                               MeshBatchCache *UNUSED(cache),
                                void *UNUSED(buf),
                                void *tls_data)
 {
@@ -42,6 +42,8 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr,
                                          const int mp_index,
                                          void *_userdata)
 {
+  const bool hidden = mr->use_hide && mr->hide_poly && mr->hide_poly[mp - mr->mpoly];
+
   GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_userdata);
   if (mr->use_subsurf_fdots) {
     const BLI_bitmap *facedot_tags = mr->me->runtime.subsurf_face_dot_tags;
@@ -50,7 +52,7 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr,
     const int ml_index_end = mp->loopstart + mp->totloop;
     for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
       const MLoop *ml = &mloop[ml_index];
-      if (BLI_BITMAP_TEST(facedot_tags, ml->v) && !(mr->use_hide && (mp->flag & ME_HIDE))) {
+      if (BLI_BITMAP_TEST(facedot_tags, ml->v) && !hidden) {
         GPU_indexbuf_set_point_vert(elb, mp_index, mp_index);
         return;
       }
@@ -58,7 +60,7 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr,
     GPU_indexbuf_set_point_restart(elb, mp_index);
   }
   else {
-    if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
+    if (!hidden) {
       GPU_indexbuf_set_point_vert(elb, mp_index, mp_index);
     }
     else {
@@ -68,7 +70,7 @@ static void extract_fdots_iter_poly_mesh(const MeshRenderData *mr,
 }
 
 static void extract_fdots_finish(const MeshRenderData *UNUSED(mr),
-                                 struct MeshBatchCache *UNUSED(cache),
+                                 MeshBatchCache *UNUSED(cache),
                                  void *buf,
                                  void *_userdata)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
index 4e89b34c0a0..9c564c2cdda 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines.cc
@@ -18,7 +18,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_lines_init(const MeshRenderData *mr,
-                               struct MeshBatchCache *UNUSED(cache),
+                               MeshBatchCache *UNUSED(cache),
                                void *UNUSED(buf),
                                void *tls_data)
 {
@@ -58,16 +58,13 @@ static void extract_lines_iter_poly_mesh(const MeshRenderData *mr,
   GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
   /* Using poly & loop iterator would complicate accessing the adjacent loop. */
   const MLoop *mloop = mr->mloop;
-  const MEdge *medge = mr->medge;
-  if (mr->use_hide || (mr->extract_type == MR_EXTRACT_MAPPED) || (mr->e_origindex != nullptr)) {
+  if (mr->use_hide || (mr->e_origindex != nullptr)) {
     const int ml_index_last = mp->loopstart + (mp->totloop - 1);
     int ml_index = ml_index_last, ml_index_next = mp->loopstart;
     do {
       const MLoop *ml = &mloop[ml_index];
-      const MEdge *med = &medge[ml->e];
-      if (!((mr->use_hide && (med->flag & ME_HIDE)) ||
-            ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
-             (mr->e_origindex[ml->e] == ORIGINDEX_NONE)))) {
+      if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[ml->e]) ||
+            ((mr->e_origindex) && (mr->e_origindex[ml->e] == ORIGINDEX_NONE)))) {
         GPU_indexbuf_set_line_verts(elb, ml->e, ml_index, ml_index_next);
       }
       else {
@@ -111,9 +108,8 @@ static void extract_lines_iter_ledge_mesh(const MeshRenderData *mr,
   GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(data);
   const int l_index_offset = mr->edge_len + ledge_index;
   const int e_index = mr->ledges[ledge_index];
-  if (!((mr->use_hide && (med->flag & ME_HIDE)) ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
-         (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) {
+  if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[med - mr->medge]) ||
+        ((mr->e_origindex) && (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) {
     const int l_index = mr->loop_len + ledge_index * 2;
     GPU_indexbuf_set_line_verts(elb, l_index_offset, l_index, l_index + 1);
   }
@@ -132,7 +128,7 @@ static void extract_lines_task_reduce(void *_userdata_to, void *_userdata_from)
 }
 
 static void extract_lines_finish(const MeshRenderData *UNUSED(mr),
-                                 struct MeshBatchCache *UNUSED(cache),
+                                 MeshBatchCache *UNUSED(cache),
                                  void *buf,
                                  void *data)
 {
@@ -143,7 +139,7 @@ static void extract_lines_finish(const MeshRenderData *UNUSED(mr),
 
 static void extract_lines_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                       const MeshRenderData *UNUSED(mr),
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *buffer,
                                       void *UNUSED(data))
 {
@@ -183,17 +179,54 @@ static void extract_lines_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
 
   uint *flags_data = static_cast<uint *>(GPU_vertbuf_get_data(flags));
 
-  if (mr->extract_type == MR_EXTRACT_MESH) {
-    const MEdge *medge = mr->medge;
-    for (DRWSubdivLooseEdge edge : loose_edges) {
-      *flags_data++ = (medge[edge.coarse_edge_index].flag & ME_HIDE) != 0;
+  switch (mr->extract_type) {
+    case MR_EXTRACT_MESH: {
+      if (mr->e_origindex == nullptr) {
+        const bool *hide_edge = mr->hide_edge;
+        if (hide_edge) {
+          for (DRWSubdivLooseEdge edge : loose_edges) {
+            *flags_data++ = hide_edge[edge.coarse_edge_index];
+          }
+        }
+        else {
+          MutableSpan<uint>(flags_data, loose_edges.size()).fill(0);
+        }
+      }
+      else {
+        if (mr->bm) {
+          for (DRWSubdivLooseEdge edge : loose_edges) {
+            const BMEdge *bm_edge = bm_original_edge_get(mr, edge.coarse_edge_index);
+            *flags_data++ = BM_elem_flag_test_bool(bm_edge, BM_ELEM_HIDDEN) != 0;
+          }
+        }
+        else {
+          const bool *hide_edge = mr->hide_edge;
+          if (hide_edge) {
+            for (DRWSubdivLooseEdge edge : loose_edges) {
+              int e = edge.coarse_edge_index;
+
+              if (mr->e_origindex && mr->e_origindex[e] != ORIGINDEX_NONE) {
+                *flags_data++ = hide_edge[edge.coarse_edge_index];
+              }
+              else {
+                *flags_data++ = false;
+              }
+            }
+          }
+          else {
+            MutableSpan<uint>(flags_data, loose_edges.size()).fill(0);
+          }
+        }
+      }
+      break;
     }
-  }
-  else {
-    BMesh *bm = mr->bm;
-    for (DRWSubdivLooseEdge edge : loose_edges) {
-      const BMEdge *bm_edge = BM_edge_at_index(bm, edge.coarse_edge_index);
-      *flags_data++ = BM_elem_flag_test_bool(bm_edge, BM_ELEM_HIDDEN) != 0;
+    case MR_EXTRACT_BMESH: {
+      BMesh *bm = mr->bm;
+      for (DRWSubdivLooseEdge edge : loose_edges) {
+        const BMEdge *bm_edge = BM_edge_at_index(bm, edge.coarse_edge_index);
+        *flags_data++ = BM_elem_flag_test_bool(bm_edge, BM_ELEM_HIDDEN) != 0;
+      }
+      break;
     }
   }
 
@@ -229,7 +262,7 @@ constexpr MeshExtract create_extractor_lines()
 /** \name Extract Lines and Loose Edges Sub Buffer
  * \{ */
 
-static void extract_lines_loose_subbuffer(const MeshRenderData *mr, struct MeshBatchCache *cache)
+static void extract_lines_loose_subbuffer(const MeshRenderData *mr, MeshBatchCache *cache)
 {
   BLI_assert(cache->final.buff.ibo.lines);
   /* Multiply by 2 because these are edges indices. */
@@ -241,7 +274,7 @@ static void extract_lines_loose_subbuffer(const MeshRenderData *mr, struct MeshB
 }
 
 static void extract_lines_with_lines_loose_finish(const MeshRenderData *mr,
-                                                  struct MeshBatchCache *cache,
+                                                  MeshBatchCache *cache,
                                                   void *buf,
                                                   void *data)
 {
@@ -253,7 +286,7 @@ static void extract_lines_with_lines_loose_finish(const MeshRenderData *mr,
 
 static void extract_lines_with_lines_loose_finish_subdiv(const struct DRWSubdivCache *subdiv_cache,
                                                          const MeshRenderData *UNUSED(mr),
-                                                         struct MeshBatchCache *cache,
+                                                         MeshBatchCache *cache,
                                                          void *UNUSED(buf),
                                                          void *UNUSED(_data))
 {
@@ -292,7 +325,7 @@ constexpr MeshExtract create_extractor_lines_with_lines_loose()
  * \{ */
 
 static void extract_lines_loose_only_init(const MeshRenderData *mr,
-                                          struct MeshBatchCache *cache,
+                                          MeshBatchCache *cache,
                                           void *buf,
                                           void *UNUSED(tls_data))
 {
@@ -303,7 +336,7 @@ static void extract_lines_loose_only_init(const MeshRenderData *mr,
 
 static void extract_lines_loose_only_init_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
                                                  const MeshRenderData *mr,
-                                                 struct MeshBatchCache *cache,
+                                                 MeshBatchCache *cache,
                                                  void *buffer,
                                                  void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
index 9ba9453dada..d6c246c51a9 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_adjacency.cc
@@ -42,7 +42,7 @@ static void line_adjacency_data_init(MeshExtract_LineAdjacency_Data *data,
 }
 
 static void extract_lines_adjacency_init(const MeshRenderData *mr,
-                                         struct MeshBatchCache *UNUSED(cache),
+                                         MeshBatchCache *UNUSED(cache),
                                          void *UNUSED(buf),
                                          void *tls_data)
 {
@@ -119,20 +119,21 @@ static void extract_lines_adjacency_iter_looptri_mesh(const MeshRenderData *mr,
                                                       void *_data)
 {
   MeshExtract_LineAdjacency_Data *data = static_cast<MeshExtract_LineAdjacency_Data *>(_data);
-  const MPoly *mp = &mr->mpoly[mlt->poly];
-  if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
-    lines_adjacency_triangle(mr->mloop[mlt->tri[0]].v,
-                             mr->mloop[mlt->tri[1]].v,
-                             mr->mloop[mlt->tri[2]].v,
-                             mlt->tri[0],
-                             mlt->tri[1],
-                             mlt->tri[2],
-                             data);
+  const bool hidden = mr->use_hide && mr->hide_poly && mr->hide_poly[mlt->poly];
+  if (hidden) {
+    return;
   }
+  lines_adjacency_triangle(mr->mloop[mlt->tri[0]].v,
+                           mr->mloop[mlt->tri[1]].v,
+                           mr->mloop[mlt->tri[2]].v,
+                           mlt->tri[0],
+                           mlt->tri[1],
+                           mlt->tri[2],
+                           data);
 }
 
 static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr),
-                                           struct MeshBatchCache *cache,
+                                           MeshBatchCache *cache,
                                            void *buf,
                                            void *_data)
 {
@@ -166,7 +167,7 @@ static void extract_lines_adjacency_finish(const MeshRenderData *UNUSED(mr),
 
 static void extract_lines_adjacency_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                                 const MeshRenderData *UNUSED(mr),
-                                                struct MeshBatchCache *UNUSED(cache),
+                                                MeshBatchCache *UNUSED(cache),
                                                 void *UNUSED(buf),
                                                 void *_data)
 {
@@ -222,7 +223,7 @@ static void extract_lines_adjacency_iter_subdiv_mesh(const DRWSubdivCache *subdi
 
 static void extract_lines_adjacency_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
                                                   const MeshRenderData *mr,
-                                                  struct MeshBatchCache *cache,
+                                                  MeshBatchCache *cache,
                                                   void *buf,
                                                   void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc
index 713a533492f..31e5c515129 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_lines_paint_mask.cc
@@ -26,7 +26,7 @@ struct MeshExtract_LinePaintMask_Data {
 };
 
 static void extract_lines_paint_mask_init(const MeshRenderData *mr,
-                                          struct MeshBatchCache *UNUSED(cache),
+                                          MeshBatchCache *UNUSED(cache),
                                           void *UNUSED(ibo),
                                           void *tls_data)
 {
@@ -47,10 +47,8 @@ static void extract_lines_paint_mask_iter_poly_mesh(const MeshRenderData *mr,
     const MLoop *ml = &mloop[ml_index];
 
     const int e_index = ml->e;
-    const MEdge *me = &mr->medge[e_index];
-    if (!((mr->use_hide && (me->flag & ME_HIDE)) ||
-          ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
-           (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) {
+    if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[e_index]) ||
+          ((mr->e_origindex) && (mr->e_origindex[e_index] == ORIGINDEX_NONE)))) {
 
       const int ml_index_last = mp->totloop + mp->loopstart - 1;
       const int ml_index_other = (ml_index == ml_index_last) ? mp->loopstart : (ml_index + 1);
@@ -78,7 +76,7 @@ static void extract_lines_paint_mask_iter_poly_mesh(const MeshRenderData *mr,
 }
 
 static void extract_lines_paint_mask_finish(const MeshRenderData *UNUSED(mr),
-                                            struct MeshBatchCache *UNUSED(cache),
+                                            MeshBatchCache *UNUSED(cache),
                                             void *buf,
                                             void *_data)
 {
@@ -122,11 +120,10 @@ static void extract_lines_paint_mask_iter_subdiv_mesh(const DRWSubdivCache *subd
       GPU_indexbuf_set_line_restart(&data->elb, subdiv_edge_index);
     }
     else {
-      const MEdge *me = &mr->medge[coarse_edge_index];
-      if (!((mr->use_hide && (me->flag & ME_HIDE)) ||
-            ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->e_origindex) &&
-             (mr->e_origindex[coarse_edge_index] == ORIGINDEX_NONE)))) {
-        const uint ml_index_other = (loop_idx == end_loop_idx) ? start_loop_idx : loop_idx + 1;
+      if (!((mr->use_hide && mr->hide_edge && mr->hide_edge[coarse_edge_index]) ||
+            ((mr->e_origindex) && (mr->e_origindex[coarse_edge_index] == ORIGINDEX_NONE)))) {
+        const uint ml_index_other = (loop_idx == (end_loop_idx - 1)) ? start_loop_idx :
+                                                                       loop_idx + 1;
         if (coarse_quad->flag & ME_FACE_SEL) {
           if (BLI_BITMAP_TEST_AND_SET_ATOMIC(data->select_map, coarse_edge_index)) {
             /* Hide edge as it has more than 2 selected loop. */
@@ -154,7 +151,7 @@ static void extract_lines_paint_mask_iter_subdiv_mesh(const DRWSubdivCache *subd
 static void extract_lines_paint_mask_finish_subdiv(
     const struct DRWSubdivCache *UNUSED(subdiv_cache),
     const MeshRenderData *mr,
-    struct MeshBatchCache *cache,
+    MeshBatchCache *cache,
     void *buf,
     void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
index e746b37fd30..48eeb86e5ee 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_points.cc
@@ -19,7 +19,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_points_init(const MeshRenderData *mr,
-                                struct MeshBatchCache *UNUSED(cache),
+                                MeshBatchCache *UNUSED(cache),
                                 void *UNUSED(buf),
                                 void *tls_data)
 {
@@ -43,10 +43,9 @@ BLI_INLINE void vert_set_mesh(GPUIndexBufBuilder *elb,
                               const int v_index,
                               const int l_index)
 {
-  const MVert *mv = &mr->mvert[v_index];
-  if (!((mr->use_hide && (mv->flag & ME_HIDE)) ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) &&
-         (mr->v_origindex[v_index] == ORIGINDEX_NONE)))) {
+  const bool hidden = mr->use_hide && mr->hide_vert && mr->hide_vert[v_index];
+
+  if (!(hidden || ((mr->v_origindex) && (mr->v_origindex[v_index] == ORIGINDEX_NONE)))) {
     GPU_indexbuf_set_point_vert(elb, v_index, l_index);
   }
   else {
@@ -131,7 +130,7 @@ static void extract_points_task_reduce(void *_userdata_to, void *_userdata_from)
 }
 
 static void extract_points_finish(const MeshRenderData *UNUSED(mr),
-                                  struct MeshBatchCache *UNUSED(cache),
+                                  MeshBatchCache *UNUSED(cache),
                                   void *buf,
                                   void *_userdata)
 {
@@ -142,7 +141,7 @@ static void extract_points_finish(const MeshRenderData *UNUSED(mr),
 
 static void extract_points_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                        const MeshRenderData *mr,
-                                       struct MeshBatchCache *UNUSED(cache),
+                                       MeshBatchCache *UNUSED(cache),
                                        void *UNUSED(buffer),
                                        void *data)
 {
@@ -181,8 +180,7 @@ static void extract_points_iter_subdiv_common(GPUIndexBufBuilder *elb,
       }
     }
     else {
-      const MVert *mv = &mr->mvert[coarse_vertex_index];
-      if (mr->use_hide && (mv->flag & ME_HIDE)) {
+      if (mr->use_hide && mr->hide_vert && mr->hide_vert[coarse_vertex_index]) {
         GPU_indexbuf_set_point_restart(elb, coarse_vertex_index);
         continue;
       }
@@ -285,7 +283,7 @@ static void extract_points_loose_geom_subdiv(const DRWSubdivCache *subdiv_cache,
 
 static void extract_points_finish_subdiv(const DRWSubdivCache *UNUSED(subdiv_cache),
                                          const MeshRenderData *UNUSED(mr),
-                                         struct MeshBatchCache *UNUSED(cache),
+                                         MeshBatchCache *UNUSED(cache),
                                          void *buf,
                                          void *_userdata)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
index 4c8d1d0002a..2e3e6c7b6b1 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_ibo_tris.cc
@@ -25,7 +25,7 @@ static void extract_tris_mat_task_reduce(void *_userdata_to, void *_userdata_fro
  * \{ */
 
 static void extract_tris_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *UNUSED(cache),
+                              MeshBatchCache *UNUSED(cache),
                               void *UNUSED(ibo),
                               void *tls_data)
 {
@@ -81,7 +81,7 @@ static void extract_tris_iter_poly_mesh(const MeshRenderData *mr,
 }
 
 static void extract_tris_finish(const MeshRenderData *mr,
-                                struct MeshBatchCache *cache,
+                                MeshBatchCache *cache,
                                 void *buf,
                                 void *_data)
 {
@@ -111,7 +111,7 @@ static void extract_tris_finish(const MeshRenderData *mr,
 
 static void extract_tris_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                      const MeshRenderData *UNUSED(mr),
-                                     struct MeshBatchCache *cache,
+                                     MeshBatchCache *cache,
                                      void *buffer,
                                      void *UNUSED(data))
 {
@@ -157,7 +157,7 @@ constexpr MeshExtract create_extractor_tris()
  * \{ */
 
 static void extract_tris_single_mat_init(const MeshRenderData *mr,
-                                         struct MeshBatchCache *UNUSED(cache),
+                                         MeshBatchCache *UNUSED(cache),
                                          void *UNUSED(ibo),
                                          void *tls_data)
 {
@@ -189,17 +189,17 @@ static void extract_tris_single_mat_iter_looptri_mesh(const MeshRenderData *mr,
                                                       void *_data)
 {
   GPUIndexBufBuilder *elb = static_cast<GPUIndexBufBuilder *>(_data);
-  const MPoly *mp = &mr->mpoly[mlt->poly];
-  if (!(mr->use_hide && (mp->flag & ME_HIDE))) {
-    GPU_indexbuf_set_tri_verts(elb, mlt_index, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
+  const bool hidden = mr->use_hide && mr->hide_poly && mr->hide_poly[mlt->poly];
+  if (hidden) {
+    GPU_indexbuf_set_tri_restart(elb, mlt_index);
   }
   else {
-    GPU_indexbuf_set_tri_restart(elb, mlt_index);
+    GPU_indexbuf_set_tri_verts(elb, mlt_index, mlt->tri[0], mlt->tri[1], mlt->tri[2]);
   }
 }
 
 static void extract_tris_single_mat_finish(const MeshRenderData *mr,
-                                           struct MeshBatchCache *cache,
+                                           MeshBatchCache *cache,
                                            void *buf,
                                            void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
index fb6b5e1904b..64ade020418 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_attributes.cc
@@ -9,6 +9,7 @@
 
 #include <functional>
 
+#include "BLI_color.hh"
 #include "BLI_math_vec_types.hh"
 #include "BLI_string.h"
 
@@ -57,7 +58,6 @@ template<typename AttributeType, typename VBOType> struct AttributeTypeConverter
   }
 };
 
-/* Similar to the one in #extract_mesh_vcol_vbo.cc */
 struct gpuMeshCol {
   ushort r, g, b, a;
 };
@@ -74,6 +74,18 @@ template<> struct AttributeTypeConverter<MPropCol, gpuMeshCol> {
   }
 };
 
+template<> struct AttributeTypeConverter<ColorGeometry4b, gpuMeshCol> {
+  static gpuMeshCol convert_value(ColorGeometry4b value)
+  {
+    gpuMeshCol result;
+    result.r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[value.r]);
+    result.g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[value.g]);
+    result.b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[value.b]);
+    result.a = unit_float_to_ushort_clamp(value.a * (1.0f / 255.0f));
+    return result;
+  }
+};
+
 /* Return the number of component for the attribute's value type, or 0 if is it unsupported. */
 static uint gpu_component_size_for_attribute_type(eCustomDataType type)
 {
@@ -90,6 +102,7 @@ static uint gpu_component_size_for_attribute_type(eCustomDataType type)
     case CD_PROP_FLOAT3:
       return 3;
     case CD_PROP_COLOR:
+    case CD_PROP_BYTE_COLOR:
       return 4;
     default:
       return 0;
@@ -102,6 +115,7 @@ static GPUVertFetchMode get_fetch_mode_for_type(eCustomDataType type)
     case CD_PROP_INT32:
       return GPU_FETCH_INT_TO_FLOAT;
     case CD_PROP_COLOR:
+    case CD_PROP_BYTE_COLOR:
       return GPU_FETCH_INT_TO_FLOAT_UNIT;
     default:
       return GPU_FETCH_FLOAT;
@@ -114,13 +128,14 @@ static GPUVertCompType get_comp_type_for_type(eCustomDataType type)
     case CD_PROP_INT32:
       return GPU_COMP_I32;
     case CD_PROP_COLOR:
+    case CD_PROP_BYTE_COLOR:
       return GPU_COMP_U16;
     default:
       return GPU_COMP_F32;
   }
 }
 
-static void init_vbo_for_attribute(const MeshRenderData *mr,
+static void init_vbo_for_attribute(const MeshRenderData &mr,
                                    GPUVertBuf *vbo,
                                    const DRW_AttributeRequest &request,
                                    bool build_on_device,
@@ -132,11 +147,8 @@ static void init_vbo_for_attribute(const MeshRenderData *mr,
   /* We should not be here if the attribute type is not supported. */
   BLI_assert(comp_size != 0);
 
-  const CustomData *custom_data = get_custom_data_for_domain(mr, request.domain);
   char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
-  const char *layer_name = CustomData_get_layer_name(
-      custom_data, request.cd_type, request.layer_index);
-  GPU_vertformat_safe_attr_name(layer_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
+  GPU_vertformat_safe_attr_name(request.attribute_name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
   /* Attributes use auto-name. */
   BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
 
@@ -144,6 +156,13 @@ static void init_vbo_for_attribute(const MeshRenderData *mr,
   GPU_vertformat_deinterleave(&format);
   GPU_vertformat_attr_add(&format, attr_name, comp_type, comp_size, fetch_mode);
 
+  if (mr.active_color_name && STREQ(request.attribute_name, mr.active_color_name)) {
+    GPU_vertformat_alias_add(&format, "ac");
+  }
+  if (mr.default_color_name && STREQ(request.attribute_name, mr.default_color_name)) {
+    GPU_vertformat_alias_add(&format, "c");
+  }
+
   if (build_on_device) {
     GPU_vertbuf_init_build_on_device(vbo, &format, len);
   }
@@ -258,18 +277,15 @@ static void extract_attr_generic(const MeshRenderData *mr,
   }
 }
 
-static void extract_attr_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *cache,
-                              void *buf,
-                              void *UNUSED(tls_data),
-                              int index)
+static void extract_attr_init(
+    const MeshRenderData *mr, MeshBatchCache *cache, void *buf, void *UNUSED(tls_data), int index)
 {
   const DRW_Attributes *attrs_used = &cache->attr_used;
   const DRW_AttributeRequest &request = attrs_used->requests[index];
 
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
 
-  init_vbo_for_attribute(mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len));
+  init_vbo_for_attribute(*mr, vbo, request, false, static_cast<uint32_t>(mr->loop_len));
 
   /* TODO(@kevindietrich): float3 is used for scalar attributes as the implicit conversion done by
    * OpenGL to vec4 for a scalar `s` will produce a `vec4(s, 0, 0, 1)`. However, following the
@@ -297,6 +313,9 @@ static void extract_attr_init(const MeshRenderData *mr,
     case CD_PROP_COLOR:
       extract_attr_generic<MPropCol, gpuMeshCol>(mr, vbo, request);
       break;
+    case CD_PROP_BYTE_COLOR:
+      extract_attr_generic<ColorGeometry4b, gpuMeshCol>(mr, vbo, request);
+      break;
     default:
       BLI_assert_unreachable();
   }
@@ -345,17 +364,24 @@ static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache,
     case CD_PROP_COLOR:
       extract_attr_generic<MPropCol, gpuMeshCol>(mr, src_data, request);
       break;
+    case CD_PROP_BYTE_COLOR:
+      extract_attr_generic<ColorGeometry4b, gpuMeshCol>(mr, src_data, request);
+      break;
     default:
       BLI_assert_unreachable();
   }
 
   GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
-  init_vbo_for_attribute(mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops);
+  init_vbo_for_attribute(*mr, dst_buffer, request, true, subdiv_cache->num_subdiv_loops);
 
   /* Ensure data is uploaded properly. */
   GPU_vertbuf_tag_dirty(src_data);
-  draw_subdiv_interp_custom_data(
-      subdiv_cache, src_data, dst_buffer, static_cast<int>(dimensions), 0, false);
+  draw_subdiv_interp_custom_data(subdiv_cache,
+                                 src_data,
+                                 dst_buffer,
+                                 static_cast<int>(dimensions),
+                                 0,
+                                 ELEM(request.cd_type, CD_PROP_COLOR, CD_PROP_BYTE_COLOR));
 
   GPU_vertbuf_discard(src_data);
 }
@@ -364,13 +390,13 @@ static void extract_attr_init_subdiv(const DRWSubdivCache *subdiv_cache,
  * extract. The overall API does not allow us to pass this in a convenient way. */
 #define EXTRACT_INIT_WRAPPER(index) \
   static void extract_attr_init##index( \
-      const MeshRenderData *mr, struct MeshBatchCache *cache, void *buf, void *tls_data) \
+      const MeshRenderData *mr, MeshBatchCache *cache, void *buf, void *tls_data) \
   { \
     extract_attr_init(mr, cache, buf, tls_data, index); \
   } \
   static void extract_attr_init_subdiv##index(const DRWSubdivCache *subdiv_cache, \
                                               const MeshRenderData *mr, \
-                                              struct MeshBatchCache *cache, \
+                                              MeshBatchCache *cache, \
                                               void *buf, \
                                               void *tls_data) \
   { \
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
index a11f740239a..50c37f6397c 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
@@ -43,7 +43,7 @@ static float loop_edge_factor_get(const float f_no[3],
 }
 
 static void extract_edge_fac_init(const MeshRenderData *mr,
-                                  struct MeshBatchCache *UNUSED(cache),
+                                  MeshBatchCache *UNUSED(cache),
                                   void *buf,
                                   void *tls_data)
 {
@@ -167,14 +167,14 @@ static void extract_edge_fac_iter_ledge_mesh(const MeshRenderData *mr,
 }
 
 static void extract_edge_fac_finish(const MeshRenderData *mr,
-                                    struct MeshBatchCache *UNUSED(cache),
+                                    MeshBatchCache *UNUSED(cache),
                                     void *buf,
                                     void *_data)
 {
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   MeshExtract_EdgeFac_Data *data = static_cast<MeshExtract_EdgeFac_Data *>(_data);
 
-  if (GPU_crappy_amd_driver()) {
+  if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
     /* Some AMD drivers strangely crash with VBO's with a one byte format.
      * To workaround we reinitialize the VBO with another format and convert
      * all bytes to floats. */
@@ -206,7 +206,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format()
 {
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
-    if (GPU_crappy_amd_driver()) {
+    if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
       GPU_vertformat_attr_add(&format, "wd", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
     }
     else {
@@ -218,7 +218,7 @@ static GPUVertFormat *get_subdiv_edge_fac_format()
 
 static void extract_edge_fac_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                          const MeshRenderData *UNUSED(mr),
-                                         struct MeshBatchCache *cache,
+                                         MeshBatchCache *cache,
                                          void *buffer,
                                          void *UNUSED(data))
 {
@@ -268,7 +268,7 @@ static void extract_edge_fac_loose_geom_subdiv(const DRWSubdivCache *subdiv_cach
 
   uint offset = subdiv_cache->num_subdiv_loops;
   for (int i = 0; i < loose_geom.edge_len; i++) {
-    if (GPU_crappy_amd_driver()) {
+    if (GPU_crappy_amd_driver() || GPU_minimum_per_vertex_stride() > 1) {
       float loose_edge_fac[2] = {1.0f, 1.0f};
       GPU_vertbuf_update_sub(vbo, offset * sizeof(float), sizeof(loose_edge_fac), loose_edge_fac);
     }
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
index 3bb706e82cd..27fd6546b8c 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edit_data.cc
@@ -112,7 +112,7 @@ static GPUVertFormat *get_edit_data_format()
 }
 
 static void extract_edit_data_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
+                                   MeshBatchCache *UNUSED(cache),
                                    void *buf,
                                    void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
index 6d54fce2a0d..0b9043e3289 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_data.cc
@@ -43,7 +43,7 @@ static void extract_edituv_data_init_common(const MeshRenderData *mr,
 }
 
 static void extract_edituv_data_init(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
+                                     MeshBatchCache *UNUSED(cache),
                                      void *buf,
                                      void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
index 5d6dd14b57a..e4714aabf34 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_angle.cc
@@ -74,7 +74,7 @@ static void edituv_get_edituv_stretch_angle(float auv[2][2],
 }
 
 static void extract_edituv_stretch_angle_init(const MeshRenderData *mr,
-                                              struct MeshBatchCache *UNUSED(cache),
+                                              MeshBatchCache *UNUSED(cache),
                                               void *buf,
                                               void *tls_data)
 {
@@ -97,7 +97,7 @@ static void extract_edituv_stretch_angle_init(const MeshRenderData *mr,
     data->cd_ofs = CustomData_get_offset(&mr->bm->ldata, CD_MLOOPUV);
   }
   else {
-    BLI_assert(ELEM(mr->extract_type, MR_EXTRACT_MAPPED, MR_EXTRACT_MESH));
+    BLI_assert(mr->extract_type == MR_EXTRACT_MESH);
     data->luv = (const MLoopUV *)CustomData_get_layer(&mr->me->ldata, CD_MLOOPUV);
   }
 }
@@ -212,7 +212,7 @@ static GPUVertFormat *get_edituv_stretch_angle_format_subdiv()
 
 static void extract_edituv_stretch_angle_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                                      const MeshRenderData *mr,
-                                                     struct MeshBatchCache *cache,
+                                                     MeshBatchCache *cache,
                                                      void *buffer,
                                                      void *UNUSED(tls_data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
index 70dcc24f946..9679c0523f8 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edituv_stretch_area.cc
@@ -20,14 +20,14 @@ namespace blender::draw {
  * \{ */
 
 static void extract_edituv_stretch_area_init(const MeshRenderData *mr,
-                                             struct MeshBatchCache *UNUSED(cache),
+                                             MeshBatchCache *UNUSED(cache),
                                              void *buf,
                                              void *UNUSED(tls_data))
 {
   GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
   static GPUVertFormat format = {0};
   if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_I16, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+    GPU_vertformat_attr_add(&format, "ratio", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
   }
 
   GPU_vertbuf_init_with_format(vbo, &format);
@@ -37,15 +37,14 @@ static void extract_edituv_stretch_area_init(const MeshRenderData *mr,
 BLI_INLINE float area_ratio_get(float area, float uvarea)
 {
   if (area >= FLT_EPSILON && uvarea >= FLT_EPSILON) {
-    /* Tag inversion by using the sign. */
-    return (area > uvarea) ? (uvarea / area) : -(area / uvarea);
+    return uvarea / area;
   }
   return 0.0f;
 }
 
-BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio, float inv_tot_ratio)
+BLI_INLINE float area_ratio_to_stretch(float ratio, float tot_ratio)
 {
-  ratio *= (ratio > 0.0f) ? tot_ratio : -inv_tot_ratio;
+  ratio *= tot_ratio;
   return (ratio > 1.0f) ? (1.0f / ratio) : ratio;
 }
 
@@ -72,7 +71,7 @@ static void compute_area_ratio(const MeshRenderData *mr,
     }
   }
   else {
-    BLI_assert(ELEM(mr->extract_type, MR_EXTRACT_MAPPED, MR_EXTRACT_MESH));
+    BLI_assert(mr->extract_type == MR_EXTRACT_MESH);
     const MLoopUV *uv_data = (const MLoopUV *)CustomData_get_layer(&mr->me->ldata, CD_MLOOPUV);
     const MPoly *mp = mr->mpoly;
     for (int mp_index = 0; mp_index < mr->poly_len; mp_index++, mp++) {
@@ -89,7 +88,7 @@ static void compute_area_ratio(const MeshRenderData *mr,
 }
 
 static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
-                                               struct MeshBatchCache *cache,
+                                               MeshBatchCache *cache,
                                                void *buf,
                                                void *UNUSED(data))
 {
@@ -97,14 +96,8 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
   float *area_ratio = static_cast<float *>(MEM_mallocN(sizeof(float) * mr->poly_len, __func__));
   compute_area_ratio(mr, area_ratio, cache->tot_area, cache->tot_uv_area);
 
-  /* Convert in place to avoid an extra allocation */
-  uint16_t *poly_stretch = (uint16_t *)area_ratio;
-  for (int mp_index = 0; mp_index < mr->poly_len; mp_index++) {
-    poly_stretch[mp_index] = area_ratio[mp_index] * SHRT_MAX;
-  }
-
   /* Copy face data for each loop. */
-  uint16_t *loop_stretch = (uint16_t *)GPU_vertbuf_get_data(vbo);
+  float *loop_stretch = (float *)GPU_vertbuf_get_data(vbo);
 
   if (mr->extract_type == MR_EXTRACT_BMESH) {
     BMFace *efa;
@@ -112,16 +105,16 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
     int f, l_index = 0;
     BM_ITER_MESH_INDEX (efa, &f_iter, mr->bm, BM_FACES_OF_MESH, f) {
       for (int i = 0; i < efa->len; i++, l_index++) {
-        loop_stretch[l_index] = poly_stretch[f];
+        loop_stretch[l_index] = area_ratio[f];
       }
     }
   }
   else {
-    BLI_assert(ELEM(mr->extract_type, MR_EXTRACT_MAPPED, MR_EXTRACT_MESH));
+    BLI_assert(mr->extract_type == MR_EXTRACT_MESH);
     const MPoly *mp = mr->mpoly;
     for (int mp_index = 0, l_index = 0; mp_index < mr->poly_len; mp_index++, mp++) {
       for (int i = 0; i < mp->totloop; i++, l_index++) {
-        loop_stretch[l_index] = poly_stretch[mp_index];
+        loop_stretch[l_index] = area_ratio[mp_index];
       }
     }
   }
@@ -131,7 +124,7 @@ static void extract_edituv_stretch_area_finish(const MeshRenderData *mr,
 
 static void extract_edituv_stretch_area_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                                     const MeshRenderData *mr,
-                                                    struct MeshBatchCache *cache,
+                                                    MeshBatchCache *cache,
                                                     void *buffer,
                                                     void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc
index 64bec0adad4..27d1975d67b 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_edituv_data.cc
@@ -21,7 +21,7 @@ struct MeshExtract_EditUVFdotData_Data {
 };
 
 static void extract_fdots_edituv_data_init(const MeshRenderData *mr,
-                                           struct MeshBatchCache *UNUSED(cache),
+                                           MeshBatchCache *UNUSED(cache),
                                            void *buf,
                                            void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc
index 8d189db9f12..c47cde63630 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_nor.cc
@@ -19,7 +19,7 @@ namespace blender::draw {
 #define NOR_AND_FLAG_HIDDEN -2
 
 static void extract_fdots_nor_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
+                                   MeshBatchCache *UNUSED(cache),
                                    void *buf,
                                    void *UNUSED(tls_data))
 {
@@ -34,7 +34,7 @@ static void extract_fdots_nor_init(const MeshRenderData *mr,
 }
 
 static void extract_fdots_nor_finish(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
+                                     MeshBatchCache *UNUSED(cache),
                                      void *buf,
                                      void *UNUSED(data))
 {
@@ -48,8 +48,7 @@ static void extract_fdots_nor_finish(const MeshRenderData *mr,
     for (int f = 0; f < mr->poly_len; f++) {
       efa = BM_face_at_index(mr->bm, f);
       const bool is_face_hidden = BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
-      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
-                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
+      if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) {
         nor[f] = GPU_normal_convert_i10_v3(invalid_normal);
         nor[f].w = NOR_AND_FLAG_HIDDEN;
       }
@@ -66,8 +65,7 @@ static void extract_fdots_nor_finish(const MeshRenderData *mr,
     for (int f = 0; f < mr->poly_len; f++) {
       efa = bm_original_face_get(mr, f);
       const bool is_face_hidden = efa && BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
-      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
-                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
+      if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) {
         nor[f] = GPU_normal_convert_i10_v3(invalid_normal);
         nor[f].w = NOR_AND_FLAG_HIDDEN;
       }
@@ -101,7 +99,7 @@ constexpr MeshExtract create_extractor_fdots_nor()
  * \{ */
 
 static void extract_fdots_nor_hq_init(const MeshRenderData *mr,
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *buf,
                                       void *UNUSED(tls_data))
 {
@@ -116,7 +114,7 @@ static void extract_fdots_nor_hq_init(const MeshRenderData *mr,
 }
 
 static void extract_fdots_nor_hq_finish(const MeshRenderData *mr,
-                                        struct MeshBatchCache *UNUSED(cache),
+                                        MeshBatchCache *UNUSED(cache),
                                         void *buf,
                                         void *UNUSED(data))
 {
@@ -130,8 +128,7 @@ static void extract_fdots_nor_hq_finish(const MeshRenderData *mr,
     for (int f = 0; f < mr->poly_len; f++) {
       efa = BM_face_at_index(mr->bm, f);
       const bool is_face_hidden = BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
-      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
-                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
+      if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) {
         normal_float_to_short_v3(&nor[f * 4], invalid_normal);
         nor[f * 4 + 3] = NOR_AND_FLAG_HIDDEN;
       }
@@ -148,8 +145,7 @@ static void extract_fdots_nor_hq_finish(const MeshRenderData *mr,
     for (int f = 0; f < mr->poly_len; f++) {
       efa = bm_original_face_get(mr, f);
       const bool is_face_hidden = efa && BM_elem_flag_test(efa, BM_ELEM_HIDDEN);
-      if (is_face_hidden || (mr->extract_type == MR_EXTRACT_MAPPED && mr->p_origindex &&
-                             mr->p_origindex[f] == ORIGINDEX_NONE)) {
+      if (is_face_hidden || (mr->p_origindex && mr->p_origindex[f] == ORIGINDEX_NONE)) {
         normal_float_to_short_v3(&nor[f * 4], invalid_normal);
         nor[f * 4 + 3] = NOR_AND_FLAG_HIDDEN;
       }
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
index 822b5928c49..c391cb6ca5a 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_pos.cc
@@ -36,7 +36,7 @@ static GPUVertFormat *get_fdots_nor_format_subdiv()
 }
 
 static void extract_fdots_pos_init(const MeshRenderData *mr,
-                                   struct MeshBatchCache *UNUSED(cache),
+                                   MeshBatchCache *UNUSED(cache),
                                    void *buf,
                                    void *tls_data)
 {
@@ -101,7 +101,7 @@ static void extract_fdots_pos_iter_poly_mesh(const MeshRenderData *mr,
 
 static void extract_fdots_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                       const MeshRenderData *UNUSED(mr),
-                                      struct MeshBatchCache *cache,
+                                      MeshBatchCache *cache,
                                       void *buffer,
                                       void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc
index de21c63e5fd..b0403cf7c4c 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_fdots_uv.cc
@@ -22,7 +22,7 @@ struct MeshExtract_FdotUV_Data {
 };
 
 static void extract_fdots_uv_init(const MeshRenderData *mr,
-                                  struct MeshBatchCache *UNUSED(cache),
+                                  MeshBatchCache *UNUSED(cache),
                                   void *buf,
                                   void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
index 42a9a58bbe4..01d07fa5f83 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_lnor.cc
@@ -16,7 +16,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_lnor_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *UNUSED(cache),
+                              MeshBatchCache *UNUSED(cache),
                               void *buf,
                               void *tls_data)
 {
@@ -62,6 +62,8 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr,
                                         const int mp_index,
                                         void *data)
 {
+  const bool hidden = mr->hide_poly && mr->hide_poly[mp_index];
+
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
@@ -78,10 +80,10 @@ static void extract_lnor_iter_poly_mesh(const MeshRenderData *mr,
     }
 
     /* Flag for paint mode overlay.
-     * Only use MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals.
+     * Only use origindex in edit mode where it is used to display the edge-normals.
      * In paint mode it will use the un-mapped data to draw the wire-frame. */
-    if (mp->flag & ME_HIDE || (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED &&
-                               (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) {
+    if (hidden ||
+        (mr->edit_bmesh && (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) {
       lnor_data->w = -1;
     }
     else if (mp->flag & ME_FACE_SEL) {
@@ -105,7 +107,7 @@ static GPUVertFormat *get_subdiv_lnor_format()
 
 static void extract_lnor_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                      const MeshRenderData *UNUSED(mr),
-                                     struct MeshBatchCache *cache,
+                                     MeshBatchCache *cache,
                                      void *buffer,
                                      void *UNUSED(data))
 {
@@ -141,7 +143,7 @@ struct gpuHQNor {
 };
 
 static void extract_lnor_hq_init(const MeshRenderData *mr,
-                                 struct MeshBatchCache *UNUSED(cache),
+                                 MeshBatchCache *UNUSED(cache),
                                  void *buf,
                                  void *tls_data)
 {
@@ -185,6 +187,8 @@ static void extract_lnor_hq_iter_poly_mesh(const MeshRenderData *mr,
                                            const int mp_index,
                                            void *data)
 {
+  const bool hidden = mr->hide_poly && mr->hide_poly[mp_index];
+
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
@@ -201,10 +205,10 @@ static void extract_lnor_hq_iter_poly_mesh(const MeshRenderData *mr,
     }
 
     /* Flag for paint mode overlay.
-     * Only use #MR_EXTRACT_MAPPED in edit mode where it is used to display the edge-normals.
+     * Only use origindex in edit mode where it is used to display the edge-normals.
      * In paint mode it will use the un-mapped data to draw the wire-frame. */
-    if (mp->flag & ME_HIDE || (mr->edit_bmesh && mr->extract_type == MR_EXTRACT_MAPPED &&
-                               (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) {
+    if (hidden ||
+        (mr->edit_bmesh && (mr->v_origindex) && mr->v_origindex[ml->v] == ORIGINDEX_NONE)) {
       lnor_data->w = -1;
     }
     else if (mp->flag & ME_FACE_SEL) {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc
index b57e2f6b807..fe2a02b6b63 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_mesh_analysis.cc
@@ -23,7 +23,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_mesh_analysis_init(const MeshRenderData *mr,
-                                       struct MeshBatchCache *UNUSED(cache),
+                                       MeshBatchCache *UNUSED(cache),
                                        void *buf,
                                        void *UNUSED(tls_data))
 {
@@ -259,7 +259,8 @@ static void statvis_calc_thickness(const MeshRenderData *mr, float *r_thickness)
 }
 
 struct BVHTree_OverlapData {
-  const Mesh *me;
+  const MVert *verts;
+  const MLoop *loops;
   const MLoopTri *mlooptri;
   float epsilon;
 };
@@ -267,7 +268,6 @@ struct BVHTree_OverlapData {
 static bool bvh_overlap_cb(void *userdata, int index_a, int index_b, int UNUSED(thread))
 {
   struct BVHTree_OverlapData *data = static_cast<struct BVHTree_OverlapData *>(userdata);
-  const Mesh *me = data->me;
 
   const MLoopTri *tri_a = &data->mlooptri[index_a];
   const MLoopTri *tri_b = &data->mlooptri[index_b];
@@ -276,12 +276,12 @@ static bool bvh_overlap_cb(void *userdata, int index_a, int index_b, int UNUSED(
     return false;
   }
 
-  const float *tri_a_co[3] = {me->mvert[me->mloop[tri_a->tri[0]].v].co,
-                              me->mvert[me->mloop[tri_a->tri[1]].v].co,
-                              me->mvert[me->mloop[tri_a->tri[2]].v].co};
-  const float *tri_b_co[3] = {me->mvert[me->mloop[tri_b->tri[0]].v].co,
-                              me->mvert[me->mloop[tri_b->tri[1]].v].co,
-                              me->mvert[me->mloop[tri_b->tri[2]].v].co};
+  const float *tri_a_co[3] = {data->verts[data->loops[tri_a->tri[0]].v].co,
+                              data->verts[data->loops[tri_a->tri[1]].v].co,
+                              data->verts[data->loops[tri_a->tri[2]].v].co};
+  const float *tri_b_co[3] = {data->verts[data->loops[tri_b->tri[0]].v].co,
+                              data->verts[data->loops[tri_b->tri[1]].v].co,
+                              data->verts[data->loops[tri_b->tri[2]].v].co};
   float ix_pair[2][3];
   int verts_shared = 0;
 
@@ -342,7 +342,8 @@ static void statvis_calc_intersect(const MeshRenderData *mr, float *r_intersect)
     BVHTree *tree = BKE_bvhtree_from_mesh_get(&treeData, mr->me, BVHTREE_FROM_LOOPTRI, 4);
 
     struct BVHTree_OverlapData data = {nullptr};
-    data.me = mr->me;
+    data.verts = mr->mvert;
+    data.loops = mr->mloop;
     data.mlooptri = mr->mlooptri;
     data.epsilon = BLI_bvhtree_get_epsilon(tree);
 
@@ -587,7 +588,7 @@ static void statvis_calc_sharp(const MeshRenderData *mr, float *r_sharp)
 }
 
 static void extract_analysis_iter_finish_mesh(const MeshRenderData *mr,
-                                              struct MeshBatchCache *UNUSED(cache),
+                                              MeshBatchCache *UNUSED(cache),
                                               void *buf,
                                               void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc
index 68d838e9e62..4fcbdb1fc7c 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_orco.cc
@@ -19,7 +19,7 @@ struct MeshExtract_Orco_Data {
 };
 
 static void extract_orco_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *UNUSED(cache),
+                              MeshBatchCache *UNUSED(cache),
                               void *buf,
                               void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
index 313744bdd27..a822845c688 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_pos_nor.cc
@@ -28,7 +28,7 @@ struct MeshExtract_PosNor_Data {
 };
 
 static void extract_pos_nor_init(const MeshRenderData *mr,
-                                 struct MeshBatchCache *UNUSED(cache),
+                                 MeshBatchCache *UNUSED(cache),
                                  void *buf,
                                  void *tls_data)
 {
@@ -83,10 +83,11 @@ static void extract_pos_nor_iter_poly_bm(const MeshRenderData *mr,
 
 static void extract_pos_nor_iter_poly_mesh(const MeshRenderData *mr,
                                            const MPoly *mp,
-                                           const int UNUSED(mp_index),
+                                           const int mp_index,
                                            void *_data)
 {
   MeshExtract_PosNor_Data *data = static_cast<MeshExtract_PosNor_Data *>(_data);
+  const bool poly_hidden = mr->hide_poly && mr->hide_poly[mp_index];
 
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
@@ -95,12 +96,12 @@ static void extract_pos_nor_iter_poly_mesh(const MeshRenderData *mr,
 
     PosNorLoop *vert = &data->vbo_data[ml_index];
     const MVert *mv = &mr->mvert[ml->v];
+    const bool vert_hidden = mr->hide_vert && mr->hide_vert[ml->v];
     copy_v3_v3(vert->pos, mv->co);
     vert->nor = data->normals[ml->v].low;
     /* Flag for paint mode overlay. */
-    if (mp->flag & ME_HIDE || mv->flag & ME_HIDE ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) &&
-         (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) {
+    if (poly_hidden || vert_hidden ||
+        ((mr->v_origindex) && (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) {
       vert->nor.w = -1;
     }
     else if (mv->flag & SELECT) {
@@ -171,7 +172,7 @@ static void extract_pos_nor_iter_lvert_mesh(const MeshRenderData *mr,
 }
 
 static void extract_pos_nor_finish(const MeshRenderData *UNUSED(mr),
-                                   struct MeshBatchCache *UNUSED(cache),
+                                   MeshBatchCache *UNUSED(cache),
                                    void *UNUSED(buf),
                                    void *_data)
 {
@@ -201,7 +202,7 @@ static GPUVertFormat *get_custom_normals_format()
 
 static void extract_pos_nor_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                         const MeshRenderData *UNUSED(mr),
-                                        struct MeshBatchCache *cache,
+                                        MeshBatchCache *cache,
                                         void *buffer,
                                         void *UNUSED(data))
 {
@@ -372,7 +373,7 @@ struct MeshExtract_PosNorHQ_Data {
 };
 
 static void extract_pos_nor_hq_init(const MeshRenderData *mr,
-                                    struct MeshBatchCache *UNUSED(cache),
+                                    MeshBatchCache *UNUSED(cache),
                                     void *buf,
                                     void *tls_data)
 {
@@ -432,20 +433,22 @@ static void extract_pos_nor_hq_iter_poly_mesh(const MeshRenderData *mr,
                                               void *_data)
 {
   MeshExtract_PosNorHQ_Data *data = static_cast<MeshExtract_PosNorHQ_Data *>(_data);
+  const bool poly_hidden = mr->hide_poly && mr->hide_poly[mp - mr->mpoly];
+
   const MLoop *mloop = mr->mloop;
   const int ml_index_end = mp->loopstart + mp->totloop;
   for (int ml_index = mp->loopstart; ml_index < ml_index_end; ml_index += 1) {
     const MLoop *ml = &mloop[ml_index];
 
+    const bool vert_hidden = mr->hide_vert && mr->hide_vert[ml->v];
     PosNorHQLoop *vert = &data->vbo_data[ml_index];
     const MVert *mv = &mr->mvert[ml->v];
     copy_v3_v3(vert->pos, mv->co);
     copy_v3_v3_short(vert->nor, data->normals[ml->v].high);
 
     /* Flag for paint mode overlay. */
-    if (mp->flag & ME_HIDE || mv->flag & ME_HIDE ||
-        ((mr->extract_type == MR_EXTRACT_MAPPED) && (mr->v_origindex) &&
-         (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) {
+    if (poly_hidden || vert_hidden ||
+        ((mr->v_origindex) && (mr->v_origindex[ml->v] == ORIGINDEX_NONE))) {
       vert->nor[3] = -1;
     }
     else if (mv->flag & SELECT) {
@@ -521,7 +524,7 @@ static void extract_pos_nor_hq_iter_lvert_mesh(const MeshRenderData *mr,
 }
 
 static void extract_pos_nor_hq_finish(const MeshRenderData *UNUSED(mr),
-                                      struct MeshBatchCache *UNUSED(cache),
+                                      MeshBatchCache *UNUSED(cache),
                                       void *UNUSED(buf),
                                       void *_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
index 0d959e324f8..6202fdd312d 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_sculpt_data.cc
@@ -9,6 +9,7 @@
 
 #include "BLI_string.h"
 
+#include "BKE_mesh.h"
 #include "BKE_paint.h"
 
 #include "draw_subdivision.h"
@@ -31,7 +32,7 @@ static GPUVertFormat *get_sculpt_data_format()
 }
 
 static void extract_sculpt_data_init(const MeshRenderData *mr,
-                                     struct MeshBatchCache *UNUSED(cache),
+                                     MeshBatchCache *UNUSED(cache),
                                      void *buf,
                                      void *UNUSED(tls_data))
 {
@@ -113,7 +114,7 @@ static void extract_sculpt_data_init(const MeshRenderData *mr,
 
 static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                             const MeshRenderData *mr,
-                                            struct MeshBatchCache *UNUSED(cache),
+                                            MeshBatchCache *UNUSED(cache),
                                             void *buffer,
                                             void *UNUSED(data))
 {
@@ -128,6 +129,9 @@ static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
   GPUVertBuf *subdiv_mask_vbo = nullptr;
   const float *cd_mask = (const float *)CustomData_get_layer(cd_vdata, CD_PAINT_MASK);
 
+  const Span<MPoly> coarse_polys = coarse_mesh->polys();
+  const Span<MLoop> coarse_loops = coarse_mesh->loops();
+
   if (cd_mask) {
     GPUVertFormat mask_format = {0};
     GPU_vertformat_attr_add(&mask_format, "msk", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
@@ -138,11 +142,11 @@ static void extract_sculpt_data_init_subdiv(const DRWSubdivCache *subdiv_cache,
     float *v_mask = static_cast<float *>(GPU_vertbuf_get_data(mask_vbo));
 
     for (int i = 0; i < coarse_mesh->totpoly; i++) {
-      const MPoly *mpoly = &coarse_mesh->mpoly[i];
+      const MPoly *mpoly = &coarse_polys[i];
 
       for (int loop_index = mpoly->loopstart; loop_index < mpoly->loopstart + mpoly->totloop;
            loop_index++) {
-        const MLoop *ml = &coarse_mesh->mloop[loop_index];
+        const MLoop *ml = &coarse_loops[loop_index];
         *v_mask++ = cd_mask[ml->v];
       }
     }
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
index 6230e1974be..9e0d171c9e4 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_select_idx.cc
@@ -30,7 +30,7 @@ static void extract_select_idx_init_impl(const MeshRenderData *UNUSED(mr),
 }
 
 static void extract_select_idx_init(const MeshRenderData *mr,
-                                    struct MeshBatchCache *UNUSED(cache),
+                                    MeshBatchCache *UNUSED(cache),
                                     void *buf,
                                     void *tls_data)
 {
@@ -366,7 +366,7 @@ constexpr MeshExtract create_extractor_vert_idx()
 }
 
 static void extract_fdot_idx_init(const MeshRenderData *mr,
-                                  struct MeshBatchCache *UNUSED(cache),
+                                  MeshBatchCache *UNUSED(cache),
                                   void *buf,
                                   void *tls_data)
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc
index a275f247cad..f7655658bdd 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_skin_roots.cc
@@ -19,7 +19,7 @@ struct SkinRootData {
 };
 
 static void extract_skin_roots_init(const MeshRenderData *mr,
-                                    struct MeshBatchCache *UNUSED(cache),
+                                    MeshBatchCache *UNUSED(cache),
                                     void *buf,
                                     void *UNUSED(tls_data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc
index 83453d6ef38..049fa416523 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_tan.cc
@@ -25,7 +25,7 @@ namespace blender::draw {
  * \{ */
 
 static void extract_tan_init_common(const MeshRenderData *mr,
-                                    struct MeshBatchCache *cache,
+                                    MeshBatchCache *cache,
                                     GPUVertFormat *format,
                                     GPUVertCompType comp_type,
                                     GPUVertFetchMode fetch_mode,
@@ -161,7 +161,7 @@ static void extract_tan_init_common(const MeshRenderData *mr,
 }
 
 static void extract_tan_ex_init(const MeshRenderData *mr,
-                                struct MeshBatchCache *cache,
+                                MeshBatchCache *cache,
                                 GPUVertBuf *vbo,
                                 const bool do_hq)
 {
@@ -235,7 +235,7 @@ static void extract_tan_ex_init(const MeshRenderData *mr,
 }
 
 static void extract_tan_init(const MeshRenderData *mr,
-                             struct MeshBatchCache *cache,
+                             MeshBatchCache *cache,
                              void *buf,
                              void *UNUSED(tls_data))
 {
@@ -254,7 +254,7 @@ static GPUVertFormat *get_coarse_tan_format()
 
 static void extract_tan_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                     const MeshRenderData *mr,
-                                    struct MeshBatchCache *cache,
+                                    MeshBatchCache *cache,
                                     void *buffer,
                                     void *UNUSED(data))
 {
@@ -344,7 +344,7 @@ constexpr MeshExtract create_extractor_tan()
  * \{ */
 
 static void extract_tan_hq_init(const MeshRenderData *mr,
-                                struct MeshBatchCache *cache,
+                                MeshBatchCache *cache,
                                 void *buf,
                                 void *UNUSED(tls_data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
index ddb8ed9b25b..6606912850d 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc
@@ -19,7 +19,7 @@ namespace blender::draw {
 /* Initialize the vertex format to be used for UVs. Return true if any UV layer is
  * found, false otherwise. */
 static bool mesh_extract_uv_format_init(GPUVertFormat *format,
-                                        struct MeshBatchCache *cache,
+                                        MeshBatchCache *cache,
                                         CustomData *cd_ldata,
                                         eMRExtractType extract_type,
                                         uint32_t &r_uv_layers)
@@ -72,7 +72,7 @@ static bool mesh_extract_uv_format_init(GPUVertFormat *format,
 }
 
 static void extract_uv_init(const MeshRenderData *mr,
-                            struct MeshBatchCache *cache,
+                            MeshBatchCache *cache,
                             void *buf,
                             void *UNUSED(tls_data))
 {
@@ -120,7 +120,7 @@ static void extract_uv_init(const MeshRenderData *mr,
 
 static void extract_uv_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                    const MeshRenderData *UNUSED(mr),
-                                   struct MeshBatchCache *cache,
+                                   MeshBatchCache *cache,
                                    void *buffer,
                                    void *UNUSED(data))
 {
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
deleted file mode 100644
index 84ab20f8f90..00000000000
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_vcol.cc
+++ /dev/null
@@ -1,387 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later
- * Copyright 2021 Blender Foundation. All rights reserved. */
-
-/** \file
- * \ingroup draw
- */
-
-#include "MEM_guardedalloc.h"
-
-#include "BKE_attribute.h"
-#include "BLI_string.h"
-#include "BLI_vector.hh"
-
-#include "draw_subdivision.h"
-#include "extract_mesh.hh"
-
-namespace blender::draw {
-
-struct VColRef {
-  const CustomDataLayer *layer;
-  eAttrDomain domain;
-};
-
-/** Get all vcol layers as AttributeRefs.
- *
- * \param vcol_layers: bitmask to filter vcol layers by, each bit
- *                     corresponds to the integer position of the attribute
- *                     within the global color attribute list.
- */
-static Vector<VColRef> get_vcol_refs(const CustomData *cd_vdata,
-                                     const CustomData *cd_ldata,
-                                     const uint vcol_layers)
-{
-  Vector<VColRef> refs;
-  uint layeri = 0;
-
-  auto buildList = [&](const CustomData *cdata, eAttrDomain domain) {
-    for (int i = 0; i < cdata->totlayer; i++) {
-      const CustomDataLayer *layer = cdata->layers + i;
-
-      if (!(CD_TYPE_AS_MASK(layer->type) & CD_MASK_COLOR_ALL)) {
-        continue;
-      }
-
-      if (layer->flag & CD_FLAG_TEMPORARY) {
-        continue;
-      }
-
-      if (!(vcol_layers & (1UL << layeri))) {
-        layeri++;
-        continue;
-      }
-
-      VColRef ref = {};
-      ref.domain = domain;
-      ref.layer = layer;
-
-      refs.append(ref);
-      layeri++;
-    }
-  };
-
-  buildList(cd_vdata, ATTR_DOMAIN_POINT);
-  buildList(cd_ldata, ATTR_DOMAIN_CORNER);
-
-  return refs;
-}
-
-/* ---------------------------------------------------------------------- */
-/** \name Extract VCol
- * \{ */
-
-/* Initialize the common vertex format for vcol for coarse and subdivided meshes. */
-static void init_vcol_format(GPUVertFormat *format,
-                             const MeshBatchCache *cache,
-                             const CustomData *cd_vdata,
-                             const CustomData *cd_ldata,
-                             const CustomDataLayer *active,
-                             const CustomDataLayer *render)
-{
-  GPU_vertformat_deinterleave(format);
-
-  const uint32_t vcol_layers = cache->cd_used.vcol;
-
-  Vector<VColRef> refs = get_vcol_refs(cd_vdata, cd_ldata, vcol_layers);
-
-  for (const VColRef &ref : refs) {
-    char attr_name[32], attr_safe_name[GPU_MAX_SAFE_ATTR_NAME];
-
-    GPU_vertformat_safe_attr_name(ref.layer->name, attr_safe_name, GPU_MAX_SAFE_ATTR_NAME);
-
-    /* VCol layer name. */
-    BLI_snprintf(attr_name, sizeof(attr_name), "a%s", attr_safe_name);
-    GPU_vertformat_attr_add(format, attr_name, GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
-
-    /* Active layer name. */
-    if (ref.layer == active) {
-      GPU_vertformat_alias_add(format, "ac");
-    }
-
-    /* Active render layer name. */
-    if (ref.layer == render) {
-      GPU_vertformat_alias_add(format, "c");
-    }
-  }
-}
-
-/* Vertex format for vertex colors, only used during the coarse data upload for the subdivision
- * case. */
-static GPUVertFormat *get_coarse_vcol_format()
-{
-  static GPUVertFormat format = {0};
-  if (format.attr_len == 0) {
-    GPU_vertformat_attr_add(&format, "cCol", GPU_COMP_U16, 4, GPU_FETCH_INT_TO_FLOAT_UNIT);
-    GPU_vertformat_alias_add(&format, "c");
-    GPU_vertformat_alias_add(&format, "ac");
-  }
-  return &format;
-}
-
-struct gpuMeshVcol {
-  ushort r, g, b, a;
-};
-
-static void extract_vcol_init(const MeshRenderData *mr,
-                              struct MeshBatchCache *cache,
-                              void *buf,
-                              void *UNUSED(tls_data))
-{
-  GPUVertBuf *vbo = static_cast<GPUVertBuf *>(buf);
-  GPUVertFormat format = {0};
-
-  const CustomData *cd_vdata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->vdata :
-                                                                        &mr->me->vdata;
-  const CustomData *cd_ldata = (mr->extract_type == MR_EXTRACT_BMESH) ? &mr->bm->ldata :
-                                                                        &mr->me->ldata;
-
-  Mesh me_query = blender::dna::shallow_zero_initialize();
-
-  BKE_id_attribute_copy_domains_temp(
-      ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
-
-  const CustomDataLayer *active_color = BKE_id_attributes_active_color_get(&me_query.id);
-  const CustomDataLayer *render_color = BKE_id_attributes_render_color_get(&me_query.id);
-
-  const uint32_t vcol_layers = cache->cd_used.vcol;
-  init_vcol_format(&format, cache, cd_vdata, cd_ldata, active_color, render_color);
-
-  GPU_vertbuf_init_with_format(vbo, &format);
-  GPU_vertbuf_data_alloc(vbo, mr->loop_len);
-
-  gpuMeshVcol *vcol_data = (gpuMeshVcol *)GPU_vertbuf_get_data(vbo);
-
-  Vector<VColRef> refs = get_vcol_refs(cd_vdata, cd_ldata, vcol_layers);
-
-  for (const VColRef &ref : refs) {
-    const CustomData *cdata = ref.domain == ATTR_DOMAIN_POINT ? cd_vdata : cd_ldata;
-
-    if (mr->extract_type == MR_EXTRACT_BMESH) {
-      int cd_ofs = ref.layer->offset;
-
-      if (cd_ofs == -1) {
-        vcol_data += ref.domain == ATTR_DOMAIN_POINT ? mr->bm->totvert : mr->bm->totloop;
-        continue;
-      }
-
-      BMIter iter;
-      const bool is_byte = ref.layer->type == CD_PROP_BYTE_COLOR;
-      const bool is_point = ref.domain == ATTR_DOMAIN_POINT;
-
-      BMFace *f;
-      BM_ITER_MESH (f, &iter, mr->bm, BM_FACES_OF_MESH) {
-        const BMLoop *l_iter = f->l_first;
-        do {
-          const BMElem *elem = is_point ? reinterpret_cast<const BMElem *>(l_iter->v) :
-                                          reinterpret_cast<const BMElem *>(l_iter);
-          if (is_byte) {
-            const MLoopCol *mloopcol = (const MLoopCol *)BM_ELEM_CD_GET_VOID_P(elem, cd_ofs);
-            vcol_data->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->r]);
-            vcol_data->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->g]);
-            vcol_data->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mloopcol->b]);
-            vcol_data->a = unit_float_to_ushort_clamp(mloopcol->a * (1.0f / 255.0f));
-            vcol_data++;
-          }
-          else {
-            const MPropCol *mpcol = (const MPropCol *)BM_ELEM_CD_GET_VOID_P(elem, cd_ofs);
-            vcol_data->r = unit_float_to_ushort_clamp(mpcol->color[0]);
-            vcol_data->g = unit_float_to_ushort_clamp(mpcol->color[1]);
-            vcol_data->b = unit_float_to_ushort_clamp(mpcol->color[2]);
-            vcol_data->a = unit_float_to_ushort_clamp(mpcol->color[3]);
-            vcol_data++;
-          }
-        } while ((l_iter = l_iter->next) != f->l_first);
-      }
-    }
-    else {
-      int totloop = mr->loop_len;
-      const int idx = CustomData_get_named_layer_index(cdata, ref.layer->type, ref.layer->name);
-
-      const MLoopCol *mcol = nullptr;
-      const MPropCol *pcol = nullptr;
-      const MLoop *mloop = mr->mloop;
-
-      if (ref.layer->type == CD_PROP_COLOR) {
-        pcol = static_cast<const MPropCol *>(cdata->layers[idx].data);
-      }
-      else {
-        mcol = static_cast<const MLoopCol *>(cdata->layers[idx].data);
-      }
-
-      const bool is_corner = ref.domain == ATTR_DOMAIN_CORNER;
-
-      for (int i = 0; i < totloop; i++, mloop++) {
-        const int v_i = is_corner ? i : mloop->v;
-
-        if (mcol) {
-          vcol_data->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[v_i].r]);
-          vcol_data->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[v_i].g]);
-          vcol_data->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[v_i].b]);
-          vcol_data->a = unit_float_to_ushort_clamp(mcol[v_i].a * (1.0f / 255.0f));
-          vcol_data++;
-        }
-        else if (pcol) {
-          vcol_data->r = unit_float_to_ushort_clamp(pcol[v_i].color[0]);
-          vcol_data->g = unit_float_to_ushort_clamp(pcol[v_i].color[1]);
-          vcol_data->b = unit_float_to_ushort_clamp(pcol[v_i].color[2]);
-          vcol_data->a = unit_float_to_ushort_clamp(pcol[v_i].color[3]);
-          vcol_data++;
-        }
-      }
-    }
-  }
-}
-
-static void extract_vcol_init_subdiv(const DRWSubdivCache *subdiv_cache,
-                                     const MeshRenderData *mr,
-                                     struct MeshBatchCache *cache,
-                                     void *buffer,
-                                     void *UNUSED(data))
-{
-  GPUVertBuf *dst_buffer = static_cast<GPUVertBuf *>(buffer);
-  const Mesh *coarse_mesh = subdiv_cache->mesh;
-
-  bool extract_bmesh = mr->extract_type == MR_EXTRACT_BMESH;
-
-  const CustomData *cd_vdata = extract_bmesh ? &coarse_mesh->edit_mesh->bm->vdata :
-                                               &coarse_mesh->vdata;
-  const CustomData *cd_ldata = extract_bmesh ? &coarse_mesh->edit_mesh->bm->ldata :
-                                               &coarse_mesh->ldata;
-  const int totloop = extract_bmesh ? coarse_mesh->edit_mesh->bm->totloop : coarse_mesh->totloop;
-
-  Mesh me_query = blender::dna::shallow_copy(*coarse_mesh);
-  BKE_id_attribute_copy_domains_temp(
-      ID_ME, cd_vdata, nullptr, cd_ldata, nullptr, nullptr, &me_query.id);
-
-  const CustomDataLayer *active_color = BKE_id_attributes_active_color_get(&me_query.id);
-  const CustomDataLayer *render_color = BKE_id_attributes_render_color_get(&me_query.id);
-
-  GPUVertFormat format = {0};
-  init_vcol_format(
-      &format, cache, &coarse_mesh->vdata, &coarse_mesh->ldata, active_color, render_color);
-
-  GPU_vertbuf_init_build_on_device(dst_buffer, &format, subdiv_cache->num_subdiv_loops);
-
-  GPUVertBuf *src_data = GPU_vertbuf_calloc();
-  /* Dynamic as we upload and interpolate layers one at a time. */
-  GPU_vertbuf_init_with_format_ex(src_data, get_coarse_vcol_format(), GPU_USAGE_DYNAMIC);
-
-  GPU_vertbuf_data_alloc(src_data, totloop);
-
-  gpuMeshVcol *mesh_vcol = (gpuMeshVcol *)GPU_vertbuf_get_data(src_data);
-
-  const uint vcol_layers = cache->cd_used.vcol;
-
-  Vector<VColRef> refs = get_vcol_refs(cd_vdata, cd_ldata, vcol_layers);
-
-  /* Index of the vertex color layer in the compact buffer. Used vertex color layers are stored in
-   * a single buffer. */
-  int pack_layer_index = 0;
-  for (const VColRef &ref : refs) {
-    /* Include stride in offset, we use a stride of 2 since colors are packed into 2 uints. */
-    const int dst_offset = (int)subdiv_cache->num_subdiv_loops * 2 * pack_layer_index++;
-
-    const CustomData *cdata = ref.domain == ATTR_DOMAIN_POINT ? cd_vdata : cd_ldata;
-    int layer_i = CustomData_get_named_layer_index(cdata, ref.layer->type, ref.layer->name);
-
-    if (layer_i == -1) {
-      printf("%s: missing color layer %s\n", __func__, ref.layer->name);
-      continue;
-    }
-
-    gpuMeshVcol *vcol = mesh_vcol;
-
-    const bool is_vert = ref.domain == ATTR_DOMAIN_POINT;
-
-    if (extract_bmesh) {
-      BMesh *bm = coarse_mesh->edit_mesh->bm;
-      BMIter iter;
-      BMFace *f;
-      int cd_ofs = cdata->layers[layer_i].offset;
-      const bool is_byte = ref.layer->type == CD_PROP_BYTE_COLOR;
-
-      BM_ITER_MESH (f, &iter, bm, BM_FACES_OF_MESH) {
-        const BMLoop *l_iter = f->l_first;
-
-        do {
-          const BMElem *elem = is_vert ? reinterpret_cast<const BMElem *>(l_iter->v) :
-                                         reinterpret_cast<const BMElem *>(l_iter);
-
-          if (is_byte) {
-            const MLoopCol *mcol2 = static_cast<const MLoopCol *>(
-                BM_ELEM_CD_GET_VOID_P(elem, cd_ofs));
-
-            vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol2->r]);
-            vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol2->g]);
-            vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol2->b]);
-            vcol->a = unit_float_to_ushort_clamp(mcol2->a * (1.0f / 255.0f));
-          }
-          else {
-            const MPropCol *pcol2 = static_cast<const MPropCol *>(
-                BM_ELEM_CD_GET_VOID_P(elem, cd_ofs));
-
-            vcol->r = unit_float_to_ushort_clamp(pcol2->color[0]);
-            vcol->g = unit_float_to_ushort_clamp(pcol2->color[1]);
-            vcol->b = unit_float_to_ushort_clamp(pcol2->color[2]);
-            vcol->a = unit_float_to_ushort_clamp(pcol2->color[3]);
-          }
-
-          vcol++;
-        } while ((l_iter = l_iter->next) != f->l_first);
-      }
-    }
-    else {
-      const MLoop *ml = coarse_mesh->mloop;
-      const MLoopCol *mcol = nullptr;
-      const MPropCol *pcol = nullptr;
-
-      if (ref.layer->type == CD_PROP_COLOR) {
-        pcol = static_cast<const MPropCol *>(cdata->layers[layer_i].data);
-      }
-      else {
-        mcol = static_cast<const MLoopCol *>(cdata->layers[layer_i].data);
-      }
-
-      for (int ml_index = 0; ml_index < coarse_mesh->totloop; ml_index++, vcol++, ml++) {
-        int idx = is_vert ? ml->v : ml_index;
-
-        if (mcol) {
-          vcol->r = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[idx].r]);
-          vcol->g = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[idx].g]);
-          vcol->b = unit_float_to_ushort_clamp(BLI_color_from_srgb_table[mcol[idx].b]);
-          vcol->a = unit_float_to_ushort_clamp(mcol[idx].a * (1.0f / 255.0f));
-        }
-        else if (pcol) {
-          vcol->r = unit_float_to_ushort_clamp(pcol[idx].color[0]);
-          vcol->g = unit_float_to_ushort_clamp(pcol[idx].color[1]);
-          vcol->b = unit_float_to_ushort_clamp(pcol[idx].color[2]);
-          vcol->a = unit_float_to_ushort_clamp(pcol[idx].color[3]);
-        }
-      }
-    }
-
-    /* Ensure data is uploaded properly. */
-    GPU_vertbuf_tag_dirty(src_data);
-    draw_subdiv_interp_custom_data(subdiv_cache, src_data, dst_buffer, 4, dst_offset, true);
-  }
-
-  GPU_vertbuf_discard(src_data);
-}
-
-constexpr MeshExtract create_extractor_vcol()
-{
-  MeshExtract extractor = {nullptr};
-  extractor.init = extract_vcol_init;
-  extractor.init_subdiv = extract_vcol_init_subdiv;
-  extractor.data_type = MR_DATA_NONE;
-  extractor.data_size = 0;
-  extractor.use_threading = false;
-  extractor.mesh_buffer_offset = offsetof(MeshBufferList, vbo.vcol);
-  return extractor;
-}
-
-/** \} */
-
-}  // namespace blender::draw
-
-const MeshExtract extract_vcol = blender::draw::create_extractor_vcol();
diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
index c64cca4dff5..4db5a8c23a4 100644
--- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
+++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_weights.cc
@@ -8,6 +8,7 @@
 #include "MEM_guardedalloc.h"
 
 #include "BKE_deform.h"
+#include "BKE_mesh.h"
 
 #include "draw_subdivision.h"
 #include "extract_mesh.hh"
@@ -79,7 +80,7 @@ static float evaluate_vertex_weight(const MDeformVert *dvert, const DRW_MeshWeig
 }
 
 static void extract_weights_init(const MeshRenderData *mr,
-                                 struct MeshBatchCache *cache,
+                                 MeshBatchCache *cache,
                                  void *buf,
                                  void *tls_data)
 {
@@ -105,7 +106,7 @@ static void extract_weights_init(const MeshRenderData *mr,
     data->cd_ofs = CustomData_get_offset(&mr->bm->vdata, CD_MDEFORMVERT);
   }
   else {
-    data->dvert = (const MDeformVert *)CustomData_get_layer(&mr->me->vdata, CD_MDEFORMVERT);
+    data->dvert = mr->me->deform_verts().data();
     data->cd_ofs = -1;
   }
 }
@@ -154,7 +155,7 @@ static void extract_weights_iter_poly_mesh(const MeshRenderData *mr,
 
 static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache,
                                         const MeshRenderData *mr,
-                                        struct MeshBatchCache *cache,
+                                        MeshBatchCache *cache,
                                         void *buffer,
                                         void *_data)
 {
@@ -171,8 +172,9 @@ static void extract_weights_init_subdiv(const DRWSubdivCache *subdiv_cache,
   extract_weights_init(mr, cache, coarse_weights, _data);
 
   if (mr->extract_type != MR_EXTRACT_BMESH) {
-    for (int i = 0; i < coarse_mesh->totpoly; i++) {
-      const MPoly *mpoly = &coarse_mesh->mpoly[i];
+    const Span<MPoly> coarse_polys = coarse_mesh->polys();
+    for (const int i : coarse_polys.index_range()) {
+      const MPoly *mpoly = &coarse_polys[i];
       extract_weights_iter_poly_mesh(mr, mpoly, i, _data);
     }
   }
diff --git a/source/blender/draw/intern/shaders/common_aabb_lib.glsl b/source/blender/draw/intern/shaders/common_aabb_lib.glsl
new file mode 100644
index 00000000000..b5f664a6779
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_aabb_lib.glsl
@@ -0,0 +1,59 @@
+
+#pragma BLENDER_REQUIRE(common_shape_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Axis Aligned Bound Box
+ * \{ */
+
+struct AABB {
+  vec3 min, max;
+};
+
+AABB aabb_init_min_max()
+{
+  AABB aabb;
+  aabb.min = vec3(1.0e30);
+  aabb.max = vec3(-1.0e30);
+  return aabb;
+}
+
+void aabb_merge(inout AABB aabb, vec3 v)
+{
+  aabb.min = min(aabb.min, v);
+  aabb.max = max(aabb.max, v);
+}
+
+/**
+ * Return true if there is any intersection.
+ */
+bool aabb_intersect(AABB a, AABB b)
+{
+  return all(greaterThanEqual(min(a.max, b.max), max(a.min, b.min)));
+}
+
+/**
+ * Compute intersect intersection volume of \a a and \a b.
+ * Return true if the resulting volume is not empty.
+ */
+bool aabb_clip(AABB a, AABB b, out AABB c)
+{
+  c.min = max(a.min, b.min);
+  c.max = min(a.max, b.max);
+  return all(greaterThanEqual(c.max, c.min));
+}
+
+Box aabb_to_box(AABB aabb)
+{
+  Box box;
+  box.corners[0] = aabb.min;
+  box.corners[1] = vec3(aabb.max.x, aabb.min.y, aabb.min.z);
+  box.corners[2] = vec3(aabb.max.x, aabb.max.y, aabb.min.z);
+  box.corners[3] = vec3(aabb.min.x, aabb.max.y, aabb.min.z);
+  box.corners[4] = vec3(aabb.min.x, aabb.min.y, aabb.max.z);
+  box.corners[5] = vec3(aabb.max.x, aabb.min.y, aabb.max.z);
+  box.corners[6] = aabb.max;
+  box.corners[7] = vec3(aabb.min.x, aabb.max.y, aabb.max.z);
+  return box;
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/common_attribute_lib.glsl b/source/blender/draw/intern/shaders/common_attribute_lib.glsl
index ce5e49c7f63..6b5b6fcc846 100644
--- a/source/blender/draw/intern/shaders/common_attribute_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_attribute_lib.glsl
@@ -25,3 +25,4 @@ float attr_load_float(sampler3D tex);
 
 float attr_load_temperature_post(float attr);
 vec4 attr_load_color_post(vec4 attr);
+vec4 attr_load_uniform(vec4 attr, const uint attr_hash);
diff --git a/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl
new file mode 100644
index 00000000000..3287897e73c
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl
@@ -0,0 +1,215 @@
+
+/**
+ * Debugging drawing library
+ *
+ * Quick way to draw debug geometry. All input should be in world space and
+ * will be rendered in the default view. No additional setup required.
+ **/
+
+/** Global switch option. */
+bool drw_debug_draw_enable = true;
+const vec4 drw_debug_default_color = vec4(1.0, 0.0, 0.0, 1.0);
+
+/* -------------------------------------------------------------------- */
+/** \name Internals
+ * \{ */
+
+uint drw_debug_start_draw(uint v_needed)
+{
+  uint vertid = atomicAdd(drw_debug_draw_v_count, v_needed);
+  vertid += drw_debug_draw_offset;
+  return vertid;
+}
+
+uint drw_debug_color_pack(vec4 color)
+{
+  color = clamp(color, 0.0, 1.0);
+  uint result = 0;
+  result |= uint(color.x * 255.0) << 0u;
+  result |= uint(color.y * 255.0) << 8u;
+  result |= uint(color.z * 255.0) << 16u;
+  result |= uint(color.w * 255.0) << 24u;
+  return result;
+}
+
+void drw_debug_line(inout uint vertid, vec3 v1, vec3 v2, uint color)
+{
+  drw_debug_verts_buf[vertid++] = DRWDebugVert(
+      floatBitsToUint(v1.x), floatBitsToUint(v1.y), floatBitsToUint(v1.z), color);
+  drw_debug_verts_buf[vertid++] = DRWDebugVert(
+      floatBitsToUint(v2.x), floatBitsToUint(v2.y), floatBitsToUint(v2.z), color);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name API
+ * \{ */
+
+/**
+ * Draw a line.
+ */
+void drw_debug_line(vec3 v1, vec3 v2, vec4 color)
+{
+  if (!drw_debug_draw_enable) {
+    return;
+  }
+  const uint v_needed = 2;
+  uint vertid = drw_debug_start_draw(v_needed);
+  if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) {
+    drw_debug_line(vertid, v1, v2, drw_debug_color_pack(color));
+  }
+}
+void drw_debug_line(vec3 v1, vec3 v2)
+{
+  drw_debug_line(v1, v2, drw_debug_default_color);
+}
+
+/**
+ * Draw a quad contour.
+ */
+void drw_debug_quad(vec3 v1, vec3 v2, vec3 v3, vec3 v4, vec4 color)
+{
+  if (!drw_debug_draw_enable) {
+    return;
+  }
+  const uint v_needed = 8;
+  uint vertid = drw_debug_start_draw(v_needed);
+  if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) {
+    uint pcolor = drw_debug_color_pack(color);
+    drw_debug_line(vertid, v1, v2, pcolor);
+    drw_debug_line(vertid, v2, v3, pcolor);
+    drw_debug_line(vertid, v3, v4, pcolor);
+    drw_debug_line(vertid, v4, v1, pcolor);
+  }
+}
+void drw_debug_quad(vec3 v1, vec3 v2, vec3 v3, vec3 v4)
+{
+  drw_debug_quad(v1, v2, v3, v4, drw_debug_default_color);
+}
+
+/**
+ * Draw a point as octahedron wireframe.
+ */
+void drw_debug_point(vec3 p, float radius, vec4 color)
+{
+  if (!drw_debug_draw_enable) {
+    return;
+  }
+  vec3 c = vec3(radius, -radius, 0);
+  vec3 v1 = p + c.xzz;
+  vec3 v2 = p + c.zxz;
+  vec3 v3 = p + c.yzz;
+  vec3 v4 = p + c.zyz;
+  vec3 v5 = p + c.zzx;
+  vec3 v6 = p + c.zzy;
+
+  const uint v_needed = 12 * 2;
+  uint vertid = drw_debug_start_draw(v_needed);
+  if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) {
+    uint pcolor = drw_debug_color_pack(color);
+    drw_debug_line(vertid, v1, v2, pcolor);
+    drw_debug_line(vertid, v2, v3, pcolor);
+    drw_debug_line(vertid, v3, v4, pcolor);
+    drw_debug_line(vertid, v4, v1, pcolor);
+    drw_debug_line(vertid, v1, v5, pcolor);
+    drw_debug_line(vertid, v2, v5, pcolor);
+    drw_debug_line(vertid, v3, v5, pcolor);
+    drw_debug_line(vertid, v4, v5, pcolor);
+    drw_debug_line(vertid, v1, v6, pcolor);
+    drw_debug_line(vertid, v2, v6, pcolor);
+    drw_debug_line(vertid, v3, v6, pcolor);
+    drw_debug_line(vertid, v4, v6, pcolor);
+  }
+}
+void drw_debug_point(vec3 p, float radius)
+{
+  drw_debug_point(p, radius, drw_debug_default_color);
+}
+void drw_debug_point(vec3 p)
+{
+  drw_debug_point(p, 0.01);
+}
+
+/**
+ * Draw a sphere wireframe as 3 axes circle.
+ */
+void drw_debug_sphere(vec3 p, float radius, vec4 color)
+{
+  if (!drw_debug_draw_enable) {
+    return;
+  }
+  const int circle_resolution = 16;
+  const uint v_needed = circle_resolution * 2 * 3;
+  uint vertid = drw_debug_start_draw(v_needed);
+  if (vertid + v_needed < DRW_DEBUG_DRAW_VERT_MAX) {
+    uint pcolor = drw_debug_color_pack(color);
+    for (int axis = 0; axis < 3; axis++) {
+      for (int edge = 0; edge < circle_resolution; edge++) {
+        float angle1 = (2.0 * 3.141592) * float(edge + 0) / float(circle_resolution);
+        vec3 p1 = vec3(cos(angle1), sin(angle1), 0.0) * radius;
+        p1 = vec3(p1[(0 + axis) % 3], p1[(1 + axis) % 3], p1[(2 + axis) % 3]);
+
+        float angle2 = (2.0 * 3.141592) * float(edge + 1) / float(circle_resolution);
+        vec3 p2 = vec3(cos(angle2), sin(angle2), 0.0) * radius;
+        p2 = vec3(p2[(0 + axis) % 3], p2[(1 + axis) % 3], p2[(2 + axis) % 3]);
+
+        drw_debug_line(vertid, p + p1, p + p2, pcolor);
+      }
+    }
+  }
+}
+void drw_debug_sphere(vec3 p, float radius)
+{
+  drw_debug_sphere(p, radius, drw_debug_default_color);
+}
+
+/**
+ * Draw a matrix transformation as 3 colored axes.
+ */
+void drw_debug_matrix(mat4 mat, vec4 color)
+{
+  vec4 p[4] = vec4[4](vec4(0, 0, 0, 1), vec4(1, 0, 0, 1), vec4(0, 1, 0, 1), vec4(0, 0, 1, 1));
+  for (int i = 0; i < 4; i++) {
+    p[i] = mat * p[i];
+    p[i].xyz /= p[i].w;
+  }
+  drw_debug_line(p[0].xyz, p[0].xyz, vec4(1, 0, 0, 1));
+  drw_debug_line(p[0].xyz, p[1].xyz, vec4(0, 1, 0, 1));
+  drw_debug_line(p[0].xyz, p[2].xyz, vec4(0, 0, 1, 1));
+}
+void drw_debug_matrix(mat4 mat)
+{
+  drw_debug_matrix(mat, drw_debug_default_color);
+}
+
+/**
+ * Draw a matrix as a 2 units length bounding box, centered on origin.
+ */
+void drw_debug_matrix_as_bbox(mat4 mat, vec4 color)
+{
+  vec4 p[8] = vec4[8](vec4(-1, -1, -1, 1),
+                      vec4(1, -1, -1, 1),
+                      vec4(1, 1, -1, 1),
+                      vec4(-1, 1, -1, 1),
+                      vec4(-1, -1, 1, 1),
+                      vec4(1, -1, 1, 1),
+                      vec4(1, 1, 1, 1),
+                      vec4(-1, 1, 1, 1));
+  for (int i = 0; i < 8; i++) {
+    p[i] = mat * p[i];
+    p[i].xyz /= p[i].w;
+  }
+  drw_debug_quad(p[0].xyz, p[1].xyz, p[2].xyz, p[3].xyz, color);
+  drw_debug_line(p[0].xyz, p[4].xyz, color);
+  drw_debug_line(p[1].xyz, p[5].xyz, color);
+  drw_debug_line(p[2].xyz, p[6].xyz, color);
+  drw_debug_line(p[3].xyz, p[7].xyz, color);
+  drw_debug_quad(p[4].xyz, p[5].xyz, p[6].xyz, p[7].xyz, color);
+}
+void drw_debug_matrix_as_bbox(mat4 mat)
+{
+  drw_debug_matrix_as_bbox(mat, drw_debug_default_color);
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/common_debug_print_lib.glsl b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl
new file mode 100644
index 00000000000..89d1729b52d
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl
@@ -0,0 +1,388 @@
+
+/**
+ * Debug print implementation for shaders.
+ *
+ * `print()`:
+ *   Log variable or strings inside the viewport.
+ *   Using a unique non string argument will print the variable name with it.
+ *   Concatenate by using multiple arguments. i.e: `print("Looped ", n, "times.")`.
+ * `drw_print_no_endl()`:
+ *   Same as `print()` but does not finish the line.
+ * `drw_print_value()`:
+ *   Display only the value of a variable. Does not finish the line.
+ * `drw_print_value_hex()`:
+ *   Display only the hex representation of a variable. Does not finish the line.
+ * `drw_print_value_binary()`: Display only the binary representation of a
+ * variable. Does not finish the line.
+ *
+ * IMPORTANT: As it is now, it is not yet thread safe. Only print from one thread. You can use the
+ * IS_DEBUG_MOUSE_FRAGMENT macro in fragment shader to filter using mouse position or
+ * IS_FIRST_INVOCATION in compute shaders.
+ *
+ * NOTE: Floating point representation might not be very precise (see drw_print_value(float)).
+ *
+ * IMPORTANT: Multipler drawcalls can write to the buffer in sequence (if they are from different
+ * shgroups). However, we add barriers to support this case and it might change the application
+ * behavior. Uncomment DISABLE_DEBUG_SHADER_drw_print_BARRIER to remove the barriers if that
+ * happens. But then you are limited to a single invocation output.
+ *
+ * IMPORTANT: All of these are copied to the CPU debug libs (draw_debug.cc). They need to be kept
+ * in sync to write the same data.
+ */
+
+/** Global switch option when you want to silence all prints from all shaders at once. */
+bool drw_debug_print_enable = true;
+
+/* Set drw_print_col to max value so we will start by creating a new line and get the correct
+ * threadsafe row. */
+uint drw_print_col = DRW_DEBUG_PRINT_WORD_WRAP_COLUMN;
+uint drw_print_row = 0u;
+
+void drw_print_newline()
+{
+  if (!drw_debug_print_enable) {
+    return;
+  }
+  drw_print_col = 0u;
+  drw_print_row = atomicAdd(drw_debug_print_row_shared, 1u) + 1u;
+}
+
+void drw_print_string_start(uint len)
+{
+  if (!drw_debug_print_enable) {
+    return;
+  }
+  /* Break before word. */
+  if (drw_print_col + len > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) {
+    drw_print_newline();
+  }
+}
+
+void drw_print_char4(uint data)
+{
+  if (!drw_debug_print_enable) {
+    return;
+  }
+  /* Convert into char stream. */
+  for (; data != 0u; data >>= 8u) {
+    uint char1 = data & 0xFFu;
+    /* Check for null terminator. */
+    if (char1 == 0x00) {
+      break;
+    }
+    uint cursor = atomicAdd(drw_debug_print_cursor, 1u);
+    cursor += drw_debug_print_offset;
+    if (cursor < DRW_DEBUG_PRINT_MAX) {
+      /* For future usage. (i.e: Color) */
+      uint flags = 0u;
+      uint col = drw_print_col++;
+      uint drw_print_header = (flags << 24u) | (drw_print_row << 16u) | (col << 8u);
+      drw_debug_print_buf[cursor] = drw_print_header | char1;
+      /* Break word. */
+      if (drw_print_col > DRW_DEBUG_PRINT_WORD_WRAP_COLUMN) {
+        drw_print_newline();
+      }
+    }
+  }
+}
+
+/**
+ * NOTE(fclem): Strange behavior emerge when trying to increment the digit
+ * counter inside the append function. It looks like the compiler does not see
+ * it is referenced as an index for char4 and thus do not capture the right
+ * reference. I do not know if this is undefined behavior. As a matter of
+ * precaution, we implement all the append function separately. This behavior
+ * was observed on both Mesa & amdgpu-pro.
+ */
+/* Using ascii char code. Expect char1 to be less or equal to 0xFF. Appends chars to the right. */
+void drw_print_append_char(uint char1, inout uint char4)
+{
+  char4 = (char4 << 8u) | char1;
+}
+
+void drw_print_append_digit(uint digit, inout uint char4)
+{
+  const uint char_A = 0x41u;
+  const uint char_0 = 0x30u;
+  bool is_hexadecimal = digit > 9u;
+  char4 = (char4 << 8u) | (is_hexadecimal ? (char_A + digit - 10u) : (char_0 + digit));
+}
+
+void drw_print_append_space(inout uint char4)
+{
+  char4 = (char4 << 8u) | 0x20u;
+}
+
+void drw_print_value_binary(uint value)
+{
+  drw_print_no_endl("0b");
+  drw_print_string_start(10u * 4u);
+  uint digits[10] = uint[10](0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
+  uint digit = 0u;
+  for (uint i = 0u; i < 32u; i++) {
+    drw_print_append_digit(((value >> i) & 1u), digits[digit / 4u]);
+    digit++;
+    if ((i % 4u) == 3u) {
+      drw_print_append_space(digits[digit / 4u]);
+      digit++;
+    }
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 9; j >= 0; j--) {
+    drw_print_char4(digits[j]);
+  }
+}
+
+void drw_print_value_binary(int value)
+{
+  drw_print_value_binary(uint(value));
+}
+
+void drw_print_value_binary(float value)
+{
+  drw_print_value_binary(floatBitsToUint(value));
+}
+
+void drw_print_value_uint(uint value, const bool hex, bool is_negative, const bool is_unsigned)
+{
+  drw_print_string_start(3u * 4u);
+  const uint blank_value = hex ? 0x30303030u : 0x20202020u;
+  const uint prefix = hex ? 0x78302020u : 0x20202020u;
+  uint digits[3] = uint[3](blank_value, blank_value, prefix);
+  const uint base = hex ? 16u : 10u;
+  uint digit = 0u;
+  /* Add `u` suffix. */
+  if (is_unsigned) {
+    drw_print_append_char('u', digits[digit / 4u]);
+    digit++;
+  }
+  /* Number's digits. */
+  for (; value != 0u || digit == uint(is_unsigned); value /= base) {
+    drw_print_append_digit(value % base, digits[digit / 4u]);
+    digit++;
+  }
+  /* Add negative sign. */
+  if (is_negative) {
+    drw_print_append_char('-', digits[digit / 4u]);
+    digit++;
+  }
+  /* Need to pad to uint alignment because we are issuing chars in "reverse". */
+  for (uint i = digit % 4u; i < 4u && i > 0u; i++) {
+    drw_print_append_space(digits[digit / 4u]);
+    digit++;
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 2; j >= 0; j--) {
+    drw_print_char4(digits[j]);
+  }
+}
+
+void drw_print_value_hex(uint value)
+{
+  drw_print_value_uint(value, true, false, false);
+}
+
+void drw_print_value_hex(int value)
+{
+  drw_print_value_uint(uint(value), true, false, false);
+}
+
+void drw_print_value_hex(float value)
+{
+  drw_print_value_uint(floatBitsToUint(value), true, false, false);
+}
+
+void drw_print_value(uint value)
+{
+  drw_print_value_uint(value, false, false, true);
+}
+
+void drw_print_value(int value)
+{
+  drw_print_value_uint(uint(abs(value)), false, (value < 0), false);
+}
+
+void drw_print_value(bool value)
+{
+  if (value) {
+    drw_print_no_endl("true ");
+  }
+  else {
+    drw_print_no_endl("false");
+  }
+}
+
+/* NOTE(@fclem): This is homebrew and might not be 100% accurate (accuracy has
+ * not been tested and might dependent on compiler implementation). If unsure,
+ * use drw_print_value_hex and transcribe the value manually with another tool. */
+void drw_print_value(float val)
+{
+  /* We pad the string to match normal float values length. */
+  if (isnan(val)) {
+    drw_print_no_endl("         NaN");
+    return;
+  }
+  if (isinf(val)) {
+    if (sign(val) < 0.0) {
+      drw_print_no_endl("        -Inf");
+    }
+    else {
+      drw_print_no_endl("         Inf");
+    }
+    return;
+  }
+
+  /* Adjusted for significant digits (6) with sign (1), decimal separator (1)
+   * and exponent (4). */
+  const float significant_digits = 6.0;
+  drw_print_string_start(3u * 4u);
+  uint digits[3] = uint[3](0x20202020u, 0x20202020u, 0x20202020u);
+
+  float exponent = floor(log(abs(val)) / log(10.0));
+  bool display_exponent = exponent >= (significant_digits) ||
+                          exponent <= (-significant_digits + 1.0);
+
+  float int_significant_digits = min(exponent + 1.0, significant_digits);
+  float dec_significant_digits = max(0.0, significant_digits - int_significant_digits);
+  /* Power to get to the rounding point. */
+  float rounding_power = dec_significant_digits;
+
+  if (val == 0.0 || isinf(exponent)) {
+    display_exponent = false;
+    int_significant_digits = dec_significant_digits = 1.0;
+  }
+  /* Remap to keep significant numbers count. */
+  if (display_exponent) {
+    int_significant_digits = 1.0;
+    dec_significant_digits = significant_digits - int_significant_digits;
+    rounding_power = -exponent + dec_significant_digits;
+  }
+  /* Round at the last significant digit. */
+  val = round(val * pow(10.0, rounding_power));
+  /* Get back to final exponent. */
+  val *= pow(10.0, -dec_significant_digits);
+
+  float int_part;
+  float dec_part = modf(val, int_part);
+
+  dec_part *= pow(10.0, dec_significant_digits);
+
+  const uint base = 10u;
+  uint digit = 0u;
+  /* Exponent */
+  uint value = uint(abs(exponent));
+  if (display_exponent) {
+    for (int i = 0; value != 0u || i == 0; i++, value /= base) {
+      drw_print_append_digit(value % base, digits[digit / 4u]);
+      digit++;
+    }
+    /* Exponent sign. */
+    uint sign_char = (exponent < 0.0) ? '-' : '+';
+    drw_print_append_char(sign_char, digits[digit / 4u]);
+    digit++;
+    /* Exponent `e` suffix. */
+    drw_print_append_char(0x65u, digits[digit / 4u]);
+    digit++;
+  }
+  /* Decimal part. */
+  value = uint(abs(dec_part));
+#if 0 /* We don't do that because it makes unstable values really hard to \
+         read. */
+  /* Trim trailing zeros. */
+  while ((value % base) == 0u) {
+    value /= base;
+    if (value == 0u) {
+      break;
+    }
+  }
+#endif
+  if (value != 0u) {
+    for (int i = 0; value != 0u || i == 0; i++, value /= base) {
+      drw_print_append_digit(value % base, digits[digit / 4u]);
+      digit++;
+    }
+    /* Point separator. */
+    drw_print_append_char('.', digits[digit / 4u]);
+    digit++;
+  }
+  /* Integer part. */
+  value = uint(abs(int_part));
+  for (int i = 0; value != 0u || i == 0; i++, value /= base) {
+    drw_print_append_digit(value % base, digits[digit / 4u]);
+    digit++;
+  }
+  /* Negative sign. */
+  if (val < 0.0) {
+    drw_print_append_char('-', digits[digit / 4u]);
+    digit++;
+  }
+  /* Need to pad to uint alignment because we are issuing chars in "reverse". */
+  for (uint i = digit % 4u; i < 4u && i > 0u; i++) {
+    drw_print_append_space(digits[digit / 4u]);
+    digit++;
+  }
+  /* Numbers are written from right to left. So we need to reverse the order. */
+  for (int j = 2; j >= 0; j--) {
+    drw_print_char4(digits[j]);
+  }
+}
+
+void drw_print_value(vec2 value)
+{
+  drw_print_no_endl("vec2(", value[0], ", ", value[1], ")");
+}
+
+void drw_print_value(vec3 value)
+{
+  drw_print_no_endl("vec3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+
+void drw_print_value(vec4 value)
+{
+  drw_print_no_endl("vec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+void drw_print_value(ivec2 value)
+{
+  drw_print_no_endl("ivec2(", value[0], ", ", value[1], ")");
+}
+
+void drw_print_value(ivec3 value)
+{
+  drw_print_no_endl("ivec3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+
+void drw_print_value(ivec4 value)
+{
+  drw_print_no_endl("ivec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+void drw_print_value(uvec2 value)
+{
+  drw_print_no_endl("uvec2(", value[0], ", ", value[1], ")");
+}
+
+void drw_print_value(uvec3 value)
+{
+  drw_print_no_endl("uvec3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+
+void drw_print_value(uvec4 value)
+{
+  drw_print_no_endl("uvec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
+
+void drw_print_value(bvec2 value)
+{
+  drw_print_no_endl("bvec2(", value[0], ", ", value[1], ")");
+}
+
+void drw_print_value(bvec3 value)
+{
+  drw_print_no_endl("bvec3(", value[0], ", ", value[1], ", ", value[1], ")");
+}
+
+void drw_print_value(bvec4 value)
+{
+  drw_print_no_endl("bvec4(", value[0], ", ", value[1], ", ", value[2], ", ", value[3], ")");
+}
diff --git a/source/blender/draw/intern/shaders/common_debug_shape_lib.glsl b/source/blender/draw/intern/shaders/common_debug_shape_lib.glsl
new file mode 100644
index 00000000000..538c55ce544
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_debug_shape_lib.glsl
@@ -0,0 +1,57 @@
+
+/**
+ * Debug drawing of shapes.
+ */
+
+#pragma BLENDER_REQUIRE(common_debug_draw_lib.glsl)
+#pragma BLENDER_REQUIRE(common_shape_lib.glsl)
+
+void drw_debug(Box shape, vec4 color)
+{
+  drw_debug_quad(shape.corners[0], shape.corners[1], shape.corners[2], shape.corners[3], color);
+  drw_debug_line(shape.corners[0], shape.corners[4], color);
+  drw_debug_line(shape.corners[1], shape.corners[5], color);
+  drw_debug_line(shape.corners[2], shape.corners[6], color);
+  drw_debug_line(shape.corners[3], shape.corners[7], color);
+  drw_debug_quad(shape.corners[4], shape.corners[5], shape.corners[6], shape.corners[7], color);
+}
+void drw_debug(Box shape)
+{
+  drw_debug(shape, drw_debug_default_color);
+}
+
+void drw_debug(Frustum shape, vec4 color)
+{
+  drw_debug_quad(shape.corners[0], shape.corners[1], shape.corners[2], shape.corners[3], color);
+  drw_debug_line(shape.corners[0], shape.corners[4], color);
+  drw_debug_line(shape.corners[1], shape.corners[5], color);
+  drw_debug_line(shape.corners[2], shape.corners[6], color);
+  drw_debug_line(shape.corners[3], shape.corners[7], color);
+  drw_debug_quad(shape.corners[4], shape.corners[5], shape.corners[6], shape.corners[7], color);
+}
+void drw_debug(Frustum shape)
+{
+  drw_debug(shape, drw_debug_default_color);
+}
+
+void drw_debug(Pyramid shape, vec4 color)
+{
+  drw_debug_line(shape.corners[0], shape.corners[1], color);
+  drw_debug_line(shape.corners[0], shape.corners[2], color);
+  drw_debug_line(shape.corners[0], shape.corners[3], color);
+  drw_debug_line(shape.corners[0], shape.corners[4], color);
+  drw_debug_quad(shape.corners[1], shape.corners[2], shape.corners[3], shape.corners[4], color);
+}
+void drw_debug(Pyramid shape)
+{
+  drw_debug(shape, drw_debug_default_color);
+}
+
+void drw_debug(Sphere shape, vec4 color)
+{
+  drw_debug_sphere(shape.center, shape.radius, color);
+}
+void drw_debug(Sphere shape)
+{
+  drw_debug(shape, drw_debug_default_color);
+}
diff --git a/source/blender/draw/intern/shaders/common_hair_lib.glsl b/source/blender/draw/intern/shaders/common_hair_lib.glsl
index e235da91e8d..b82df4a51dc 100644
--- a/source/blender/draw/intern/shaders/common_hair_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_hair_lib.glsl
@@ -164,16 +164,15 @@ float hair_shaperadius(float shape, float root, float tip, float time)
 in float dummy;
 #  endif
 
-void hair_get_pos_tan_binor_time(bool is_persp,
-                                 mat4 invmodel_mat,
-                                 vec3 camera_pos,
-                                 vec3 camera_z,
-                                 out vec3 wpos,
-                                 out vec3 wtan,
-                                 out vec3 wbinor,
-                                 out float time,
-                                 out float thickness,
-                                 out float thick_time)
+void hair_get_center_pos_tan_binor_time(bool is_persp,
+                                        mat4 invmodel_mat,
+                                        vec3 camera_pos,
+                                        vec3 camera_z,
+                                        out vec3 wpos,
+                                        out vec3 wtan,
+                                        out vec3 wbinor,
+                                        out float time,
+                                        out float thickness)
 {
   int id = hair_get_base_id();
   vec4 data = texelFetch(hairPointBuffer, id);
@@ -202,15 +201,27 @@ void hair_get_pos_tan_binor_time(bool is_persp,
   wbinor = normalize(cross(camera_vec, wtan));
 
   thickness = hair_shaperadius(hairRadShape, hairRadRoot, hairRadTip, time);
+}
 
+void hair_get_pos_tan_binor_time(bool is_persp,
+                                 mat4 invmodel_mat,
+                                 vec3 camera_pos,
+                                 vec3 camera_z,
+                                 out vec3 wpos,
+                                 out vec3 wtan,
+                                 out vec3 wbinor,
+                                 out float time,
+                                 out float thickness,
+                                 out float thick_time)
+{
+  hair_get_center_pos_tan_binor_time(
+      is_persp, invmodel_mat, camera_pos, camera_z, wpos, wtan, wbinor, time, thickness);
   if (hairThicknessRes > 1) {
     thick_time = float(gl_VertexID % hairThicknessRes) / float(hairThicknessRes - 1);
     thick_time = thickness * (thick_time * 2.0 - 1.0);
-
     /* Take object scale into account.
      * NOTE: This only works fine with uniform scaling. */
     float scale = 1.0 / length(mat3(invmodel_mat) * wbinor);
-
     wpos += wbinor * thick_time * scale;
   }
   else {
diff --git a/source/blender/draw/intern/shaders/common_intersect_lib.glsl b/source/blender/draw/intern/shaders/common_intersect_lib.glsl
new file mode 100644
index 00000000000..83223f89277
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_intersect_lib.glsl
@@ -0,0 +1,466 @@
+
+/**
+ * Intersection library used for culling.
+ * Results are meant to be conservative.
+ */
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+#pragma BLENDER_REQUIRE(common_shape_lib.glsl)
+
+/* ---------------------------------------------------------------------- */
+/** \name Plane extraction functions.
+ * \{ */
+
+/** \a v1 and \a v2 are vectors on the plane. \a p is a point on the plane. */
+vec4 isect_plane_setup(vec3 p, vec3 v1, vec3 v2)
+{
+  vec3 normal_to_plane = normalize(cross(v1, v2));
+  return vec4(normal_to_plane, -dot(normal_to_plane, p));
+}
+
+struct IsectPyramid {
+  vec3 corners[5];
+  vec4 planes[5];
+};
+
+IsectPyramid isect_data_setup(Pyramid shape)
+{
+  vec3 A1 = shape.corners[1] - shape.corners[0];
+  vec3 A2 = shape.corners[2] - shape.corners[0];
+  vec3 A3 = shape.corners[3] - shape.corners[0];
+  vec3 A4 = shape.corners[4] - shape.corners[0];
+  vec3 S4 = shape.corners[4] - shape.corners[1];
+  vec3 S2 = shape.corners[2] - shape.corners[1];
+
+  IsectPyramid data;
+  data.planes[0] = isect_plane_setup(shape.corners[0], A2, A1);
+  data.planes[1] = isect_plane_setup(shape.corners[0], A3, A2);
+  data.planes[2] = isect_plane_setup(shape.corners[0], A4, A3);
+  data.planes[3] = isect_plane_setup(shape.corners[0], A1, A4);
+  data.planes[4] = isect_plane_setup(shape.corners[1], S2, S4);
+  for (int i = 0; i < 5; i++) {
+    data.corners[i] = shape.corners[i];
+  }
+  return data;
+}
+
+struct IsectBox {
+  vec3 corners[8];
+  vec4 planes[6];
+};
+
+IsectBox isect_data_setup(Box shape)
+{
+  vec3 A1 = shape.corners[1] - shape.corners[0];
+  vec3 A3 = shape.corners[3] - shape.corners[0];
+  vec3 A4 = shape.corners[4] - shape.corners[0];
+
+  IsectBox data;
+  data.planes[0] = isect_plane_setup(shape.corners[0], A3, A1);
+  data.planes[1] = isect_plane_setup(shape.corners[0], A4, A3);
+  data.planes[2] = isect_plane_setup(shape.corners[0], A1, A4);
+  /* Assumes that the box is actually a box! */
+  data.planes[3] = vec4(-data.planes[0].xyz, -dot(-data.planes[0].xyz, shape.corners[6]));
+  data.planes[4] = vec4(-data.planes[1].xyz, -dot(-data.planes[1].xyz, shape.corners[6]));
+  data.planes[5] = vec4(-data.planes[2].xyz, -dot(-data.planes[2].xyz, shape.corners[6]));
+  for (int i = 0; i < 8; i++) {
+    data.corners[i] = shape.corners[i];
+  }
+  return data;
+}
+
+/* Construct box from 1 corner point + 3 side vectors. */
+IsectBox isect_data_setup(vec3 origin, vec3 side_x, vec3 side_y, vec3 side_z)
+{
+  IsectBox data;
+  data.corners[0] = origin;
+  data.corners[1] = origin + side_x;
+  data.corners[2] = origin + side_y + side_x;
+  data.corners[3] = origin + side_y;
+  data.corners[4] = data.corners[0] + side_z;
+  data.corners[5] = data.corners[1] + side_z;
+  data.corners[6] = data.corners[2] + side_z;
+  data.corners[7] = data.corners[3] + side_z;
+
+  data.planes[0] = isect_plane_setup(data.corners[0], side_y, side_z);
+  data.planes[1] = isect_plane_setup(data.corners[0], side_x, side_y);
+  data.planes[2] = isect_plane_setup(data.corners[0], side_z, side_x);
+  /* Assumes that the box is actually a box! */
+  data.planes[3] = vec4(-data.planes[0].xyz, -dot(-data.planes[0].xyz, data.corners[6]));
+  data.planes[4] = vec4(-data.planes[1].xyz, -dot(-data.planes[1].xyz, data.corners[6]));
+  data.planes[5] = vec4(-data.planes[2].xyz, -dot(-data.planes[2].xyz, data.corners[6]));
+
+  return data;
+}
+
+struct IsectFrustum {
+  vec3 corners[8];
+  vec4 planes[6];
+};
+
+IsectFrustum isect_data_setup(Frustum shape)
+{
+  vec3 A1 = shape.corners[1] - shape.corners[0];
+  vec3 A3 = shape.corners[3] - shape.corners[0];
+  vec3 A4 = shape.corners[4] - shape.corners[0];
+  vec3 B5 = shape.corners[5] - shape.corners[6];
+  vec3 B7 = shape.corners[7] - shape.corners[6];
+  vec3 B2 = shape.corners[2] - shape.corners[6];
+
+  IsectFrustum data;
+  data.planes[0] = isect_plane_setup(shape.corners[0], A3, A1);
+  data.planes[1] = isect_plane_setup(shape.corners[0], A4, A3);
+  data.planes[2] = isect_plane_setup(shape.corners[0], A1, A4);
+  data.planes[3] = isect_plane_setup(shape.corners[6], B7, B5);
+  data.planes[4] = isect_plane_setup(shape.corners[6], B5, B2);
+  data.planes[5] = isect_plane_setup(shape.corners[6], B2, B7);
+  for (int i = 0; i < 8; i++) {
+    data.corners[i] = shape.corners[i];
+  }
+  return data;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name View Intersection functions.
+ * \{ */
+
+bool intersect_view(Pyramid pyramid)
+{
+  bool intersects = true;
+
+  /* Do Pyramid vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 5; ++v) {
+      float test = dot(drw_view.frustum_planes[p], vec4(pyramid.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Frustum vertices vs Pyramid planes. */
+  IsectPyramid i_pyramid = isect_data_setup(pyramid);
+  for (int p = 0; p < 5; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_pyramid.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+  return intersects;
+}
+
+bool intersect_view(Box box)
+{
+  bool intersects = true;
+
+  /* Do Box vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(drw_view.frustum_planes[p], vec4(box.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Frustum vertices vs Box planes. */
+  IsectBox i_box = isect_data_setup(box);
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_box.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  return intersects;
+}
+
+bool intersect_view(IsectBox i_box)
+{
+  bool intersects = true;
+
+  /* Do Box vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(drw_view.frustum_planes[p], vec4(i_box.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_box.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  return intersects;
+}
+
+bool intersect_view(Sphere sphere)
+{
+  bool intersects = true;
+
+  for (int p = 0; p < 6 && intersects; ++p) {
+    float dist_to_plane = dot(drw_view.frustum_planes[p], vec4(sphere.center, 1.0));
+    if (dist_to_plane < -sphere.radius) {
+      intersects = false;
+    }
+  }
+  /* TODO reject false positive. */
+  return intersects;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Shape vs. Shape Intersection functions.
+ * \{ */
+
+bool intersect(IsectPyramid i_pyramid, Box box)
+{
+  bool intersects = true;
+
+  /* Do Box vertices vs Pyramid planes. */
+  for (int p = 0; p < 5; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_pyramid.planes[p], vec4(box.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Pyramid vertices vs Box planes. */
+  IsectBox i_box = isect_data_setup(box);
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 5; ++v) {
+      float test = dot(i_box.planes[p], vec4(i_pyramid.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+  return intersects;
+}
+
+bool intersect(IsectFrustum i_frustum, Pyramid pyramid)
+{
+  bool intersects = true;
+
+  /* Do Pyramid vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 5; ++v) {
+      float test = dot(i_frustum.planes[p], vec4(pyramid.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Frustum vertices vs Pyramid planes. */
+  IsectPyramid i_pyramid = isect_data_setup(pyramid);
+  for (int p = 0; p < 5; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_pyramid.planes[p], vec4(i_frustum.corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+  return intersects;
+}
+
+bool intersect(IsectFrustum i_frustum, Box box)
+{
+  bool intersects = true;
+
+  /* Do Box vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_frustum.planes[p], vec4(box.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  /* Now do Frustum vertices vs Box planes. */
+  IsectBox i_box = isect_data_setup(box);
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_box.planes[p], vec4(i_frustum.corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  return intersects;
+}
+
+bool intersect(IsectFrustum i_frustum, Sphere sphere)
+{
+  bool intersects = true;
+  for (int p = 0; p < 6; ++p) {
+    float dist_to_plane = dot(i_frustum.planes[p], vec4(sphere.center, 1.0));
+    if (dist_to_plane < -sphere.radius) {
+      intersects = false;
+      break;
+    }
+  }
+  return intersects;
+}
+
+bool intersect(Cone cone, Sphere sphere)
+{
+  /**
+   * Following "Improve Tile-based Light Culling with Spherical-sliced Cone"
+   * by Eric Zhang
+   * https://lxjk.github.io/2018/03/25/Improve-Tile-based-Light-Culling-with-Spherical-sliced-Cone.html
+   */
+  float sphere_distance = length(sphere.center);
+  float sphere_distance_rcp = safe_rcp(sphere_distance);
+  float sphere_sin = saturate(sphere.radius * sphere_distance_rcp);
+  float sphere_cos = sqrt(1.0 - sphere_sin * sphere_sin);
+  float cone_aperture_sin = sqrt(1.0 - cone.angle_cos * cone.angle_cos);
+
+  float cone_sphere_center_cos = dot(sphere.center * sphere_distance_rcp, cone.direction);
+  /* cos(A+B) = cos(A) * cos(B) - sin(A) * sin(B). */
+  float cone_sphere_angle_sum_cos = (sphere.radius > sphere_distance) ?
+                                        -1.0 :
+                                        (cone.angle_cos * sphere_cos -
+                                         cone_aperture_sin * sphere_sin);
+  /* Comparing cosines instead of angles since we are interested
+   * only in the monotonic region [0 .. M_PI / 2]. This saves costly acos() calls. */
+  bool intersects = (cone_sphere_center_cos >= cone_sphere_angle_sum_cos);
+
+  return intersects;
+}
+
+bool intersect(Circle circle_a, Circle circle_b)
+{
+  return distance_squared(circle_a.center, circle_b.center) <
+         sqr(circle_a.radius + circle_b.radius);
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl
index 6d4452c18c8..71460c39285 100644
--- a/source/blender/draw/intern/shaders/common_math_geom_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_math_geom_lib.glsl
@@ -5,63 +5,88 @@
 /** \name Math intersection & projection functions.
  * \{ */
 
-float point_plane_projection_dist(vec3 lineorigin, vec3 planeorigin, vec3 planenormal)
+vec4 plane_from_quad(vec3 v0, vec3 v1, vec3 v2, vec3 v3)
 {
-  return dot(planenormal, planeorigin - lineorigin);
+  vec3 nor = normalize(cross(v2 - v1, v0 - v1) + cross(v0 - v3, v2 - v3));
+  return vec4(nor, -dot(nor, v2));
 }
 
-float line_plane_intersect_dist(vec3 lineorigin,
-                                vec3 linedirection,
-                                vec3 planeorigin,
-                                vec3 planenormal)
+vec4 plane_from_tri(vec3 v0, vec3 v1, vec3 v2)
 {
-  return dot(planenormal, planeorigin - lineorigin) / dot(planenormal, linedirection);
+  vec3 nor = normalize(cross(v2 - v1, v0 - v1));
+  return vec4(nor, -dot(nor, v2));
 }
 
-float line_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec4 plane)
+float point_plane_projection_dist(vec3 line_origin, vec3 plane_origin, vec3 plane_normal)
+{
+  return dot(plane_normal, plane_origin - line_origin);
+}
+
+float point_line_projection_dist(vec2 point, vec2 line_origin, vec2 line_normal)
+{
+  return dot(line_normal, line_origin - point);
+}
+
+float line_plane_intersect_dist(vec3 line_origin,
+                                vec3 line_direction,
+                                vec3 plane_origin,
+                                vec3 plane_normal)
+{
+  return dot(plane_normal, plane_origin - line_origin) / dot(plane_normal, line_direction);
+}
+
+float line_plane_intersect_dist(vec3 line_origin, vec3 line_direction, vec4 plane)
 {
   vec3 plane_co = plane.xyz * (-plane.w / len_squared(plane.xyz));
-  vec3 h = lineorigin - plane_co;
-  return -dot(plane.xyz, h) / dot(plane.xyz, linedirection);
+  vec3 h = line_origin - plane_co;
+  return -dot(plane.xyz, h) / dot(plane.xyz, line_direction);
 }
 
-vec3 line_plane_intersect(vec3 lineorigin, vec3 linedirection, vec3 planeorigin, vec3 planenormal)
+vec3 line_plane_intersect(vec3 line_origin,
+                          vec3 line_direction,
+                          vec3 plane_origin,
+                          vec3 plane_normal)
 {
-  float dist = line_plane_intersect_dist(lineorigin, linedirection, planeorigin, planenormal);
-  return lineorigin + linedirection * dist;
+  float dist = line_plane_intersect_dist(line_origin, line_direction, plane_origin, plane_normal);
+  return line_origin + line_direction * dist;
 }
 
-vec3 line_plane_intersect(vec3 lineorigin, vec3 linedirection, vec4 plane)
+vec3 line_plane_intersect(vec3 line_origin, vec3 line_direction, vec4 plane)
 {
-  float dist = line_plane_intersect_dist(lineorigin, linedirection, plane);
-  return lineorigin + linedirection * dist;
+  float dist = line_plane_intersect_dist(line_origin, line_direction, plane);
+  return line_origin + line_direction * dist;
 }
 
-float line_aligned_plane_intersect_dist(vec3 lineorigin, vec3 linedirection, vec3 planeorigin)
+float line_aligned_plane_intersect_dist(vec3 line_origin, vec3 line_direction, vec3 plane_origin)
 {
   /* aligned plane normal */
-  vec3 L = planeorigin - lineorigin;
-  float diskdist = length(L);
-  vec3 planenormal = -normalize(L);
-  return -diskdist / dot(planenormal, linedirection);
+  vec3 L = plane_origin - line_origin;
+  float disk_dist = length(L);
+  vec3 plane_normal = -normalize(L);
+  return -disk_dist / dot(plane_normal, line_direction);
 }
 
-vec3 line_aligned_plane_intersect(vec3 lineorigin, vec3 linedirection, vec3 planeorigin)
+vec3 line_aligned_plane_intersect(vec3 line_origin, vec3 line_direction, vec3 plane_origin)
 {
-  float dist = line_aligned_plane_intersect_dist(lineorigin, linedirection, planeorigin);
+  float dist = line_aligned_plane_intersect_dist(line_origin, line_direction, plane_origin);
   if (dist < 0) {
     /* if intersection is behind we fake the intersection to be
      * really far and (hopefully) not inside the radius of interest */
     dist = 1e16;
   }
-  return lineorigin + linedirection * dist;
+  return line_origin + line_direction * dist;
 }
 
-float line_unit_sphere_intersect_dist(vec3 lineorigin, vec3 linedirection)
+/**
+ * Returns intersection distance between the unit sphere and the line
+ * with the assumption that \a line_origin is contained in the unit sphere.
+ * It will always returns the farthest intersection.
+ */
+float line_unit_sphere_intersect_dist(vec3 line_origin, vec3 line_direction)
 {
-  float a = dot(linedirection, linedirection);
-  float b = dot(linedirection, lineorigin);
-  float c = dot(lineorigin, lineorigin) - 1;
+  float a = dot(line_direction, line_direction);
+  float b = dot(line_direction, line_origin);
+  float c = dot(line_origin, line_origin) - 1;
 
   float dist = 1e15;
   float determinant = b * b - a * c;
@@ -72,22 +97,63 @@ float line_unit_sphere_intersect_dist(vec3 lineorigin, vec3 linedirection)
   return dist;
 }
 
-float line_unit_box_intersect_dist(vec3 lineorigin, vec3 linedirection)
+/**
+ * Returns minimum intersection distance between the unit box and the line
+ * with the assumption that \a line_origin is contained in the unit box.
+ * In other words, it will always returns the farthest intersection.
+ */
+float line_unit_box_intersect_dist(vec3 line_origin, vec3 line_direction)
 {
   /* https://seblagarde.wordpress.com/2012/09/29/image-based-lighting-approaches-and-parallax-corrected-cubemap/
    */
-  vec3 firstplane = (vec3(1.0) - lineorigin) / linedirection;
-  vec3 secondplane = (vec3(-1.0) - lineorigin) / linedirection;
-  vec3 furthestplane = max(firstplane, secondplane);
+  vec3 first_plane = (vec3(1.0) - line_origin) / line_direction;
+  vec3 second_plane = (vec3(-1.0) - line_origin) / line_direction;
+  vec3 farthest_plane = max(first_plane, second_plane);
+
+  return min_v3(farthest_plane);
+}
+
+float line_unit_box_intersect_dist_safe(vec3 line_origin, vec3 line_direction)
+{
+  vec3 safe_line_direction = max(vec3(1e-8), abs(line_direction)) *
+                             select(vec3(1.0), -vec3(1.0), lessThan(line_direction, vec3(0.0)));
+  return line_unit_box_intersect_dist(line_origin, safe_line_direction);
+}
+
+/**
+ * Same as line_unit_box_intersect_dist but for 2D case.
+ */
+float line_unit_square_intersect_dist(vec2 line_origin, vec2 line_direction)
+{
+  vec2 first_plane = (vec2(1.0) - line_origin) / line_direction;
+  vec2 second_plane = (vec2(-1.0) - line_origin) / line_direction;
+  vec2 farthest_plane = max(first_plane, second_plane);
 
-  return min_v3(furthestplane);
+  return min_v2(farthest_plane);
 }
 
-float line_unit_box_intersect_dist_safe(vec3 lineorigin, vec3 linedirection)
+float line_unit_square_intersect_dist_safe(vec2 line_origin, vec2 line_direction)
 {
-  vec3 safe_linedirection = max(vec3(1e-8), abs(linedirection)) *
-                            select(vec3(1.0), -vec3(1.0), lessThan(linedirection, vec3(0.0)));
-  return line_unit_box_intersect_dist(lineorigin, safe_linedirection);
+  vec2 safe_line_direction = max(vec2(1e-8), abs(line_direction)) *
+                             select(vec2(1.0), -vec2(1.0), lessThan(line_direction, vec2(0.0)));
+  return line_unit_square_intersect_dist(line_origin, safe_line_direction);
+}
+
+/**
+ * Returns clipping distance (intersection with the nearest plane) with the given axis-aligned
+ * bound box along \a line_direction.
+ * Safe even if \a line_direction is degenerate.
+ * It assumes that an intersection exists (i.e: that \a line_direction points towards the AABB).
+ */
+float line_aabb_clipping_dist(vec3 line_origin, vec3 line_direction, vec3 aabb_min, vec3 aabb_max)
+{
+  vec3 safe_dir = select(line_direction, vec3(1e-5), lessThan(abs(line_direction), vec3(1e-5)));
+  vec3 dir_inv = 1.0 / safe_dir;
+
+  vec3 first_plane = (aabb_min - line_origin) * dir_inv;
+  vec3 second_plane = (aabb_max - line_origin) * dir_inv;
+  vec3 nearest_plane = min(first_plane, second_plane);
+  return max_v3(nearest_plane);
 }
 
 /** \} */
@@ -98,8 +164,8 @@ float line_unit_box_intersect_dist_safe(vec3 lineorigin, vec3 linedirection)
 
 void make_orthonormal_basis(vec3 N, out vec3 T, out vec3 B)
 {
-  vec3 UpVector = abs(N.z) < 0.99999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0);
-  T = normalize(cross(UpVector, N));
+  vec3 up_vector = abs(N.z) < 0.99999 ? vec3(0.0, 0.0, 1.0) : vec3(1.0, 0.0, 0.0);
+  T = normalize(cross(up_vector, N));
   B = cross(N, T);
 }
 
diff --git a/source/blender/draw/intern/shaders/common_math_lib.glsl b/source/blender/draw/intern/shaders/common_math_lib.glsl
index 51f3c890df8..5842df424be 100644
--- a/source/blender/draw/intern/shaders/common_math_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_math_lib.glsl
@@ -17,6 +17,7 @@
 #define M_SQRT2 1.41421356237309504880   /* sqrt(2) */
 #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
 #define FLT_MAX 3.402823e+38
+#define FLT_MIN 1.175494e-38
 
 vec3 mul(mat3 m, vec3 v)
 {
@@ -116,8 +117,8 @@ bool flag_test(int flag, int val) { return (flag & val) != 0; }
 void set_flag_from_test(inout uint value, bool test, uint flag) { if (test) { value |= flag; } else { value &= ~flag; } }
 void set_flag_from_test(inout int value, bool test, int flag) { if (test) { value |= flag; } else { value &= ~flag; } }
 
-#define weighted_sum(val0, val1, val2, val3, weights) ((val0 * weights[0] + val1 * weights[1] + val2 * weights[2] + val3 * weights[3]) * safe_rcp(sum(weights)));
-#define weighted_sum_array(val, weights) ((val[0] * weights[0] + val[1] * weights[1] + val[2] * weights[2] + val[3] * weights[3]) * safe_rcp(sum(weights)));
+#define weighted_sum(val0, val1, val2, val3, weights) ((val0 * weights[0] + val1 * weights[1] + val2 * weights[2] + val3 * weights[3]) * safe_rcp(sum(weights)))
+#define weighted_sum_array(val, weights) ((val[0] * weights[0] + val[1] * weights[1] + val[2] * weights[2] + val[3] * weights[3]) * safe_rcp(sum(weights)))
 
 /* clang-format on */
 
@@ -130,12 +131,17 @@ void set_flag_from_test(inout int value, bool test, int flag) { if (test) { valu
 #define in_texture_range(texel, tex) \
   (all(greaterThanEqual(texel, ivec2(0))) && all(lessThan(texel, textureSize(tex, 0).xy)))
 
-uint divide_ceil_u(uint visible_count, uint divisor)
+uint divide_ceil(uint visible_count, uint divisor)
 {
   return (visible_count + (divisor - 1u)) / divisor;
 }
 
-int divide_ceil_i(int visible_count, int divisor)
+int divide_ceil(int visible_count, int divisor)
+{
+  return (visible_count + (divisor - 1)) / divisor;
+}
+
+ivec2 divide_ceil(ivec2 visible_count, ivec2 divisor)
 {
   return (visible_count + (divisor - 1)) / divisor;
 }
diff --git a/source/blender/draw/intern/shaders/common_shape_lib.glsl b/source/blender/draw/intern/shaders/common_shape_lib.glsl
new file mode 100644
index 00000000000..f2c8bf0faaf
--- /dev/null
+++ b/source/blender/draw/intern/shaders/common_shape_lib.glsl
@@ -0,0 +1,202 @@
+
+#pragma BLENDER_REQUIRE(common_math_geom_lib.glsl)
+
+/**
+ * Geometric shape structures.
+ * Some constructors might seems redundant but are here to make the API cleaner and
+ * allow for more than one constructor per type.
+ */
+
+/* ---------------------------------------------------------------------- */
+/** \name Circle
+ * \{ */
+
+struct Circle {
+  vec2 center;
+  float radius;
+};
+
+Circle shape_circle(vec2 center, float radius)
+{
+  return Circle(center, radius);
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Sphere
+ * \{ */
+
+struct Sphere {
+  vec3 center;
+  float radius;
+};
+
+Sphere shape_sphere(vec3 center, float radius)
+{
+  return Sphere(center, radius);
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Box
+ * \{ */
+
+struct Box {
+  vec3 corners[8];
+};
+
+/* Construct box from 4 basis points. */
+Box shape_box(vec3 v000, vec3 v100, vec3 v010, vec3 v001)
+{
+  v100 -= v000;
+  v010 -= v000;
+  v001 -= v000;
+  Box box;
+  box.corners[0] = v000;
+  box.corners[1] = v000 + v100;
+  box.corners[2] = v000 + v010 + v100;
+  box.corners[3] = v000 + v010;
+  box.corners[4] = box.corners[0] + v001;
+  box.corners[5] = box.corners[1] + v001;
+  box.corners[6] = box.corners[2] + v001;
+  box.corners[7] = box.corners[3] + v001;
+  return box;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Square Pyramid
+ * \{ */
+
+struct Pyramid {
+  /* Apex is the first. Base vertices are in clockwise order from front view. */
+  vec3 corners[5];
+};
+
+/**
+ * Regular Square Pyramid (can be oblique).
+ * Use this corner order.
+ * (Top-Down View of the pyramid)
+ * <pre>
+ *
+ * Y
+ * |
+ * |
+ * .-----X
+ *
+ *  4-----------3
+ *  | \       / |
+ *  |   \   /   |
+ *  |     0     |
+ *  |   /   \   |
+ *  | /       \ |
+ *  1-----------2
+ * </pre>
+ * base_corner_00 is vertex 1
+ * base_corner_01 is vertex 2
+ * base_corner_10 is vertex 4
+ */
+Pyramid shape_pyramid(vec3 apex, vec3 base_corner_00, vec3 base_corner_01, vec3 base_corner_10)
+{
+  Pyramid pyramid;
+  pyramid.corners[0] = apex;
+  pyramid.corners[1] = base_corner_00;
+  pyramid.corners[2] = base_corner_01;
+  pyramid.corners[3] = base_corner_10 + (base_corner_01 - base_corner_00);
+  pyramid.corners[4] = base_corner_10;
+  return pyramid;
+}
+
+/**
+ * Regular Square Pyramid.
+ * <pre>
+ *
+ * Y
+ * |
+ * |
+ * .-----X
+ *
+ *  4-----Y-----3
+ *  | \   |   / |
+ *  |   \ | /   |
+ *  |     0-----X
+ *  |   /   \   |
+ *  | /       \ |
+ *  1-----------2
+ * </pre>
+ * base_center_pos_x is vector from base center to X
+ * base_center_pos_y is vector from base center to Y
+ */
+Pyramid shape_pyramid_non_oblique(vec3 apex,
+                                  vec3 base_center,
+                                  vec3 base_center_pos_x,
+                                  vec3 base_center_pos_y)
+{
+  Pyramid pyramid;
+  pyramid.corners[0] = apex;
+  pyramid.corners[1] = base_center - base_center_pos_x - base_center_pos_y;
+  pyramid.corners[2] = base_center + base_center_pos_x - base_center_pos_y;
+  pyramid.corners[3] = base_center + base_center_pos_x + base_center_pos_y;
+  pyramid.corners[4] = base_center - base_center_pos_x + base_center_pos_y;
+  return pyramid;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Frustum
+ * \{ */
+
+struct Frustum {
+  vec3 corners[8];
+};
+
+/**
+ * Use this corner order.
+ * <pre>
+ *
+ * Z  Y
+ * | /
+ * |/
+ * .-----X
+ *     2----------6
+ *    /|         /|
+ *   / |        / |
+ *  1----------5  |
+ *  |  |       |  |
+ *  |  3-------|--7
+ *  | /        | /
+ *  |/         |/
+ *  0----------4
+ * </pre>
+ */
+Frustum shape_frustum(vec3 corners[8])
+{
+  Frustum frustum;
+  for (int i = 0; i < 8; i++) {
+    frustum.corners[i] = corners[i];
+  }
+  return frustum;
+}
+
+/** \} */
+
+/* ---------------------------------------------------------------------- */
+/** \name Cone
+ * \{ */
+
+/* Cone at orign with no height. */
+struct Cone {
+  vec3 direction;
+  float angle_cos;
+};
+
+Cone shape_cone(vec3 direction, float angle_cosine)
+{
+  return Cone(direction, angle_cosine);
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
index 3244b7960d8..eacdf8e6333 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_lines_comp.glsl
@@ -35,7 +35,7 @@ void emit_line(uint line_offset, uint quad_index, uint start_loop_index, uint co
   uint coarse_quad_index = coarse_polygon_index_from_subdiv_quad_index(quad_index,
                                                                        coarse_poly_count);
 
-  if (is_face_hidden(coarse_quad_index) ||
+  if (use_hide && is_face_hidden(coarse_quad_index) ||
       (input_origindex[vertex_index] == ORIGINDEX_NONE && optimal_display)) {
     output_lines[line_offset + 0] = 0xffffffff;
     output_lines[line_offset + 1] = 0xffffffff;
diff --git a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
index ce3c8478d3f..a46d69eca88 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_ibo_tris_comp.glsl
@@ -45,7 +45,7 @@ void main()
   int triangle_loop_index = (int(quad_index) + mat_offset) * 6;
 #endif
 
-  if (is_face_hidden(coarse_quad_index)) {
+  if (use_hide && is_face_hidden(coarse_quad_index)) {
     output_tris[triangle_loop_index + 0] = 0xffffffff;
     output_tris[triangle_loop_index + 1] = 0xffffffff;
     output_tris[triangle_loop_index + 2] = 0xffffffff;
diff --git a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
index d76a7369f79..4183b4a1cd3 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_lib.glsl
@@ -36,6 +36,10 @@ layout(std140) uniform shader_data
 
   /* Total number of elements to process. */
   uint total_dispatch_size;
+
+  bool is_edit_mode;
+
+  bool use_hide;
 };
 
 uint get_global_invocation_index()
diff --git a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
index e146ccb343a..81e346863c2 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_patch_evaluation_comp.glsl
@@ -427,7 +427,7 @@ void main()
   output_nors[coarse_quad_index] = fnor;
 #  endif
 
-  if (is_face_hidden(coarse_quad_index)) {
+  if (use_hide && is_face_hidden(coarse_quad_index)) {
     output_indices[coarse_quad_index] = 0xffffffff;
   }
   else {
diff --git a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
index f5c4c7895aa..97c07704c06 100644
--- a/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
+++ b/source/blender/draw/intern/shaders/common_subdiv_vbo_lnor_comp.glsl
@@ -26,6 +26,23 @@ bool is_face_selected(uint coarse_quad_index)
   return (extra_coarse_face_data[coarse_quad_index] & coarse_face_select_mask) != 0;
 }
 
+bool is_face_hidden(uint coarse_quad_index)
+{
+  return (extra_coarse_face_data[coarse_quad_index] & coarse_face_hidden_mask) != 0;
+}
+
+/* Flag for paint mode overlay and normals drawing in edit-mode. */
+float get_loop_flag(uint coarse_quad_index, int vert_origindex)
+{
+  if (is_face_hidden(coarse_quad_index) || (is_edit_mode && vert_origindex == -1)) {
+    return -1.0;
+  }
+  if (is_face_selected(coarse_quad_index)) {
+    return 1.0;
+  }
+  return 0.0;
+}
+
 void main()
 {
   /* We execute for each quad. */
@@ -44,7 +61,11 @@ void main()
     /* Face is smooth, use vertex normals. */
     for (int i = 0; i < 4; i++) {
       PosNorLoop pos_nor_loop = pos_nor[start_loop_index + i];
-      output_lnor[start_loop_index + i] = get_normal_and_flag(pos_nor_loop);
+      int origindex = input_vert_origindex[start_loop_index + i];
+      LoopNormal loop_normal = get_normal_and_flag(pos_nor_loop);
+      loop_normal.flag = get_loop_flag(coarse_quad_index, origindex);
+
+      output_lnor[start_loop_index + i] = loop_normal;
     }
   }
   else {
@@ -68,11 +89,7 @@ void main()
 
     for (int i = 0; i < 4; i++) {
       int origindex = input_vert_origindex[start_loop_index + i];
-      float flag = 0.0;
-      if (origindex == -1) {
-        flag = -1.0;
-      }
-      loop_normal.flag = flag;
+      loop_normal.flag = get_loop_flag(coarse_quad_index, origindex);
 
       output_lnor[start_loop_index + i] = loop_normal;
     }
diff --git a/source/blender/draw/intern/shaders/common_view_lib.glsl b/source/blender/draw/intern/shaders/common_view_lib.glsl
index 8eecaa46b58..6521476c3a7 100644
--- a/source/blender/draw/intern/shaders/common_view_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_view_lib.glsl
@@ -37,6 +37,9 @@ layout(std140) uniform viewBlock
 #  endif
 #endif
 
+#define IS_DEBUG_MOUSE_FRAGMENT (ivec2(gl_FragCoord) == drw_view.mouse_pixel)
+#define IS_FIRST_INVOCATION (gl_GlobalInvocationID == uvec3(0))
+
 #define ViewNear (ViewVecs[0].w)
 #define ViewFar (ViewVecs[1].w)
 
@@ -152,7 +155,11 @@ uniform int drw_ResourceID;
 #    define PASS_RESOURCE_ID
 
 #  elif defined(GPU_VERTEX_SHADER)
-#    define resource_id gpu_InstanceIndex
+#    if defined(UNIFORM_RESOURCE_ID_NEW)
+#      define resource_id drw_ResourceID
+#    else
+#      define resource_id gpu_InstanceIndex
+#    endif
 #    define PASS_RESOURCE_ID drw_ResourceID_iface.resource_index = resource_id;
 
 #  elif defined(GPU_GEOMETRY_SHADER)
@@ -200,8 +207,8 @@ flat in int resourceIDFrag;
 #  ifndef DRW_SHADER_SHARED_H
 
 struct ObjectMatrices {
-  mat4 drw_modelMatrix;
-  mat4 drw_modelMatrixInverse;
+  mat4 model;
+  mat4 model_inverse;
 };
 #  endif /* DRW_SHADER_SHARED_H */
 
@@ -211,8 +218,8 @@ layout(std140) uniform modelBlock
   ObjectMatrices drw_matrices[DRW_RESOURCE_CHUNK_LEN];
 };
 
-#    define ModelMatrix (drw_matrices[resource_id].drw_modelMatrix)
-#    define ModelMatrixInverse (drw_matrices[resource_id].drw_modelMatrixInverse)
+#    define ModelMatrix (drw_matrices[resource_id].model)
+#    define ModelMatrixInverse (drw_matrices[resource_id].model_inverse)
 #  endif /* USE_GPU_SHADER_CREATE_INFO */
 
 #else /* GPU_INTEL */
diff --git a/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl
new file mode 100644
index 00000000000..3e640540777
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl
@@ -0,0 +1,84 @@
+
+/**
+ * Convert DrawPrototype into draw commands.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+#define atomicAddAndGet(dst, val) (atomicAdd(dst, val) + val)
+
+/* This is only called by the last thread executed over the group's prototype draws. */
+void write_draw_call(DrawGroup group, uint group_id)
+{
+  DrawCommand cmd;
+  cmd.vertex_len = group.vertex_len;
+  cmd.vertex_first = group.vertex_first;
+  if (group.base_index != -1) {
+    cmd.base_index = group.base_index;
+    cmd.instance_first_indexed = group.start;
+  }
+  else {
+    cmd._instance_first_array = group.start;
+  }
+  /* Back-facing command. */
+  cmd.instance_len = group_buf[group_id].back_facing_counter;
+  command_buf[group_id * 2 + 0] = cmd;
+  /* Front-facing command. */
+  cmd.instance_len = group_buf[group_id].front_facing_counter;
+  command_buf[group_id * 2 + 1] = cmd;
+
+  /* Reset the counters for a next command gen dispatch. Avoids resending the whole data just
+   * for this purpose. Only the last thread will execute this so it is thread-safe. */
+  group_buf[group_id].front_facing_counter = 0u;
+  group_buf[group_id].back_facing_counter = 0u;
+  group_buf[group_id].total_counter = 0u;
+}
+
+void main()
+{
+  uint proto_id = gl_GlobalInvocationID.x;
+  if (proto_id >= prototype_len) {
+    return;
+  }
+
+  DrawPrototype proto = prototype_buf[proto_id];
+  uint group_id = proto.group_id;
+  bool is_inverted = (proto.resource_handle & 0x80000000u) != 0;
+  uint resource_index = (proto.resource_handle & 0x7FFFFFFFu);
+
+  /* Visibility test result. */
+  bool is_visible = ((visibility_buf[resource_index / 32u] & (1u << (resource_index % 32u)))) != 0;
+
+  DrawGroup group = group_buf[group_id];
+
+  if (!is_visible) {
+    /* Skip the draw but still count towards the completion. */
+    if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
+      write_draw_call(group, group_id);
+    }
+    return;
+  }
+
+  uint back_facing_len = group.len - group.front_facing_len;
+  uint front_facing_len = group.front_facing_len;
+  uint dst_index = group.start;
+  if (is_inverted) {
+    uint offset = atomicAdd(group_buf[group_id].back_facing_counter, proto.instance_len);
+    dst_index += offset;
+    if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
+      write_draw_call(group, group_id);
+    }
+  }
+  else {
+    uint offset = atomicAdd(group_buf[group_id].front_facing_counter, proto.instance_len);
+    dst_index += back_facing_len + offset;
+    if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
+      write_draw_call(group, group_id);
+    }
+  }
+
+  for (uint i = dst_index; i < dst_index + proto.instance_len; i++) {
+    /* Fill resource_id buffer for each instance of this draw */
+    resource_id_buf[i] = resource_index;
+  }
+}
diff --git a/source/blender/draw/intern/shaders/draw_debug_draw_display_frag.glsl b/source/blender/draw/intern/shaders/draw_debug_draw_display_frag.glsl
new file mode 100644
index 00000000000..3fc5294b024
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_draw_display_frag.glsl
@@ -0,0 +1,9 @@
+
+/**
+ * Display debug edge list.
+ **/
+
+void main()
+{
+  out_color = interp.color;
+}
diff --git a/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl
new file mode 100644
index 00000000000..4061dda5d1c
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl
@@ -0,0 +1,15 @@
+
+/**
+ * Display debug edge list.
+ **/
+
+void main()
+{
+  /* Skip the first vertex containing header data. */
+  DRWDebugVert vert = drw_debug_verts_buf[gl_VertexID + 2];
+  vec3 pos = uintBitsToFloat(uvec3(vert.pos0, vert.pos1, vert.pos2));
+  vec4 col = vec4((uvec4(vert.color) >> uvec4(0, 8, 16, 24)) & 0xFFu) / 255.0;
+
+  interp.color = col;
+  gl_Position = persmat * vec4(pos, 1.0);
+}
diff --git a/source/blender/draw/intern/shaders/draw_debug_info.hh b/source/blender/draw/intern/shaders/draw_debug_info.hh
new file mode 100644
index 00000000000..ce450bb1210
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_info.hh
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "draw_defines.h"
+#include "gpu_shader_create_info.hh"
+
+/* -------------------------------------------------------------------- */
+/** \name Debug print
+ *
+ * Allows print() function to have logging support inside shaders.
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_debug_print)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(DRW_DEBUG_PRINT_SLOT, Qualifier::READ_WRITE, "uint", "drw_debug_print_buf[]");
+
+GPU_SHADER_INTERFACE_INFO(draw_debug_print_display_iface, "").flat(Type::UINT, "char_index");
+
+GPU_SHADER_CREATE_INFO(draw_debug_print_display)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(7, Qualifier::READ, "uint", "drw_debug_print_buf[]")
+    .vertex_out(draw_debug_print_display_iface)
+    .fragment_out(0, Type::VEC4, "out_color")
+    .vertex_source("draw_debug_print_display_vert.glsl")
+    .fragment_source("draw_debug_print_display_frag.glsl")
+    .additional_info("draw_view");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug draw shapes
+ *
+ * Allows to draw lines and points just like the DRW_debug module functions.
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_debug_draw)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(DRW_DEBUG_DRAW_SLOT,
+                 Qualifier::READ_WRITE,
+                 "DRWDebugVert",
+                 "drw_debug_verts_buf[]");
+
+GPU_SHADER_INTERFACE_INFO(draw_debug_draw_display_iface, "interp").flat(Type::VEC4, "color");
+
+GPU_SHADER_CREATE_INFO(draw_debug_draw_display)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(6, Qualifier::READ, "DRWDebugVert", "drw_debug_verts_buf[]")
+    .vertex_out(draw_debug_draw_display_iface)
+    .fragment_out(0, Type::VEC4, "out_color")
+    .push_constant(Type::MAT4, "persmat")
+    .vertex_source("draw_debug_draw_display_vert.glsl")
+    .fragment_source("draw_debug_draw_display_frag.glsl")
+    .additional_info("draw_view");
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl b/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl
new file mode 100644
index 00000000000..4e0d980637f
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl
@@ -0,0 +1,133 @@
+
+/**
+ * Display characters using an ascii table.
+ **/
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+bool char_intersect(uvec2 bitmap_position)
+{
+  /* Using 8x8 = 64bits = uvec2. */
+  uvec2 ascii_bitmap[96] = uvec2[96](uvec2(0x00000000u, 0x00000000u),
+                                     uvec2(0x18001800u, 0x183c3c18u),
+                                     uvec2(0x00000000u, 0x36360000u),
+                                     uvec2(0x7f363600u, 0x36367f36u),
+                                     uvec2(0x301f0c00u, 0x0c3e031eu),
+                                     uvec2(0x0c666300u, 0x00633318u),
+                                     uvec2(0x3b336e00u, 0x1c361c6eu),
+                                     uvec2(0x00000000u, 0x06060300u),
+                                     uvec2(0x060c1800u, 0x180c0606u),
+                                     uvec2(0x180c0600u, 0x060c1818u),
+                                     uvec2(0x3c660000u, 0x00663cffu),
+                                     uvec2(0x0c0c0000u, 0x000c0c3fu),
+                                     uvec2(0x000c0c06u, 0x00000000u),
+                                     uvec2(0x00000000u, 0x0000003fu),
+                                     uvec2(0x000c0c00u, 0x00000000u),
+                                     uvec2(0x06030100u, 0x6030180cu),
+                                     uvec2(0x6f673e00u, 0x3e63737bu),
+                                     uvec2(0x0c0c3f00u, 0x0c0e0c0cu),
+                                     uvec2(0x06333f00u, 0x1e33301cu),
+                                     uvec2(0x30331e00u, 0x1e33301cu),
+                                     uvec2(0x7f307800u, 0x383c3633u),
+                                     uvec2(0x30331e00u, 0x3f031f30u),
+                                     uvec2(0x33331e00u, 0x1c06031fu),
+                                     uvec2(0x0c0c0c00u, 0x3f333018u),
+                                     uvec2(0x33331e00u, 0x1e33331eu),
+                                     uvec2(0x30180e00u, 0x1e33333eu),
+                                     uvec2(0x000c0c00u, 0x000c0c00u),
+                                     uvec2(0x000c0c06u, 0x000c0c00u),
+                                     uvec2(0x060c1800u, 0x180c0603u),
+                                     uvec2(0x003f0000u, 0x00003f00u),
+                                     uvec2(0x180c0600u, 0x060c1830u),
+                                     uvec2(0x0c000c00u, 0x1e333018u),
+                                     uvec2(0x7b031e00u, 0x3e637b7bu),
+                                     uvec2(0x3f333300u, 0x0c1e3333u),
+                                     uvec2(0x66663f00u, 0x3f66663eu),
+                                     uvec2(0x03663c00u, 0x3c660303u),
+                                     uvec2(0x66361f00u, 0x1f366666u),
+                                     uvec2(0x16467f00u, 0x7f46161eu),
+                                     uvec2(0x16060f00u, 0x7f46161eu),
+                                     uvec2(0x73667c00u, 0x3c660303u),
+                                     uvec2(0x33333300u, 0x3333333fu),
+                                     uvec2(0x0c0c1e00u, 0x1e0c0c0cu),
+                                     uvec2(0x33331e00u, 0x78303030u),
+                                     uvec2(0x36666700u, 0x6766361eu),
+                                     uvec2(0x46667f00u, 0x0f060606u),
+                                     uvec2(0x6b636300u, 0x63777f7fu),
+                                     uvec2(0x73636300u, 0x63676f7bu),
+                                     uvec2(0x63361c00u, 0x1c366363u),
+                                     uvec2(0x06060f00u, 0x3f66663eu),
+                                     uvec2(0x3b1e3800u, 0x1e333333u),
+                                     uvec2(0x36666700u, 0x3f66663eu),
+                                     uvec2(0x38331e00u, 0x1e33070eu),
+                                     uvec2(0x0c0c1e00u, 0x3f2d0c0cu),
+                                     uvec2(0x33333f00u, 0x33333333u),
+                                     uvec2(0x331e0c00u, 0x33333333u),
+                                     uvec2(0x7f776300u, 0x6363636bu),
+                                     uvec2(0x1c366300u, 0x6363361cu),
+                                     uvec2(0x0c0c1e00u, 0x3333331eu),
+                                     uvec2(0x4c667f00u, 0x7f633118u),
+                                     uvec2(0x06061e00u, 0x1e060606u),
+                                     uvec2(0x30604000u, 0x03060c18u),
+                                     uvec2(0x18181e00u, 0x1e181818u),
+                                     uvec2(0x00000000u, 0x081c3663u),
+                                     uvec2(0x000000ffu, 0x00000000u),
+                                     uvec2(0x00000000u, 0x0c0c1800u),
+                                     uvec2(0x3e336e00u, 0x00001e30u),
+                                     uvec2(0x66663b00u, 0x0706063eu),
+                                     uvec2(0x03331e00u, 0x00001e33u),
+                                     uvec2(0x33336e00u, 0x3830303eu),
+                                     uvec2(0x3f031e00u, 0x00001e33u),
+                                     uvec2(0x06060f00u, 0x1c36060fu),
+                                     uvec2(0x333e301fu, 0x00006e33u),
+                                     uvec2(0x66666700u, 0x0706366eu),
+                                     uvec2(0x0c0c1e00u, 0x0c000e0cu),
+                                     uvec2(0x3033331eu, 0x30003030u),
+                                     uvec2(0x1e366700u, 0x07066636u),
+                                     uvec2(0x0c0c1e00u, 0x0e0c0c0cu),
+                                     uvec2(0x7f6b6300u, 0x0000337fu),
+                                     uvec2(0x33333300u, 0x00001f33u),
+                                     uvec2(0x33331e00u, 0x00001e33u),
+                                     uvec2(0x663e060fu, 0x00003b66u),
+                                     uvec2(0x333e3078u, 0x00006e33u),
+                                     uvec2(0x66060f00u, 0x00003b6eu),
+                                     uvec2(0x1e301f00u, 0x00003e03u),
+                                     uvec2(0x0c2c1800u, 0x080c3e0cu),
+                                     uvec2(0x33336e00u, 0x00003333u),
+                                     uvec2(0x331e0c00u, 0x00003333u),
+                                     uvec2(0x7f7f3600u, 0x0000636bu),
+                                     uvec2(0x1c366300u, 0x00006336u),
+                                     uvec2(0x333e301fu, 0x00003333u),
+                                     uvec2(0x0c263f00u, 0x00003f19u),
+                                     uvec2(0x0c0c3800u, 0x380c0c07u),
+                                     uvec2(0x18181800u, 0x18181800u),
+                                     uvec2(0x0c0c0700u, 0x070c0c38u),
+                                     uvec2(0x00000000u, 0x6e3b0000u),
+                                     uvec2(0x00000000u, 0x00000000u));
+
+  if (!in_range_inclusive(bitmap_position, uvec2(0), uvec2(7))) {
+    return false;
+  }
+  uint char_bits = ascii_bitmap[char_index][bitmap_position.y >> 2u & 1u];
+  char_bits = (char_bits >> ((bitmap_position.y & 3u) * 8u + bitmap_position.x));
+  return (char_bits & 1u) != 0u;
+}
+
+void main()
+{
+  uvec2 bitmap_position = uvec2(gl_PointCoord.xy * 8.0);
+  /* Point coord start from top left corner. But layout is from bottom to top. */
+  bitmap_position.y = 7 - bitmap_position.y;
+
+  if (char_intersect(bitmap_position)) {
+    out_color = vec4(1);
+  }
+  else if (char_intersect(bitmap_position + uvec2(0, 1))) {
+    /* Shadow */
+    out_color = vec4(0, 0, 0, 1);
+  }
+  else {
+    /* Transparent Background for ease of read. */
+    out_color = vec4(0, 0, 0, 0.2);
+  }
+}
diff --git a/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl
new file mode 100644
index 00000000000..cb379056e2b
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl
@@ -0,0 +1,29 @@
+
+/**
+ * Display characters using an ascii table. Outputs one point per character.
+ **/
+
+#pragma BLENDER_REQUIRE(common_view_lib.glsl)
+
+void main()
+{
+  /* Skip first 4 chars containing header data. */
+  uint char_data = drw_debug_print_buf[gl_VertexID + 8];
+  char_index = (char_data & 0xFFu) - 0x20u;
+
+  /* Discard invalid chars. */
+  if (char_index >= 96u) {
+    gl_Position = vec4(-1);
+    gl_PointSize = 0.0;
+    return;
+  }
+  uint row = (char_data >> 16u) & 0xFFu;
+  uint col = (char_data >> 8u) & 0xFFu;
+
+  float char_size = 16.0;
+  /* Change anchor point to the top left. */
+  vec2 pos_on_screen = char_size * vec2(col, row) + char_size * 4;
+  gl_Position = vec4(
+      pos_on_screen * drw_view.viewport_size_inverse * vec2(2.0, -2.0) - vec2(1.0, -1.0), 0, 1);
+  gl_PointSize = char_size;
+}
diff --git a/source/blender/draw/intern/shaders/draw_object_infos_info.hh b/source/blender/draw/intern/shaders/draw_object_infos_info.hh
index 8fd55ea351f..31fee018fbc 100644
--- a/source/blender/draw/intern/shaders/draw_object_infos_info.hh
+++ b/source/blender/draw/intern/shaders/draw_object_infos_info.hh
@@ -1,10 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include "draw_defines.h"
 #include "gpu_shader_create_info.hh"
 
 GPU_SHADER_CREATE_INFO(draw_object_infos)
     .typedef_source("draw_shader_shared.h")
     .define("OBINFO_LIB")
+    .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)")
+    .define("ObjectInfo", "(drw_infos[resource_id].infos)")
+    .define("ObjectColor", "(drw_infos[resource_id].color)")
     .uniform_buf(1, "ObjectInfos", "drw_infos[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH);
 
 GPU_SHADER_CREATE_INFO(draw_volume_infos)
@@ -14,3 +18,19 @@ GPU_SHADER_CREATE_INFO(draw_volume_infos)
 GPU_SHADER_CREATE_INFO(draw_curves_infos)
     .typedef_source("draw_shader_shared.h")
     .uniform_buf(2, "CurvesInfos", "drw_curves", Frequency::BATCH);
+
+GPU_SHADER_CREATE_INFO(draw_object_infos_new)
+    .typedef_source("draw_shader_shared.h")
+    .define("OBINFO_LIB")
+    .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)")
+    .define("ObjectInfo", "(drw_infos[resource_id].infos)")
+    .define("ObjectColor", "(drw_infos[resource_id].color)")
+    .storage_buf(DRW_OBJ_INFOS_SLOT, Qualifier::READ, "ObjectInfos", "drw_infos[]");
+
+/** \note Requires draw_object_infos_new. */
+GPU_SHADER_CREATE_INFO(draw_object_attribute_new)
+    .define("OBATTR_LIB")
+    .define("ObjectAttributeStart", "(drw_infos[resource_id].orco_mul_bias[0].w)")
+    .define("ObjectAttributeLen", "(drw_infos[resource_id].orco_mul_bias[1].w)")
+    .storage_buf(DRW_OBJ_ATTR_SLOT, Qualifier::READ, "ObjectAttribute", "drw_attrs[]")
+    .additional_info("draw_object_infos_new");
diff --git a/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl
new file mode 100644
index 00000000000..511d4e49651
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl
@@ -0,0 +1,64 @@
+
+/**
+ * Finish computation of a few draw resource after sync.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+void main()
+{
+  uint resource_id = gl_GlobalInvocationID.x;
+  if (resource_id >= resource_len) {
+    return;
+  }
+
+  mat4 model_mat = matrix_buf[resource_id].model;
+  ObjectInfos infos = infos_buf[resource_id];
+  ObjectBounds bounds = bounds_buf[resource_id];
+
+  if (bounds.bounding_sphere.w != -1.0) {
+    /* Convert corners to origin + sides in world space. */
+    vec3 p0 = bounds.bounding_corners[0].xyz;
+    vec3 p01 = bounds.bounding_corners[1].xyz - p0;
+    vec3 p02 = bounds.bounding_corners[2].xyz - p0;
+    vec3 p03 = bounds.bounding_corners[3].xyz - p0;
+    /* Avoid flat box. */
+    p01.x = max(p01.x, 1e-4);
+    p02.y = max(p02.y, 1e-4);
+    p03.z = max(p03.z, 1e-4);
+    vec3 diagonal = p01 + p02 + p03;
+    vec3 center = p0 + diagonal * 0.5;
+    float min_axis = min_v3(abs(diagonal));
+    bounds_buf[resource_id].bounding_sphere.xyz = transform_point(model_mat, center);
+    /* We have to apply scaling to the diagonal. */
+    bounds_buf[resource_id].bounding_sphere.w = length(transform_direction(model_mat, diagonal)) *
+                                                0.5;
+    bounds_buf[resource_id]._inner_sphere_radius = min_axis;
+    bounds_buf[resource_id].bounding_corners[0].xyz = transform_point(model_mat, p0);
+    bounds_buf[resource_id].bounding_corners[1].xyz = transform_direction(model_mat, p01);
+    bounds_buf[resource_id].bounding_corners[2].xyz = transform_direction(model_mat, p02);
+    bounds_buf[resource_id].bounding_corners[3].xyz = transform_direction(model_mat, p03);
+    /* Always have correct handedness in the corners vectors. */
+    if (flag_test(infos.flag, OBJECT_NEGATIVE_SCALE)) {
+      bounds_buf[resource_id].bounding_corners[0].xyz +=
+          bounds_buf[resource_id].bounding_corners[1].xyz;
+      bounds_buf[resource_id].bounding_corners[1].xyz =
+          -bounds_buf[resource_id].bounding_corners[1].xyz;
+    }
+
+    /* TODO: Bypass test for very large objects (see T67319). */
+    if (bounds_buf[resource_id].bounding_sphere.w > 1e12) {
+      bounds_buf[resource_id].bounding_sphere.w = -1.0;
+    }
+  }
+
+  vec3 loc = infos.orco_add;  /* Box center. */
+  vec3 size = infos.orco_mul; /* Box half-extent. */
+  /* This is what the original computation looks like.
+   * Simplify to a nice MADD in shading code. */
+  // orco = (pos - loc) / size;
+  // orco = pos * (1.0 / size) + (-loc / size);
+  vec3 size_inv = safe_rcp(size);
+  infos_buf[resource_id].orco_add = -loc * size_inv;
+  infos_buf[resource_id].orco_mul = size_inv;
+}
diff --git a/source/blender/draw/intern/shaders/draw_view_info.hh b/source/blender/draw/intern/shaders/draw_view_info.hh
index 0400521c53d..c522c607791 100644
--- a/source/blender/draw/intern/shaders/draw_view_info.hh
+++ b/source/blender/draw/intern/shaders/draw_view_info.hh
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include "draw_defines.h"
 #include "gpu_shader_create_info.hh"
 
 /* -------------------------------------------------------------------- */
@@ -44,13 +45,13 @@ GPU_SHADER_CREATE_INFO(draw_resource_handle)
  * \{ */
 
 GPU_SHADER_CREATE_INFO(draw_view)
-    .uniform_buf(0, "ViewInfos", "drw_view", Frequency::PASS)
+    .uniform_buf(DRW_VIEW_UBO_SLOT, "ViewInfos", "drw_view", Frequency::PASS)
     .typedef_source("draw_shader_shared.h");
 
 GPU_SHADER_CREATE_INFO(draw_modelmat)
     .uniform_buf(8, "ObjectMatrices", "drw_matrices[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH)
-    .define("ModelMatrix", "(drw_matrices[resource_id].drw_modelMatrix)")
-    .define("ModelMatrixInverse", "(drw_matrices[resource_id].drw_modelMatrixInverse)")
+    .define("ModelMatrix", "(drw_matrices[resource_id].model)")
+    .define("ModelMatrixInverse", "(drw_matrices[resource_id].model_inverse)")
     .additional_info("draw_view");
 
 GPU_SHADER_CREATE_INFO(draw_modelmat_legacy)
@@ -136,3 +137,77 @@ GPU_SHADER_CREATE_INFO(draw_gpencil)
     .additional_info("draw_modelmat", "draw_resource_id_uniform", "draw_object_infos");
 
 /** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Internal Draw Manager usage
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_resource_finalize)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .define("DRAW_FINALIZE_SHADER")
+    .local_group_size(DRW_FINALIZE_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ, "ObjectMatrices", "matrix_buf[]")
+    .storage_buf(1, Qualifier::READ_WRITE, "ObjectBounds", "bounds_buf[]")
+    .storage_buf(2, Qualifier::READ_WRITE, "ObjectInfos", "infos_buf[]")
+    .push_constant(Type::INT, "resource_len")
+    .compute_source("draw_resource_finalize_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(draw_visibility_compute)
+    .do_static_compilation(true)
+    .local_group_size(DRW_VISIBILITY_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ, "ObjectBounds", "bounds_buf[]")
+    .storage_buf(1, Qualifier::READ_WRITE, "uint", "visibility_buf[]")
+    .push_constant(Type::INT, "resource_len")
+    .compute_source("draw_visibility_comp.glsl")
+    .additional_info("draw_view");
+
+GPU_SHADER_CREATE_INFO(draw_command_generate)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .typedef_source("draw_command_shared.hh")
+    .local_group_size(DRW_COMMAND_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ_WRITE, "DrawGroup", "group_buf[]")
+    .storage_buf(1, Qualifier::READ, "uint", "visibility_buf[]")
+    .storage_buf(2, Qualifier::READ, "DrawPrototype", "prototype_buf[]")
+    .storage_buf(3, Qualifier::WRITE, "DrawCommand", "command_buf[]")
+    .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::WRITE, "uint", "resource_id_buf[]")
+    .push_constant(Type::INT, "prototype_len")
+    .compute_source("draw_command_generate_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw Resource ID
+ * New implementation using gl_BaseInstance and storage buffers.
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_resource_id_new)
+    .define("UNIFORM_RESOURCE_ID_NEW")
+    .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::READ, "int", "resource_id_buf[]")
+    .define("drw_ResourceID", "resource_id_buf[gpu_BaseInstance + gl_InstanceID]");
+
+/**
+ * Workaround the lack of gl_BaseInstance by binding the resource_id_buf as vertex buf.
+ */
+GPU_SHADER_CREATE_INFO(draw_resource_id_fallback)
+    .define("UNIFORM_RESOURCE_ID_NEW")
+    .vertex_in(15, Type::INT, "drw_ResourceID");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw Object Resources
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_modelmat_new)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(DRW_OBJ_MAT_SLOT, Qualifier::READ, "ObjectMatrices", "drw_matrix_buf[]")
+    .define("drw_ModelMatrixInverse", "drw_matrix_buf[resource_id].model_inverse")
+    .define("drw_ModelMatrix", "drw_matrix_buf[resource_id].model")
+    /* TODO For compatibility with old shaders. To be removed. */
+    .define("ModelMatrixInverse", "drw_ModelMatrixInverse")
+    .define("ModelMatrix", "drw_ModelMatrix")
+    .additional_info("draw_resource_id_new");
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl
new file mode 100644
index 00000000000..86add2d1fe2
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl
@@ -0,0 +1,46 @@
+
+/**
+ * Compute visibility of each resource bounds for a given view.
+ */
+/* TODO(fclem): This could be augmented by a 2 pass occlusion culling system. */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
+
+shared uint shared_result;
+
+void mask_visibility_bit()
+{
+  uint bit = 1u << gl_LocalInvocationID.x;
+  atomicAnd(visibility_buf[gl_WorkGroupID.x], ~bit);
+}
+
+void main()
+{
+  if (gl_GlobalInvocationID.x >= resource_len) {
+    return;
+  }
+
+  ObjectBounds bounds = bounds_buf[gl_GlobalInvocationID.x];
+
+  if (bounds.bounding_sphere.w != -1.0) {
+    IsectBox box = isect_data_setup(bounds.bounding_corners[0].xyz,
+                                    bounds.bounding_corners[1].xyz,
+                                    bounds.bounding_corners[2].xyz,
+                                    bounds.bounding_corners[3].xyz);
+    Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
+    Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius);
+
+    if (intersect_view(inscribed_sphere) == true) {
+      /* Visible. */
+    }
+    else if (intersect_view(bounding_sphere) == false) {
+      /* Not visible. */
+      mask_visibility_bit();
+    }
+    else if (intersect_view(box) == false) {
+      /* Not visible. */
+      mask_visibility_bit();
+    }
+  }
+}