48 files changed, 4742 insertions, 165 deletions
diff --git a/release/scripts/addons b/release/scripts/addons
-Subproject 25ffc6f430fc995b1c046b01acba1c3e6c1896b
+Subproject 67f1fbca1482d9d9362a4001332e785c3fd5d23
diff --git a/release/scripts/startup/bl_ui/space_userpref.py b/release/scripts/startup/bl_ui/space_userpref.py
index cd11938e146..49f0fef5849 100644
--- a/release/scripts/startup/bl_ui/space_userpref.py
+++ b/release/scripts/startup/bl_ui/space_userpref.py
@@ -2327,6 +2327,7 @@ class USERPREF_PT_experimental_debugging(ExperimentalPanel, Panel):
                 ({"property": "use_cycles_debug"}, None),
                 ({"property": "show_asset_debug_info"}, None),
                 ({"property": "use_asset_indexing"}, None),
+                ({"property": "use_viewport_debug"}, None),
             ),
         )
 
diff --git a/release/scripts/startup/bl_ui/space_view3d.py b/release/scripts/startup/bl_ui/space_view3d.py
index b1b5738aecd..b2fa8e4d64f 100644
--- a/release/scripts/startup/bl_ui/space_view3d.py
+++ b/release/scripts/startup/bl_ui/space_view3d.py
@@ -7809,6 +7809,25 @@ class VIEW3D_PT_curves_sculpt_grow_shrink_scaling(Panel):
         layout.prop(brush.curves_sculpt_settings, "minimum_length")
 
 
+class VIEW3D_PT_viewport_debug(Panel):
+    bl_space_type = 'VIEW_3D'
+    bl_region_type = 'HEADER'
+    bl_parent_id = 'VIEW3D_PT_overlay'
+    bl_label = "Viewport Debug"
+
+    @classmethod
+    def poll(cls, context):
+        prefs = context.preferences
+        return prefs.experimental.use_viewport_debug
+
+    def draw(self, context):
+        layout = self.layout
+        view = context.space_data
+        overlay = view.overlay
+
+        layout.prop(overlay, "use_debug_freeze_view_culling")
+
+
 classes = (
     VIEW3D_HT_header,
     VIEW3D_HT_tool_header,
@@ -8046,6 +8065,7 @@ classes = (
     TOPBAR_PT_annotation_layers,
     VIEW3D_PT_curves_sculpt_add_shape,
     VIEW3D_PT_curves_sculpt_grow_shrink_scaling,
+    VIEW3D_PT_viewport_debug,
 )
 
 
diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt
index 939e302b3d2..5704c9e6774 100644
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -79,19 +79,21 @@ set(SRC
   intern/draw_cache_impl_subdivision.cc
   intern/draw_cache_impl_volume.c
   intern/draw_color_management.cc
+  intern/draw_command.cc
   intern/draw_common.c
   intern/draw_curves.cc
   intern/draw_debug.cc
   intern/draw_fluid.c
   intern/draw_hair.cc
   intern/draw_instance_data.c
-  intern/draw_manager.c
   intern/draw_manager_data.c
   intern/draw_manager_exec.c
   intern/draw_manager_profiling.c
   intern/draw_manager_shader.c
   intern/draw_manager_text.c
   intern/draw_manager_texture.c
+  intern/draw_manager.c
+  intern/draw_manager.cc
   intern/draw_select_buffer.c
   intern/draw_shader.cc
   intern/draw_texture_pool.cc
@@ -206,28 +208,32 @@ set(SRC
   intern/DRW_gpu_wrapper.hh
   intern/DRW_render.h
   intern/draw_attributes.h
-  intern/draw_cache.h
   intern/draw_cache_extract.hh
   intern/draw_cache_impl.h
   intern/draw_cache_inline.h
+  intern/draw_cache.h
   intern/draw_color_management.h
-  intern/draw_common.h
+  intern/draw_command.hh
   intern/draw_common_shader_shared.h
+  intern/draw_common.h
   intern/draw_curves_private.h
   intern/draw_debug.h
   intern/draw_debug.hh
   intern/draw_hair_private.h
   intern/draw_instance_data.h
-  intern/draw_manager.h
   intern/draw_manager_profiling.h
   intern/draw_manager_testing.h
   intern/draw_manager_text.h
-  intern/draw_shader.h
+  intern/draw_manager.h
+  intern/draw_manager.hh
+  intern/draw_pass.hh
   intern/draw_shader_shared.h
+  intern/draw_shader.h
   intern/draw_subdivision.h
   intern/draw_texture_pool.h
-  intern/draw_view.h
   intern/draw_view_data.h
+  intern/draw_view.cc
+  intern/draw_view.h
   intern/mesh_extractors/extract_mesh.hh
   intern/smaa_textures.h
   engines/basic/basic_engine.h
@@ -496,14 +502,19 @@ set(GLSL_SRC
   intern/shaders/common_subdiv_vbo_sculpt_data_comp.glsl
   intern/shaders/common_view_clipping_lib.glsl
   intern/shaders/common_view_lib.glsl
+  intern/shaders/draw_command_generate_comp.glsl
   intern/shaders/draw_debug_draw_display_frag.glsl
   intern/shaders/draw_debug_draw_display_vert.glsl
   intern/shaders/draw_debug_info.hh
   intern/shaders/draw_debug_print_display_frag.glsl
   intern/shaders/draw_debug_print_display_vert.glsl
+  intern/shaders/draw_resource_finalize_comp.glsl
+  intern/shaders/draw_visibility_comp.glsl
 
   intern/draw_common_shader_shared.h
+  intern/draw_command_shared.hh
   intern/draw_shader_shared.h
+  intern/draw_defines.h
 
   engines/gpencil/shaders/gpencil_frag.glsl
   engines/gpencil/shaders/gpencil_vert.glsl
@@ -708,6 +719,7 @@ if(WITH_GTESTS)
   if(WITH_OPENGL_DRAW_TESTS)
     set(TEST_SRC
       tests/draw_testing.cc
+      tests/draw_pass_test.cc
       tests/shaders_test.cc
 
       tests/draw_testing.hh
diff --git a/source/blender/draw/intern/DRW_gpu_wrapper.hh b/source/blender/draw/intern/DRW_gpu_wrapper.hh
index 8ed6594c31e..d9122657144 100644
--- a/source/blender/draw/intern/DRW_gpu_wrapper.hh
+++ b/source/blender/draw/intern/DRW_gpu_wrapper.hh
@@ -238,6 +238,11 @@ class StorageCommon : public DataBuffer<T, len, false>, NonMovable, NonCopyable
     GPU_storagebuf_clear_to_zero(ssbo_);
   }
 
+  void read()
+  {
+    GPU_storagebuf_read(ssbo_, this->data_);
+  }
+
   operator GPUStorageBuf *() const
   {
     return ssbo_;
@@ -850,6 +855,32 @@ class TextureFromPool : public Texture, NonMovable {
   GPUTexture *stencil_view() = delete;
 };
 
+/**
+ * Dummy type to bind texture as image.
+ * It is just a GPUTexture in disguise.
+ */
+class Image {};
+
+static inline Image *as_image(GPUTexture *tex)
+{
+  return reinterpret_cast<Image *>(tex);
+}
+
+static inline Image **as_image(GPUTexture **tex)
+{
+  return reinterpret_cast<Image **>(tex);
+}
+
+static inline GPUTexture *as_texture(Image *img)
+{
+  return reinterpret_cast<GPUTexture *>(img);
+}
+
+static inline GPUTexture **as_texture(Image **img)
+{
+  return reinterpret_cast<GPUTexture **>(img);
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h
index 30c1144739e..7b80ffd2b88 100644
--- a/source/blender/draw/intern/DRW_render.h
+++ b/source/blender/draw/intern/DRW_render.h
@@ -41,6 +41,7 @@
 
 #include "draw_debug.h"
 #include "draw_manager_profiling.h"
+#include "draw_state.h"
 #include "draw_view_data.h"
 
 #include "MEM_guardedalloc.h"
@@ -288,83 +289,6 @@ void DRW_shader_library_free(DRWShaderLibrary *lib);
 
 /* Batches */
 
-/**
- * DRWState is a bit-mask that stores the current render state and the desired render state. Based
- * on the differences the minimum state changes can be invoked to setup the desired render state.
- *
- * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive
- * therefore they aren't ordered as a bit mask.
- */
-typedef enum {
-  /** To be used for compute passes. */
-  DRW_STATE_NO_DRAW = 0,
-  /** Write mask */
-  DRW_STATE_WRITE_DEPTH = (1 << 0),
-  DRW_STATE_WRITE_COLOR = (1 << 1),
-  /* Write Stencil. These options are mutual exclusive and packed into 2 bits */
-  DRW_STATE_WRITE_STENCIL = (1 << 2),
-  DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2),
-  DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2),
-  /** Depth test. These options are mutual exclusive and packed into 3 bits */
-  DRW_STATE_DEPTH_ALWAYS = (1 << 4),
-  DRW_STATE_DEPTH_LESS = (2 << 4),
-  DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4),
-  DRW_STATE_DEPTH_EQUAL = (4 << 4),
-  DRW_STATE_DEPTH_GREATER = (5 << 4),
-  DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4),
-  /** Culling test */
-  DRW_STATE_CULL_BACK = (1 << 7),
-  DRW_STATE_CULL_FRONT = (1 << 8),
-  /** Stencil test. These options are mutually exclusive and packed into 2 bits. */
-  DRW_STATE_STENCIL_ALWAYS = (1 << 9),
-  DRW_STATE_STENCIL_EQUAL = (2 << 9),
-  DRW_STATE_STENCIL_NEQUAL = (3 << 9),
-
-  /** Blend state. These options are mutual exclusive and packed into 4 bits */
-  DRW_STATE_BLEND_ADD = (1 << 11),
-  /** Same as additive but let alpha accumulate without pre-multiply. */
-  DRW_STATE_BLEND_ADD_FULL = (2 << 11),
-  /** Standard alpha blending. */
-  DRW_STATE_BLEND_ALPHA = (3 << 11),
-  /** Use that if color is already pre-multiply by alpha. */
-  DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11),
-  DRW_STATE_BLEND_BACKGROUND = (5 << 11),
-  DRW_STATE_BLEND_OIT = (6 << 11),
-  DRW_STATE_BLEND_MUL = (7 << 11),
-  DRW_STATE_BLEND_SUB = (8 << 11),
-  /** Use dual source blending. WARNING: Only one color buffer allowed. */
-  DRW_STATE_BLEND_CUSTOM = (9 << 11),
-  DRW_STATE_LOGIC_INVERT = (10 << 11),
-  DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11),
-
-  DRW_STATE_IN_FRONT_SELECT = (1 << 27),
-  DRW_STATE_SHADOW_OFFSET = (1 << 28),
-  DRW_STATE_CLIP_PLANES = (1 << 29),
-  DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30),
-  /** DO NOT USE. Assumed always enabled. Only used internally. */
-  DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31),
-} DRWState;
-
-ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE);
-
-#define DRW_STATE_DEFAULT \
-  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL)
-#define DRW_STATE_BLEND_ENABLED \
-  (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \
-   DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \
-   DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT)
-#define DRW_STATE_RASTERIZER_ENABLED \
-  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \
-   DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
-#define DRW_STATE_DEPTH_TEST_ENABLED \
-  (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \
-   DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL)
-#define DRW_STATE_STENCIL_TEST_ENABLED \
-  (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL)
-#define DRW_STATE_WRITE_STENCIL_ENABLED \
-  (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \
-   DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
-
 typedef enum {
   DRW_ATTR_INT,
   DRW_ATTR_FLOAT,
diff --git a/source/blender/draw/intern/draw_command.cc b/source/blender/draw/intern/draw_command.cc
new file mode 100644
index 00000000000..7d5ea5c2048
--- /dev/null
+++ b/source/blender/draw/intern/draw_command.cc
@@ -0,0 +1,600 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_compute.h"
+#include "GPU_debug.h"
+
+#include "draw_command.hh"
+#include "draw_shader.h"
+#include "draw_view.hh"
+
+#include <bitset>
+#include <sstream>
+
+namespace blender::draw::command {
+
+/* -------------------------------------------------------------------- */
+/** \name Commands Execution
+ * \{ */
+
+void ShaderBind::execute(RecordingState &state) const
+{
+  if (assign_if_different(state.shader, shader)) {
+    GPU_shader_bind(shader);
+  }
+}
+
+void ResourceBind::execute() const
+{
+  if (slot == -1) {
+    return;
+  }
+  switch (type) {
+    case ResourceBind::Type::Sampler:
+      GPU_texture_bind_ex(is_reference ? *texture_ref : texture, sampler, slot, false);
+      break;
+    case ResourceBind::Type::Image:
+      GPU_texture_image_bind(is_reference ? *texture_ref : texture, slot);
+      break;
+    case ResourceBind::Type::UniformBuf:
+      GPU_uniformbuf_bind(is_reference ? *uniform_buf_ref : uniform_buf, slot);
+      break;
+    case ResourceBind::Type::StorageBuf:
+      GPU_storagebuf_bind(is_reference ? *storage_buf_ref : storage_buf, slot);
+      break;
+  }
+}
+
+void PushConstant::execute(RecordingState &state) const
+{
+  if (location == -1) {
+    return;
+  }
+  switch (type) {
+    case PushConstant::Type::IntValue:
+      GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int4_value);
+      break;
+    case PushConstant::Type::IntReference:
+      GPU_shader_uniform_vector_int(state.shader, location, comp_len, array_len, int_ref);
+      break;
+    case PushConstant::Type::FloatValue:
+      GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float4_value);
+      break;
+    case PushConstant::Type::FloatReference:
+      GPU_shader_uniform_vector(state.shader, location, comp_len, array_len, float_ref);
+      break;
+  }
+}
+
+void Draw::execute(RecordingState &state) const
+{
+  state.front_facing_set(handle.has_inverted_handedness());
+
+  if (GPU_shader_draw_parameters_support() == false) {
+    GPU_batch_resource_id_buf_set(batch, state.resource_id_buf);
+  }
+
+  GPU_batch_set_shader(batch, state.shader);
+  GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len);
+}
+
+void DrawMulti::execute(RecordingState &state) const
+{
+  DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_;
+  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
+
+  uint group_index = this->group_first;
+  while (group_index != (uint)-1) {
+    const DrawGroup &group = groups[group_index];
+
+    if (group.vertex_len > 0) {
+      if (GPU_shader_draw_parameters_support() == false) {
+        GPU_batch_resource_id_buf_set(group.gpu_batch, state.resource_id_buf);
+      }
+
+      GPU_batch_set_shader(group.gpu_batch, state.shader);
+
+      constexpr intptr_t stride = sizeof(DrawCommand);
+      /* We have 2 indirect command reserved per draw group. */
+      intptr_t offset = stride * group_index * 2;
+
+      /* Draw negatively scaled geometry first. */
+      if (group.len - group.front_facing_len > 0) {
+        state.front_facing_set(true);
+        GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset);
+      }
+
+      if (group.front_facing_len > 0) {
+        state.front_facing_set(false);
+        GPU_batch_draw_indirect(group.gpu_batch, indirect_buf, offset + stride);
+      }
+    }
+
+    group_index = group.next;
+  }
+}
+
+void DrawIndirect::execute(RecordingState &state) const
+{
+  state.front_facing_set(handle.has_inverted_handedness());
+
+  GPU_batch_draw_indirect(batch, *indirect_buf, 0);
+}
+
+void Dispatch::execute(RecordingState &state) const
+{
+  if (is_reference) {
+    GPU_compute_dispatch(state.shader, size_ref->x, size_ref->y, size_ref->z);
+  }
+  else {
+    GPU_compute_dispatch(state.shader, size.x, size.y, size.z);
+  }
+}
+
+void DispatchIndirect::execute(RecordingState &state) const
+{
+  GPU_compute_dispatch_indirect(state.shader, *indirect_buf);
+}
+
+void Barrier::execute() const
+{
+  GPU_memory_barrier(type);
+}
+
+void Clear::execute() const
+{
+  GPUFrameBuffer *fb = GPU_framebuffer_active_get();
+  GPU_framebuffer_clear(fb, (eGPUFrameBufferBits)clear_channels, color, depth, stencil);
+}
+
+void StateSet::execute(RecordingState &recording_state) const
+{
+  /**
+   * Does not support locked state for the moment and never should.
+   * Better implement a less hacky selection!
+   */
+  BLI_assert(DST.state_lock == 0);
+
+  if (!assign_if_different(recording_state.pipeline_state, new_state)) {
+    return;
+  }
+
+  /* Keep old API working. Keep the state tracking in sync. */
+  /* TODO(fclem): Move at the end of a pass. */
+  DST.state = new_state;
+
+  GPU_state_set(to_write_mask(new_state),
+                to_blend(new_state),
+                to_face_cull_test(new_state),
+                to_depth_test(new_state),
+                to_stencil_test(new_state),
+                to_stencil_op(new_state),
+                to_provoking_vertex(new_state));
+
+  if (new_state & DRW_STATE_SHADOW_OFFSET) {
+    GPU_shadow_offset(true);
+  }
+  else {
+    GPU_shadow_offset(false);
+  }
+
+  /* TODO: this should be part of shader state. */
+  if (new_state & DRW_STATE_CLIP_PLANES) {
+    GPU_clip_distances(recording_state.view_clip_plane_count);
+  }
+  else {
+    GPU_clip_distances(0);
+  }
+
+  if (new_state & DRW_STATE_IN_FRONT_SELECT) {
+    /* XXX `GPU_depth_range` is not a perfect solution
+     * since very distant geometries can still be occluded.
+     * Also the depth test precision of these geometries is impaired.
+     * However, it solves the selection for the vast majority of cases. */
+    GPU_depth_range(0.0f, 0.01f);
+  }
+  else {
+    GPU_depth_range(0.0f, 1.0f);
+  }
+
+  if (new_state & DRW_STATE_PROGRAM_POINT_SIZE) {
+    GPU_program_point_size(true);
+  }
+  else {
+    GPU_program_point_size(false);
+  }
+}
+
+void StencilSet::execute() const
+{
+  GPU_stencil_write_mask_set(write_mask);
+  GPU_stencil_compare_mask_set(compare_mask);
+  GPU_stencil_reference_set(reference);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Commands Serialization for debugging
+ * \{ */
+
+std::string ShaderBind::serialize() const
+{
+  return std::string(".shader_bind(") + GPU_shader_get_name(shader) + ")";
+}
+
+std::string ResourceBind::serialize() const
+{
+  switch (type) {
+    case Type::Sampler:
+      return std::string(".bind_texture") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) +
+             (sampler != GPU_SAMPLER_MAX ? ", sampler=" + std::to_string(sampler) : "") + ")";
+    case Type::Image:
+      return std::string(".bind_image") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) + ")";
+    case Type::UniformBuf:
+      return std::string(".bind_uniform_buf") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) + ")";
+    case Type::StorageBuf:
+      return std::string(".bind_storage_buf") + (is_reference ? "_ref" : "") + "(" +
+             std::to_string(slot) + ")";
+    default:
+      BLI_assert_unreachable();
+      return "";
+  }
+}
+
+std::string PushConstant::serialize() const
+{
+  std::stringstream ss;
+  for (int i = 0; i < array_len; i++) {
+    switch (comp_len) {
+      case 1:
+        switch (type) {
+          case Type::IntValue:
+            ss << int1_value;
+            break;
+          case Type::IntReference:
+            ss << int_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float1_value;
+            break;
+          case Type::FloatReference:
+            ss << float_ref[i];
+            break;
+        }
+        break;
+      case 2:
+        switch (type) {
+          case Type::IntValue:
+            ss << int2_value;
+            break;
+          case Type::IntReference:
+            ss << int2_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float2_value;
+            break;
+          case Type::FloatReference:
+            ss << float2_ref[i];
+            break;
+        }
+        break;
+      case 3:
+        switch (type) {
+          case Type::IntValue:
+            ss << int3_value;
+            break;
+          case Type::IntReference:
+            ss << int3_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float3_value;
+            break;
+          case Type::FloatReference:
+            ss << float3_ref[i];
+            break;
+        }
+        break;
+      case 4:
+        switch (type) {
+          case Type::IntValue:
+            ss << int4_value;
+            break;
+          case Type::IntReference:
+            ss << int4_ref[i];
+            break;
+          case Type::FloatValue:
+            ss << float4_value;
+            break;
+          case Type::FloatReference:
+            ss << float4_ref[i];
+            break;
+        }
+        break;
+      case 16:
+        switch (type) {
+          case Type::IntValue:
+          case Type::IntReference:
+            BLI_assert_unreachable();
+            break;
+          case Type::FloatValue:
+            ss << *reinterpret_cast<const float4x4 *>(&float4_value);
+            break;
+          case Type::FloatReference:
+            ss << *float4x4_ref;
+            break;
+        }
+        break;
+    }
+    if (i < array_len - 1) {
+      ss << ", ";
+    }
+  }
+
+  return std::string(".push_constant(") + std::to_string(location) + ", data=" + ss.str() + ")";
+}
+
+std::string Draw::serialize() const
+{
+  std::string inst_len = (instance_len == (uint)-1) ? "from_batch" : std::to_string(instance_len);
+  std::string vert_len = (vertex_len == (uint)-1) ? "from_batch" : std::to_string(vertex_len);
+  std::string vert_first = (vertex_first == (uint)-1) ? "from_batch" :
+                                                        std::to_string(vertex_first);
+  return std::string(".draw(inst_len=") + inst_len + ", vert_len=" + vert_len +
+         ", vert_first=" + vert_first + ", res_id=" + std::to_string(handle.resource_index()) +
+         ")";
+}
+
+std::string DrawMulti::serialize(std::string line_prefix) const
+{
+  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
+
+  MutableSpan<DrawPrototype> prototypes(multi_draw_buf->prototype_buf_.data(),
+                                        multi_draw_buf->prototype_count_);
+
+  /* This emulates the GPU sorting but without the unstable draw order. */
+  std::sort(
+      prototypes.begin(), prototypes.end(), [](const DrawPrototype &a, const DrawPrototype &b) {
+        return (a.group_id < b.group_id) ||
+               (a.group_id == b.group_id && a.resource_handle > b.resource_handle);
+      });
+
+  /* Compute prefix sum to have correct offsets. */
+  uint prefix_sum = 0u;
+  for (DrawGroup &group : groups) {
+    group.start = prefix_sum;
+    prefix_sum += group.front_proto_len + group.back_proto_len;
+  }
+
+  std::stringstream ss;
+
+  uint group_len = 0;
+  uint group_index = this->group_first;
+  while (group_index != (uint)-1) {
+    const DrawGroup &grp = groups[group_index];
+
+    ss << std::endl << line_prefix << "  .group(id=" << group_index << ", len=" << grp.len << ")";
+
+    intptr_t offset = grp.start;
+
+    if (grp.back_proto_len > 0) {
+      for (DrawPrototype &proto : prototypes.slice({offset, grp.back_proto_len})) {
+        BLI_assert(proto.group_id == group_index);
+        ResourceHandle handle(proto.resource_handle);
+        BLI_assert(handle.has_inverted_handedness());
+        ss << std::endl
+           << line_prefix << "    .proto(instance_len=" << std::to_string(proto.instance_len)
+           << ", resource_id=" << std::to_string(handle.resource_index()) << ", back_face)";
+      }
+      offset += grp.back_proto_len;
+    }
+
+    if (grp.front_proto_len > 0) {
+      for (DrawPrototype &proto : prototypes.slice({offset, grp.front_proto_len})) {
+        BLI_assert(proto.group_id == group_index);
+        ResourceHandle handle(proto.resource_handle);
+        BLI_assert(!handle.has_inverted_handedness());
+        ss << std::endl
+           << line_prefix << "    .proto(instance_len=" << std::to_string(proto.instance_len)
+           << ", resource_id=" << std::to_string(handle.resource_index()) << ", front_face)";
+      }
+    }
+
+    group_index = grp.next;
+    group_len++;
+  }
+
+  ss << std::endl;
+
+  return line_prefix + ".draw_multi(" + std::to_string(group_len) + ")" + ss.str();
+}
+
+std::string DrawIndirect::serialize() const
+{
+  return std::string(".draw_indirect()");
+}
+
+std::string Dispatch::serialize() const
+{
+  int3 sz = is_reference ? *size_ref : size;
+  return std::string(".dispatch") + (is_reference ? "_ref" : "") + "(" + std::to_string(sz.x) +
+         ", " + std::to_string(sz.y) + ", " + std::to_string(sz.z) + ")";
+}
+
+std::string DispatchIndirect::serialize() const
+{
+  return std::string(".dispatch_indirect()");
+}
+
+std::string Barrier::serialize() const
+{
+  /* TOOD(fclem): Better serialization... */
+  return std::string(".barrier(") + std::to_string(type) + ")";
+}
+
+std::string Clear::serialize() const
+{
+  std::stringstream ss;
+  if (eGPUFrameBufferBits(clear_channels) & GPU_COLOR_BIT) {
+    ss << "color=" << color;
+    if (eGPUFrameBufferBits(clear_channels) & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) {
+      ss << ", ";
+    }
+  }
+  if (eGPUFrameBufferBits(clear_channels) & GPU_DEPTH_BIT) {
+    ss << "depth=" << depth;
+    if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
+      ss << ", ";
+    }
+  }
+  if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
+    ss << "stencil=0b" << std::bitset<8>(stencil) << ")";
+  }
+  return std::string(".clear(") + ss.str() + ")";
+}
+
+std::string StateSet::serialize() const
+{
+  /* TOOD(fclem): Better serialization... */
+  return std::string(".state_set(") + std::to_string(new_state) + ")";
+}
+
+std::string StencilSet::serialize() const
+{
+  std::stringstream ss;
+  ss << ".stencil_set(write_mask=0b" << std::bitset<8>(write_mask) << ", compare_mask=0b"
+     << std::bitset<8>(compare_mask) << ", reference=0b" << std::bitset<8>(reference);
+  return ss.str();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Commands buffers binding / command / resource ID generation
+ * \{ */
+
+void DrawCommandBuf::bind(RecordingState &state,
+                          Vector<Header, 0> &headers,
+                          Vector<Undetermined, 0> &commands)
+{
+  UNUSED_VARS(headers, commands);
+
+  resource_id_count_ = 0;
+
+  for (const Header &header : headers) {
+    if (header.type != Type::Draw) {
+      continue;
+    }
+
+    Draw &cmd = commands[header.index].draw;
+
+    int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len;
+    /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
+    GPU_batch_draw_parameter_get(
+        cmd.batch, &batch_vert_len, &batch_vert_first, &batch_base_index, &batch_inst_len);
+    /* Instancing attributes are not supported using the new pipeline since we use the base
+     * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
+    BLI_assert(batch_inst_len == 1);
+
+    if (cmd.vertex_len == (uint)-1) {
+      cmd.vertex_len = batch_vert_len;
+    }
+
+    if (cmd.handle.raw > 0) {
+      /* Save correct offset to start of resource_id buffer region for this draw. */
+      uint instance_first = resource_id_count_;
+      resource_id_count_ += cmd.instance_len;
+      /* Ensure the buffer is big enough. */
+      resource_id_buf_.get_or_resize(resource_id_count_ - 1);
+
+      /* Copy the resource id for all instances. */
+      uint index = cmd.handle.resource_index();
+      for (int i = instance_first; i < (instance_first + cmd.instance_len); i++) {
+        resource_id_buf_[i] = index;
+      }
+    }
+  }
+
+  resource_id_buf_.push_update();
+
+  if (GPU_shader_draw_parameters_support() == false) {
+    state.resource_id_buf = resource_id_buf_;
+  }
+  else {
+    GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
+  }
+}
+
+void DrawMultiBuf::bind(RecordingState &state,
+                        Vector<Header, 0> &headers,
+                        Vector<Undetermined, 0> &commands,
+                        VisibilityBuf &visibility_buf)
+{
+  UNUSED_VARS(headers, commands);
+
+  GPU_debug_group_begin("DrawMultiBuf.bind");
+
+  resource_id_count_ = 0u;
+  for (DrawGroup &group : MutableSpan<DrawGroup>(group_buf_.data(), group_count_)) {
+    /* Compute prefix sum of all instance of previous group. */
+    group.start = resource_id_count_;
+    resource_id_count_ += group.len;
+
+    int batch_inst_len;
+    /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
+    GPU_batch_draw_parameter_get(group.gpu_batch,
+                                 &group.vertex_len,
+                                 &group.vertex_first,
+                                 &group.base_index,
+                                 &batch_inst_len);
+
+    /* Instancing attributes are not supported using the new pipeline since we use the base
+     * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
+    BLI_assert(batch_inst_len == 1);
+    UNUSED_VARS_NDEBUG(batch_inst_len);
+
+    /* Now that we got the batch infos, we can set the counters to 0. */
+    group.total_counter = group.front_facing_counter = group.back_facing_counter = 0;
+  }
+
+  group_buf_.push_update();
+  prototype_buf_.push_update();
+  /* Allocate enough for the expansion pass. */
+  resource_id_buf_.get_or_resize(resource_id_count_);
+  /* Two command per group. */
+  command_buf_.get_or_resize(group_count_ * 2);
+
+  if (prototype_count_ > 0) {
+    GPUShader *shader = DRW_shader_draw_command_generate_get();
+    GPU_shader_bind(shader);
+    GPU_shader_uniform_1i(shader, "prototype_len", prototype_count_);
+    GPU_storagebuf_bind(group_buf_, GPU_shader_get_ssbo(shader, "group_buf"));
+    GPU_storagebuf_bind(visibility_buf, GPU_shader_get_ssbo(shader, "visibility_buf"));
+    GPU_storagebuf_bind(prototype_buf_, GPU_shader_get_ssbo(shader, "prototype_buf"));
+    GPU_storagebuf_bind(command_buf_, GPU_shader_get_ssbo(shader, "command_buf"));
+    GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
+    GPU_compute_dispatch(shader, divide_ceil_u(prototype_count_, DRW_COMMAND_GROUP_SIZE), 1, 1);
+    if (GPU_shader_draw_parameters_support() == false) {
+      GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
+      state.resource_id_buf = resource_id_buf_;
+    }
+    else {
+      GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+    }
+  }
+
+  GPU_debug_group_end();
+}
+
+/** \} */
+
+};  // namespace blender::draw::command
diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh
new file mode 100644
index 00000000000..e24a620bb73
--- /dev/null
+++ b/source/blender/draw/intern/draw_command.hh
@@ -0,0 +1,533 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * Commands stored inside draw passes. Converted into GPU commands upon pass submission.
+ *
+ * Draw calls (primitive rendering commands) are managed by either `DrawCommandBuf` or
+ * `DrawMultiBuf`. See implementation details at their definition.
+ */
+
+#include "BKE_global.h"
+#include "BLI_map.hh"
+#include "DRW_gpu_wrapper.hh"
+
+#include "draw_command_shared.hh"
+#include "draw_handle.hh"
+#include "draw_state.h"
+#include "draw_view.hh"
+
+namespace blender::draw::command {
+
+class DrawCommandBuf;
+class DrawMultiBuf;
+
+/* -------------------------------------------------------------------- */
+/** \name Recording State
+ * \{ */
+
+/**
+ * Command recording state.
+ * Keep track of several states and avoid redundant state changes.
+ */
+struct RecordingState {
+  GPUShader *shader = nullptr;
+  bool front_facing = true;
+  bool inverted_view = false;
+  DRWState pipeline_state = DRW_STATE_NO_DRAW;
+  int view_clip_plane_count = 0;
+  /** Used for gl_BaseInstance workaround. */
+  GPUStorageBuf *resource_id_buf = nullptr;
+
+  void front_facing_set(bool facing)
+  {
+    /* Facing is inverted if view is not in expected handedness. */
+    facing = this->inverted_view == facing;
+    /* Remove redundant changes. */
+    if (assign_if_different(this->front_facing, facing)) {
+      GPU_front_facing(!facing);
+    }
+  }
+
+  void cleanup()
+  {
+    if (front_facing == false) {
+      GPU_front_facing(false);
+    }
+
+    if (G.debug & G_DEBUG_GPU) {
+      GPU_storagebuf_unbind_all();
+      GPU_texture_image_unbind_all();
+      GPU_texture_unbind_all();
+      GPU_uniformbuf_unbind_all();
+    }
+  }
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Regular Commands
+ * \{ */
+
+enum class Type : uint8_t {
+  /**
+   * None Type commands are either uninitialized or are repurposed as data storage.
+   * They are skipped during submission.
+   */
+  None = 0,
+
+  /** Commands stored as Undetermined in regular command buffer. */
+  Barrier,
+  Clear,
+  Dispatch,
+  DispatchIndirect,
+  Draw,
+  DrawIndirect,
+  PushConstant,
+  ResourceBind,
+  ShaderBind,
+  StateSet,
+  StencilSet,
+
+  /** Special commands stored in separate buffers. */
+  SubPass,
+  DrawMulti,
+};
+
+/**
+ * The index of the group is implicit since it is known by the one who want to
+ * access it. This also allows to have an indexed object to split the command
+ * stream.
+ */
+struct Header {
+  /** Command type. */
+  Type type;
+  /** Command index in command heap of this type. */
+  uint index;
+};
+
+struct ShaderBind {
+  GPUShader *shader;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct ResourceBind {
+  eGPUSamplerState sampler;
+  int slot;
+  bool is_reference;
+
+  enum class Type : uint8_t {
+    Sampler = 0,
+    Image,
+    UniformBuf,
+    StorageBuf,
+  } type;
+
+  union {
+    /** TODO: Use draw::Texture|StorageBuffer|UniformBuffer as resources as they will give more
+     * debug info. */
+    GPUUniformBuf *uniform_buf;
+    GPUUniformBuf **uniform_buf_ref;
+    GPUStorageBuf *storage_buf;
+    GPUStorageBuf **storage_buf_ref;
+    /** NOTE: Texture is used for both Sampler and Image binds. */
+    GPUTexture *texture;
+    GPUTexture **texture_ref;
+  };
+
+  ResourceBind() = default;
+
+  ResourceBind(int slot_, GPUUniformBuf *res)
+      : slot(slot_), is_reference(false), type(Type::UniformBuf), uniform_buf(res){};
+  ResourceBind(int slot_, GPUUniformBuf **res)
+      : slot(slot_), is_reference(true), type(Type::UniformBuf), uniform_buf_ref(res){};
+  ResourceBind(int slot_, GPUStorageBuf *res)
+      : slot(slot_), is_reference(false), type(Type::StorageBuf), storage_buf(res){};
+  ResourceBind(int slot_, GPUStorageBuf **res)
+      : slot(slot_), is_reference(true), type(Type::StorageBuf), storage_buf_ref(res){};
+  ResourceBind(int slot_, draw::Image *res)
+      : slot(slot_), is_reference(false), type(Type::Image), texture(draw::as_texture(res)){};
+  ResourceBind(int slot_, draw::Image **res)
+      : slot(slot_), is_reference(true), type(Type::Image), texture_ref(draw::as_texture(res)){};
+  ResourceBind(int slot_, GPUTexture *res, eGPUSamplerState state)
+      : sampler(state), slot(slot_), is_reference(false), type(Type::Sampler), texture(res){};
+  ResourceBind(int slot_, GPUTexture **res, eGPUSamplerState state)
+      : sampler(state), slot(slot_), is_reference(true), type(Type::Sampler), texture_ref(res){};
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+struct PushConstant {
+  int location;
+  uint8_t array_len;
+  uint8_t comp_len;
+  enum class Type : uint8_t {
+    IntValue = 0,
+    FloatValue,
+    IntReference,
+    FloatReference,
+  } type;
+  /**
+   * IMPORTANT: Data is at the end of the struct as it can span over the next commands.
+   * These next commands are not real commands but just memory to hold the data and are not
+   * referenced by any Command::Header.
+   * This is a hack to support float4x4 copy.
+   */
+  union {
+    int int1_value;
+    int2 int2_value;
+    int3 int3_value;
+    int4 int4_value;
+    float float1_value;
+    float2 float2_value;
+    float3 float3_value;
+    float4 float4_value;
+    const int *int_ref;
+    const int2 *int2_ref;
+    const int3 *int3_ref;
+    const int4 *int4_ref;
+    const float *float_ref;
+    const float2 *float2_ref;
+    const float3 *float3_ref;
+    const float4 *float4_ref;
+    const float4x4 *float4x4_ref;
+  };
+
+  PushConstant() = default;
+
+  PushConstant(int loc, const float &val)
+      : location(loc), array_len(1), comp_len(1), type(Type::FloatValue), float1_value(val){};
+  PushConstant(int loc, const float2 &val)
+      : location(loc), array_len(1), comp_len(2), type(Type::FloatValue), float2_value(val){};
+  PushConstant(int loc, const float3 &val)
+      : location(loc), array_len(1), comp_len(3), type(Type::FloatValue), float3_value(val){};
+  PushConstant(int loc, const float4 &val)
+      : location(loc), array_len(1), comp_len(4), type(Type::FloatValue), float4_value(val){};
+
+  PushConstant(int loc, const int &val)
+      : location(loc), array_len(1), comp_len(1), type(Type::IntValue), int1_value(val){};
+  PushConstant(int loc, const int2 &val)
+      : location(loc), array_len(1), comp_len(2), type(Type::IntValue), int2_value(val){};
+  PushConstant(int loc, const int3 &val)
+      : location(loc), array_len(1), comp_len(3), type(Type::IntValue), int3_value(val){};
+  PushConstant(int loc, const int4 &val)
+      : location(loc), array_len(1), comp_len(4), type(Type::IntValue), int4_value(val){};
+
+  PushConstant(int loc, const float *val, int arr)
+      : location(loc), array_len(arr), comp_len(1), type(Type::FloatReference), float_ref(val){};
+  PushConstant(int loc, const float2 *val, int arr)
+      : location(loc), array_len(arr), comp_len(2), type(Type::FloatReference), float2_ref(val){};
+  PushConstant(int loc, const float3 *val, int arr)
+      : location(loc), array_len(arr), comp_len(3), type(Type::FloatReference), float3_ref(val){};
+  PushConstant(int loc, const float4 *val, int arr)
+      : location(loc), array_len(arr), comp_len(4), type(Type::FloatReference), float4_ref(val){};
+  PushConstant(int loc, const float4x4 *val)
+      : location(loc), array_len(1), comp_len(16), type(Type::FloatReference), float4x4_ref(val){};
+
+  PushConstant(int loc, const int *val, int arr)
+      : location(loc), array_len(arr), comp_len(1), type(Type::IntReference), int_ref(val){};
+  PushConstant(int loc, const int2 *val, int arr)
+      : location(loc), array_len(arr), comp_len(2), type(Type::IntReference), int2_ref(val){};
+  PushConstant(int loc, const int3 *val, int arr)
+      : location(loc), array_len(arr), comp_len(3), type(Type::IntReference), int3_ref(val){};
+  PushConstant(int loc, const int4 *val, int arr)
+      : location(loc), array_len(arr), comp_len(4), type(Type::IntReference), int4_ref(val){};
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct Draw {
+  GPUBatch *batch;
+  uint instance_len;
+  uint vertex_len;
+  uint vertex_first;
+  ResourceHandle handle;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct DrawMulti {
+  GPUBatch *batch;
+  DrawMultiBuf *multi_draw_buf;
+  uint group_first;
+  uint uuid;
+
+  void execute(RecordingState &state) const;
+  std::string serialize(std::string line_prefix) const;
+};
+
+struct DrawIndirect {
+  GPUBatch *batch;
+  GPUStorageBuf **indirect_buf;
+  ResourceHandle handle;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct Dispatch {
+  bool is_reference;
+  union {
+    int3 size;
+    int3 *size_ref;
+  };
+
+  Dispatch() = default;
+
+  Dispatch(int3 group_len) : is_reference(false), size(group_len){};
+  Dispatch(int3 *group_len) : is_reference(true), size_ref(group_len){};
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct DispatchIndirect {
+  GPUStorageBuf **indirect_buf;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct Barrier {
+  eGPUBarrier type;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+struct Clear {
+  uint8_t clear_channels; /* #eGPUFrameBufferBits. But want to save some bits. */
+  uint8_t stencil;
+  float depth;
+  float4 color;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+struct StateSet {
+  DRWState new_state;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
+struct StencilSet {
+  uint write_mask;
+  uint compare_mask;
+  uint reference;
+
+  void execute() const;
+  std::string serialize() const;
+};
+
+union Undetermined {
+  ShaderBind shader_bind;
+  ResourceBind resource_bind;
+  PushConstant push_constant;
+  Draw draw;
+  DrawMulti draw_multi;
+  DrawIndirect draw_indirect;
+  Dispatch dispatch;
+  DispatchIndirect dispatch_indirect;
+  Barrier barrier;
+  Clear clear;
+  StateSet state_set;
+  StencilSet stencil_set;
+};
+
+/** Try to keep the command size as low as possible for performance. */
+BLI_STATIC_ASSERT(sizeof(Undetermined) <= 24, "One of the command type is too large.")
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw Commands
+ *
+ * A draw command buffer used to issue single draw commands without instance merging or any
+ * other optimizations.
+ *
+ * It still uses a ResourceIdBuf to keep the same shader interface as multi draw commands.
+ *
+ * \{ */
+
+class DrawCommandBuf {
+  friend Manager;
+
+ private:
+  using ResourceIdBuf = StorageArrayBuffer<uint, 128, false>;
+
+  /** Array of resource id. One per instance. Generated on GPU and send to GPU. */
+  ResourceIdBuf resource_id_buf_;
+  /** Used items in the resource_id_buf_. Not it's allocated length. */
+  uint resource_id_count_ = 0;
+
+ public:
+  void clear(){};
+
+  void append_draw(Vector<Header, 0> &headers,
+                   Vector<Undetermined, 0> &commands,
+                   GPUBatch *batch,
+                   uint instance_len,
+                   uint vertex_len,
+                   uint vertex_first,
+                   ResourceHandle handle)
+  {
+    vertex_first = vertex_first != -1 ? vertex_first : 0;
+    instance_len = instance_len != -1 ? instance_len : 1;
+
+    int64_t index = commands.append_and_get_index({});
+    headers.append({Type::Draw, static_cast<uint>(index)});
+    commands[index].draw = {batch, instance_len, vertex_len, vertex_first, handle};
+  }
+
+  void bind(RecordingState &state, Vector<Header, 0> &headers, Vector<Undetermined, 0> &commands);
+};
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Multi Draw Commands
+ *
+ * For efficient rendering of large scene we strive to minimize the number of draw call and state
+ * changes. To this end, we group many rendering commands and sort them per render state using
+ * `DrawGroup` as a container. This is done automatically for any successive commands with the
+ * same state.
+ *
+ * A `DrawGroup` is the combination of a `GPUBatch` (VBO state) and a `command::DrawMulti`
+ * (Pipeline State).
+ *
+ * Inside each `DrawGroup` all instances of a same `GPUBatch` is merged into a single indirect
+ * command.
+ *
+ * To support this arbitrary reordering, we only need to know the offset of all the commands for a
+ * specific `DrawGroup`. This is done on CPU by doing a simple prefix sum. The result is pushed to
+ * GPU and used on CPU to issue the right command indirect.
+ *
+ * Each draw command is stored in an unsorted array of `DrawPrototype` and sent directly to the
+ * GPU.
+ *
+ * A command generation compute shader then go over each `DrawPrototype`. For each it adds it (or
+ * not depending on visibility) to the correct draw command using the offset of the `DrawGroup`
+ * computed on CPU. After that, it also outputs one resource ID for each instance inside a
+ * `DrawPrototype`.
+ *
+ * \{ */
+
+class DrawMultiBuf {
+  friend Manager;
+  friend DrawMulti;
+
+ private:
+  using DrawGroupBuf = StorageArrayBuffer<DrawGroup, 16>;
+  using DrawPrototypeBuf = StorageArrayBuffer<DrawPrototype, 16>;
+  using DrawCommandBuf = StorageArrayBuffer<DrawCommand, 16, true>;
+  using ResourceIdBuf = StorageArrayBuffer<uint, 128, true>;
+
+  using DrawGroupKey = std::pair<uint, GPUBatch *>;
+  using DrawGroupMap = Map<DrawGroupKey, uint>;
+  /** Maps a DrawMulti command and a gpu batch to their unique DrawGroup command. */
+  DrawGroupMap group_ids_;
+
+  /** DrawGroup Command heap. Uploaded to GPU for sorting. */
+  DrawGroupBuf group_buf_ = {"DrawGroupBuf"};
+  /** Command Prototypes. Unsorted */
+  DrawPrototypeBuf prototype_buf_ = {"DrawPrototypeBuf"};
+  /** Command list generated by the sorting / compaction steps. Lives on GPU. */
+  DrawCommandBuf command_buf_ = {"DrawCommandBuf"};
+  /** Array of resource id. One per instance. Lives on GPU. */
+  ResourceIdBuf resource_id_buf_ = {"ResourceIdBuf"};
+  /** Give unique ID to each header so we can use that as hash key. */
+  uint header_id_counter_ = 0;
+  /** Number of groups inside group_buf_. */
+  uint group_count_ = 0;
+  /** Number of prototype command inside prototype_buf_. */
+  uint prototype_count_ = 0;
+  /** Used items in the resource_id_buf_. Not it's allocated length. */
+  uint resource_id_count_ = 0;
+
+ public:
+  void clear()
+  {
+    header_id_counter_ = 0;
+    group_count_ = 0;
+    prototype_count_ = 0;
+    group_ids_.clear();
+  }
+
+  void append_draw(Vector<Header, 0> &headers,
+                   Vector<Undetermined, 0> &commands,
+                   GPUBatch *batch,
+                   uint instance_len,
+                   uint vertex_len,
+                   uint vertex_first,
+                   ResourceHandle handle)
+  {
+    /* Unsupported for now. Use PassSimple. */
+    BLI_assert(vertex_first == 0 || vertex_first == -1);
+    BLI_assert(vertex_len == -1);
+
+    instance_len = instance_len != -1 ? instance_len : 1;
+
+    /* If there was some state changes since previous call, we have to create another command. */
+    if (headers.is_empty() || headers.last().type != Type::DrawMulti) {
+      uint index = commands.append_and_get_index({});
+      headers.append({Type::DrawMulti, index});
+      commands[index].draw_multi = {batch, this, (uint)-1, header_id_counter_++};
+    }
+
+    DrawMulti &cmd = commands.last().draw_multi;
+
+    uint &group_id = group_ids_.lookup_or_add(DrawGroupKey(cmd.uuid, batch), (uint)-1);
+
+    bool inverted = handle.has_inverted_handedness();
+
+    if (group_id == (uint)-1) {
+      uint new_group_id = group_count_++;
+
+      DrawGroup &group = group_buf_.get_or_resize(new_group_id);
+      group.next = cmd.group_first;
+      group.len = instance_len;
+      group.front_facing_len = inverted ? 0 : instance_len;
+      group.gpu_batch = batch;
+      group.front_proto_len = 0;
+      group.back_proto_len = 0;
+      /* For serialization only. */
+      (inverted ? group.back_proto_len : group.front_proto_len)++;
+      /* Append to list. */
+      cmd.group_first = new_group_id;
+      group_id = new_group_id;
+    }
+    else {
+      DrawGroup &group = group_buf_[group_id];
+      group.len += instance_len;
+      group.front_facing_len += inverted ? 0 : instance_len;
+      /* For serialization only. */
+      (inverted ? group.back_proto_len : group.front_proto_len)++;
+    }
+
+    DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++);
+    draw.group_id = group_id;
+    draw.resource_handle = handle.raw;
+    draw.instance_len = instance_len;
+  }
+
+  void bind(RecordingState &state,
+            Vector<Header, 0> &headers,
+            Vector<Undetermined, 0> &commands,
+            VisibilityBuf &visibility_buf);
+};
+
+/** \} */
+
+};  // namespace blender::draw::command
+\ No newline at end of file
diff --git a/source/blender/draw/intern/draw_command_shared.hh b/source/blender/draw/intern/draw_command_shared.hh
new file mode 100644
index 00000000000..22d1facfb09
--- /dev/null
+++ b/source/blender/draw/intern/draw_command_shared.hh
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#ifndef GPU_SHADER
+#  include "BLI_span.hh"
+#  include "GPU_shader_shared_utils.h"
+
+namespace blender::draw::command {
+
+struct RecordingState;
+
+#endif
+
+/* -------------------------------------------------------------------- */
+/** \name Multi Draw
+ * \{ */
+
+/**
+ * A DrawGroup allow to split the command stream into batch-able chunks of commands with
+ * the same render state.
+ */
+struct DrawGroup {
+  /** Index of next DrawGroup from the same header. */
+  uint next;
+
+  /** Index of the first instances after sorting. */
+  uint start;
+  /** Total number of instances (including inverted facing). Needed to issue the draw call. */
+  uint len;
+  /** Number of non inverted scaling instances in this Group. */
+  uint front_facing_len;
+
+  /** GPUBatch values to be copied to DrawCommand after sorting (if not overriden). */
+  int vertex_len;
+  int vertex_first;
+  int base_index;
+
+  /** Atomic counters used during command sorting. */
+  uint total_counter;
+
+#ifndef GPU_SHADER
+  /* NOTE: Union just to make sure the struct has always the same size on all platform. */
+  union {
+    struct {
+      /** For debug printing only. */
+      uint front_proto_len;
+      uint back_proto_len;
+      /** Needed to create the correct draw call. */
+      GPUBatch *gpu_batch;
+    };
+    struct {
+#endif
+      uint front_facing_counter;
+      uint back_facing_counter;
+      uint _pad0, _pad1;
+#ifndef GPU_SHADER
+    };
+  };
+#endif
+};
+BLI_STATIC_ASSERT_ALIGN(DrawGroup, 16)
+
+/**
+ * Representation of a future draw call inside a DrawGroup. This #DrawPrototype is then
+ * converted into #DrawCommand on GPU after visibility and compaction. Multiple
+ * #DrawPrototype might get merged into the same final #DrawCommand.
+ */
+struct DrawPrototype {
+  /* Reference to parent DrawGroup to get the GPUBatch vertex / instance count. */
+  uint group_id;
+  /* Resource handle associated with this call. Also reference visibility. */
+  uint resource_handle;
+  /* Number of instances. */
+  uint instance_len;
+  uint _pad0;
+};
+BLI_STATIC_ASSERT_ALIGN(DrawPrototype, 16)
+
+/** \} */
+
+#ifndef GPU_SHADER
+};  // namespace blender::draw::command
+#endif
diff --git a/source/blender/draw/intern/draw_common_shader_shared.h b/source/blender/draw/intern/draw_common_shader_shared.h
index c9819d9da87..57cb7880ce6 100644
--- a/source/blender/draw/intern/draw_common_shader_shared.h
+++ b/source/blender/draw/intern/draw_common_shader_shared.h
@@ -19,7 +19,7 @@ typedef struct GlobalsUboStorage GlobalsUboStorage;
 #define UBO_LAST_COLOR color_uv_shadow
 
 /* Used as ubo but colors can be directly referenced as well */
-/* NOTE: Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */
+/* \note Also keep all color as vec4 and between #UBO_FIRST_COLOR and #UBO_LAST_COLOR. */
 struct GlobalsUboStorage {
   /* UBOs data needs to be 16 byte aligned (size of vec4) */
   float4 color_wire;
diff --git a/source/blender/draw/intern/draw_debug.cc b/source/blender/draw/intern/draw_debug.cc
index ab78db5d913..9cb79d73812 100644
--- a/source/blender/draw/intern/draw_debug.cc
+++ b/source/blender/draw/intern/draw_debug.cc
@@ -63,26 +63,26 @@ DebugDraw::DebugDraw()
 
 void DebugDraw::init()
 {
-  cpu_print_buf_.command.v_count = 0;
-  cpu_print_buf_.command.v_first = 0;
-  cpu_print_buf_.command.i_count = 1;
-  cpu_print_buf_.command.i_first = 0;
-
-  cpu_draw_buf_.command.v_count = 0;
-  cpu_draw_buf_.command.v_first = 0;
-  cpu_draw_buf_.command.i_count = 1;
-  cpu_draw_buf_.command.i_first = 0;
-
-  gpu_print_buf_.command.v_count = 0;
-  gpu_print_buf_.command.v_first = 0;
-  gpu_print_buf_.command.i_count = 1;
-  gpu_print_buf_.command.i_first = 0;
+  cpu_print_buf_.command.vertex_len = 0;
+  cpu_print_buf_.command.vertex_first = 0;
+  cpu_print_buf_.command.instance_len = 1;
+  cpu_print_buf_.command.instance_first_array = 0;
+
+  cpu_draw_buf_.command.vertex_len = 0;
+  cpu_draw_buf_.command.vertex_first = 0;
+  cpu_draw_buf_.command.instance_len = 1;
+  cpu_draw_buf_.command.instance_first_array = 0;
+
+  gpu_print_buf_.command.vertex_len = 0;
+  gpu_print_buf_.command.vertex_first = 0;
+  gpu_print_buf_.command.instance_len = 1;
+  gpu_print_buf_.command.instance_first_array = 0;
   gpu_print_buf_used = false;
 
-  gpu_draw_buf_.command.v_count = 0;
-  gpu_draw_buf_.command.v_first = 0;
-  gpu_draw_buf_.command.i_count = 1;
-  gpu_draw_buf_.command.i_first = 0;
+  gpu_draw_buf_.command.vertex_len = 0;
+  gpu_draw_buf_.command.vertex_first = 0;
+  gpu_draw_buf_.command.instance_len = 1;
+  gpu_draw_buf_.command.instance_first_array = 0;
   gpu_draw_buf_used = false;
 
   modelmat_reset();
@@ -323,11 +323,11 @@ template<> void DebugDraw::print_value<uint4>(const uint4 &value)
 void DebugDraw::draw_line(float3 v1, float3 v2, uint color)
 {
   DebugDrawBuf &buf = cpu_draw_buf_;
-  uint index = buf.command.v_count;
+  uint index = buf.command.vertex_len;
   if (index + 2 < DRW_DEBUG_DRAW_VERT_MAX) {
     buf.verts[index + 0] = vert_pack(model_mat_ * v1, color);
     buf.verts[index + 1] = vert_pack(model_mat_ * v2, color);
-    buf.command.v_count += 2;
+    buf.command.vertex_len += 2;
   }
 }
 
@@ -356,7 +356,7 @@ DRWDebugVert DebugDraw::vert_pack(float3 pos, uint color)
 void DebugDraw::print_newline()
 {
   print_col_ = 0u;
-  print_row_ = ++cpu_print_buf_.command.i_first;
+  print_row_ = ++cpu_print_buf_.command.instance_first_array;
 }
 
 void DebugDraw::print_string_start(uint len)
@@ -406,7 +406,7 @@ void DebugDraw::print_char4(uint data)
       break;
     }
     /* NOTE: Do not skip the header manually like in GPU. */
-    uint cursor = cpu_print_buf_.command.v_count++;
+    uint cursor = cpu_print_buf_.command.vertex_len++;
     if (cursor < DRW_DEBUG_PRINT_MAX) {
       /* For future usage. (i.e: Color) */
       uint flags = 0u;
@@ -504,7 +504,7 @@ void DebugDraw::print_value_uint(uint value,
 
 void DebugDraw::display_lines()
 {
-  if (cpu_draw_buf_.command.v_count == 0 && gpu_draw_buf_used == false) {
+  if (cpu_draw_buf_.command.vertex_len == 0 && gpu_draw_buf_used == false) {
     return;
   }
   GPU_debug_group_begin("Lines");
@@ -541,7 +541,7 @@ void DebugDraw::display_lines()
 
 void DebugDraw::display_prints()
 {
-  if (cpu_print_buf_.command.v_count == 0 && gpu_print_buf_used == false) {
+  if (cpu_print_buf_.command.vertex_len == 0 && gpu_print_buf_used == false) {
     return;
   }
   GPU_debug_group_begin("Prints");
diff --git a/source/blender/draw/intern/draw_defines.h b/source/blender/draw/intern/draw_defines.h
new file mode 100644
index 00000000000..3df7e47cffb
--- /dev/null
+++ b/source/blender/draw/intern/draw_defines.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation.
+ */
+
+/** \file
+ * \ingroup draw
+ *
+ * List of defines that are shared with the GPUShaderCreateInfos. We do this to avoid
+ * dragging larger headers into the createInfo pipeline which would cause problems.
+ */
+
+#pragma once
+
+#define DRW_VIEW_UBO_SLOT 0
+
+#define DRW_RESOURCE_ID_SLOT 11
+#define DRW_OBJ_MAT_SLOT 10
+#define DRW_OBJ_INFOS_SLOT 9
+#define DRW_OBJ_ATTR_SLOT 8
+
+#define DRW_DEBUG_PRINT_SLOT 15
+#define DRW_DEBUG_DRAW_SLOT 14
+
+#define DRW_COMMAND_GROUP_SIZE 64
+#define DRW_FINALIZE_GROUP_SIZE 64
+/* Must be multiple of 32. Set to 32 for shader simplicity. */
+#define DRW_VISIBILITY_GROUP_SIZE 32
diff --git a/source/blender/draw/intern/draw_handle.hh b/source/blender/draw/intern/draw_handle.hh
new file mode 100644
index 00000000000..5f96bfa5dcd
--- /dev/null
+++ b/source/blender/draw/intern/draw_handle.hh
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * A unique identifier for each object component.
+ * It is used to access each component data such as matrices and object attributes.
+ * It is valid only for the current draw, it is not persistent.
+ *
+ * The most significant bit is used to encode if the object needs to invert the front face winding
+ * because of its object matrix handedness. This is handy because this means sorting inside
+ * #DrawGroup command will put all inverted commands last.
+ *
+ * Default value of 0 points toward an non-cull-able object with unit bounding box centered at
+ * the origin.
+ */
+
+#include "draw_shader_shared.h"
+
+struct Object;
+struct DupliObject;
+
+namespace blender::draw {
+
+struct ResourceHandle {
+  uint raw;
+
+  ResourceHandle() = default;
+  ResourceHandle(uint raw_) : raw(raw_){};
+  ResourceHandle(uint index, bool inverted_handedness)
+  {
+    raw = index;
+    SET_FLAG_FROM_TEST(raw, inverted_handedness, 0x80000000u);
+  }
+
+  bool has_inverted_handedness() const
+  {
+    return (raw & 0x80000000u) != 0;
+  }
+
+  uint resource_index() const
+  {
+    return (raw & 0x7FFFFFFFu);
+  }
+};
+
+/* TODO(fclem): Move to somewhere more appropriated after cleaning up the header dependencies. */
+struct ObjectRef {
+  Object *object;
+  /** Dupli object that corresponds to the current object. */
+  DupliObject *dupli_object;
+  /** Object that created the dupli-list the current object is part of. */
+  Object *dupli_parent;
+};
+
+};  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index f44cd33fb2b..799d0544e34 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -1001,6 +1001,8 @@ static void drw_engines_init(void)
 
 static void drw_engines_cache_init(void)
 {
+  DRW_manager_begin_sync();
+
   DRW_ENABLED_ENGINE_ITER (DST.view_data_active, engine, data) {
     if (data->text_draw_cache) {
       DRW_text_cache_destroy(data->text_draw_cache);
@@ -1072,6 +1074,8 @@ static void drw_engines_cache_finish(void)
       engine->cache_finish(data);
     }
   }
+
+  DRW_manager_end_sync();
 }
 
 static void drw_engines_draw_scene(void)
diff --git a/source/blender/draw/intern/draw_manager.cc b/source/blender/draw/intern/draw_manager.cc
new file mode 100644
index 00000000000..8fb2ffb39e8
--- /dev/null
+++ b/source/blender/draw/intern/draw_manager.cc
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "BKE_global.h"
+#include "GPU_compute.h"
+
+#include "draw_debug.hh"
+#include "draw_defines.h"
+#include "draw_manager.h"
+#include "draw_manager.hh"
+#include "draw_pass.hh"
+#include "draw_shader.h"
+
+namespace blender::draw {
+
+Manager::~Manager()
+{
+  for (GPUTexture *texture : acquired_textures) {
+    /* Decrease refcount and free if 0. */
+    GPU_texture_free(texture);
+  }
+}
+
+void Manager::begin_sync()
+{
+  /* TODO: This means the reference is kept until further redraw or manager teardown. Instead, they
+   * should be released after each draw loop. But for now, mimics old DRW behavior. */
+  for (GPUTexture *texture : acquired_textures) {
+    /* Decrease refcount and free if 0. */
+    GPU_texture_free(texture);
+  }
+
+  acquired_textures.clear();
+
+#ifdef DEBUG
+  /* Detect non-init data. */
+  memset(matrix_buf.data(), 0xF0, resource_len_ * sizeof(*matrix_buf.data()));
+  memset(bounds_buf.data(), 0xF0, resource_len_ * sizeof(*bounds_buf.data()));
+  memset(infos_buf.data(), 0xF0, resource_len_ * sizeof(*infos_buf.data()));
+#endif
+  resource_len_ = 0;
+  /* TODO(fclem): Resize buffers if too big, but with an hysteresis threshold. */
+
+  object_active = DST.draw_ctx.obact;
+
+  /* Init the 0 resource. */
+  resource_handle(float4x4::identity());
+}
+
+void Manager::end_sync()
+{
+  GPU_debug_group_begin("Manager.end_sync");
+
+  matrix_buf.push_update();
+  bounds_buf.push_update();
+  infos_buf.push_update();
+
+  debug_bind();
+
+  /* Dispatch compute to finalize the resources on GPU. Save a bit of CPU time. */
+  uint thread_groups = divide_ceil_u(resource_len_, DRW_FINALIZE_GROUP_SIZE);
+  GPUShader *shader = DRW_shader_draw_resource_finalize_get();
+  GPU_shader_bind(shader);
+  GPU_shader_uniform_1i(shader, "resource_len", resource_len_);
+  GPU_storagebuf_bind(matrix_buf, GPU_shader_get_ssbo(shader, "matrix_buf"));
+  GPU_storagebuf_bind(bounds_buf, GPU_shader_get_ssbo(shader, "bounds_buf"));
+  GPU_storagebuf_bind(infos_buf, GPU_shader_get_ssbo(shader, "infos_buf"));
+  GPU_compute_dispatch(shader, thread_groups, 1, 1);
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+
+  GPU_debug_group_end();
+}
+
+void Manager::debug_bind()
+{
+#ifdef DEBUG
+  if (DST.debug == nullptr) {
+    return;
+  }
+  GPU_storagebuf_bind(drw_debug_gpu_draw_buf_get(), DRW_DEBUG_DRAW_SLOT);
+  GPU_storagebuf_bind(drw_debug_gpu_print_buf_get(), DRW_DEBUG_PRINT_SLOT);
+#  ifndef DISABLE_DEBUG_SHADER_PRINT_BARRIER
+  /* Add a barrier to allow multiple shader writing to the same buffer. */
+  GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+#  endif
+#endif
+}
+
+void Manager::submit(PassSimple &pass, View &view)
+{
+  view.bind();
+
+  debug_bind();
+
+  command::RecordingState state;
+  state.inverted_view = view.is_inverted();
+
+  pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_);
+
+  GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT);
+  GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT);
+  // GPU_storagebuf_bind(attribute_buf, DRW_OBJ_ATTR_SLOT); /* TODO */
+
+  pass.submit(state);
+
+  state.cleanup();
+}
+
+void Manager::submit(PassMain &pass, View &view)
+{
+  view.bind();
+
+  debug_bind();
+
+  bool freeze_culling = (U.experimental.use_viewport_debug && DST.draw_ctx.v3d &&
+                         (DST.draw_ctx.v3d->debug_flag & V3D_DEBUG_FREEZE_CULLING) != 0);
+
+  view.compute_visibility(bounds_buf, resource_len_, freeze_culling);
+
+  command::RecordingState state;
+  state.inverted_view = view.is_inverted();
+
+  pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_, view.visibility_buf_);
+
+  GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT);
+  GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT);
+  // GPU_storagebuf_bind(attribute_buf, DRW_OBJ_ATTR_SLOT); /* TODO */
+
+  pass.submit(state);
+
+  state.cleanup();
+}
+
+void Manager::submit(PassSortable &pass, View &view)
+{
+  pass.sort();
+
+  this->submit(static_cast<PassMain &>(pass), view);
+}
+
+void Manager::submit(PassSimple &pass)
+{
+  debug_bind();
+
+  command::RecordingState state;
+
+  pass.draw_commands_buf_.bind(state, pass.headers_, pass.commands_);
+
+  GPU_storagebuf_bind(matrix_buf, DRW_OBJ_MAT_SLOT);
+  GPU_storagebuf_bind(infos_buf, DRW_OBJ_INFOS_SLOT);
+  // GPU_storagebuf_bind(attribute_buf, DRW_OBJ_ATTR_SLOT); /* TODO */
+
+  pass.submit(state);
+
+  state.cleanup();
+}
+
+Manager::SubmitDebugOutput Manager::submit_debug(PassSimple &pass, View &view)
+{
+  submit(pass, view);
+
+  pass.draw_commands_buf_.resource_id_buf_.read();
+
+  Manager::SubmitDebugOutput output;
+  output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(),
+                        pass.draw_commands_buf_.resource_id_count_};
+  /* There is no visibility data for PassSimple. */
+  output.visibility = {(uint *)view.visibility_buf_.data(), 0};
+  return output;
+}
+
+Manager::SubmitDebugOutput Manager::submit_debug(PassMain &pass, View &view)
+{
+  submit(pass, view);
+
+  GPU_finish();
+
+  pass.draw_commands_buf_.resource_id_buf_.read();
+  view.visibility_buf_.read();
+
+  Manager::SubmitDebugOutput output;
+  output.resource_id = {pass.draw_commands_buf_.resource_id_buf_.data(),
+                        pass.draw_commands_buf_.resource_id_count_};
+  output.visibility = {(uint *)view.visibility_buf_.data(), divide_ceil_u(resource_len_, 32)};
+  return output;
+}
+
+Manager::DataDebugOutput Manager::data_debug()
+{
+  matrix_buf.read();
+  bounds_buf.read();
+  infos_buf.read();
+
+  Manager::DataDebugOutput output;
+  output.matrices = {matrix_buf.data(), resource_len_};
+  output.bounds = {bounds_buf.data(), resource_len_};
+  output.infos = {infos_buf.data(), resource_len_};
+  return output;
+}
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_manager.h b/source/blender/draw/intern/draw_manager.h
index a29f2fa7507..83ebe1b3c3b 100644
--- a/source/blender/draw/intern/draw_manager.h
+++ b/source/blender/draw/intern/draw_manager.h
@@ -694,6 +694,9 @@ bool drw_engine_data_engines_data_validate(GPUViewport *viewport, void **engine_
 void drw_engine_data_cache_release(GPUViewport *viewport);
 void drw_engine_data_free(GPUViewport *viewport);
 
+void DRW_manager_begin_sync(void);
+void DRW_manager_end_sync(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/draw/intern/draw_manager.hh b/source/blender/draw/intern/draw_manager.hh
new file mode 100644
index 00000000000..5f110b8bb6b
--- /dev/null
+++ b/source/blender/draw/intern/draw_manager.hh
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * `draw::Manager` is the interface between scene data and viewport engines.
+ *
+ * It holds per component data (`ObjectInfo`, `ObjectMatrices`, ...) indexed per `ResourceHandle`.
+ *
+ * \note It is currently work in progress and should replace the old global draw manager.
+ */
+
+#include "BLI_sys_types.h"
+
+#include "draw_resource.hh"
+#include "draw_view.hh"
+
+#include <string>
+
+namespace blender::draw {
+
+/* Forward declarations. */
+
+namespace detail {
+template<typename T> class Pass;
+}  // namespace detail
+
+namespace command {
+class DrawCommandBuf;
+class DrawMultiBuf;
+}  // namespace command
+
+using PassSimple = detail::Pass<command::DrawCommandBuf>;
+using PassMain = detail::Pass<command::DrawMultiBuf>;
+class PassSortable;
+
+class Manager {
+  using ObjectMatricesBuf = StorageArrayBuffer<ObjectMatrices, 128>;
+  using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>;
+  using ObjectInfosBuf = StorageArrayBuffer<ObjectInfos, 128>;
+
+ public:
+  struct SubmitDebugOutput {
+    /** Indexed by resource id. */
+    Span<uint32_t> visibility;
+    /** Indexed by drawn instance. */
+    Span<uint32_t> resource_id;
+  };
+
+  struct DataDebugOutput {
+    /** Indexed by resource id. */
+    Span<ObjectMatrices> matrices;
+    /** Indexed by resource id. */
+    Span<ObjectBounds> bounds;
+    /** Indexed by resource id. */
+    Span<ObjectInfos> infos;
+  };
+
+  /**
+   * Buffers containing all object data. Referenced by resource index.
+   * Exposed as public members for shader access after sync.
+   */
+  ObjectMatricesBuf matrix_buf;
+  ObjectBoundsBuf bounds_buf;
+  ObjectInfosBuf infos_buf;
+
+  /** List of textures coming from Image data-blocks. They need to be refcounted in order to avoid
+   * beeing freed in another thread. */
+  Vector<GPUTexture *> acquired_textures;
+
+ private:
+  uint resource_len_ = 0;
+  Object *object = nullptr;
+
+  Object *object_active = nullptr;
+
+ public:
+  Manager(){};
+  ~Manager();
+
+  /**
+   * Create a new resource handle for the given object. Can be called multiple time with the
+   * same object **successively** without duplicating the data.
+   */
+  ResourceHandle resource_handle(const ObjectRef ref);
+  /**
+   * Get resource id for a loose matrix. The draw-calls for this resource handle won't be culled
+   * and there won't be any associated object info / bounds. Assumes correct handedness / winding.
+   */
+  ResourceHandle resource_handle(const float4x4 &model_matrix);
+  /**
+   * Get resource id for a loose matrix with bounds. The draw-calls for this resource handle will
+   * be culled bute there won't be any associated object info / bounds. Assumes correct handedness
+   * / winding.
+   */
+  ResourceHandle resource_handle(const float4x4 &model_matrix,
+                                 const float3 &bounds_center,
+                                 const float3 &bounds_half_extent);
+
+  /**
+   * Populate additional per resource data on demand.
+   */
+  void extract_object_attributes(ResourceHandle handle,
+                                 Object &object,
+                                 Span<GPUMaterial *> materials);
+
+  /**
+   * Submit a pass for drawing. All resource reference will be dereferenced and commands will be
+   * sent to GPU.
+   */
+  void submit(PassSimple &pass, View &view);
+  void submit(PassMain &pass, View &view);
+  void submit(PassSortable &pass, View &view);
+  /**
+   * Variant without any view. Must not contain any shader using `draw_view` create info.
+   */
+  void submit(PassSimple &pass);
+
+  /**
+   * Submit a pass for drawing but read back all data buffers for inspection.
+   */
+  SubmitDebugOutput submit_debug(PassSimple &pass, View &view);
+  SubmitDebugOutput submit_debug(PassMain &pass, View &view);
+
+  /**
+   * Check data buffers of the draw manager. Only to be used after end_sync().
+   */
+  DataDebugOutput data_debug();
+
+  /**
+   * Will acquire the texture using ref counting and release it after drawing. To be used for
+   * texture coming from blender Image.
+   */
+  void acquire_texture(GPUTexture *texture)
+  {
+    GPU_texture_ref(texture);
+    acquired_textures.append(texture);
+  }
+
+  /** TODO(fclem): The following should become private at some point. */
+  void begin_sync();
+  void end_sync();
+
+  void debug_bind();
+};
+
+inline ResourceHandle Manager::resource_handle(const ObjectRef ref)
+{
+  bool is_active_object = (ref.dupli_object ? ref.dupli_parent : ref.object) == object_active;
+  matrix_buf.get_or_resize(resource_len_).sync(*ref.object);
+  bounds_buf.get_or_resize(resource_len_).sync(*ref.object);
+  infos_buf.get_or_resize(resource_len_).sync(ref, is_active_object);
+  return ResourceHandle(resource_len_++, (ref.object->transflag & OB_NEG_SCALE) != 0);
+}
+
+inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix)
+{
+  matrix_buf.get_or_resize(resource_len_).sync(model_matrix);
+  bounds_buf.get_or_resize(resource_len_).sync();
+  infos_buf.get_or_resize(resource_len_).sync();
+  return ResourceHandle(resource_len_++, false);
+}
+
+inline ResourceHandle Manager::resource_handle(const float4x4 &model_matrix,
+                                               const float3 &bounds_center,
+                                               const float3 &bounds_half_extent)
+{
+  matrix_buf.get_or_resize(resource_len_).sync(model_matrix);
+  bounds_buf.get_or_resize(resource_len_).sync(bounds_center, bounds_half_extent);
+  infos_buf.get_or_resize(resource_len_).sync();
+  return ResourceHandle(resource_len_++, false);
+}
+
+inline void Manager::extract_object_attributes(ResourceHandle handle,
+                                               Object &object,
+                                               Span<GPUMaterial *> materials)
+{
+  /* TODO */
+  (void)handle;
+  (void)object;
+  (void)materials;
+}
+
+}  // namespace blender::draw
+
+/* TODO(@fclem): This is for testing. The manager should be passed to the engine through the
+ * callbacks. */
+blender::draw::Manager *DRW_manager_get();
+blender::draw::ObjectRef DRW_object_ref_get(Object *object);
diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh
new file mode 100644
index 00000000000..65faa9febbc
--- /dev/null
+++ b/source/blender/draw/intern/draw_pass.hh
@@ -0,0 +1,1004 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * Passes record draw commands. Commands are executed only when a pass is submitted for execution.
+ *
+ * `PassMain`:
+ * Should be used on heavy load passes such as ones that may contain scene objects. Draw call
+ * submission is optimized for large number of draw calls. But has a significant overhead per
+ * #Pass. Use many #PassSub along with a main #Pass to reduce the overhead and allow groupings of
+ * commands. \note The draw call order inside a batch of multiple draw with the exact same state is
+ * not guaranteed and is not even deterministic. Use a PassSimple or PassSortable if ordering is
+ * needed. \note As of now, it is also quite limited in the type of draw command it can record
+ * (no custom vertex count, no custom first vertex).
+ *
+ * `PassSimple`:
+ * Does not have the overhead of #PassMain but does not have the culling and batching optimization.
+ * It should be used for passes that needs a few commands or that needs guaranteed draw call order.
+ *
+ * `Pass<T>::Sub`:
+ * A lightweight #Pass that lives inside a main #Pass. It can only be created from #Pass.sub()
+ * and is auto managed. This mean it can be created, filled and thrown away. A #PassSub reference
+ * is valid until the next #Pass.init() of the parent pass. Commands recorded inside a #PassSub are
+ * inserted inside the parent #Pass where the sub have been created durring submission.
+ *
+ * `PassSortable`:
+ * This is a sort of `PassMain` augmented with a per sub-pass sorting value. They can't directly
+ * contain draw command, everything needs to be inside sub-passes. Sub-passes are automatically
+ * sorted before submission.
+ *
+ * \note A pass can be recorded once and resubmitted any number of time. This can be a good
+ * optimization for passes that are always the same for each frame. The only thing to be aware of
+ * is the life time of external resources. If a pass contains draw-calls with non default
+ * ResourceHandle (not 0) or a reference to any non static resources (GPUBatch, PushConstant ref,
+ * ResourceBind ref) it will have to be re-recorded if any of these reference becomes invalid.
+ */
+
+#include "BKE_image.h"
+#include "BLI_vector.hh"
+#include "DRW_gpu_wrapper.hh"
+#include "GPU_debug.h"
+#include "GPU_material.h"
+
+#include "draw_command.hh"
+#include "draw_handle.hh"
+#include "draw_manager.hh"
+#include "draw_pass.hh"
+#include "draw_shader_shared.h"
+#include "draw_state.h"
+
+#include "intern/gpu_codegen.h"
+
+namespace blender::draw {
+
+using namespace blender::draw;
+using namespace blender::draw::command;
+
+class Manager;
+
+/* -------------------------------------------------------------------- */
+/** \name Pass API
+ * \{ */
+
+namespace detail {
+
+/**
+ * Special container that never moves allocated items and has fast indexing.
+ */
+template<typename T,
+         /** Numbers of element of type T to allocate together. */
+         int64_t block_size = 16>
+class SubPassVector {
+ private:
+  Vector<std::unique_ptr<Vector<T, block_size>>, 0> blocks_;
+
+ public:
+  void clear()
+  {
+    blocks_.clear();
+  }
+
+  int64_t append_and_get_index(T &&elem)
+  {
+    /* Do not go over the inline size so that existing members never move. */
+    if (blocks_.is_empty() || blocks_.last()->size() == block_size) {
+      blocks_.append(std::make_unique<Vector<T, block_size>>());
+    }
+    return blocks_.last()->append_and_get_index(std::move(elem)) +
+           (blocks_.size() - 1) * block_size;
+  }
+
+  T &operator[](int64_t index)
+  {
+    return (*blocks_[index / block_size])[index % block_size];
+  }
+
+  const T &operator[](int64_t index) const
+  {
+    return (*blocks_[index / block_size])[index % block_size];
+  }
+};
+
+/**
+ * Public API of a draw pass.
+ */
+template<
+    /** Type of command buffer used to create the draw calls. */
+    typename DrawCommandBufType>
+class PassBase {
+  friend Manager;
+
+  /** Will use texture own sampler state. */
+  static constexpr eGPUSamplerState sampler_auto = GPU_SAMPLER_MAX;
+
+ protected:
+  /** Highest level of the command stream. Split command stream in different command types. */
+  Vector<command::Header, 0> headers_;
+  /** Commands referenced by headers (which contains their types). */
+  Vector<command::Undetermined, 0> commands_;
+  /* Reference to draw commands buffer. Either own or from parent pass. */
+  DrawCommandBufType &draw_commands_buf_;
+  /* Reference to sub-pass commands buffer. Either own or from parent pass. */
+  SubPassVector<PassBase<DrawCommandBufType>> &sub_passes_;
+  /** Currently bound shader. Used for interface queries. */
+  GPUShader *shader_;
+
+ public:
+  const char *debug_name;
+
+  PassBase(const char *name,
+           DrawCommandBufType &draw_command_buf,
+           SubPassVector<PassBase<DrawCommandBufType>> &sub_passes,
+           GPUShader *shader = nullptr)
+      : draw_commands_buf_(draw_command_buf),
+        sub_passes_(sub_passes),
+        shader_(shader),
+        debug_name(name){};
+
+  /**
+   * Reset the pass command pool.
+   * \note Implemented in derived class. Not a virtual function to avoid indirection. Here only for
+   * API readability listing.
+   */
+  void init();
+
+  /**
+   * Create a sub-pass inside this pass.
+   */
+  PassBase<DrawCommandBufType> &sub(const char *name);
+
+  /**
+   * Changes the fixed function pipeline state.
+   * Starts as DRW_STATE_NO_DRAW at the start of a Pass submission.
+   * SubPass inherit previous pass state.
+   *
+   * IMPORTANT: This does not set the stencil mask/reference values. Add a call to state_stencil()
+   * to ensure correct behavior of stencil aware draws.
+   */
+  void state_set(DRWState state);
+
+  /**
+   * Clear the current frame-buffer.
+   */
+  void clear_color(float4 color);
+  void clear_depth(float depth);
+  void clear_stencil(uint8_t stencil);
+  void clear_depth_stencil(float depth, uint8_t stencil);
+  void clear_color_depth_stencil(float4 color, float depth, uint8_t stencil);
+
+  /**
+   * Reminders:
+   * - (compare_mask & reference) is what is tested against (compare_mask & stencil_value)
+   *   stencil_value being the value stored in the stencil buffer.
+   * - (write-mask & reference) is what gets written if the test condition is fulfilled.
+   */
+  void state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask);
+
+  /**
+   * Bind a shader. Any following bind() or push_constant() call will use its interface.
+   */
+  void shader_set(GPUShader *shader);
+
+  /**
+   * Bind a material shader along with its associated resources. Any following bind() or
+   * push_constant() call will use its interface.
+   * IMPORTANT: Assumes material is compiled and can be used (no compilation error).
+   */
+  void material_set(Manager &manager, GPUMaterial *material);
+
+  /**
+   * Record a draw call.
+   * \note Setting the count or first to -1 will use the values from the batch.
+   * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded.
+   */
+  void draw(GPUBatch *batch,
+            uint instance_len = -1,
+            uint vertex_len = -1,
+            uint vertex_first = -1,
+            ResourceHandle handle = {0});
+
+  /**
+   * Shorter version for the common case.
+   * \note Implemented in derived class. Not a virtual function to avoid indirection.
+   */
+  void draw(GPUBatch *batch, ResourceHandle handle);
+
+  /**
+   * Record a procedural draw call. Geometry is **NOT** source from a GPUBatch.
+   * \note An instance or vertex count of 0 will discard the draw call. It will not be recorded.
+   */
+  void draw_procedural(GPUPrimType primitive,
+                       uint instance_len,
+                       uint vertex_len,
+                       uint vertex_first = -1,
+                       ResourceHandle handle = {0});
+
+  /**
+   * Indirect variants.
+   * \note If needed, the resource id need to also be set accordingly in the DrawCommand.
+   */
+  void draw_indirect(GPUBatch *batch,
+                     StorageBuffer<DrawCommand, true> &indirect_buffer,
+                     ResourceHandle handle = {0});
+  void draw_procedural_indirect(GPUPrimType primitive,
+                                StorageBuffer<DrawCommand, true> &indirect_buffer,
+                                ResourceHandle handle = {0});
+
+  /**
+   * Record a compute dispatch call.
+   */
+  void dispatch(int3 group_len);
+  void dispatch(int3 *group_len);
+  void dispatch(StorageBuffer<DispatchCommand> &indirect_buffer);
+
+  /**
+   * Record a barrier call to synchronize arbitrary load/store operation between draw calls.
+   */
+  void barrier(eGPUBarrier type);
+
+  /**
+   * Bind a shader resource.
+   *
+   * Reference versions are to be used when the resource might be resize / realloc or even change
+   * between the time it is referenced and the time it is dereferenced for drawing.
+   *
+   * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data
+   * still alive until pass submission.
+   *
+   * \note Variations using slot will not query a shader interface and can be used before
+   * binding a shader.
+   */
+  void bind_image(const char *name, GPUTexture *image);
+  void bind_image(const char *name, GPUTexture **image);
+  void bind_image(int slot, GPUTexture *image);
+  void bind_image(int slot, GPUTexture **image);
+  void bind_texture(const char *name, GPUTexture *texture, eGPUSamplerState state = sampler_auto);
+  void bind_texture(const char *name, GPUTexture **texture, eGPUSamplerState state = sampler_auto);
+  void bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state = sampler_auto);
+  void bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state = sampler_auto);
+  void bind_ssbo(const char *name, GPUStorageBuf *buffer);
+  void bind_ssbo(const char *name, GPUStorageBuf **buffer);
+  void bind_ssbo(int slot, GPUStorageBuf *buffer);
+  void bind_ssbo(int slot, GPUStorageBuf **buffer);
+  void bind_ubo(const char *name, GPUUniformBuf *buffer);
+  void bind_ubo(const char *name, GPUUniformBuf **buffer);
+  void bind_ubo(int slot, GPUUniformBuf *buffer);
+  void bind_ubo(int slot, GPUUniformBuf **buffer);
+
+  /**
+   * Update a shader constant.
+   *
+   * Reference versions are to be used when the resource might change between the time it is
+   * referenced and the time it is dereferenced for drawing.
+   *
+   * IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data
+   * still alive until pass submission.
+   *
+   * \note bool reference version is expected to take bool1 reference which is aliased to int.
+   */
+  void push_constant(const char *name, const float &data);
+  void push_constant(const char *name, const float2 &data);
+  void push_constant(const char *name, const float3 &data);
+  void push_constant(const char *name, const float4 &data);
+  void push_constant(const char *name, const int &data);
+  void push_constant(const char *name, const int2 &data);
+  void push_constant(const char *name, const int3 &data);
+  void push_constant(const char *name, const int4 &data);
+  void push_constant(const char *name, const bool &data);
+  void push_constant(const char *name, const float4x4 &data);
+  void push_constant(const char *name, const float *data, int array_len = 1);
+  void push_constant(const char *name, const float2 *data, int array_len = 1);
+  void push_constant(const char *name, const float3 *data, int array_len = 1);
+  void push_constant(const char *name, const float4 *data, int array_len = 1);
+  void push_constant(const char *name, const int *data, int array_len = 1);
+  void push_constant(const char *name, const int2 *data, int array_len = 1);
+  void push_constant(const char *name, const int3 *data, int array_len = 1);
+  void push_constant(const char *name, const int4 *data, int array_len = 1);
+  void push_constant(const char *name, const float4x4 *data);
+
+  /**
+   * Turn the pass into a string for inspection.
+   */
+  std::string serialize(std::string line_prefix = "") const;
+
+  friend std::ostream &operator<<(std::ostream &stream, const PassBase &pass)
+  {
+    return stream << pass.serialize();
+  }
+
+ protected:
+  /**
+   * Internal Helpers
+   */
+
+  int push_constant_offset(const char *name);
+
+  void clear(eGPUFrameBufferBits planes, float4 color, float depth, uint8_t stencil);
+
+  GPUBatch *procedural_batch_get(GPUPrimType primitive);
+
+  /**
+   * Return a new command recorded with the given type.
+   */
+  command::Undetermined &create_command(command::Type type);
+
+  void submit(command::RecordingState &state) const;
+};
+
+template<typename DrawCommandBufType> class Pass : public detail::PassBase<DrawCommandBufType> {
+ public:
+  using Sub = detail::PassBase<DrawCommandBufType>;
+
+ private:
+  /** Sub-passes referenced by headers. */
+  SubPassVector<detail::PassBase<DrawCommandBufType>> sub_passes_main_;
+  /** Draws are recorded as indirect draws for compatibility with the multi-draw pipeline. */
+  DrawCommandBufType draw_commands_buf_main_;
+
+ public:
+  Pass(const char *name)
+      : detail::PassBase<DrawCommandBufType>(name, draw_commands_buf_main_, sub_passes_main_){};
+
+  void init()
+  {
+    this->headers_.clear();
+    this->commands_.clear();
+    this->sub_passes_.clear();
+    this->draw_commands_buf_.clear();
+  }
+};  // namespace blender::draw
+
+}  // namespace detail
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Pass types
+ * \{ */
+
+/**
+ * Normal pass type. No visibility or draw-call optimisation.
+ */
+// using PassSimple = detail::Pass<DrawCommandBuf>;
+
+/**
+ * Main pass type.
+ * Optimized for many draw calls and sub-pass.
+ *
+ * IMPORTANT: To be used only for passes containing lots of draw calls since it has a potentially
+ * high overhead due to batching and culling optimizations.
+ */
+// using PassMain = detail::Pass<DrawMultiBuf>;
+
+/**
+ * Special pass type for rendering transparent objects.
+ * The base level can only be composed of sub passes that will be ordered by a sorting value.
+ */
+class PassSortable : public PassMain {
+  friend Manager;
+
+ private:
+  /** Sorting value associated with each sub pass. */
+  Vector<float> sorting_values_;
+
+  bool sorted_ = false;
+
+ public:
+  PassSortable(const char *name_) : PassMain(name_){};
+
+  void init()
+  {
+    sorting_values_.clear();
+    sorted_ = false;
+    PassMain::init();
+  }
+
+  PassMain::Sub &sub(const char *name, float sorting_value)
+  {
+    int64_t index = sub_passes_.append_and_get_index(
+        PassBase(name, draw_commands_buf_, sub_passes_, shader_));
+    headers_.append({Type::SubPass, static_cast<uint>(index)});
+    sorting_values_.append(sorting_value);
+    return sub_passes_[index];
+  }
+
+  std::string serialize(std::string line_prefix = "") const
+  {
+    if (sorted_ == false) {
+      const_cast<PassSortable *>(this)->sort();
+    }
+    return PassMain::serialize(line_prefix);
+  }
+
+ protected:
+  void sort()
+  {
+    if (sorted_ == false) {
+      std::sort(headers_.begin(), headers_.end(), [&](Header &a, Header &b) {
+        BLI_assert(a.type == Type::SubPass && b.type == Type::SubPass);
+        float a_val = sorting_values_[a.index];
+        float b_val = sorting_values_[b.index];
+        return a_val < b_val || (a_val == b_val && a.index < b.index);
+      });
+      sorted_ = true;
+    }
+  }
+};
+
+/** \} */
+
+namespace detail {
+
+/* -------------------------------------------------------------------- */
+/** \name PassBase Implementation
+ * \{ */
+
+template<class T> inline command::Undetermined &PassBase<T>::create_command(command::Type type)
+{
+  int64_t index = commands_.append_and_get_index({});
+  headers_.append({type, static_cast<uint>(index)});
+  return commands_[index];
+}
+
+template<class T>
+inline void PassBase<T>::clear(eGPUFrameBufferBits planes,
+                               float4 color,
+                               float depth,
+                               uint8_t stencil)
+{
+  create_command(command::Type::Clear).clear = {(uint8_t)planes, stencil, depth, color};
+}
+
+template<class T> inline GPUBatch *PassBase<T>::procedural_batch_get(GPUPrimType primitive)
+{
+  switch (primitive) {
+    case GPU_PRIM_POINTS:
+      return drw_cache_procedural_points_get();
+    case GPU_PRIM_LINES:
+      return drw_cache_procedural_lines_get();
+    case GPU_PRIM_TRIS:
+      return drw_cache_procedural_triangles_get();
+    case GPU_PRIM_TRI_STRIP:
+      return drw_cache_procedural_triangle_strips_get();
+    default:
+      /* Add new one as needed. */
+      BLI_assert_unreachable();
+      return nullptr;
+  }
+}
+
+template<class T> inline PassBase<T> &PassBase<T>::sub(const char *name)
+{
+  int64_t index = sub_passes_.append_and_get_index(
+      PassBase(name, draw_commands_buf_, sub_passes_, shader_));
+  headers_.append({command::Type::SubPass, static_cast<uint>(index)});
+  return sub_passes_[index];
+}
+
+template<class T> void PassBase<T>::submit(command::RecordingState &state) const
+{
+  GPU_debug_group_begin(debug_name);
+
+  for (const command::Header &header : headers_) {
+    switch (header.type) {
+      default:
+      case Type::None:
+        break;
+      case Type::SubPass:
+        sub_passes_[header.index].submit(state);
+        break;
+      case command::Type::ShaderBind:
+        commands_[header.index].shader_bind.execute(state);
+        break;
+      case command::Type::ResourceBind:
+        commands_[header.index].resource_bind.execute();
+        break;
+      case command::Type::PushConstant:
+        commands_[header.index].push_constant.execute(state);
+        break;
+      case command::Type::Draw:
+        commands_[header.index].draw.execute(state);
+        break;
+      case command::Type::DrawMulti:
+        commands_[header.index].draw_multi.execute(state);
+        break;
+      case command::Type::DrawIndirect:
+        commands_[header.index].draw_indirect.execute(state);
+        break;
+      case command::Type::Dispatch:
+        commands_[header.index].dispatch.execute(state);
+        break;
+      case command::Type::DispatchIndirect:
+        commands_[header.index].dispatch_indirect.execute(state);
+        break;
+      case command::Type::Barrier:
+        commands_[header.index].barrier.execute();
+        break;
+      case command::Type::Clear:
+        commands_[header.index].clear.execute();
+        break;
+      case command::Type::StateSet:
+        commands_[header.index].state_set.execute(state);
+        break;
+      case command::Type::StencilSet:
+        commands_[header.index].stencil_set.execute();
+        break;
+    }
+  }
+
+  GPU_debug_group_end();
+}
+
+template<class T> std::string PassBase<T>::serialize(std::string line_prefix) const
+{
+  std::stringstream ss;
+  ss << line_prefix << "." << debug_name << std::endl;
+  line_prefix += "  ";
+  for (const command::Header &header : headers_) {
+    switch (header.type) {
+      default:
+      case Type::None:
+        break;
+      case Type::SubPass:
+        ss << sub_passes_[header.index].serialize(line_prefix);
+        break;
+      case Type::ShaderBind:
+        ss << line_prefix << commands_[header.index].shader_bind.serialize() << std::endl;
+        break;
+      case Type::ResourceBind:
+        ss << line_prefix << commands_[header.index].resource_bind.serialize() << std::endl;
+        break;
+      case Type::PushConstant:
+        ss << line_prefix << commands_[header.index].push_constant.serialize() << std::endl;
+        break;
+      case Type::Draw:
+        ss << line_prefix << commands_[header.index].draw.serialize() << std::endl;
+        break;
+      case Type::DrawMulti:
+        ss << commands_[header.index].draw_multi.serialize(line_prefix);
+        break;
+      case Type::DrawIndirect:
+        ss << line_prefix << commands_[header.index].draw_indirect.serialize() << std::endl;
+        break;
+      case Type::Dispatch:
+        ss << line_prefix << commands_[header.index].dispatch.serialize() << std::endl;
+        break;
+      case Type::DispatchIndirect:
+        ss << line_prefix << commands_[header.index].dispatch_indirect.serialize() << std::endl;
+        break;
+      case Type::Barrier:
+        ss << line_prefix << commands_[header.index].barrier.serialize() << std::endl;
+        break;
+      case Type::Clear:
+        ss << line_prefix << commands_[header.index].clear.serialize() << std::endl;
+        break;
+      case Type::StateSet:
+        ss << line_prefix << commands_[header.index].state_set.serialize() << std::endl;
+        break;
+      case Type::StencilSet:
+        ss << line_prefix << commands_[header.index].stencil_set.serialize() << std::endl;
+        break;
+    }
+  }
+  return ss.str();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw calls
+ * \{ */
+
+template<class T>
+inline void PassBase<T>::draw(
+    GPUBatch *batch, uint instance_len, uint vertex_len, uint vertex_first, ResourceHandle handle)
+{
+  if (instance_len == 0 || vertex_len == 0) {
+    return;
+  }
+  BLI_assert(shader_);
+  draw_commands_buf_.append_draw(
+      headers_, commands_, batch, instance_len, vertex_len, vertex_first, handle);
+}
+
+template<class T> inline void PassBase<T>::draw(GPUBatch *batch, ResourceHandle handle)
+{
+  this->draw(batch, -1, -1, -1, handle);
+}
+
+template<class T>
+inline void PassBase<T>::draw_procedural(GPUPrimType primitive,
+                                         uint instance_len,
+                                         uint vertex_len,
+                                         uint vertex_first,
+                                         ResourceHandle handle)
+{
+  this->draw(procedural_batch_get(primitive), instance_len, vertex_len, vertex_first, handle);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Indirect draw calls
+ * \{ */
+
+template<class T>
+inline void PassBase<T>::draw_indirect(GPUBatch *batch,
+                                       StorageBuffer<DrawCommand, true> &indirect_buffer,
+                                       ResourceHandle handle)
+{
+  BLI_assert(shader_);
+  create_command(Type::DrawIndirect).draw_indirect = {batch, &indirect_buffer, handle};
+}
+
+template<class T>
+inline void PassBase<T>::draw_procedural_indirect(
+    GPUPrimType primitive,
+    StorageBuffer<DrawCommand, true> &indirect_buffer,
+    ResourceHandle handle)
+{
+  this->draw_indirect(procedural_batch_get(primitive), indirect_buffer, handle);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Compute Dispatch Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::dispatch(int3 group_len)
+{
+  BLI_assert(shader_);
+  create_command(Type::Dispatch).dispatch = {group_len};
+}
+
+template<class T> inline void PassBase<T>::dispatch(int3 *group_len)
+{
+  BLI_assert(shader_);
+  create_command(Type::Dispatch).dispatch = {group_len};
+}
+
+template<class T>
+inline void PassBase<T>::dispatch(StorageBuffer<DispatchCommand> &indirect_buffer)
+{
+  BLI_assert(shader_);
+  create_command(Type::DispatchIndirect).dispatch_indirect = {&indirect_buffer};
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Clear Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::clear_color(float4 color)
+{
+  this->clear(GPU_COLOR_BIT, color, 0.0f, 0);
+}
+
+template<class T> inline void PassBase<T>::clear_depth(float depth)
+{
+  this->clear(GPU_DEPTH_BIT, float4(0.0f), depth, 0);
+}
+
+template<class T> inline void PassBase<T>::clear_stencil(uint8_t stencil)
+{
+  this->clear(GPU_STENCIL_BIT, float4(0.0f), 0.0f, stencil);
+}
+
+template<class T> inline void PassBase<T>::clear_depth_stencil(float depth, uint8_t stencil)
+{
+  this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT, float4(0.0f), depth, stencil);
+}
+
+template<class T>
+inline void PassBase<T>::clear_color_depth_stencil(float4 color, float depth, uint8_t stencil)
+{
+  this->clear(GPU_DEPTH_BIT | GPU_STENCIL_BIT | GPU_COLOR_BIT, color, depth, stencil);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Barrier Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::barrier(eGPUBarrier type)
+{
+  create_command(Type::Barrier).barrier = {type};
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name State Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::state_set(DRWState state)
+{
+  create_command(Type::StateSet).state_set = {state};
+}
+
+template<class T>
+inline void PassBase<T>::state_stencil(uint8_t write_mask, uint8_t reference, uint8_t compare_mask)
+{
+  create_command(Type::StencilSet).stencil_set = {write_mask, reference, compare_mask};
+}
+
+template<class T> inline void PassBase<T>::shader_set(GPUShader *shader)
+{
+  shader_ = shader;
+  create_command(Type::ShaderBind).shader_bind = {shader};
+}
+
+template<class T> inline void PassBase<T>::material_set(Manager &manager, GPUMaterial *material)
+{
+  GPUPass *gpupass = GPU_material_get_pass(material);
+  shader_set(GPU_pass_shader_get(gpupass));
+
+  /* Bind all textures needed by the material. */
+  ListBase textures = GPU_material_textures(material);
+  for (GPUMaterialTexture *tex : ListBaseWrapper<GPUMaterialTexture>(textures)) {
+    if (tex->ima) {
+      /* Image */
+      ImageUser *iuser = tex->iuser_available ? &tex->iuser : nullptr;
+      if (tex->tiled_mapping_name[0]) {
+        GPUTexture *tiles = BKE_image_get_gpu_tiles(tex->ima, iuser, nullptr);
+        manager.acquire_texture(tiles);
+        bind_texture(tex->sampler_name, tiles, (eGPUSamplerState)tex->sampler_state);
+
+        GPUTexture *tile_map = BKE_image_get_gpu_tilemap(tex->ima, iuser, nullptr);
+        manager.acquire_texture(tile_map);
+        bind_texture(tex->tiled_mapping_name, tile_map, (eGPUSamplerState)tex->sampler_state);
+      }
+      else {
+        GPUTexture *texture = BKE_image_get_gpu_texture(tex->ima, iuser, nullptr);
+        manager.acquire_texture(texture);
+        bind_texture(tex->sampler_name, texture, (eGPUSamplerState)tex->sampler_state);
+      }
+    }
+    else if (tex->colorband) {
+      /* Color Ramp */
+      bind_texture(tex->sampler_name, *tex->colorband);
+    }
+  }
+
+  GPUUniformBuf *ubo = GPU_material_uniform_buffer_get(material);
+  if (ubo != nullptr) {
+    bind_ubo(GPU_UBO_BLOCK_NAME, ubo);
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Resource bind Implementation
+ * \{ */
+
+template<class T> inline int PassBase<T>::push_constant_offset(const char *name)
+{
+  return GPU_shader_get_uniform(shader_, name);
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf *buffer)
+{
+  this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer);
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf *buffer)
+{
+  this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer);
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(const char *name,
+                                      GPUTexture *texture,
+                                      eGPUSamplerState state)
+{
+  this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state);
+}
+
+template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture *image)
+{
+  this->bind_texture(GPU_shader_get_texture_binding(shader_, name), image);
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf *buffer)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf *buffer)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(int slot, GPUTexture *texture, eGPUSamplerState state)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, texture, state};
+}
+
+template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture *image)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)};
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(const char *name, GPUStorageBuf **buffer)
+{
+  this->bind_ssbo(GPU_shader_get_ssbo(shader_, name), buffer);
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(const char *name, GPUUniformBuf **buffer)
+{
+  this->bind_ubo(GPU_shader_get_uniform_block_binding(shader_, name), buffer);
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(const char *name,
+                                      GPUTexture **texture,
+                                      eGPUSamplerState state)
+{
+  this->bind_texture(GPU_shader_get_texture_binding(shader_, name), texture, state);
+}
+
+template<class T> inline void PassBase<T>::bind_image(const char *name, GPUTexture **image)
+{
+  this->bind_image(GPU_shader_get_texture_binding(shader_, name), image);
+}
+
+template<class T> inline void PassBase<T>::bind_ssbo(int slot, GPUStorageBuf **buffer)
+{
+
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T> inline void PassBase<T>::bind_ubo(int slot, GPUUniformBuf **buffer)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, buffer};
+}
+
+template<class T>
+inline void PassBase<T>::bind_texture(int slot, GPUTexture **texture, eGPUSamplerState state)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, texture, state};
+}
+
+template<class T> inline void PassBase<T>::bind_image(int slot, GPUTexture **image)
+{
+  create_command(Type::ResourceBind).resource_bind = {slot, as_image(image)};
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Push Constant Implementation
+ * \{ */
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float2 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float3 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float4 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int2 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int3 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const int4 &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const bool &data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float2 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float3 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const float4 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int2 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int3 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T>
+inline void PassBase<T>::push_constant(const char *name, const int4 *data, int array_len)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data, array_len};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 *data)
+{
+  create_command(Type::PushConstant).push_constant = {push_constant_offset(name), data};
+}
+
+template<class T> inline void PassBase<T>::push_constant(const char *name, const float4x4 &data)
+{
+  /* WORKAROUND: Push 3 consecutive commands to hold the 64 bytes of the float4x4.
+   * This assumes that all commands are always stored in flat array of memory. */
+  Undetermined commands[3];
+
+  PushConstant &cmd = commands[0].push_constant;
+  cmd.location = push_constant_offset(name);
+  cmd.array_len = 1;
+  cmd.comp_len = 16;
+  cmd.type = PushConstant::Type::FloatValue;
+  /* Copy overrides the next 2 commands. We append them as Type::None to not evaluate them. */
+  *reinterpret_cast<float4x4 *>(&cmd.float4_value) = data;
+
+  create_command(Type::PushConstant) = commands[0];
+  create_command(Type::None) = commands[1];
+  create_command(Type::None) = commands[2];
+}
+
+/** \} */
+
+}  // namespace detail
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_resource.hh b/source/blender/draw/intern/draw_resource.hh
new file mode 100644
index 00000000000..503833e8a6d
--- /dev/null
+++ b/source/blender/draw/intern/draw_resource.hh
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ *
+ * Component / Object level resources like object attributes, matrices, visibility etc...
+ * Each of them are reference by resource index (#ResourceHandle).
+ */
+
+#include "BKE_curve.h"
+#include "BKE_duplilist.h"
+#include "BKE_mesh.h"
+#include "BKE_object.h"
+#include "BKE_volume.h"
+#include "BLI_hash.h"
+#include "DNA_curve_types.h"
+#include "DNA_layer_types.h"
+#include "DNA_meta_types.h"
+#include "DNA_object_types.h"
+
+#include "draw_handle.hh"
+#include "draw_manager.hh"
+#include "draw_shader_shared.h"
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectMatrices
+ * \{ */
+
+inline void ObjectMatrices::sync(const Object &object)
+{
+  model = object.obmat;
+  model_inverse = object.imat;
+}
+
+inline void ObjectMatrices::sync(const float4x4 &model_matrix)
+{
+  model = model_matrix;
+  model_inverse = model_matrix.inverted();
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const ObjectMatrices &matrices)
+{
+  stream << "ObjectMatrices(" << std::endl;
+  stream << "model=" << matrices.model << ", " << std::endl;
+  stream << "model_inverse=" << matrices.model_inverse << ")" << std::endl;
+  return stream;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectInfos
+ * \{ */
+
+ENUM_OPERATORS(eObjectInfoFlag, OBJECT_NEGATIVE_SCALE)
+
+inline void ObjectInfos::sync()
+{
+  flag = eObjectInfoFlag::OBJECT_NO_INFO;
+}
+
+inline void ObjectInfos::sync(const blender::draw::ObjectRef ref, bool is_active_object)
+{
+  color = ref.object->color;
+  index = ref.object->index;
+  SET_FLAG_FROM_TEST(flag, is_active_object, eObjectInfoFlag::OBJECT_ACTIVE);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->base_flag & BASE_SELECTED, eObjectInfoFlag::OBJECT_SELECTED);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->base_flag & BASE_FROM_DUPLI, eObjectInfoFlag::OBJECT_FROM_DUPLI);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->base_flag & BASE_FROM_SET, eObjectInfoFlag::OBJECT_FROM_SET);
+  SET_FLAG_FROM_TEST(
+      flag, ref.object->transflag & OB_NEG_SCALE, eObjectInfoFlag::OBJECT_NEGATIVE_SCALE);
+
+  if (ref.dupli_object == nullptr) {
+    /* TODO(fclem): this is rather costly to do at draw time. Maybe we can
+     * put it in ob->runtime and make depsgraph ensure it is up to date. */
+    random = BLI_hash_int_2d(BLI_hash_string(ref.object->id.name + 2), 0) * (1.0f / 0xFFFFFFFF);
+  }
+  else {
+    random = ref.dupli_object->random_id * (1.0f / 0xFFFFFFFF);
+  }
+  /* Default values. Set if needed. */
+  random = 0.0f;
+
+  if (ref.object->data == nullptr) {
+    orco_add = float3(0.0f);
+    orco_mul = float3(1.0f);
+    return;
+  }
+
+  switch (GS(reinterpret_cast<ID *>(ref.object->data)->name)) {
+    case ID_VO: {
+      BoundBox &bbox = *BKE_volume_boundbox_get(ref.object);
+      orco_add = (float3(bbox.vec[6]) + float3(bbox.vec[0])) * 0.5f; /* Center. */
+      orco_mul = float3(bbox.vec[6]) - float3(bbox.vec[0]);          /* Size. */
+      break;
+    }
+    case ID_ME: {
+      BKE_mesh_texspace_get(static_cast<Mesh *>(ref.object->data), orco_add, orco_mul);
+      break;
+    }
+    case ID_CU_LEGACY: {
+      Curve &cu = *static_cast<Curve *>(ref.object->data);
+      BKE_curve_texspace_ensure(&cu);
+      orco_add = cu.loc;
+      orco_mul = cu.size;
+      break;
+    }
+    case ID_MB: {
+      MetaBall &mb = *static_cast<MetaBall *>(ref.object->data);
+      orco_add = mb.loc;
+      orco_mul = mb.size;
+      break;
+    }
+    default:
+      orco_add = float3(0.0f);
+      orco_mul = float3(1.0f);
+      break;
+  }
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const ObjectInfos &infos)
+{
+  stream << "ObjectInfos(";
+  if (infos.flag == eObjectInfoFlag::OBJECT_NO_INFO) {
+    stream << "skipped)" << std::endl;
+    return stream;
+  }
+  stream << "orco_add=" << infos.orco_add << ", ";
+  stream << "orco_mul=" << infos.orco_mul << ", ";
+  stream << "color=" << infos.color << ", ";
+  stream << "index=" << infos.index << ", ";
+  stream << "random=" << infos.random << ", ";
+  stream << "flag=" << infos.flag << ")" << std::endl;
+  return stream;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name ObjectBounds
+ * \{ */
+
+inline void ObjectBounds::sync()
+{
+  bounding_sphere.w = -1.0f; /* Disable test. */
+}
+
+inline void ObjectBounds::sync(Object &ob)
+{
+  const BoundBox *bbox = BKE_object_boundbox_get(&ob);
+  if (bbox == nullptr) {
+    bounding_sphere.w = -1.0f; /* Disable test. */
+    return;
+  }
+  *reinterpret_cast<float3 *>(&bounding_corners[0]) = bbox->vec[0];
+  *reinterpret_cast<float3 *>(&bounding_corners[1]) = bbox->vec[4];
+  *reinterpret_cast<float3 *>(&bounding_corners[2]) = bbox->vec[3];
+  *reinterpret_cast<float3 *>(&bounding_corners[3]) = bbox->vec[1];
+  bounding_sphere.w = 0.0f; /* Enable test. */
+}
+
+inline void ObjectBounds::sync(const float3 &center, const float3 &size)
+{
+  *reinterpret_cast<float3 *>(&bounding_corners[0]) = center - size;
+  *reinterpret_cast<float3 *>(&bounding_corners[1]) = center + float3(+size.x, -size.y, -size.z);
+  *reinterpret_cast<float3 *>(&bounding_corners[2]) = center + float3(-size.x, +size.y, -size.z);
+  *reinterpret_cast<float3 *>(&bounding_corners[3]) = center + float3(-size.x, -size.y, +size.z);
+  bounding_sphere.w = 0.0; /* Enable test. */
+}
+
+inline std::ostream &operator<<(std::ostream &stream, const ObjectBounds &bounds)
+{
+  stream << "ObjectBounds(";
+  if (bounds.bounding_sphere.w == -1.0f) {
+    stream << "skipped)" << std::endl;
+    return stream;
+  }
+  stream << std::endl;
+  stream << ".bounding_corners[0]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[0]) << std::endl;
+  stream << ".bounding_corners[1]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[1]) << std::endl;
+  stream << ".bounding_corners[2]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[2]) << std::endl;
+  stream << ".bounding_corners[3]"
+         << *reinterpret_cast<const float3 *>(&bounds.bounding_corners[3]) << std::endl;
+  stream << ".sphere=(pos=" << float3(bounds.bounding_sphere)
+         << ", rad=" << bounds.bounding_sphere.w << std::endl;
+  stream << ")" << std::endl;
+  return stream;
+}
+
+/** \} */
diff --git a/source/blender/draw/intern/draw_shader.cc b/source/blender/draw/intern/draw_shader.cc
index ecb30d54b64..960348b4a94 100644
--- a/source/blender/draw/intern/draw_shader.cc
+++ b/source/blender/draw/intern/draw_shader.cc
@@ -17,15 +17,15 @@
 #include "draw_shader.h"
 
 extern "C" char datatoc_common_hair_lib_glsl[];
-
 extern "C" char datatoc_common_hair_refine_vert_glsl[];
-extern "C" char datatoc_common_hair_refine_comp_glsl[];
-extern "C" char datatoc_gpu_shader_3D_smooth_color_frag_glsl[];
 
 static struct {
   struct GPUShader *hair_refine_sh[PART_REFINE_MAX_SHADER];
   struct GPUShader *debug_print_display_sh;
   struct GPUShader *debug_draw_display_sh;
+  struct GPUShader *draw_visibility_compute_sh;
+  struct GPUShader *draw_resource_finalize_sh;
+  struct GPUShader *draw_command_generate_sh;
 } e_data = {{nullptr}};
 
 /* -------------------------------------------------------------------- */
@@ -127,6 +127,31 @@ GPUShader *DRW_shader_debug_draw_display_get()
   return e_data.debug_draw_display_sh;
 }
 
+GPUShader *DRW_shader_draw_visibility_compute_get()
+{
+  if (e_data.draw_visibility_compute_sh == nullptr) {
+    e_data.draw_visibility_compute_sh = GPU_shader_create_from_info_name(
+        "draw_visibility_compute");
+  }
+  return e_data.draw_visibility_compute_sh;
+}
+
+GPUShader *DRW_shader_draw_resource_finalize_get()
+{
+  if (e_data.draw_resource_finalize_sh == nullptr) {
+    e_data.draw_resource_finalize_sh = GPU_shader_create_from_info_name("draw_resource_finalize");
+  }
+  return e_data.draw_resource_finalize_sh;
+}
+
+GPUShader *DRW_shader_draw_command_generate_get()
+{
+  if (e_data.draw_command_generate_sh == nullptr) {
+    e_data.draw_command_generate_sh = GPU_shader_create_from_info_name("draw_command_generate");
+  }
+  return e_data.draw_command_generate_sh;
+}
+
 /** \} */
 
 void DRW_shaders_free()
@@ -136,4 +161,7 @@ void DRW_shaders_free()
   }
   DRW_SHADER_FREE_SAFE(e_data.debug_print_display_sh);
   DRW_SHADER_FREE_SAFE(e_data.debug_draw_display_sh);
+  DRW_SHADER_FREE_SAFE(e_data.draw_visibility_compute_sh);
+  DRW_SHADER_FREE_SAFE(e_data.draw_resource_finalize_sh);
+  DRW_SHADER_FREE_SAFE(e_data.draw_command_generate_sh);
 }
diff --git a/source/blender/draw/intern/draw_shader.h b/source/blender/draw/intern/draw_shader.h
index dabb4b3327f..3b8c0425fa9 100644
--- a/source/blender/draw/intern/draw_shader.h
+++ b/source/blender/draw/intern/draw_shader.h
@@ -32,6 +32,9 @@ struct GPUShader *DRW_shader_curves_refine_get(CurvesEvalShader type,
 
 struct GPUShader *DRW_shader_debug_print_display_get(void);
 struct GPUShader *DRW_shader_debug_draw_display_get(void);
+struct GPUShader *DRW_shader_draw_visibility_compute_get(void);
+struct GPUShader *DRW_shader_draw_resource_finalize_get(void);
+struct GPUShader *DRW_shader_draw_command_generate_get(void);
 
 void DRW_shaders_free(void);
 
diff --git a/source/blender/draw/intern/draw_shader_shared.h b/source/blender/draw/intern/draw_shader_shared.h
index 90a6475c42b..00d54311548 100644
--- a/source/blender/draw/intern/draw_shader_shared.h
+++ b/source/blender/draw/intern/draw_shader_shared.h
@@ -5,18 +5,35 @@
 
 #  include "GPU_shader.h"
 #  include "GPU_shader_shared_utils.h"
+#  include "draw_defines.h"
 
 typedef struct ViewInfos ViewInfos;
 typedef struct ObjectMatrices ObjectMatrices;
 typedef struct ObjectInfos ObjectInfos;
+typedef struct ObjectBounds ObjectBounds;
 typedef struct VolumeInfos VolumeInfos;
 typedef struct CurvesInfos CurvesInfos;
 typedef struct DrawCommand DrawCommand;
-typedef struct DrawCommandIndexed DrawCommandIndexed;
 typedef struct DispatchCommand DispatchCommand;
 typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer;
 typedef struct DRWDebugVert DRWDebugVert;
 typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer;
+
+#  ifdef __cplusplus
+/* C++ only forward declarations. */
+struct Object;
+
+namespace blender::draw {
+
+struct ObjectRef;
+
+}  // namespace blender::draw
+
+#  else /* __cplusplus */
+/* C only forward declarations. */
+typedef enum eObjectInfoFlag eObjectInfoFlag;
+
+#  endif
 #endif
 
 #define DRW_SHADER_SHARED_H
@@ -48,15 +65,18 @@ struct ViewInfos {
   float2 viewport_size_inverse;
 
   /** Frustum culling data. */
-  /** NOTE: vec3 arrays are padded to vec4. */
+  /** \note vec3 array padded to vec4. */
   float4 frustum_corners[8];
   float4 frustum_planes[6];
+  float4 frustum_bound_sphere;
 
   /** For debugging purpose */
   /* Mouse pixel. */
   int2 mouse_pixel;
 
-  int2 _pad0;
+  /** True if facing needs to be inverted. */
+  bool1 is_inverted;
+  int _pad0;
 };
 BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
 
@@ -74,23 +94,89 @@ BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
 #  define CameraTexCoFactors drw_view.viewcamtexcofac
 #endif
 
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Debug draw shapes
+ * \{ */
+
 struct ObjectMatrices {
-  float4x4 drw_modelMatrix;
-  float4x4 drw_modelMatrixInverse;
+  float4x4 model;
+  float4x4 model_inverse;
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  void sync(const Object &object);
+  void sync(const float4x4 &model_matrix);
+#endif
+};
+BLI_STATIC_ASSERT_ALIGN(ObjectMatrices, 16)
+
+enum eObjectInfoFlag {
+  OBJECT_SELECTED = (1u << 0u),
+  OBJECT_FROM_DUPLI = (1u << 1u),
+  OBJECT_FROM_SET = (1u << 2u),
+  OBJECT_ACTIVE = (1u << 3u),
+  OBJECT_NEGATIVE_SCALE = (1u << 4u),
+  /* Avoid skipped info to change culling. */
+  OBJECT_NO_INFO = ~OBJECT_NEGATIVE_SCALE
 };
-BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
 
 struct ObjectInfos {
-  float4 drw_OrcoTexCoFactors[2];
-  float4 drw_ObjectColor;
-  float4 drw_Infos;
+#if defined(GPU_SHADER) && !defined(DRAW_FINALIZE_SHADER)
+  /* TODO Rename to struct member for glsl too. */
+  float4 orco_mul_bias[2];
+  float4 color;
+  float4 infos;
+#else
+  /** Uploaded as center + size. Converted to mul+bias to local coord.  */
+  float3 orco_add;
+  float _pad0;
+  float3 orco_mul;
+  float _pad1;
+
+  float4 color;
+  uint index;
+  uint _pad2;
+  float random;
+  eObjectInfoFlag flag;
+#endif
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  void sync();
+  void sync(const blender::draw::ObjectRef ref, bool is_active_object);
+#endif
 };
-BLI_STATIC_ASSERT_ALIGN(ViewInfos, 16)
+BLI_STATIC_ASSERT_ALIGN(ObjectInfos, 16)
+
+struct ObjectBounds {
+  /**
+   * Uploaded as vertex (0, 4, 3, 1) of the bbox in local space, matching XYZ axis order.
+   * Then processed by GPU and stored as (0, 4-0, 3-0, 1-0) in world space for faster culling.
+   */
+  float4 bounding_corners[4];
+  /** Bounding sphere derived from the bounding corner. Computed on GPU. */
+  float4 bounding_sphere;
+  /** Radius of the inscribed sphere derived from the bounding corner. Computed on GPU. */
+#define _inner_sphere_radius bounding_corners[3].w
+
+#if !defined(GPU_SHADER) && defined(__cplusplus)
+  void sync();
+  void sync(Object &ob);
+  void sync(const float3 &center, const float3 &size);
+#endif
+};
+BLI_STATIC_ASSERT_ALIGN(ObjectBounds, 16)
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Object attributes
+ * \{ */
 
 struct VolumeInfos {
-  /* Object to grid-space. */
+  /** Object to grid-space. */
   float4x4 grids_xform[DRW_GRID_PER_VOLUME_MAX];
-  /* NOTE: vec4 for alignment. Only float3 needed. */
+  /** \note vec4 for alignment. Only float3 needed. */
   float4 color_mul;
   float density_scale;
   float temperature_mul;
@@ -100,38 +186,41 @@ struct VolumeInfos {
 BLI_STATIC_ASSERT_ALIGN(VolumeInfos, 16)
 
 struct CurvesInfos {
-  /* Per attribute scope, follows loading order.
-   * NOTE: uint as bool in GLSL is 4 bytes.
-   * NOTE: GLSL pad arrays of scalar to 16 bytes (std140). */
+  /** Per attribute scope, follows loading order.
+   * \note uint as bool in GLSL is 4 bytes.
+   * \note GLSL pad arrays of scalar to 16 bytes (std140). */
   uint4 is_point_attribute[DRW_ATTRIBUTE_PER_CURVES_MAX];
 };
 BLI_STATIC_ASSERT_ALIGN(CurvesInfos, 16)
 
-#define OrcoTexCoFactors (drw_infos[resource_id].drw_OrcoTexCoFactors)
-#define ObjectInfo (drw_infos[resource_id].drw_Infos)
-#define ObjectColor (drw_infos[resource_id].drw_ObjectColor)
+/** \} */
 
-/* Indirect commands structures. */
+/* -------------------------------------------------------------------- */
+/** \name Indirect commands structures.
+ * \{ */
 
 struct DrawCommand {
-  uint v_count;
-  uint i_count;
-  uint v_first;
-  uint i_first;
-};
-BLI_STATIC_ASSERT_ALIGN(DrawCommand, 16)
-
-struct DrawCommandIndexed {
-  uint v_count;
-  uint i_count;
-  uint v_first;
+  /* TODO(fclem): Rename */
+  uint vertex_len;
+  uint instance_len;
+  uint vertex_first;
+#if defined(GPU_SHADER)
   uint base_index;
-  uint i_first;
-  uint _pad0;
-  uint _pad1;
-  uint _pad2;
+  /** \note base_index is i_first for non-indexed draw-calls. */
+#  define _instance_first_array base_index
+#else
+  union {
+    uint base_index;
+    /* Use this instead of instance_first_indexed for non indexed draw calls. */
+    uint instance_first_array;
+  };
+#endif
+
+  uint instance_first_indexed;
+
+  uint _pad0, _pad1, _pad2;
 };
-BLI_STATIC_ASSERT_ALIGN(DrawCommandIndexed, 16)
+BLI_STATIC_ASSERT_ALIGN(DrawCommand, 16)
 
 struct DispatchCommand {
   uint num_groups_x;
@@ -141,13 +230,15 @@ struct DispatchCommand {
 };
 BLI_STATIC_ASSERT_ALIGN(DispatchCommand, 16)
 
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name Debug print
  * \{ */
 
 /* Take the header (DrawCommand) into account. */
 #define DRW_DEBUG_PRINT_MAX (8 * 1024) - 4
-/* NOTE: Cannot be more than 255 (because of column encoding). */
+/** \note Cannot be more than 255 (because of column encoding). */
 #define DRW_DEBUG_PRINT_WORD_WRAP_COLUMN 120u
 
 /* The debug print buffer is laid-out as the following struct.
@@ -164,6 +255,9 @@ BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16)
 /* Reuse first instance as row index as we don't use instancing. Equivalent to
  * `DRWDebugPrintBuffer.command.i_first`. */
 #define drw_debug_print_row_shared drw_debug_print_buf[3]
+/** Offset to the first data. Equal to: sizeof(DrawCommand) / sizeof(uint).
+ * This is needed because we bind the whole buffer as a `uint` array. */
+#define drw_debug_print_offset 8
 
 /** \} */
 
@@ -194,5 +288,8 @@ BLI_STATIC_ASSERT_ALIGN(DRWDebugPrintBuffer, 16)
 
 /* Equivalent to `DRWDebugDrawBuffer.command.v_count`. */
 #define drw_debug_draw_v_count drw_debug_verts_buf[0].pos0
+/** Offset to the first data. Equal to: sizeof(DrawCommand) / sizeof(DRWDebugVert).
+ * This is needed because we bind the whole buffer as a `DRWDebugVert` array. */
+#define drw_debug_draw_offset 2
 
 /** \} */
diff --git a/source/blender/draw/intern/draw_state.h b/source/blender/draw/intern/draw_state.h
new file mode 100644
index 00000000000..bf1e63e0852
--- /dev/null
+++ b/source/blender/draw/intern/draw_state.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \file
+ * \ingroup draw
+ *
+ * Internal Pipeline State tracking. It is higher level than GPU state as everything fits a single
+ * enum.
+ */
+
+/**
+ * DRWState is a bit-mask that stores the current render state and the desired render state. Based
+ * on the differences the minimum state changes can be invoked to setup the desired render state.
+ *
+ * The Write Stencil, Stencil test, Depth test and Blend state options are mutual exclusive
+ * therefore they aren't ordered as a bit mask.
+ */
+typedef enum {
+  /** To be used for compute passes. */
+  DRW_STATE_NO_DRAW = 0,
+  /** Write mask */
+  DRW_STATE_WRITE_DEPTH = (1 << 0),
+  DRW_STATE_WRITE_COLOR = (1 << 1),
+  /* Write Stencil. These options are mutual exclusive and packed into 2 bits */
+  DRW_STATE_WRITE_STENCIL = (1 << 2),
+  DRW_STATE_WRITE_STENCIL_SHADOW_PASS = (2 << 2),
+  DRW_STATE_WRITE_STENCIL_SHADOW_FAIL = (3 << 2),
+  /** Depth test. These options are mutual exclusive and packed into 3 bits */
+  DRW_STATE_DEPTH_ALWAYS = (1 << 4),
+  DRW_STATE_DEPTH_LESS = (2 << 4),
+  DRW_STATE_DEPTH_LESS_EQUAL = (3 << 4),
+  DRW_STATE_DEPTH_EQUAL = (4 << 4),
+  DRW_STATE_DEPTH_GREATER = (5 << 4),
+  DRW_STATE_DEPTH_GREATER_EQUAL = (6 << 4),
+  /** Culling test */
+  DRW_STATE_CULL_BACK = (1 << 7),
+  DRW_STATE_CULL_FRONT = (1 << 8),
+  /** Stencil test. These options are mutually exclusive and packed into 2 bits. */
+  DRW_STATE_STENCIL_ALWAYS = (1 << 9),
+  DRW_STATE_STENCIL_EQUAL = (2 << 9),
+  DRW_STATE_STENCIL_NEQUAL = (3 << 9),
+
+  /** Blend state. These options are mutual exclusive and packed into 4 bits */
+  DRW_STATE_BLEND_ADD = (1 << 11),
+  /** Same as additive but let alpha accumulate without pre-multiply. */
+  DRW_STATE_BLEND_ADD_FULL = (2 << 11),
+  /** Standard alpha blending. */
+  DRW_STATE_BLEND_ALPHA = (3 << 11),
+  /** Use that if color is already pre-multiply by alpha. */
+  DRW_STATE_BLEND_ALPHA_PREMUL = (4 << 11),
+  DRW_STATE_BLEND_BACKGROUND = (5 << 11),
+  DRW_STATE_BLEND_OIT = (6 << 11),
+  DRW_STATE_BLEND_MUL = (7 << 11),
+  DRW_STATE_BLEND_SUB = (8 << 11),
+  /** Use dual source blending. WARNING: Only one color buffer allowed. */
+  DRW_STATE_BLEND_CUSTOM = (9 << 11),
+  DRW_STATE_LOGIC_INVERT = (10 << 11),
+  DRW_STATE_BLEND_ALPHA_UNDER_PREMUL = (11 << 11),
+
+  DRW_STATE_IN_FRONT_SELECT = (1 << 27),
+  DRW_STATE_SHADOW_OFFSET = (1 << 28),
+  DRW_STATE_CLIP_PLANES = (1 << 29),
+  DRW_STATE_FIRST_VERTEX_CONVENTION = (1 << 30),
+  /** DO NOT USE. Assumed always enabled. Only used internally. */
+  DRW_STATE_PROGRAM_POINT_SIZE = (1u << 31),
+} DRWState;
+
+ENUM_OPERATORS(DRWState, DRW_STATE_PROGRAM_POINT_SIZE);
+
+#define DRW_STATE_DEFAULT \
+  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_LESS_EQUAL)
+#define DRW_STATE_BLEND_ENABLED \
+  (DRW_STATE_BLEND_ADD | DRW_STATE_BLEND_ADD_FULL | DRW_STATE_BLEND_ALPHA | \
+   DRW_STATE_BLEND_ALPHA_PREMUL | DRW_STATE_BLEND_BACKGROUND | DRW_STATE_BLEND_OIT | \
+   DRW_STATE_BLEND_MUL | DRW_STATE_BLEND_SUB | DRW_STATE_BLEND_CUSTOM | DRW_STATE_LOGIC_INVERT)
+#define DRW_STATE_RASTERIZER_ENABLED \
+  (DRW_STATE_WRITE_DEPTH | DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL | \
+   DRW_STATE_WRITE_STENCIL_SHADOW_PASS | DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
+#define DRW_STATE_DEPTH_TEST_ENABLED \
+  (DRW_STATE_DEPTH_ALWAYS | DRW_STATE_DEPTH_LESS | DRW_STATE_DEPTH_LESS_EQUAL | \
+   DRW_STATE_DEPTH_EQUAL | DRW_STATE_DEPTH_GREATER | DRW_STATE_DEPTH_GREATER_EQUAL)
+#define DRW_STATE_STENCIL_TEST_ENABLED \
+  (DRW_STATE_STENCIL_ALWAYS | DRW_STATE_STENCIL_EQUAL | DRW_STATE_STENCIL_NEQUAL)
+#define DRW_STATE_WRITE_STENCIL_ENABLED \
+  (DRW_STATE_WRITE_STENCIL | DRW_STATE_WRITE_STENCIL_SHADOW_PASS | \
+   DRW_STATE_WRITE_STENCIL_SHADOW_FAIL)
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+
+namespace blender::draw {
+
+/* -------------------------------------------------------------------- */
+/** \name DRWState to GPU state conversion
+ * \{ */
+
+static inline eGPUWriteMask to_write_mask(DRWState state)
+{
+  eGPUWriteMask write_mask = GPU_WRITE_NONE;
+  if (state & DRW_STATE_WRITE_DEPTH) {
+    write_mask |= GPU_WRITE_DEPTH;
+  }
+  if (state & DRW_STATE_WRITE_COLOR) {
+    write_mask |= GPU_WRITE_COLOR;
+  }
+  if (state & DRW_STATE_WRITE_STENCIL_ENABLED) {
+    write_mask |= GPU_WRITE_STENCIL;
+  }
+  return write_mask;
+}
+
+static inline eGPUFaceCullTest to_face_cull_test(DRWState state)
+{
+  switch (state & (DRW_STATE_CULL_BACK | DRW_STATE_CULL_FRONT)) {
+    case DRW_STATE_CULL_BACK:
+      return GPU_CULL_BACK;
+    case DRW_STATE_CULL_FRONT:
+      return GPU_CULL_FRONT;
+    default:
+      return GPU_CULL_NONE;
+  }
+}
+
+static inline eGPUDepthTest to_depth_test(DRWState state)
+{
+  switch (state & DRW_STATE_DEPTH_TEST_ENABLED) {
+    case DRW_STATE_DEPTH_LESS:
+      return GPU_DEPTH_LESS;
+    case DRW_STATE_DEPTH_LESS_EQUAL:
+      return GPU_DEPTH_LESS_EQUAL;
+    case DRW_STATE_DEPTH_EQUAL:
+      return GPU_DEPTH_EQUAL;
+    case DRW_STATE_DEPTH_GREATER:
+      return GPU_DEPTH_GREATER;
+    case DRW_STATE_DEPTH_GREATER_EQUAL:
+      return GPU_DEPTH_GREATER_EQUAL;
+    case DRW_STATE_DEPTH_ALWAYS:
+      return GPU_DEPTH_ALWAYS;
+    default:
+      return GPU_DEPTH_NONE;
+  }
+}
+
+static inline eGPUStencilOp to_stencil_op(DRWState state)
+{
+  switch (state & DRW_STATE_WRITE_STENCIL_ENABLED) {
+    case DRW_STATE_WRITE_STENCIL:
+      return GPU_STENCIL_OP_REPLACE;
+    case DRW_STATE_WRITE_STENCIL_SHADOW_PASS:
+      return GPU_STENCIL_OP_COUNT_DEPTH_PASS;
+    case DRW_STATE_WRITE_STENCIL_SHADOW_FAIL:
+      return GPU_STENCIL_OP_COUNT_DEPTH_FAIL;
+    default:
+      return GPU_STENCIL_OP_NONE;
+  }
+}
+
+static inline eGPUStencilTest to_stencil_test(DRWState state)
+{
+  switch (state & DRW_STATE_STENCIL_TEST_ENABLED) {
+    case DRW_STATE_STENCIL_ALWAYS:
+      return GPU_STENCIL_ALWAYS;
+    case DRW_STATE_STENCIL_EQUAL:
+      return GPU_STENCIL_EQUAL;
+    case DRW_STATE_STENCIL_NEQUAL:
+      return GPU_STENCIL_NEQUAL;
+    default:
+      return GPU_STENCIL_NONE;
+  }
+}
+
+static inline eGPUBlend to_blend(DRWState state)
+{
+  switch (state & DRW_STATE_BLEND_ENABLED) {
+    case DRW_STATE_BLEND_ADD:
+      return GPU_BLEND_ADDITIVE;
+    case DRW_STATE_BLEND_ADD_FULL:
+      return GPU_BLEND_ADDITIVE_PREMULT;
+    case DRW_STATE_BLEND_ALPHA:
+      return GPU_BLEND_ALPHA;
+    case DRW_STATE_BLEND_ALPHA_PREMUL:
+      return GPU_BLEND_ALPHA_PREMULT;
+    case DRW_STATE_BLEND_BACKGROUND:
+      return GPU_BLEND_BACKGROUND;
+    case DRW_STATE_BLEND_OIT:
+      return GPU_BLEND_OIT;
+    case DRW_STATE_BLEND_MUL:
+      return GPU_BLEND_MULTIPLY;
+    case DRW_STATE_BLEND_SUB:
+      return GPU_BLEND_SUBTRACT;
+    case DRW_STATE_BLEND_CUSTOM:
+      return GPU_BLEND_CUSTOM;
+    case DRW_STATE_LOGIC_INVERT:
+      return GPU_BLEND_INVERT;
+    case DRW_STATE_BLEND_ALPHA_UNDER_PREMUL:
+      return GPU_BLEND_ALPHA_UNDER_PREMUL;
+    default:
+      return GPU_BLEND_NONE;
+  }
+}
+
+static inline eGPUProvokingVertex to_provoking_vertex(DRWState state)
+{
+  switch (state & DRW_STATE_FIRST_VERTEX_CONVENTION) {
+    case DRW_STATE_FIRST_VERTEX_CONVENTION:
+      return GPU_VERTEX_FIRST;
+    default:
+      return GPU_VERTEX_LAST;
+  }
+}
+
+/** \} */
+
+};  // namespace blender::draw
+
+#endif
diff --git a/source/blender/draw/intern/draw_view.cc b/source/blender/draw/intern/draw_view.cc
new file mode 100644
index 00000000000..326e8629e52
--- /dev/null
+++ b/source/blender/draw/intern/draw_view.cc
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "BLI_math_geom.h"
+#include "GPU_compute.h"
+#include "GPU_debug.h"
+
+#include "draw_debug.hh"
+#include "draw_shader.h"
+#include "draw_view.hh"
+
+namespace blender::draw {
+
+void View::sync(const float4x4 &view_mat, const float4x4 &win_mat)
+{
+  data_.viewmat = view_mat;
+  data_.viewinv = view_mat.inverted();
+  data_.winmat = win_mat;
+  data_.wininv = win_mat.inverted();
+  data_.persmat = data_.winmat * data_.viewmat;
+  data_.persinv = data_.persmat.inverted();
+  /* Should not be used anymore. */
+  data_.viewcamtexcofac = float4(1.0f, 1.0f, 0.0f, 0.0f);
+
+  data_.is_inverted = (is_negative_m4(view_mat.ptr()) == is_negative_m4(win_mat.ptr()));
+
+  update_view_vectors();
+
+  BoundBox &bound_box = *reinterpret_cast<BoundBox *>(&data_.frustum_corners);
+  BoundSphere &bound_sphere = *reinterpret_cast<BoundSphere *>(&data_.frustum_bound_sphere);
+  frustum_boundbox_calc(bound_box);
+  frustum_culling_planes_calc();
+  frustum_culling_sphere_calc(bound_box, bound_sphere);
+
+  dirty_ = true;
+}
+
+void View::frustum_boundbox_calc(BoundBox &bbox)
+{
+  /* Extract the 8 corners from a Projection Matrix. */
+#if 0 /* Equivalent to this but it has accuracy problems. */
+  BKE_boundbox_init_from_minmax(&bbox, float3(-1.0f),float3(1.0f));
+  for (int i = 0; i < 8; i++) {
+    mul_project_m4_v3(data_.wininv.ptr(), bbox.vec[i]);
+  }
+#endif
+
+  float left, right, bottom, top, near, far;
+  bool is_persp = data_.winmat[3][3] == 0.0f;
+
+  projmat_dimensions(data_.winmat.ptr(), &left, &right, &bottom, &top, &near, &far);
+
+  bbox.vec[0][2] = bbox.vec[3][2] = bbox.vec[7][2] = bbox.vec[4][2] = -near;
+  bbox.vec[0][0] = bbox.vec[3][0] = left;
+  bbox.vec[4][0] = bbox.vec[7][0] = right;
+  bbox.vec[0][1] = bbox.vec[4][1] = bottom;
+  bbox.vec[7][1] = bbox.vec[3][1] = top;
+
+  /* Get the coordinates of the far plane. */
+  if (is_persp) {
+    float sca_far = far / near;
+    left *= sca_far;
+    right *= sca_far;
+    bottom *= sca_far;
+    top *= sca_far;
+  }
+
+  bbox.vec[1][2] = bbox.vec[2][2] = bbox.vec[6][2] = bbox.vec[5][2] = -far;
+  bbox.vec[1][0] = bbox.vec[2][0] = left;
+  bbox.vec[6][0] = bbox.vec[5][0] = right;
+  bbox.vec[1][1] = bbox.vec[5][1] = bottom;
+  bbox.vec[2][1] = bbox.vec[6][1] = top;
+
+  /* Transform into world space. */
+  for (int i = 0; i < 8; i++) {
+    mul_m4_v3(data_.viewinv.ptr(), bbox.vec[i]);
+  }
+}
+
+void View::frustum_culling_planes_calc()
+{
+  planes_from_projmat(data_.persmat.ptr(),
+                      data_.frustum_planes[0],
+                      data_.frustum_planes[5],
+                      data_.frustum_planes[1],
+                      data_.frustum_planes[3],
+                      data_.frustum_planes[4],
+                      data_.frustum_planes[2]);
+
+  /* Normalize. */
+  for (int p = 0; p < 6; p++) {
+    data_.frustum_planes[p].w /= normalize_v3(data_.frustum_planes[p]);
+  }
+}
+
+void View::frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere)
+{
+  /* Extract Bounding Sphere */
+  if (data_.winmat[3][3] != 0.0f) {
+    /* Orthographic */
+    /* The most extreme points on the near and far plane. (normalized device coords). */
+    const float *nearpoint = bbox.vec[0];
+    const float *farpoint = bbox.vec[6];
+
+    /* just use median point */
+    mid_v3_v3v3(bsphere.center, farpoint, nearpoint);
+    bsphere.radius = len_v3v3(bsphere.center, farpoint);
+  }
+  else if (data_.winmat[2][0] == 0.0f && data_.winmat[2][1] == 0.0f) {
+    /* Perspective with symmetrical frustum. */
+
+    /* We obtain the center and radius of the circumscribed circle of the
+     * isosceles trapezoid composed by the diagonals of the near and far clipping plane */
+
+    /* center of each clipping plane */
+    float mid_min[3], mid_max[3];
+    mid_v3_v3v3(mid_min, bbox.vec[3], bbox.vec[4]);
+    mid_v3_v3v3(mid_max, bbox.vec[2], bbox.vec[5]);
+
+    /* square length of the diagonals of each clipping plane */
+    float a_sq = len_squared_v3v3(bbox.vec[3], bbox.vec[4]);
+    float b_sq = len_squared_v3v3(bbox.vec[2], bbox.vec[5]);
+
+    /* distance squared between clipping planes */
+    float h_sq = len_squared_v3v3(mid_min, mid_max);
+
+    float fac = (4 * h_sq + b_sq - a_sq) / (8 * h_sq);
+
+    /* The goal is to get the smallest sphere,
+     * not the sphere that passes through each corner */
+    CLAMP(fac, 0.0f, 1.0f);
+
+    interp_v3_v3v3(bsphere.center, mid_min, mid_max, fac);
+
+    /* distance from the center to one of the points of the far plane (1, 2, 5, 6) */
+    bsphere.radius = len_v3v3(bsphere.center, bbox.vec[1]);
+  }
+  else {
+    /* Perspective with asymmetrical frustum. */
+
+    /* We put the sphere center on the line that goes from origin
+     * to the center of the far clipping plane. */
+
+    /* Detect which of the corner of the far clipping plane is the farthest to the origin */
+    float nfar[4];               /* most extreme far point in NDC space */
+    float farxy[2];              /* far-point projection onto the near plane */
+    float farpoint[3] = {0.0f};  /* most extreme far point in camera coordinate */
+    float nearpoint[3];          /* most extreme near point in camera coordinate */
+    float farcenter[3] = {0.0f}; /* center of far clipping plane in camera coordinate */
+    float F = -1.0f, N;          /* square distance of far and near point to origin */
+    float f, n; /* distance of far and near point to z axis. f is always > 0 but n can be < 0 */
+    float e, s; /* far and near clipping distance (<0) */
+    float c;    /* slope of center line = distance of far clipping center
+                 * to z axis / far clipping distance. */
+    float z;    /* projection of sphere center on z axis (<0) */
+
+    /* Find farthest corner and center of far clip plane. */
+    float corner[3] = {1.0f, 1.0f, 1.0f}; /* in clip space */
+    for (int i = 0; i < 4; i++) {
+      float point[3];
+      mul_v3_project_m4_v3(point, data_.wininv.ptr(), corner);
+      float len = len_squared_v3(point);
+      if (len > F) {
+        copy_v3_v3(nfar, corner);
+        copy_v3_v3(farpoint, point);
+        F = len;
+      }
+      add_v3_v3(farcenter, point);
+      /* rotate by 90 degree to walk through the 4 points of the far clip plane */
+      float tmp = corner[0];
+      corner[0] = -corner[1];
+      corner[1] = tmp;
+    }
+
+    /* the far center is the average of the far clipping points */
+    mul_v3_fl(farcenter, 0.25f);
+    /* the extreme near point is the opposite point on the near clipping plane */
+    copy_v3_fl3(nfar, -nfar[0], -nfar[1], -1.0f);
+    mul_v3_project_m4_v3(nearpoint, data_.wininv.ptr(), nfar);
+    /* this is a frustum projection */
+    N = len_squared_v3(nearpoint);
+    e = farpoint[2];
+    s = nearpoint[2];
+    /* distance to view Z axis */
+    f = len_v2(farpoint);
+    /* get corresponding point on the near plane */
+    mul_v2_v2fl(farxy, farpoint, s / e);
+    /* this formula preserve the sign of n */
+    sub_v2_v2(nearpoint, farxy);
+    n = f * s / e - len_v2(nearpoint);
+    c = len_v2(farcenter) / e;
+    /* the big formula, it simplifies to (F-N)/(2(e-s)) for the symmetric case */
+    z = (F - N) / (2.0f * (e - s + c * (f - n)));
+
+    bsphere.center[0] = farcenter[0] * z / e;
+    bsphere.center[1] = farcenter[1] * z / e;
+    bsphere.center[2] = z;
+
+    /* For XR, the view matrix may contain a scale factor. Then, transforming only the center
+     * into world space after calculating the radius will result in incorrect behavior. */
+    mul_m4_v3(data_.viewinv.ptr(), bsphere.center); /* Transform to world space. */
+    mul_m4_v3(data_.viewinv.ptr(), farpoint);
+    bsphere.radius = len_v3v3(bsphere.center, farpoint);
+  }
+}
+
+void View::set_clip_planes(Span<float4> planes)
+{
+  BLI_assert(planes.size() <= ARRAY_SIZE(data_.clip_planes));
+  int i = 0;
+  for (const auto &plane : planes) {
+    data_.clip_planes[i++] = plane;
+  }
+}
+
+void View::update_viewport_size()
+{
+  float4 viewport;
+  GPU_viewport_size_get_f(viewport);
+  float2 viewport_size = float2(viewport.z, viewport.w);
+  if (assign_if_different(data_.viewport_size, viewport_size)) {
+    dirty_ = true;
+  }
+}
+
+void View::update_view_vectors()
+{
+  bool is_persp = data_.winmat[3][3] == 0.0f;
+
+  /* Near clip distance. */
+  data_.viewvecs[0][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f) :
+                                      -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2];
+
+  /* Far clip distance. */
+  data_.viewvecs[1][3] = (is_persp) ? -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f) :
+                                      -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2];
+
+  /* View vectors for the corners of the view frustum.
+   * Can be used to recreate the world space position easily */
+  float3 view_vecs[4] = {
+      {-1.0f, -1.0f, -1.0f},
+      {1.0f, -1.0f, -1.0f},
+      {-1.0f, 1.0f, -1.0f},
+      {-1.0f, -1.0f, 1.0f},
+  };
+
+  /* Convert the view vectors to view space */
+  for (int i = 0; i < 4; i++) {
+    mul_project_m4_v3(data_.wininv.ptr(), view_vecs[i]);
+    /* Normalized trick see:
+     * http://www.derschmale.com/2014/01/26/reconstructing-positions-from-the-depth-buffer */
+    if (is_persp) {
+      view_vecs[i].x /= view_vecs[i].z;
+      view_vecs[i].y /= view_vecs[i].z;
+    }
+  }
+
+  /**
+   * If ortho : view_vecs[0] is the near-bottom-left corner of the frustum and
+   *            view_vecs[1] is the vector going from the near-bottom-left corner to
+   *            the far-top-right corner.
+   * If Persp : view_vecs[0].xy and view_vecs[1].xy are respectively the bottom-left corner
+   *            when Z = 1, and top-left corner if Z = 1.
+   *            view_vecs[0].z the near clip distance and view_vecs[1].z is the (signed)
+   *            distance from the near plane to the far clip plane.
+   */
+  copy_v3_v3(data_.viewvecs[0], view_vecs[0]);
+
+  /* we need to store the differences */
+  data_.viewvecs[1][0] = view_vecs[1][0] - view_vecs[0][0];
+  data_.viewvecs[1][1] = view_vecs[2][1] - view_vecs[0][1];
+  data_.viewvecs[1][2] = view_vecs[3][2] - view_vecs[0][2];
+}
+
+void View::bind()
+{
+  update_viewport_size();
+
+  if (dirty_) {
+    dirty_ = false;
+    data_.push_update();
+  }
+
+  GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT);
+}
+
+void View::compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze)
+{
+  if (debug_freeze && frozen_ == false) {
+    data_freeze_ = static_cast<ViewInfos>(data_);
+    data_freeze_.push_update();
+  }
+#ifdef DEBUG
+  if (debug_freeze) {
+    drw_debug_matrix_as_bbox(data_freeze_.persinv, float4(0, 1, 0, 1));
+  }
+#endif
+  frozen_ = debug_freeze;
+
+  GPU_debug_group_begin("View.compute_visibility");
+
+  /* TODO(fclem): Early out if visibility hasn't changed. */
+  /* TODO(fclem): Resize to nearest pow2 to reduce fragmentation. */
+  visibility_buf_.resize(divide_ceil_u(resource_len, 128));
+
+  uint32_t data = 0xFFFFFFFFu;
+  GPU_storagebuf_clear(visibility_buf_, GPU_R32UI, GPU_DATA_UINT, &data);
+
+  if (do_visibility_) {
+    GPUShader *shader = DRW_shader_draw_visibility_compute_get();
+    GPU_shader_bind(shader);
+    GPU_shader_uniform_1i(shader, "resource_len", resource_len);
+    GPU_storagebuf_bind(bounds, GPU_shader_get_ssbo(shader, "bounds_buf"));
+    GPU_storagebuf_bind(visibility_buf_, GPU_shader_get_ssbo(shader, "visibility_buf"));
+    GPU_uniformbuf_bind((frozen_) ? data_freeze_ : data_, DRW_VIEW_UBO_SLOT);
+    GPU_compute_dispatch(shader, divide_ceil_u(resource_len, DRW_VISIBILITY_GROUP_SIZE), 1, 1);
+    GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+  }
+
+  if (frozen_) {
+    /* Bind back the non frozen data. */
+    GPU_uniformbuf_bind(data_, DRW_VIEW_UBO_SLOT);
+  }
+
+  GPU_debug_group_end();
+}
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_view.hh b/source/blender/draw/intern/draw_view.hh
new file mode 100644
index 00000000000..82e74774a5a
--- /dev/null
+++ b/source/blender/draw/intern/draw_view.hh
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2022 Blender Foundation. */
+
+#pragma once
+
+/** \file
+ * \ingroup draw
+ */
+
+#include "DRW_gpu_wrapper.hh"
+#include "DRW_render.h"
+
+#include "draw_shader_shared.h"
+
+namespace blender::draw {
+
+class Manager;
+
+/* TODO deduplicate. */
+using ObjectBoundsBuf = StorageArrayBuffer<ObjectBounds, 128>;
+/** \note Using uint4 for declaration but bound as uint. */
+using VisibilityBuf = StorageArrayBuffer<uint4, 1, true>;
+
+class View {
+  friend Manager;
+
+ private:
+  UniformBuffer<ViewInfos> data_;
+  /** Freezed version of data_ used for debugging culling. */
+  UniformBuffer<ViewInfos> data_freeze_;
+  /** Result of the visibility computation. 1 bit per resource ID. */
+  VisibilityBuf visibility_buf_;
+
+  const char *debug_name_;
+
+  bool do_visibility_ = true;
+  bool dirty_ = true;
+  bool frozen_ = false;
+
+ public:
+  View(const char *name) : visibility_buf_(name), debug_name_(name){};
+  /* For compatibility with old system. Will be removed at some point. */
+  View(const char *name, const DRWView *view) : visibility_buf_(name), debug_name_(name)
+  {
+    float4x4 view_mat, win_mat;
+    DRW_view_viewmat_get(view, view_mat.ptr(), false);
+    DRW_view_winmat_get(view, win_mat.ptr(), false);
+    this->sync(view_mat, win_mat);
+  }
+
+  void set_clip_planes(Span<float4> planes);
+
+  void sync(const float4x4 &view_mat, const float4x4 &win_mat);
+
+  bool is_persp() const
+  {
+    return data_.winmat[3][3] == 0.0f;
+  }
+
+  bool is_inverted() const
+  {
+    return data_.is_inverted;
+  }
+
+  float far_clip() const
+  {
+    if (is_persp()) {
+      return -data_.winmat[3][2] / (data_.winmat[2][2] + 1.0f);
+    }
+    return -(data_.winmat[3][2] - 1.0f) / data_.winmat[2][2];
+  }
+
+  float near_clip() const
+  {
+    if (is_persp()) {
+      return -data_.winmat[3][2] / (data_.winmat[2][2] - 1.0f);
+    }
+    return -(data_.winmat[3][2] + 1.0f) / data_.winmat[2][2];
+  }
+
+ private:
+  /** Called from draw manager. */
+  void bind();
+  void compute_visibility(ObjectBoundsBuf &bounds, uint resource_len, bool debug_freeze);
+
+  void update_view_vectors();
+  void update_viewport_size();
+
+  void frustum_boundbox_calc(BoundBox &bbox);
+  void frustum_culling_planes_calc();
+  void frustum_culling_sphere_calc(const BoundBox &bbox, BoundSphere &bsphere);
+};
+
+}  // namespace blender::draw
diff --git a/source/blender/draw/intern/draw_view_data.cc b/source/blender/draw/intern/draw_view_data.cc
index 55f1ab83b3a..58d826e0218 100644
--- a/source/blender/draw/intern/draw_view_data.cc
+++ b/source/blender/draw/intern/draw_view_data.cc
@@ -7,6 +7,7 @@
 
 #include "BLI_vector.hh"
 
+#include "GPU_capabilities.h"
 #include "GPU_viewport.h"
 
 #include "DRW_render.h"
@@ -16,6 +17,7 @@
 #include "draw_manager_text.h"
 
 #include "draw_manager.h"
+#include "draw_manager.hh"
 #include "draw_view_data.h"
 
 using namespace blender;
@@ -33,6 +35,22 @@ struct DRWViewData {
 
   Vector<ViewportEngineData> engines;
   Vector<ViewportEngineData *> enabled_engines;
+
+  /** New per view/viewport manager. Null if not supported by current hardware. */
+  draw::Manager *manager = nullptr;
+
+  DRWViewData()
+  {
+    /* Only for GL >= 4.3 implementation for now. */
+    if (GPU_shader_storage_buffer_objects_support() && GPU_compute_shader_support()) {
+      manager = new draw::Manager();
+    }
+  };
+
+  ~DRWViewData()
+  {
+    delete manager;
+  };
 };
 
 DRWViewData *DRW_view_data_create(ListBase *engine_types)
@@ -237,3 +255,31 @@ ViewportEngineData *DRW_view_data_enabled_engine_iter_step(DRWEngineIterator *it
   ViewportEngineData *engine = iterator->engines[iterator->id++];
   return engine;
 }
+
+draw::Manager *DRW_manager_get()
+{
+  BLI_assert(DST.view_data_active->manager);
+  return reinterpret_cast<draw::Manager *>(DST.view_data_active->manager);
+}
+
+draw::ObjectRef DRW_object_ref_get(Object *object)
+{
+  BLI_assert(DST.view_data_active->manager);
+  return {object, DST.dupli_source, DST.dupli_parent};
+}
+
+void DRW_manager_begin_sync()
+{
+  if (DST.view_data_active->manager == nullptr) {
+    return;
+  }
+  reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->begin_sync();
+}
+
+void DRW_manager_end_sync()
+{
+  if (DST.view_data_active->manager == nullptr) {
+    return;
+  }
+  reinterpret_cast<draw::Manager *>(DST.view_data_active->manager)->end_sync();
+}
diff --git a/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl
index 5f795d3abdb..3287897e73c 100644
--- a/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl
@@ -17,8 +17,7 @@ const vec4 drw_debug_default_color = vec4(1.0, 0.0, 0.0, 1.0);
 uint drw_debug_start_draw(uint v_needed)
 {
   uint vertid = atomicAdd(drw_debug_draw_v_count, v_needed);
-  /* NOTE: Skip the header manually. */
-  vertid += 1;
+  vertid += drw_debug_draw_offset;
   return vertid;
 }
 
diff --git a/source/blender/draw/intern/shaders/common_debug_print_lib.glsl b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl
index 0c7f32bd00d..89d1729b52d 100644
--- a/source/blender/draw/intern/shaders/common_debug_print_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl
@@ -71,8 +71,7 @@ void drw_print_char4(uint data)
       break;
     }
     uint cursor = atomicAdd(drw_debug_print_cursor, 1u);
-    /* NOTE: Skip the header manually. */
-    cursor += 4;
+    cursor += drw_debug_print_offset;
     if (cursor < DRW_DEBUG_PRINT_MAX) {
       /* For future usage. (i.e: Color) */
       uint flags = 0u;
diff --git a/source/blender/draw/intern/shaders/common_intersect_lib.glsl b/source/blender/draw/intern/shaders/common_intersect_lib.glsl
index 33378588553..83223f89277 100644
--- a/source/blender/draw/intern/shaders/common_intersect_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_intersect_lib.glsl
@@ -70,6 +70,30 @@ IsectBox isect_data_setup(Box shape)
   return data;
 }
 
+/* Construct box from 1 corner point + 3 side vectors. */
+IsectBox isect_data_setup(vec3 origin, vec3 side_x, vec3 side_y, vec3 side_z)
+{
+  IsectBox data;
+  data.corners[0] = origin;
+  data.corners[1] = origin + side_x;
+  data.corners[2] = origin + side_y + side_x;
+  data.corners[3] = origin + side_y;
+  data.corners[4] = data.corners[0] + side_z;
+  data.corners[5] = data.corners[1] + side_z;
+  data.corners[6] = data.corners[2] + side_z;
+  data.corners[7] = data.corners[3] + side_z;
+
+  data.planes[0] = isect_plane_setup(data.corners[0], side_y, side_z);
+  data.planes[1] = isect_plane_setup(data.corners[0], side_x, side_y);
+  data.planes[2] = isect_plane_setup(data.corners[0], side_z, side_x);
+  /* Assumes that the box is actually a box! */
+  data.planes[3] = vec4(-data.planes[0].xyz, -dot(-data.planes[0].xyz, data.corners[6]));
+  data.planes[4] = vec4(-data.planes[1].xyz, -dot(-data.planes[1].xyz, data.corners[6]));
+  data.planes[5] = vec4(-data.planes[2].xyz, -dot(-data.planes[2].xyz, data.corners[6]));
+
+  return data;
+}
+
 struct IsectFrustum {
   vec3 corners[8];
   vec4 planes[6];
@@ -194,6 +218,50 @@ bool intersect_view(Box box)
   return intersects;
 }
 
+bool intersect_view(IsectBox i_box)
+{
+  bool intersects = true;
+
+  /* Do Box vertices vs Frustum planes. */
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(drw_view.frustum_planes[p], vec4(i_box.corners[v], 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  if (!intersects) {
+    return intersects;
+  }
+
+  for (int p = 0; p < 6; ++p) {
+    bool is_any_vertex_on_positive_side = false;
+    for (int v = 0; v < 8; ++v) {
+      float test = dot(i_box.planes[p], vec4(drw_view.frustum_corners[v].xyz, 1.0));
+      if (test > 0.0) {
+        is_any_vertex_on_positive_side = true;
+        break;
+      }
+    }
+    bool all_vertex_on_negative_side = !is_any_vertex_on_positive_side;
+    if (all_vertex_on_negative_side) {
+      intersects = false;
+      break;
+    }
+  }
+
+  return intersects;
+}
+
 bool intersect_view(Sphere sphere)
 {
   bool intersects = true;
diff --git a/source/blender/draw/intern/shaders/common_view_lib.glsl b/source/blender/draw/intern/shaders/common_view_lib.glsl
index 8ab2ef10e4c..6521476c3a7 100644
--- a/source/blender/draw/intern/shaders/common_view_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_view_lib.glsl
@@ -155,7 +155,11 @@ uniform int drw_ResourceID;
 #    define PASS_RESOURCE_ID
 
 #  elif defined(GPU_VERTEX_SHADER)
-#    define resource_id gpu_InstanceIndex
+#    if defined(UNIFORM_RESOURCE_ID_NEW)
+#      define resource_id drw_ResourceID
+#    else
+#      define resource_id gpu_InstanceIndex
+#    endif
 #    define PASS_RESOURCE_ID drw_ResourceID_iface.resource_index = resource_id;
 
 #  elif defined(GPU_GEOMETRY_SHADER)
@@ -203,8 +207,8 @@ flat in int resourceIDFrag;
 #  ifndef DRW_SHADER_SHARED_H
 
 struct ObjectMatrices {
-  mat4 drw_modelMatrix;
-  mat4 drw_modelMatrixInverse;
+  mat4 model;
+  mat4 model_inverse;
 };
 #  endif /* DRW_SHADER_SHARED_H */
 
@@ -214,8 +218,8 @@ layout(std140) uniform modelBlock
   ObjectMatrices drw_matrices[DRW_RESOURCE_CHUNK_LEN];
 };
 
-#    define ModelMatrix (drw_matrices[resource_id].drw_modelMatrix)
-#    define ModelMatrixInverse (drw_matrices[resource_id].drw_modelMatrixInverse)
+#    define ModelMatrix (drw_matrices[resource_id].model)
+#    define ModelMatrixInverse (drw_matrices[resource_id].model_inverse)
 #  endif /* USE_GPU_SHADER_CREATE_INFO */
 
 #else /* GPU_INTEL */
diff --git a/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl
new file mode 100644
index 00000000000..70842e5bb81
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_command_generate_comp.glsl
@@ -0,0 +1,84 @@
+
+/**
+ * Convert DrawPrototype into draw commands.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+#define atomicAddAndGet(dst, val) (atomicAdd(dst, val) + val)
+
+/* This is only called by the last thread executed over the group's prototype draws. */
+void write_draw_call(DrawGroup group, uint group_id)
+{
+  DrawCommand cmd;
+  cmd.vertex_len = group.vertex_len;
+  cmd.vertex_first = group.vertex_first;
+  if (group.base_index != -1) {
+    cmd.base_index = group.base_index;
+    cmd.instance_first_indexed = group.start;
+  }
+  else {
+    cmd._instance_first_array = group.start;
+  }
+  /* Back-facing command. */
+  cmd.instance_len = group_buf[group_id].back_facing_counter;
+  command_buf[group_id * 2 + 0] = cmd;
+  /* Front-facing command. */
+  cmd.instance_len = group_buf[group_id].front_facing_counter;
+  command_buf[group_id * 2 + 1] = cmd;
+
+  /* Reset the counters for a next command gen dispatch. Avoids resending the whole data just
+   * for this purpose. Only the last thread will execute this so it is threadsafe.  */
+  group_buf[group_id].front_facing_counter = 0u;
+  group_buf[group_id].back_facing_counter = 0u;
+  group_buf[group_id].total_counter = 0u;
+}
+
+void main()
+{
+  uint proto_id = gl_GlobalInvocationID.x;
+  if (proto_id >= prototype_len) {
+    return;
+  }
+
+  DrawPrototype proto = prototype_buf[proto_id];
+  uint group_id = proto.group_id;
+  bool is_inverted = (proto.resource_handle & 0x80000000u) != 0;
+  uint resource_index = (proto.resource_handle & 0x7FFFFFFFu);
+
+  /* Visibility test result. */
+  bool is_visible = ((visibility_buf[resource_index / 32u] & (1u << (resource_index % 32u)))) != 0;
+
+  DrawGroup group = group_buf[group_id];
+
+  if (!is_visible) {
+    /* Skip the draw but still count towards the completion. */
+    if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
+      write_draw_call(group, group_id);
+    }
+    return;
+  }
+
+  uint back_facing_len = group.len - group.front_facing_len;
+  uint front_facing_len = group.front_facing_len;
+  uint dst_index = group.start;
+  if (is_inverted) {
+    uint offset = atomicAdd(group_buf[group_id].back_facing_counter, proto.instance_len);
+    dst_index += offset;
+    if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
+      write_draw_call(group, group_id);
+    }
+  }
+  else {
+    uint offset = atomicAdd(group_buf[group_id].front_facing_counter, proto.instance_len);
+    dst_index += back_facing_len + offset;
+    if (atomicAddAndGet(group_buf[group_id].total_counter, proto.instance_len) == group.len) {
+      write_draw_call(group, group_id);
+    }
+  }
+
+  for (uint i = dst_index; i < dst_index + proto.instance_len; i++) {
+    /* Fill resource_id buffer for each instance of this draw */
+    resource_id_buf[i] = resource_index;
+  }
+}
diff --git a/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl
index ab76df819d5..4061dda5d1c 100644
--- a/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl
+++ b/source/blender/draw/intern/shaders/draw_debug_draw_display_vert.glsl
@@ -6,7 +6,7 @@
 void main()
 {
   /* Skip the first vertex containing header data. */
-  DRWDebugVert vert = drw_debug_verts_buf[gl_VertexID + 1];
+  DRWDebugVert vert = drw_debug_verts_buf[gl_VertexID + 2];
   vec3 pos = uintBitsToFloat(uvec3(vert.pos0, vert.pos1, vert.pos2));
   vec4 col = vec4((uvec4(vert.color) >> uvec4(0, 8, 16, 24)) & 0xFFu) / 255.0;
 
diff --git a/source/blender/draw/intern/shaders/draw_debug_info.hh b/source/blender/draw/intern/shaders/draw_debug_info.hh
index 893a5e537d9..ce450bb1210 100644
--- a/source/blender/draw/intern/shaders/draw_debug_info.hh
+++ b/source/blender/draw/intern/shaders/draw_debug_info.hh
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include "draw_defines.h"
 #include "gpu_shader_create_info.hh"
 
 /* -------------------------------------------------------------------- */
@@ -10,7 +11,7 @@
 
 GPU_SHADER_CREATE_INFO(draw_debug_print)
     .typedef_source("draw_shader_shared.h")
-    .storage_buf(7, Qualifier::READ_WRITE, "uint", "drw_debug_print_buf[]");
+    .storage_buf(DRW_DEBUG_PRINT_SLOT, Qualifier::READ_WRITE, "uint", "drw_debug_print_buf[]");
 
 GPU_SHADER_INTERFACE_INFO(draw_debug_print_display_iface, "").flat(Type::UINT, "char_index");
 
@@ -34,7 +35,10 @@ GPU_SHADER_CREATE_INFO(draw_debug_print_display)
 
 GPU_SHADER_CREATE_INFO(draw_debug_draw)
     .typedef_source("draw_shader_shared.h")
-    .storage_buf(6, Qualifier::READ_WRITE, "DRWDebugVert", "drw_debug_verts_buf[]");
+    .storage_buf(DRW_DEBUG_DRAW_SLOT,
+                 Qualifier::READ_WRITE,
+                 "DRWDebugVert",
+                 "drw_debug_verts_buf[]");
 
 GPU_SHADER_INTERFACE_INFO(draw_debug_draw_display_iface, "interp").flat(Type::VEC4, "color");
 
diff --git a/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl
index f67e9d3f9e0..cb379056e2b 100644
--- a/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl
+++ b/source/blender/draw/intern/shaders/draw_debug_print_display_vert.glsl
@@ -8,7 +8,7 @@
 void main()
 {
   /* Skip first 4 chars containing header data. */
-  uint char_data = drw_debug_print_buf[gl_VertexID + 4];
+  uint char_data = drw_debug_print_buf[gl_VertexID + 8];
   char_index = (char_data & 0xFFu) - 0x20u;
 
   /* Discard invalid chars. */
diff --git a/source/blender/draw/intern/shaders/draw_object_infos_info.hh b/source/blender/draw/intern/shaders/draw_object_infos_info.hh
index 8fd55ea351f..2ec40ab76e3 100644
--- a/source/blender/draw/intern/shaders/draw_object_infos_info.hh
+++ b/source/blender/draw/intern/shaders/draw_object_infos_info.hh
@@ -1,10 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include "draw_defines.h"
 #include "gpu_shader_create_info.hh"
 
 GPU_SHADER_CREATE_INFO(draw_object_infos)
     .typedef_source("draw_shader_shared.h")
     .define("OBINFO_LIB")
+    .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)")
+    .define("ObjectInfo", "(drw_infos[resource_id].infos)")
+    .define("ObjectColor", "(drw_infos[resource_id].color)")
     .uniform_buf(1, "ObjectInfos", "drw_infos[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH);
 
 GPU_SHADER_CREATE_INFO(draw_volume_infos)
@@ -14,3 +18,11 @@ GPU_SHADER_CREATE_INFO(draw_volume_infos)
 GPU_SHADER_CREATE_INFO(draw_curves_infos)
     .typedef_source("draw_shader_shared.h")
     .uniform_buf(2, "CurvesInfos", "drw_curves", Frequency::BATCH);
+
+GPU_SHADER_CREATE_INFO(draw_object_infos_new)
+    .typedef_source("draw_shader_shared.h")
+    .define("OBINFO_LIB")
+    .define("OrcoTexCoFactors", "(drw_infos[resource_id].orco_mul_bias)")
+    .define("ObjectInfo", "(drw_infos[resource_id].infos)")
+    .define("ObjectColor", "(drw_infos[resource_id].color)")
+    .storage_buf(DRW_OBJ_INFOS_SLOT, Qualifier::READ, "ObjectInfos", "drw_infos[]");
+\ No newline at end of file
diff --git a/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl
new file mode 100644
index 00000000000..d834435e54e
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_resource_finalize_comp.glsl
@@ -0,0 +1,64 @@
+
+/**
+ * Finish computation of a few draw resource after sync.
+ */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+
+void main()
+{
+  uint resource_id = gl_GlobalInvocationID.x;
+  if (resource_id >= resource_len) {
+    return;
+  }
+
+  mat4 model_mat = matrix_buf[resource_id].model;
+  ObjectInfos infos = infos_buf[resource_id];
+  ObjectBounds bounds = bounds_buf[resource_id];
+
+  if (bounds.bounding_sphere.w != -1.0) {
+    /* Convert corners to origin + sides in world space. */
+    vec3 p0 = bounds.bounding_corners[0].xyz;
+    vec3 p01 = bounds.bounding_corners[1].xyz - p0;
+    vec3 p02 = bounds.bounding_corners[2].xyz - p0;
+    vec3 p03 = bounds.bounding_corners[3].xyz - p0;
+    /* Avoid flat box. */
+    p01.x = max(p01.x, 1e-4);
+    p02.y = max(p02.y, 1e-4);
+    p03.z = max(p03.z, 1e-4);
+    vec3 diagonal = p01 + p02 + p03;
+    vec3 center = p0 + diagonal * 0.5;
+    float min_axis = min_v3(abs(diagonal));
+    bounds_buf[resource_id].bounding_sphere.xyz = transform_point(model_mat, center);
+    /* We have to apply scaling to the diagonal. */
+    bounds_buf[resource_id].bounding_sphere.w = length(transform_direction(model_mat, diagonal)) *
+                                                0.5;
+    bounds_buf[resource_id]._inner_sphere_radius = min_axis;
+    bounds_buf[resource_id].bounding_corners[0].xyz = transform_point(model_mat, p0);
+    bounds_buf[resource_id].bounding_corners[1].xyz = transform_direction(model_mat, p01);
+    bounds_buf[resource_id].bounding_corners[2].xyz = transform_direction(model_mat, p02);
+    bounds_buf[resource_id].bounding_corners[3].xyz = transform_direction(model_mat, p03);
+    /* Always have correct handedness in the corners vectors. */
+    if (flag_test(infos.flag, OBJECT_NEGATIVE_SCALE)) {
+      bounds_buf[resource_id].bounding_corners[0].xyz +=
+          bounds_buf[resource_id].bounding_corners[1].xyz;
+      bounds_buf[resource_id].bounding_corners[1].xyz =
+          -bounds_buf[resource_id].bounding_corners[1].xyz;
+    }
+
+    /* TODO: Bypass test for very large objects (see T67319). */
+    if (bounds_buf[resource_id].bounding_sphere.w > 1e12) {
+      bounds_buf[resource_id].bounding_sphere.w = -1.0;
+    }
+  }
+
+  vec3 loc = infos.orco_add;  /* Box center. */
+  vec3 size = infos.orco_mul; /* Box half-extent. */
+  /* This is what the original computation looks like.
+   * Simplify to a nice MADD in shading code. */
+  // orco = (pos - loc) / size;
+  // orco = pos * (1.0 / size) + (-loc / size);
+  vec3 size_inv = safe_rcp(size);
+  infos_buf[resource_id].orco_add = -loc * size_inv;
+  infos_buf[resource_id].orco_mul = size_inv;
+}
+\ No newline at end of file
diff --git a/source/blender/draw/intern/shaders/draw_view_info.hh b/source/blender/draw/intern/shaders/draw_view_info.hh
index 0400521c53d..c522c607791 100644
--- a/source/blender/draw/intern/shaders/draw_view_info.hh
+++ b/source/blender/draw/intern/shaders/draw_view_info.hh
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 
+#include "draw_defines.h"
 #include "gpu_shader_create_info.hh"
 
 /* -------------------------------------------------------------------- */
@@ -44,13 +45,13 @@ GPU_SHADER_CREATE_INFO(draw_resource_handle)
  * \{ */
 
 GPU_SHADER_CREATE_INFO(draw_view)
-    .uniform_buf(0, "ViewInfos", "drw_view", Frequency::PASS)
+    .uniform_buf(DRW_VIEW_UBO_SLOT, "ViewInfos", "drw_view", Frequency::PASS)
     .typedef_source("draw_shader_shared.h");
 
 GPU_SHADER_CREATE_INFO(draw_modelmat)
     .uniform_buf(8, "ObjectMatrices", "drw_matrices[DRW_RESOURCE_CHUNK_LEN]", Frequency::BATCH)
-    .define("ModelMatrix", "(drw_matrices[resource_id].drw_modelMatrix)")
-    .define("ModelMatrixInverse", "(drw_matrices[resource_id].drw_modelMatrixInverse)")
+    .define("ModelMatrix", "(drw_matrices[resource_id].model)")
+    .define("ModelMatrixInverse", "(drw_matrices[resource_id].model_inverse)")
     .additional_info("draw_view");
 
 GPU_SHADER_CREATE_INFO(draw_modelmat_legacy)
@@ -136,3 +137,77 @@ GPU_SHADER_CREATE_INFO(draw_gpencil)
     .additional_info("draw_modelmat", "draw_resource_id_uniform", "draw_object_infos");
 
 /** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Internal Draw Manager usage
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_resource_finalize)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .define("DRAW_FINALIZE_SHADER")
+    .local_group_size(DRW_FINALIZE_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ, "ObjectMatrices", "matrix_buf[]")
+    .storage_buf(1, Qualifier::READ_WRITE, "ObjectBounds", "bounds_buf[]")
+    .storage_buf(2, Qualifier::READ_WRITE, "ObjectInfos", "infos_buf[]")
+    .push_constant(Type::INT, "resource_len")
+    .compute_source("draw_resource_finalize_comp.glsl");
+
+GPU_SHADER_CREATE_INFO(draw_visibility_compute)
+    .do_static_compilation(true)
+    .local_group_size(DRW_VISIBILITY_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ, "ObjectBounds", "bounds_buf[]")
+    .storage_buf(1, Qualifier::READ_WRITE, "uint", "visibility_buf[]")
+    .push_constant(Type::INT, "resource_len")
+    .compute_source("draw_visibility_comp.glsl")
+    .additional_info("draw_view");
+
+GPU_SHADER_CREATE_INFO(draw_command_generate)
+    .do_static_compilation(true)
+    .typedef_source("draw_shader_shared.h")
+    .typedef_source("draw_command_shared.hh")
+    .local_group_size(DRW_COMMAND_GROUP_SIZE)
+    .storage_buf(0, Qualifier::READ_WRITE, "DrawGroup", "group_buf[]")
+    .storage_buf(1, Qualifier::READ, "uint", "visibility_buf[]")
+    .storage_buf(2, Qualifier::READ, "DrawPrototype", "prototype_buf[]")
+    .storage_buf(3, Qualifier::WRITE, "DrawCommand", "command_buf[]")
+    .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::WRITE, "uint", "resource_id_buf[]")
+    .push_constant(Type::INT, "prototype_len")
+    .compute_source("draw_command_generate_comp.glsl");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw Resource ID
+ * New implementation using gl_BaseInstance and storage buffers.
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_resource_id_new)
+    .define("UNIFORM_RESOURCE_ID_NEW")
+    .storage_buf(DRW_RESOURCE_ID_SLOT, Qualifier::READ, "int", "resource_id_buf[]")
+    .define("drw_ResourceID", "resource_id_buf[gpu_BaseInstance + gl_InstanceID]");
+
+/**
+ * Workaround the lack of gl_BaseInstance by binding the resource_id_buf as vertex buf.
+ */
+GPU_SHADER_CREATE_INFO(draw_resource_id_fallback)
+    .define("UNIFORM_RESOURCE_ID_NEW")
+    .vertex_in(15, Type::INT, "drw_ResourceID");
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Draw Object Resources
+ * \{ */
+
+GPU_SHADER_CREATE_INFO(draw_modelmat_new)
+    .typedef_source("draw_shader_shared.h")
+    .storage_buf(DRW_OBJ_MAT_SLOT, Qualifier::READ, "ObjectMatrices", "drw_matrix_buf[]")
+    .define("drw_ModelMatrixInverse", "drw_matrix_buf[resource_id].model_inverse")
+    .define("drw_ModelMatrix", "drw_matrix_buf[resource_id].model")
+    /* TODO For compatibility with old shaders. To be removed. */
+    .define("ModelMatrixInverse", "drw_ModelMatrixInverse")
+    .define("ModelMatrix", "drw_ModelMatrix")
+    .additional_info("draw_resource_id_new");
+
+/** \} */
diff --git a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl
new file mode 100644
index 00000000000..7ec58c8f919
--- /dev/null
+++ b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl
@@ -0,0 +1,46 @@
+
+/**
+ * Compute visibility of each resource bounds for a given view.
+ */
+/* TODO(fclem): This could be augmented by a 2 pass occlusion culling system. */
+
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(common_intersect_lib.glsl)
+
+shared uint shared_result;
+
+void mask_visibility_bit()
+{
+  uint bit = 1u << gl_LocalInvocationID.x;
+  atomicAnd(visibility_buf[gl_WorkGroupID.x], ~bit);
+}
+
+void main()
+{
+  if (gl_GlobalInvocationID.x >= resource_len) {
+    return;
+  }
+
+  ObjectBounds bounds = bounds_buf[gl_GlobalInvocationID.x];
+
+  if (bounds.bounding_sphere.w != -1.0) {
+    IsectBox box = isect_data_setup(bounds.bounding_corners[0].xyz,
+                                    bounds.bounding_corners[1].xyz,
+                                    bounds.bounding_corners[2].xyz,
+                                    bounds.bounding_corners[3].xyz);
+    Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
+    Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius);
+
+    if (intersect_view(inscribed_sphere) == true) {
+      /* Visible. */
+    }
+    else if (intersect_view(bounding_sphere) == false) {
+      /* Not visible. */
+      mask_visibility_bit();
+    }
+    else if (intersect_view(box) == false) {
+      /* Not visible. */
+      mask_visibility_bit();
+    }
+  }
+}
+\ No newline at end of file
diff --git a/source/blender/draw/tests/draw_pass_test.cc b/source/blender/draw/tests/draw_pass_test.cc
new file mode 100644
index 00000000000..f8a006d096b
--- /dev/null
+++ b/source/blender/draw/tests/draw_pass_test.cc
@@ -0,0 +1,441 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+
+#include "testing/testing.h"
+
+#include "draw_manager.hh"
+#include "draw_pass.hh"
+#include "draw_shader.h"
+#include "draw_testing.hh"
+
+#include <bitset>
+
+namespace blender::draw {
+
+static void test_draw_pass_all_commands()
+{
+  Texture tex;
+  tex.ensure_2d(GPU_RGBA16, int2(1));
+
+  UniformBuffer<uint4> ubo;
+  ubo.push_update();
+
+  StorageBuffer<uint4> ssbo;
+  ssbo.push_update();
+
+  float alpha = 0.0f;
+  int3 dispatch_size(1);
+
+  PassSimple pass = {"test.all_commands"};
+  pass.init();
+  pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_STENCIL);
+  pass.clear_color_depth_stencil(float4(0.25f, 0.5f, 100.0f, -2000.0f), 0.5f, 0xF0);
+  pass.state_stencil(0x80, 0x0F, 0x8F);
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
+  pass.bind_texture("image", tex);
+  pass.bind_texture("image", &tex);
+  pass.bind_image("missing_image", tex);  /* Should not crash. */
+  pass.bind_image("missing_image", &tex); /* Should not crash. */
+  pass.bind_ubo("missing_ubo", ubo);      /* Should not crash. */
+  pass.bind_ubo("missing_ubo", &ubo);     /* Should not crash. */
+  pass.bind_ssbo("missing_ssbo", ssbo);   /* Should not crash. */
+  pass.bind_ssbo("missing_ssbo", &ssbo);  /* Should not crash. */
+  pass.push_constant("alpha", alpha);
+  pass.push_constant("alpha", &alpha);
+  pass.push_constant("ModelViewProjectionMatrix", float4x4::identity());
+  pass.draw_procedural(GPU_PRIM_TRIS, 1, 3);
+
+  /* Should not crash even if shader is not a compute. This is because we only serialize. */
+  /* TODO(fclem): Use real compute shader. */
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
+  pass.dispatch(dispatch_size);
+  pass.dispatch(&dispatch_size);
+  pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+
+  /* Change references. */
+  alpha = 1.0f;
+  dispatch_size = int3(2);
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.all_commands" << std::endl;
+  expected << "  .state_set(6)" << std::endl;
+  expected << "  .clear(color=(0.25, 0.5, 100, -2000), depth=0.5, stencil=0b11110000))"
+           << std::endl;
+  expected << "  .stencil_set(write_mask=0b10000000, compare_mask=0b00001111, reference=0b10001111"
+           << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
+  expected << "  .bind_texture(0)" << std::endl;
+  expected << "  .bind_texture_ref(0)" << std::endl;
+  expected << "  .bind_image(-1)" << std::endl;
+  expected << "  .bind_image_ref(-1)" << std::endl;
+  expected << "  .bind_uniform_buf(-1)" << std::endl;
+  expected << "  .bind_uniform_buf_ref(-1)" << std::endl;
+  expected << "  .bind_storage_buf(-1)" << std::endl;
+  expected << "  .bind_storage_buf_ref(-1)" << std::endl;
+  expected << "  .push_constant(2, data=0)" << std::endl;
+  expected << "  .push_constant(2, data=1)" << std::endl;
+  expected << "  .push_constant(0, data=(" << std::endl;
+  expected << "(   1.000000,    0.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    1.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    1.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ")" << std::endl;
+  expected << "  .draw(inst_len=1, vert_len=3, vert_first=0, res_id=0)" << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
+  expected << "  .dispatch(1, 1, 1)" << std::endl;
+  expected << "  .dispatch_ref(2, 2, 2)" << std::endl;
+  expected << "  .barrier(4)" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+
+  DRW_shape_cache_free();
+}
+DRAW_TEST(draw_pass_all_commands)
+
+static void test_draw_pass_sub_ordering()
+{
+  PassSimple pass = {"test.sub_ordering"};
+  pass.init();
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
+  pass.push_constant("test_pass", 1);
+
+  PassSimple::Sub &sub1 = pass.sub("Sub1");
+  sub1.push_constant("test_sub1", 11);
+
+  PassSimple::Sub &sub2 = pass.sub("Sub2");
+  sub2.push_constant("test_sub2", 21);
+
+  /* Will execute after both sub. */
+  pass.push_constant("test_pass", 2);
+
+  /* Will execute after sub1. */
+  sub2.push_constant("test_sub2", 22);
+
+  /* Will execute before sub2. */
+  sub1.push_constant("test_sub1", 12);
+
+  /* Will execute before end of pass. */
+  sub2.push_constant("test_sub2", 23);
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.sub_ordering" << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
+  expected << "  .push_constant(-1, data=1)" << std::endl;
+  expected << "  .Sub1" << std::endl;
+  expected << "    .push_constant(-1, data=11)" << std::endl;
+  expected << "    .push_constant(-1, data=12)" << std::endl;
+  expected << "  .Sub2" << std::endl;
+  expected << "    .push_constant(-1, data=21)" << std::endl;
+  expected << "    .push_constant(-1, data=22)" << std::endl;
+  expected << "    .push_constant(-1, data=23)" << std::endl;
+  expected << "  .push_constant(-1, data=2)" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+}
+DRAW_TEST(draw_pass_sub_ordering)
+
+static void test_draw_pass_simple_draw()
+{
+  PassSimple pass = {"test.simple_draw"};
+  pass.init();
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
+  /* Each draw procedural type uses a different batch. Groups are drawn in correct order. */
+  pass.draw_procedural(GPU_PRIM_TRIS, 1, 10, 1, {1});
+  pass.draw_procedural(GPU_PRIM_POINTS, 4, 20, 2, {2});
+  pass.draw_procedural(GPU_PRIM_TRIS, 2, 30, 3, {3});
+  pass.draw_procedural(GPU_PRIM_POINTS, 5, 40, 4, ResourceHandle(4, true));
+  pass.draw_procedural(GPU_PRIM_LINES, 1, 50, 5, {5});
+  pass.draw_procedural(GPU_PRIM_POINTS, 6, 60, 6, {5});
+  pass.draw_procedural(GPU_PRIM_TRIS, 3, 70, 7, {6});
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.simple_draw" << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
+  expected << "  .draw(inst_len=1, vert_len=10, vert_first=1, res_id=1)" << std::endl;
+  expected << "  .draw(inst_len=4, vert_len=20, vert_first=2, res_id=2)" << std::endl;
+  expected << "  .draw(inst_len=2, vert_len=30, vert_first=3, res_id=3)" << std::endl;
+  expected << "  .draw(inst_len=5, vert_len=40, vert_first=4, res_id=4)" << std::endl;
+  expected << "  .draw(inst_len=1, vert_len=50, vert_first=5, res_id=5)" << std::endl;
+  expected << "  .draw(inst_len=6, vert_len=60, vert_first=6, res_id=5)" << std::endl;
+  expected << "  .draw(inst_len=3, vert_len=70, vert_first=7, res_id=6)" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+
+  DRW_shape_cache_free();
+}
+DRAW_TEST(draw_pass_simple_draw)
+
+static void test_draw_pass_multi_draw()
+{
+  PassMain pass = {"test.multi_draw"};
+  pass.init();
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
+  /* Each draw procedural type uses a different batch. Groups are drawn in reverse order. */
+  pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, {1});
+  pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, {2});
+  pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, {3});
+  pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, ResourceHandle(4, true));
+  pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, {5});
+  pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, {5});
+  pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, {6});
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.multi_draw" << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
+  expected << "  .draw_multi(3)" << std::endl;
+  expected << "    .group(id=2, len=1)" << std::endl;
+  expected << "      .proto(instance_len=1, resource_id=5, front_face)" << std::endl;
+  expected << "    .group(id=1, len=15)" << std::endl;
+  expected << "      .proto(instance_len=5, resource_id=4, back_face)" << std::endl;
+  expected << "      .proto(instance_len=6, resource_id=5, front_face)" << std::endl;
+  expected << "      .proto(instance_len=4, resource_id=2, front_face)" << std::endl;
+  expected << "    .group(id=0, len=6)" << std::endl;
+  expected << "      .proto(instance_len=3, resource_id=6, front_face)" << std::endl;
+  expected << "      .proto(instance_len=2, resource_id=3, front_face)" << std::endl;
+  expected << "      .proto(instance_len=1, resource_id=1, front_face)" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+
+  DRW_shape_cache_free();
+}
+DRAW_TEST(draw_pass_multi_draw)
+
+static void test_draw_pass_sortable()
+{
+  PassSortable pass = {"test.sortable"};
+  pass.init();
+
+  pass.sub("Sub3", 3.0f);
+  pass.sub("Sub2", 2.0f);
+  pass.sub("Sub5", 4.0f);
+  pass.sub("Sub4", 3.0f);
+  pass.sub("Sub1", 1.0f);
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.sortable" << std::endl;
+  expected << "  .Sub1" << std::endl;
+  expected << "  .Sub2" << std::endl;
+  expected << "  .Sub3" << std::endl;
+  expected << "  .Sub4" << std::endl;
+  expected << "  .Sub5" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+
+  DRW_shape_cache_free();
+}
+DRAW_TEST(draw_pass_sortable)
+
+static void test_draw_resource_id_gen()
+{
+  float4x4 win_mat;
+  orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1);
+
+  View view("test_view");
+  view.sync(float4x4::identity(), win_mat);
+
+  Manager drw;
+
+  float4x4 obmat_1 = float4x4::identity();
+  float4x4 obmat_2 = float4x4::identity();
+  obmat_1.apply_scale(-0.5f);
+  obmat_2.apply_scale(0.5f);
+
+  drw.begin_sync();
+  ResourceHandle handle1 = drw.resource_handle(obmat_1);
+  ResourceHandle handle2 = drw.resource_handle(obmat_1);
+  ResourceHandle handle3 = drw.resource_handle(obmat_2);
+  drw.resource_handle(obmat_2, float3(2), float3(1));
+  drw.end_sync();
+
+  StringRefNull expected = "2 1 1 1 1 3 3 1 1 1 1 1 3 2 2 2 2 2 2 1 1 1 ";
+
+  {
+    /* Computed on CPU. */
+    PassSimple pass = {"test.resource_id"};
+    pass.init();
+    pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
+    pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2);
+    pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1);
+    pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3);
+    pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1);
+    pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3);
+    pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2);
+    pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1);
+
+    Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view);
+
+    std::stringstream result;
+    for (auto val : debug.resource_id) {
+      result << val << " ";
+    }
+
+    EXPECT_EQ(result.str(), expected);
+  }
+  {
+    /* Same thing with PassMain (computed on GPU) */
+    PassSimple pass = {"test.resource_id"};
+    pass.init();
+    pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
+    pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, handle2);
+    pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, handle1);
+    pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, handle3);
+    pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, handle1);
+    pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, handle3);
+    pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, handle2);
+    pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, handle1);
+
+    Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view);
+
+    std::stringstream result;
+    for (auto val : debug.resource_id) {
+      result << val << " ";
+    }
+
+    EXPECT_EQ(result.str(), expected);
+  }
+
+  DRW_shape_cache_free();
+  DRW_shaders_free();
+}
+DRAW_TEST(draw_resource_id_gen)
+
+static void test_draw_visibility()
+{
+  float4x4 win_mat;
+  orthographic_m4(win_mat.ptr(), -1, 1, -1, 1, -1, 1);
+
+  View view("test_view");
+  view.sync(float4x4::identity(), win_mat);
+
+  Manager drw;
+
+  float4x4 obmat_1 = float4x4::identity();
+  float4x4 obmat_2 = float4x4::identity();
+  obmat_1.apply_scale(-0.5f);
+  obmat_2.apply_scale(0.5f);
+
+  drw.begin_sync();                                   /* Default {0} always visible. */
+  drw.resource_handle(obmat_1);                       /* No bounds, always visible. */
+  drw.resource_handle(obmat_1, float3(3), float3(1)); /* Out of view. */
+  drw.resource_handle(obmat_2, float3(0), float3(1)); /* Inside view. */
+  drw.end_sync();
+
+  PassMain pass = {"test.visibility"};
+  pass.init();
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
+  pass.draw_procedural(GPU_PRIM_TRIS, 1, -1);
+
+  Manager::SubmitDebugOutput debug = drw.submit_debug(pass, view);
+  Vector<uint32_t> expected_visibility = {0};
+
+  std::stringstream result;
+  for (auto val : debug.visibility) {
+    result << std::bitset<32>(val);
+  }
+
+  EXPECT_EQ(result.str(), "11111111111111111111111111111011");
+
+  DRW_shape_cache_free();
+  DRW_shaders_free();
+}
+DRAW_TEST(draw_visibility)
+
+static void test_draw_manager_sync()
+{
+  float4x4 obmat_1 = float4x4::identity();
+  float4x4 obmat_2 = float4x4::identity();
+  obmat_1.apply_scale(-0.5f);
+  obmat_2.apply_scale(0.5f);
+
+  /* TODO find a way to create a minimum object to test resource handle creation on it. */
+  Manager drw;
+
+  drw.begin_sync();
+  drw.resource_handle(obmat_1);
+  drw.resource_handle(obmat_2, float3(2), float3(1));
+  drw.end_sync();
+
+  Manager::DataDebugOutput debug = drw.data_debug();
+
+  std::stringstream result;
+  for (const auto &val : debug.matrices) {
+    result << val;
+  }
+  for (const auto &val : debug.bounds) {
+    result << val;
+  }
+  for (const auto &val : debug.infos) {
+    result << val;
+  }
+
+  std::stringstream expected;
+  expected << "ObjectMatrices(" << std::endl;
+  expected << "model=(" << std::endl;
+  expected << "(   1.000000,    0.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    1.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    1.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ", " << std::endl;
+  expected << "model_inverse=(" << std::endl;
+  expected << "(   1.000000,   -0.000000,    0.000000,   -0.000000)" << std::endl;
+  expected << "(  -0.000000,    1.000000,   -0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,   -0.000000,    1.000000,   -0.000000)" << std::endl;
+  expected << "(  -0.000000,    0.000000,   -0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ")" << std::endl;
+  expected << "ObjectMatrices(" << std::endl;
+  expected << "model=(" << std::endl;
+  expected << "(  -0.500000,   -0.000000,   -0.000000,    0.000000)" << std::endl;
+  expected << "(  -0.000000,   -0.500000,   -0.000000,    0.000000)" << std::endl;
+  expected << "(  -0.000000,   -0.000000,   -0.500000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ", " << std::endl;
+  expected << "model_inverse=(" << std::endl;
+  expected << "(  -2.000000,    0.000000,   -0.000000,   -0.000000)" << std::endl;
+  expected << "(   0.000000,   -2.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(  -0.000000,    0.000000,   -2.000000,    0.000000)" << std::endl;
+  expected << "(  -0.000000,   -0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ")" << std::endl;
+  expected << "ObjectMatrices(" << std::endl;
+  expected << "model=(" << std::endl;
+  expected << "(   0.500000,    0.000000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.500000,    0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.500000,    0.000000)" << std::endl;
+  expected << "(   0.000000,    0.000000,    0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ", " << std::endl;
+  expected << "model_inverse=(" << std::endl;
+  expected << "(   2.000000,   -0.000000,    0.000000,   -0.000000)" << std::endl;
+  expected << "(  -0.000000,    2.000000,   -0.000000,    0.000000)" << std::endl;
+  expected << "(   0.000000,   -0.000000,    2.000000,   -0.000000)" << std::endl;
+  expected << "(  -0.000000,    0.000000,   -0.000000,    1.000000)" << std::endl;
+  expected << ")" << std::endl;
+  expected << ")" << std::endl;
+  expected << "ObjectBounds(skipped)" << std::endl;
+  expected << "ObjectBounds(skipped)" << std::endl;
+  expected << "ObjectBounds(" << std::endl;
+  expected << ".bounding_corners[0](0.5, 0.5, 0.5)" << std::endl;
+  expected << ".bounding_corners[1](1, 0, 0)" << std::endl;
+  expected << ".bounding_corners[2](0, 1, 0)" << std::endl;
+  expected << ".bounding_corners[3](0, 0, 1)" << std::endl;
+  expected << ".sphere=(pos=(1, 1, 1), rad=0.866025" << std::endl;
+  expected << ")" << std::endl;
+  expected << "ObjectInfos(skipped)" << std::endl;
+  expected << "ObjectInfos(skipped)" << std::endl;
+  expected << "ObjectInfos(skipped)" << std::endl;
+
+  EXPECT_EQ(result.str(), expected.str());
+
+  DRW_shaders_free();
+}
+DRAW_TEST(draw_manager_sync)
+
+}  // namespace blender::draw
+\ No newline at end of file
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 7ae9eae6d44..2f16d788b9d 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -27,6 +27,7 @@ set(INC
 
   # For *_info.hh includes.
   ../draw/engines/eevee_next
+  ../draw/intern
 
   # For node muting stuff.
   ../nodes
diff --git a/source/blender/gpu/GPU_batch.h b/source/blender/gpu/GPU_batch.h
index 8f524f72fa1..4935ced7f48 100644
--- a/source/blender/gpu/GPU_batch.h
+++ b/source/blender/gpu/GPU_batch.h
@@ -70,6 +70,8 @@ typedef struct GPUBatch {
   GPUVertBuf *inst[GPU_BATCH_INST_VBO_MAX_LEN];
   /** NULL if element list not needed */
   GPUIndexBuf *elem;
+  /** Resource ID attribute workaround. */
+  GPUStorageBuf *resource_id_buf;
   /** Bookkeeping. */
   eGPUBatchFlag flag;
   /** Type of geometry to draw. */
@@ -126,6 +128,11 @@ bool GPU_batch_vertbuf_has(GPUBatch *, GPUVertBuf *);
 
 #define GPU_batch_vertbuf_add(batch, verts) GPU_batch_vertbuf_add_ex(batch, verts, false)
 
+/**
+ * Set resource id buffer to bind as instance attribute to workaround the lack of gl_BaseInstance.
+ */
+void GPU_batch_resource_id_buf_set(GPUBatch *batch, GPUStorageBuf *resource_id_buf);
+
 void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader);
 /**
  * Bind program bound to IMM to the batch.
diff --git a/source/blender/gpu/intern/gpu_batch.cc b/source/blender/gpu/intern/gpu_batch.cc
index 9092ad5110c..c871004deac 100644
--- a/source/blender/gpu/intern/gpu_batch.cc
+++ b/source/blender/gpu/intern/gpu_batch.cc
@@ -200,6 +200,13 @@ bool GPU_batch_vertbuf_has(GPUBatch *batch, GPUVertBuf *verts)
   return false;
 }
 
+void GPU_batch_resource_id_buf_set(GPUBatch *batch, GPUStorageBuf *resource_id_buf)
+{
+  BLI_assert(resource_id_buf);
+  batch->flag |= GPU_BATCH_DIRTY;
+  batch->resource_id_buf = resource_id_buf;
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
diff --git a/source/blender/gpu/intern/gpu_shader_create_info.cc b/source/blender/gpu/intern/gpu_shader_create_info.cc
index 110b77f1f52..a18fdcd32df 100644
--- a/source/blender/gpu/intern/gpu_shader_create_info.cc
+++ b/source/blender/gpu/intern/gpu_shader_create_info.cc
@@ -300,6 +300,11 @@ void gpu_shader_create_info_init()
     draw_modelmat = draw_modelmat_legacy;
   }
 
+  /* WORKAROUND: Replace the use of gpu_BaseInstance by an instance attribute. */
+  if (GPU_shader_draw_parameters_support() == false) {
+    draw_resource_id_new = draw_resource_id_fallback;
+  }
+
   for (ShaderCreateInfo *info : g_create_infos->values()) {
     if (info->do_static_compilation_) {
       info->builtins_ |= gpu_shader_dependency_get_builtins(info->vertex_source_);
diff --git a/source/blender/gpu/opengl/gl_vertex_array.cc b/source/blender/gpu/opengl/gl_vertex_array.cc
index d836b73f5d8..6897ac9f4a2 100644
--- a/source/blender/gpu/opengl/gl_vertex_array.cc
+++ b/source/blender/gpu/opengl/gl_vertex_array.cc
@@ -11,6 +11,7 @@
 #include "gl_batch.hh"
 #include "gl_context.hh"
 #include "gl_index_buffer.hh"
+#include "gl_storage_buffer.hh"
 #include "gl_vertex_buffer.hh"
 
 #include "gl_vertex_array.hh"
@@ -118,6 +119,18 @@ void GLVertArray::update_bindings(const GLuint vao,
     }
   }
 
+  if (batch->resource_id_buf) {
+    const ShaderInput *input = interface->attr_get("drw_ResourceID");
+    if (input) {
+      dynamic_cast<GLStorageBuf *>(unwrap(batch->resource_id_buf))->bind_as(GL_ARRAY_BUFFER);
+      glEnableVertexAttribArray(input->location);
+      glVertexAttribDivisor(input->location, 1);
+      glVertexAttribIPointer(
+          input->location, 1, to_gl(GPU_COMP_I32), sizeof(uint32_t), (GLvoid *)nullptr);
+      attr_mask &= ~(1 << input->location);
+    }
+  }
+
   if (attr_mask != 0 && GLContext::vertex_attrib_binding_support) {
     for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
       if (attr_mask & mask) {
diff --git a/source/blender/makesdna/DNA_userdef_types.h b/source/blender/makesdna/DNA_userdef_types.h
index dc461502b10..39fb3690da4 100644
--- a/source/blender/makesdna/DNA_userdef_types.h
+++ b/source/blender/makesdna/DNA_userdef_types.h
@@ -640,8 +640,8 @@ typedef struct UserDef_Experimental {
   char use_cycles_debug;
   char show_asset_debug_info;
   char no_asset_indexing;
+  char use_viewport_debug;
   char SANITIZE_AFTER_HERE;
-  char _pad0;
   /* The following options are automatically sanitized (set to 0)
    * when the release cycle is not alpha. */
   char use_new_curves_tools;
diff --git a/source/blender/makesdna/DNA_view3d_types.h b/source/blender/makesdna/DNA_view3d_types.h
index 0d281032b7e..1ba057d9c40 100644
--- a/source/blender/makesdna/DNA_view3d_types.h
+++ b/source/blender/makesdna/DNA_view3d_types.h
@@ -296,7 +296,9 @@ typedef struct View3D {
   char _pad6[2];
   int layact DNA_DEPRECATED;
   unsigned short local_collections_uuid;
-  short _pad7[3];
+  short _pad7[2];
+
+  short debug_flag;
 
   /** Optional bool for 3d cursor to define center. */
   short ob_center_cursor;
@@ -489,6 +491,11 @@ enum {
   V3D_SHADING_COMPOSITOR = (1 << 15),
 };
 
+/** #View3D.debug_flag */
+enum {
+  V3D_DEBUG_FREEZE_CULLING = (1 << 0),
+};
+
 #define V3D_USES_SCENE_LIGHTS(v3d) \
   ((((v3d)->shading.type == OB_MATERIAL) && ((v3d)->shading.flag & V3D_SHADING_SCENE_LIGHTS)) || \
    (((v3d)->shading.type == OB_RENDER) && \
diff --git a/source/blender/makesrna/intern/rna_space.c b/source/blender/makesrna/intern/rna_space.c
index 9b08b6ef665..5f2e3c4d1a0 100644
--- a/source/blender/makesrna/intern/rna_space.c
+++ b/source/blender/makesrna/intern/rna_space.c
@@ -4736,6 +4736,13 @@ static void rna_def_space_view3d_overlay(BlenderRNA *brna)
   RNA_def_property_range(prop, 0.0f, 1.0f);
   RNA_def_property_ui_text(prop, "Opacity", "Vertex Paint mix factor");
   RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, "rna_GPencil_update");
+
+  /* Developper Debug overlay */
+
+  prop = RNA_def_property(srna, "use_debug_freeze_view_culling", PROP_BOOLEAN, PROP_NONE);
+  RNA_def_property_boolean_sdna(prop, NULL, "debug_flag", V3D_DEBUG_FREEZE_CULLING);
+  RNA_def_property_ui_text(prop, "Freeze Culling", "Freeze view culling bounds");
+  RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D, NULL);
 }
 
 static void rna_def_space_view3d(BlenderRNA *brna)
diff --git a/source/blender/makesrna/intern/rna_userdef.c b/source/blender/makesrna/intern/rna_userdef.c
index 324c0bb9006..61d4edccb06 100644
--- a/source/blender/makesrna/intern/rna_userdef.c
+++ b/source/blender/makesrna/intern/rna_userdef.c
@@ -6372,6 +6372,14 @@ static void rna_def_userdef_experimental(BlenderRNA *brna)
   prop = RNA_def_property(srna, "enable_eevee_next", PROP_BOOLEAN, PROP_NONE);
   RNA_def_property_boolean_sdna(prop, NULL, "enable_eevee_next", 1);
   RNA_def_property_ui_text(prop, "EEVEE Next", "Enable the new EEVEE codebase, requires restart");
+
+  prop = RNA_def_property(srna, "use_viewport_debug", PROP_BOOLEAN, PROP_NONE);
+  RNA_def_property_boolean_sdna(prop, NULL, "use_viewport_debug", 1);
+  RNA_def_property_ui_text(prop,
+                           "Viewport Debug",
+                           "Enable viewport debugging options for developpers in the overlays "
+                           "pop-over");
+  RNA_def_property_update(prop, 0, "rna_userdef_ui_update");
 }
 
 static void rna_def_userdef_addon_collection(BlenderRNA *brna, PropertyRNA *cprop)