From 1bc2e9a6fcdcf58f6aec93f11d5d76c8e9773de2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= <foucault.clem@gmail.com>
Date: Wed, 24 Aug 2022 15:21:14 +0200
Subject: Add multidraw

---
 .../shaders/eevee_depth_of_field_reduce_comp.glsl  |   8 +-
 source/blender/draw/intern/draw_command.cc         |  54 +++----
 source/blender/draw/intern/draw_command.hh         | 168 +++++++++++++++++----
 source/blender/draw/intern/draw_command_shared.hh  |  59 +++-----
 source/blender/draw/intern/draw_debug.cc           |  48 +++---
 source/blender/draw/intern/draw_manager.cc         |   8 +-
 source/blender/draw/intern/draw_manager.hh         |   4 +-
 source/blender/draw/intern/draw_pass.hh            |   8 +-
 source/blender/draw/intern/draw_shader_shared.h    |  24 +--
 9 files changed, 242 insertions(+), 139 deletions(-)

diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
index 80555367478..a6426cd06e4 100644
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_reduce_comp.glsl
@@ -133,9 +133,9 @@ void main()
       /* Issue a sprite for each field if any CoC matches. */
       if (any(lessThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
         /* Same value for all threads. Not an issue if we don't sync access to it. */
-        scatter_fg_indirect_buf.v_count = 4u;
+        scatter_fg_indirect_buf.vertex_len = 4u;
         /* Issue 1 strip instance per sprite. */
-        uint rect_id = atomicAdd(scatter_fg_indirect_buf.i_count, 1u);
+        uint rect_id = atomicAdd(scatter_fg_indirect_buf.instance_len, 1u);
         if (rect_id < dof_buf.scatter_max_rect) {
 
           vec4 coc4_fg = max(vec4(0.0), -coc4);
@@ -166,9 +166,9 @@ void main()
       }
       if (any(greaterThan(do_scatter4 * sign(coc4), vec4(0.0)))) {
         /* Same value for all threads. Not an issue if we don't sync access to it. */
-        scatter_bg_indirect_buf.v_count = 4u;
+        scatter_bg_indirect_buf.vertex_len = 4u;
         /* Issue 1 strip instance per sprite. */
-        uint rect_id = atomicAdd(scatter_bg_indirect_buf.i_count, 1u);
+        uint rect_id = atomicAdd(scatter_bg_indirect_buf.instance_len, 1u);
         if (rect_id < dof_buf.scatter_max_rect) {
           vec4 coc4_bg = max(vec4(0.0), coc4);
           vec4 bg_weights = dof_layer_weight(coc4_bg) * dof_sample_weight(coc4_bg) * do_scatter4;
diff --git a/source/blender/draw/intern/draw_command.cc b/source/blender/draw/intern/draw_command.cc
index edd6efe1f7b..fb1a8a8dc2a 100644
--- a/source/blender/draw/intern/draw_command.cc
+++ b/source/blender/draw/intern/draw_command.cc
@@ -111,23 +111,23 @@ void StateSet::execute(RecordingState &recording_state) const
    */
   BLI_assert(DST.state_lock == 0);
 
-  if (!assign_if_different(recording_state.pipeline_state, state)) {
+  if (!assign_if_different(recording_state.pipeline_state, new_state)) {
     return;
   }
 
   /* Keep old API working. Keep the state tracking in sync. */
   /* TODO(fclem): Move at the end of a pass. */
-  DST.state = state;
+  DST.state = new_state;
 
-  GPU_state_set(to_write_mask(state),
-                to_blend(state),
-                to_face_cull_test(state),
-                to_depth_test(state),
-                to_stencil_test(state),
-                to_stencil_op(state),
-                to_provoking_vertex(state));
+  GPU_state_set(to_write_mask(new_state),
+                to_blend(new_state),
+                to_face_cull_test(new_state),
+                to_depth_test(new_state),
+                to_stencil_test(new_state),
+                to_stencil_op(new_state),
+                to_provoking_vertex(new_state));
 
-  if (state & DRW_STATE_SHADOW_OFFSET) {
+  if (new_state & DRW_STATE_SHADOW_OFFSET) {
     GPU_shadow_offset(true);
   }
   else {
@@ -135,14 +135,14 @@ void StateSet::execute(RecordingState &recording_state) const
   }
 
   /* TODO: this should be part of shader state. */
-  if (state & DRW_STATE_CLIP_PLANES) {
+  if (new_state & DRW_STATE_CLIP_PLANES) {
     GPU_clip_distances(recording_state.view_clip_plane_count);
   }
   else {
     GPU_clip_distances(0);
   }
 
-  if (state & DRW_STATE_IN_FRONT_SELECT) {
+  if (new_state & DRW_STATE_IN_FRONT_SELECT) {
     /* XXX `GPU_depth_range` is not a perfect solution
      * since very distant geometries can still be occluded.
      * Also the depth test precision of these geometries is impaired.
@@ -153,7 +153,7 @@ void StateSet::execute(RecordingState &recording_state) const
     GPU_depth_range(0.0f, 1.0f);
   }
 
-  if (state & DRW_STATE_PROGRAM_POINT_SIZE) {
+  if (new_state & DRW_STATE_PROGRAM_POINT_SIZE) {
     GPU_program_point_size(true);
   }
   else {
@@ -168,32 +168,34 @@ void StencilSet::execute() const
   GPU_stencil_reference_set(reference);
 }
 
-void MultiDraw::execute(RecordingState &state,
-                        Span<MultiDraw> multi_draw_buf,
-                        uint command_id) const
+void DrawMulti::execute(RecordingState &state) const
 {
-  while (command_id != (uint)-1) {
-    const MultiDraw &cmd = multi_draw_buf[command_id];
+  DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_;
+  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
 
-    GPU_batch_set_shader(cmd.gpu_batch, state.shader);
+  uint group_index = this->group_first;
+  while (group_index != (uint)-1) {
+    const DrawGroup &grp = groups[group_index];
+
+    GPU_batch_set_shader(grp.gpu_batch, state.shader);
 
     constexpr intptr_t stride = sizeof(DrawCommand);
-    intptr_t offset = stride * cmd.command_start;
+    intptr_t offset = stride * grp.command_start;
 
     /* Draw negatively scaled geometry first. */
-    uint back_facing_len = cmd.command_len - cmd.front_facing_len;
+    uint back_facing_len = grp.command_len - grp.front_facing_len;
     if (back_facing_len > 0) {
       state.front_facing_set(false);
-      // GPU_batch_multi_draw_indirect(cmd.gpu_batch, offset, back_facing_len, stride);
+      GPU_batch_draw_indirect(grp.gpu_batch, indirect_buf, offset);
       offset += stride * back_facing_len;
     }
 
-    if (cmd.front_facing_len > 0) {
+    if (grp.front_facing_len > 0) {
       state.front_facing_set(true);
-      // GPU_batch_multi_draw_indirect(cmd.gpu_batch, offset, cmd.front_facing_len, stride);
+      GPU_batch_draw_indirect(grp.gpu_batch, indirect_buf, offset);
     }
 
-    command_id = cmd.next;
+    group_index = grp.next;
   }
 }
 
@@ -380,7 +382,7 @@ std::string Clear::serialize() const
 std::string StateSet::serialize() const
 {
   /* TOOD(fclem): Better serialization... */
-  return std::string(".state_set(") + std::to_string(state) + ")";
+  return std::string(".state_set(") + std::to_string(new_state) + ")";
 }
 
 std::string StencilSet::serialize() const
diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh
index a816cfd53f9..ccfe29a8fca 100644
--- a/source/blender/draw/intern/draw_command.hh
+++ b/source/blender/draw/intern/draw_command.hh
@@ -9,6 +9,7 @@
  * Passes record draw commands.
  */
 
+#include "BLI_map.hh"
 #include "DRW_gpu_wrapper.hh"
 
 #include "draw_command_shared.hh"
@@ -18,6 +19,7 @@
 namespace blender::draw::command {
 
 class DrawCommandBuf;
+class DrawMultiBuf;
 
 /* -------------------------------------------------------------------- */
 /** \name Recording State
@@ -73,7 +75,7 @@ enum class Type : uint8_t {
 
   /** Special commands stored in separate buffers. */
   SubPass,
-  MultiDraw,
+  DrawMulti,
 };
 
 /**
@@ -233,6 +235,15 @@ struct Draw {
   std::string serialize() const;
 };
 
+struct DrawMulti {
+  DrawMultiBuf *multi_draw_buf;
+  uint group_first;
+  uint uuid;
+
+  void execute(RecordingState &state) const;
+  std::string serialize() const;
+};
+
 struct DrawIndirect {
   GPUBatch *batch;
   GPUStorageBuf **indirect_buf;
@@ -283,9 +294,9 @@ struct Clear {
 };
 
 struct StateSet {
-  DRWState state;
+  DRWState new_state;
 
-  void execute(RecordingState &recording_state) const;
+  void execute(RecordingState &state) const;
   std::string serialize() const;
 };
 
@@ -304,6 +315,7 @@ struct Undetermined {
     ResourceBind resource_bind;
     PushConstant push_constant;
     Draw draw;
+    DrawMulti draw_multi;
     DrawIndirect draw_indirect;
     Dispatch dispatch;
     DispatchIndirect dispatch_indirect;
@@ -328,6 +340,9 @@ struct Undetermined {
       case command::Type::Draw:
         draw.execute(state);
         break;
+      case command::Type::DrawMulti:
+        draw_multi.execute(state);
+        break;
       case command::Type::DrawIndirect:
         draw_indirect.execute(state);
         break;
@@ -405,8 +420,8 @@ class DrawCommandBuf {
  public:
   void clear(){};
 
-  void append_draw(Vector<command::Header> &headers,
-                   Vector<command::Undetermined> &commands,
+  void append_draw(Vector<Header> &headers,
+                   Vector<Undetermined> &commands,
                    GPUBatch *batch,
                    uint instance_len,
                    uint vertex_len,
@@ -417,15 +432,23 @@ class DrawCommandBuf {
     instance_len = instance_len != -1 ? instance_len : 1;
 
     int64_t index = commands.append_and_get_index({});
-    headers.append({command::Type::Draw, static_cast<uint>(index)});
+    headers.append({Type::Draw, static_cast<uint>(index)});
     commands[index].draw = {batch, instance_len, vertex_len, vertex_first, handle};
   }
 
-  void bind(ResourceIdBuf &resource_id_buf)
+  void bind(Vector<Header> &headers,
+            Vector<Undetermined> &commands,
+            ResourceIdBuf &resource_id_buf)
   {
     uint total_instance = 0;
-#if 0
-    for (DrawCommand &cmd : command_buf_) {
+
+    for (const Header &header : headers) {
+      if (header.type != Type::Draw) {
+        continue;
+      }
+
+      Draw &cmd = commands[header.command_index].draw;
+
       int batch_vert_len, batch_inst_len;
       /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
       GPU_batch_draw_parameter_get(cmd.batch, &batch_vert_len, &batch_inst_len);
@@ -433,22 +456,22 @@ class DrawCommandBuf {
        * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
       BLI_assert(batch_inst_len == 1);
 
-      cmd.v_count = max_ii(cmd.v_count, batch_vert_len);
+      cmd.vertex_len = max_ii(cmd.vertex_len, batch_vert_len);
 
-      if (cmd.resource_id > 0) {
+      if (cmd.handle.raw > 0) {
         /* Save correct offset to start of resource_id buffer region for this draw. */
-        cmd.i_first = total_instance;
-        total_instance += cmd.i_count;
+        uint instance_first = total_instance;
+        total_instance += cmd.instance_len;
         /* Ensure the buffer is big enough. */
         resource_id_buf.get_or_resize(total_instance - 1);
 
         /* Copy the resource id for all instances. */
-        for (int i = cmd.i_first; i < (cmd.i_first + cmd.i_count); i++) {
-          resource_id_buf[i] = cmd.resource_id;
+        uint index = cmd.handle.resource_index();
+        for (int i = instance_first; i < (instance_first + cmd.instance_len); i++) {
+          resource_id_buf[i] = index;
         }
       }
     }
-#endif
 
     if (total_instance > 0) {
       resource_id_buf.push_update();
@@ -492,18 +515,111 @@ class DrawCommandBuf {
  *
  * \{ */
 
-struct MultiDrawBuf {
-  void clear(){};
+class DrawMultiBuf {
+  friend DrawMulti;
+
+ private:
+  using DrawGroupBuf = StorageArrayBuffer<DrawGroup, 16>;
+  using DrawPrototypeBuf = StorageArrayBuffer<DrawPrototype, 16>;
+  using DrawCommandBuf = StorageArrayBuffer<DrawCommand, 16, true>;
+
+  /** Key used to identify which DrawGroup to increment in the subgroup map. */
+  using DrawGroupKey = std::pair<uint, GPUBatch *>;
+  using DrawGroupMap = Map<DrawGroupKey, uint>;
+  /** Maps a command group and a gpu batch to their unique multi_draw command. */
+  DrawGroupMap group_ids_;
+
+  /** DrawGroup Command heap. Uploaded to GPU for sorting. */
+  DrawGroupBuf group_buf_;
+  /** Prototype commands. */
+  DrawPrototypeBuf prototype_buf_;
+  /** Command list generated by the sorting / compaction steps. Lives on GPU. */
+  DrawCommandBuf command_buf_;
+  /** Give unique ID to each header so we can use that as hash key. */
+  uint header_id_counter_ = 0;
+  /** Number of groups inside group_buf_. */
+  uint group_count_ = 0;
+  /** Number of groups inside group_buf_. */
+  uint prototype_count_ = 0;
+
+ public:
+  void clear()
+  {
+    header_id_counter_ = 0;
+    group_count_ = 0;
+  }
+
+  void append_draw(Vector<Header> &headers,
+                   Vector<Undetermined> &commands,
+                   GPUBatch *batch,
+                   uint instance_len,
+                   uint vertex_len,
+                   uint vertex_first,
+                   ResourceHandle handle)
+  {
+    /* Unsupported for now. Use PassSimple. */
+    BLI_assert(vertex_first == 0);
+
+    /* If there was some state changes since previous call, we have to create another command. */
+    if (headers.last().type != Type::DrawMulti) {
+      uint index = commands.append_and_get_index({});
+      headers.append({Type::DrawMulti, index});
+      commands[index].draw_multi = {this, (uint)-1, header_id_counter_++};
+    }
+
+    DrawMulti &cmd = commands.last().draw_multi;
+
+    uint group_id = group_ids_.lookup_default(DrawGroupKey(cmd.uuid, batch), (uint)-1);
+
+    if (group_id == (uint)-1) {
+      uint new_group_id = group_count_++;
 
-  void append_draw(Vector<command::Header> &,
-                   Vector<command::Undetermined> &,
-                   GPUBatch *,
-                   uint,
-                   uint,
-                   uint,
-                   ResourceHandle){};
+      DrawGroup &group = group_buf_.get_or_resize(new_group_id);
+      group.next = cmd.group_first;
+      group.command_len = 1;
+      group.front_facing_len = !handle.has_inverted_handedness();
+      group.gpu_batch = batch;
+
+      /* Append to list. */
+      cmd.group_first = new_group_id;
+      group_id = new_group_id;
+    }
+    else {
+      DrawGroup &group = group_buf_.get_or_resize(group_id);
+      group.command_len += 1;
+      group.front_facing_len += !handle.has_inverted_handedness();
+    }
+
+    DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++);
+    draw.group_id = group_id;
+    draw.resource_handle = handle.raw;
+    draw.instance_len = instance_len;
+    draw.vertex_len = vertex_len;
+  }
 
-  void bind(ResourceIdBuf &){};
+  void bind(Vector<Header> &, Vector<Undetermined> &, ResourceIdBuf &)
+  {
+    /* Compute prefix sum for each multi draw command. */
+    uint prefix_sum = 0u;
+    for (DrawGroup &group : group_buf_) {
+      group.command_start = prefix_sum;
+      prefix_sum += group.command_len;
+
+      int batch_inst_len;
+      /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
+      GPU_batch_draw_parameter_get(group.gpu_batch, &group.vertex_len, &batch_inst_len);
+      /* Tag group as using index draw (changes indirect drawcall structure). */
+      if (group.gpu_batch->elem != nullptr) {
+        group.vertex_len = -group.vertex_len;
+      }
+      /* Instancing attributes are not supported using the new pipeline since we use the base
+       * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
+      BLI_assert(batch_inst_len == 1);
+      UNUSED_VARS_NDEBUG(batch_inst_len);
+    }
+
+    // GPU_compute_dispatch(resource_id_expand_shader, n, 1, 1);
+  }
 };
 
 /** \} */
diff --git a/source/blender/draw/intern/draw_command_shared.hh b/source/blender/draw/intern/draw_command_shared.hh
index 9344058038b..4b6c5d6fdf7 100644
--- a/source/blender/draw/intern/draw_command_shared.hh
+++ b/source/blender/draw/intern/draw_command_shared.hh
@@ -20,63 +20,50 @@ struct RecordingState;
  * \{ */
 
 /**
- * A Command::MultiDraw allow to split the command stream into batch-able chunks of commands with
+ * A DrawGroup allow to split the command stream into batch-able chunks of commands with
  * the same render state.
  */
-struct MultiDraw {
-  /** Index of next MultiDraw from the same Command::Header. */
+struct DrawGroup {
+  /** Index of next DrawGroup from the same header. */
   uint next;
 
   /** Index of the first command after sorting. */
   uint command_start;
-#if defined(GPU_SHADER) && !defined(GPU_METAL)
-  /* No support for ushort. */
-  uint cmd_len_packed;
-#  define _cmd_len (cmd_len_packed & 0xFFFFu)
-#  define _inverted_len (cmd_len_packed >> 16u)
-#else
-  /**
-   * NOTE(fclem): We have to make room to be able to stick the GPUBatch pointer at the end.
-   */
-  /** Number of commands. Needed to issue the draw call. */
-  ushort command_len;
+  /** Total number of commands (including inverted facing). Needed to issue the draw call. */
+  uint command_len;
   /** Number of non inverted scaling commands in this Group. */
-  ushort front_facing_len;
-#endif
+  uint front_facing_len;
+
   /** GPUBatch values to be copied to DrawCommand after sorting (if not overriden). */
-  uint vertex_count;
-  uint instance_count;
-  uint base_vertex; /** NOTE: (uint)-1 if non indexed draw. */
+  int vertex_len; /** NOTE: Negative if using indexed draw. */
+  uint _pad0;
+
 #ifdef GPU_SHADER
-  uint _pad0, _pad1;
+  uint _pad1 _pad2;
 #else
   /* NOTE: Union just to make sure the struct has always the same size on all platform. */
   union {
     /** Needed to create the correct draw call. */
     GPUBatch *gpu_batch;
-    uint _pad0[2];
+    uint _pad1[2];
   };
-
-  void execute(RecordingState &state, Span<MultiDraw> multi_draw_buf, uint command_id) const;
 #endif
 };
-BLI_STATIC_ASSERT(sizeof(MultiDraw) == 32, "MultiDraw might not have the same size on GPU and CPU")
+BLI_STATIC_ASSERT(sizeof(DrawGroup) == 32, "DrawGroup might not have the same size on GPU and CPU")
 
 /**
- * Representation of a future draw call inside a MultiDraw. This #DrawDescription is then converted
- * into #DrawCommand on GPU after visibility and compaction. Multiple #DrawDescription might get
- * merged into the same final #DrawCommand.
+ * Representation of a future draw call inside a DrawGroup. This #DrawPrototype is then
+ * converted into #DrawCommand on GPU after visibility and compaction. Multiple
+ * #DrawPrototype might get merged into the same final #DrawCommand.
  */
-struct DrawDescription {
+struct DrawPrototype {
+  /* Reference to parent DrawGroup to get the GPUBatch vertex / instance count. */
+  uint group_id;
+  /* Resource handle associated with this call. Also reference visibility. */
+  uint resource_handle;
   /* Override of GPUBatch values. (uint)-1 otherwise. */
-  uint vertex_first;
-  uint vertex_count;
-  uint instance_first;
-  uint instance_count;
-  /* Resource ID associated with this call. */
-  uint resource_id;
-  /* Reference to parent MultiDraw to get the GPUBatch vertex / instance count. */
-  uint multi_draw_id;
+  uint vertex_len;
+  uint instance_len;
 };
 
 /** \} */
diff --git a/source/blender/draw/intern/draw_debug.cc b/source/blender/draw/intern/draw_debug.cc
index ab78db5d913..9cb79d73812 100644
--- a/source/blender/draw/intern/draw_debug.cc
+++ b/source/blender/draw/intern/draw_debug.cc
@@ -63,26 +63,26 @@ DebugDraw::DebugDraw()
 
 void DebugDraw::init()
 {
-  cpu_print_buf_.command.v_count = 0;
-  cpu_print_buf_.command.v_first = 0;
-  cpu_print_buf_.command.i_count = 1;
-  cpu_print_buf_.command.i_first = 0;
-
-  cpu_draw_buf_.command.v_count = 0;
-  cpu_draw_buf_.command.v_first = 0;
-  cpu_draw_buf_.command.i_count = 1;
-  cpu_draw_buf_.command.i_first = 0;
-
-  gpu_print_buf_.command.v_count = 0;
-  gpu_print_buf_.command.v_first = 0;
-  gpu_print_buf_.command.i_count = 1;
-  gpu_print_buf_.command.i_first = 0;
+  cpu_print_buf_.command.vertex_len = 0;
+  cpu_print_buf_.command.vertex_first = 0;
+  cpu_print_buf_.command.instance_len = 1;
+  cpu_print_buf_.command.instance_first_array = 0;
+
+  cpu_draw_buf_.command.vertex_len = 0;
+  cpu_draw_buf_.command.vertex_first = 0;
+  cpu_draw_buf_.command.instance_len = 1;
+  cpu_draw_buf_.command.instance_first_array = 0;
+
+  gpu_print_buf_.command.vertex_len = 0;
+  gpu_print_buf_.command.vertex_first = 0;
+  gpu_print_buf_.command.instance_len = 1;
+  gpu_print_buf_.command.instance_first_array = 0;
   gpu_print_buf_used = false;
 
-  gpu_draw_buf_.command.v_count = 0;
-  gpu_draw_buf_.command.v_first = 0;
-  gpu_draw_buf_.command.i_count = 1;
-  gpu_draw_buf_.command.i_first = 0;
+  gpu_draw_buf_.command.vertex_len = 0;
+  gpu_draw_buf_.command.vertex_first = 0;
+  gpu_draw_buf_.command.instance_len = 1;
+  gpu_draw_buf_.command.instance_first_array = 0;
   gpu_draw_buf_used = false;
 
   modelmat_reset();
@@ -323,11 +323,11 @@ template<> void DebugDraw::print_value<uint4>(const uint4 &value)
 void DebugDraw::draw_line(float3 v1, float3 v2, uint color)
 {
   DebugDrawBuf &buf = cpu_draw_buf_;
-  uint index = buf.command.v_count;
+  uint index = buf.command.vertex_len;
   if (index + 2 < DRW_DEBUG_DRAW_VERT_MAX) {
     buf.verts[index + 0] = vert_pack(model_mat_ * v1, color);
     buf.verts[index + 1] = vert_pack(model_mat_ * v2, color);
-    buf.command.v_count += 2;
+    buf.command.vertex_len += 2;
   }
 }
 
@@ -356,7 +356,7 @@ DRWDebugVert DebugDraw::vert_pack(float3 pos, uint color)
 void DebugDraw::print_newline()
 {
   print_col_ = 0u;
-  print_row_ = ++cpu_print_buf_.command.i_first;
+  print_row_ = ++cpu_print_buf_.command.instance_first_array;
 }
 
 void DebugDraw::print_string_start(uint len)
@@ -406,7 +406,7 @@ void DebugDraw::print_char4(uint data)
       break;
     }
     /* NOTE: Do not skip the header manually like in GPU. */
-    uint cursor = cpu_print_buf_.command.v_count++;
+    uint cursor = cpu_print_buf_.command.vertex_len++;
     if (cursor < DRW_DEBUG_PRINT_MAX) {
       /* For future usage. (i.e: Color) */
       uint flags = 0u;
@@ -504,7 +504,7 @@ void DebugDraw::print_value_uint(uint value,
 
 void DebugDraw::display_lines()
 {
-  if (cpu_draw_buf_.command.v_count == 0 && gpu_draw_buf_used == false) {
+  if (cpu_draw_buf_.command.vertex_len == 0 && gpu_draw_buf_used == false) {
     return;
   }
   GPU_debug_group_begin("Lines");
@@ -541,7 +541,7 @@ void DebugDraw::display_lines()
 
 void DebugDraw::display_prints()
 {
-  if (cpu_print_buf_.command.v_count == 0 && gpu_print_buf_used == false) {
+  if (cpu_print_buf_.command.vertex_len == 0 && gpu_print_buf_used == false) {
     return;
   }
   GPU_debug_group_begin("Prints");
diff --git a/source/blender/draw/intern/draw_manager.cc b/source/blender/draw/intern/draw_manager.cc
index 1ddeef1fe7f..c180730cc0f 100644
--- a/source/blender/draw/intern/draw_manager.cc
+++ b/source/blender/draw/intern/draw_manager.cc
@@ -48,13 +48,13 @@ void Manager::end_sync()
   GPU_compute_dispatch(shader, thread_groups, 1, 1);
 }
 
-void Manager::submit(const PassSimple &pass)
+void Manager::submit(PassSimple &pass)
 {
   command::RecordingState state;
   pass.submit(state);
 }
 
-void Manager::submit(const PassMain &pass, View &view)
+void Manager::submit(PassMain &pass, View &view)
 {
   view.bind();
 
@@ -66,9 +66,9 @@ void Manager::submit(const PassMain &pass, View &view)
 
   command::RecordingState state;
 
-  pass.draw_commands_buf_.bind(resource_id_buf);
+  pass.draw_commands_buf_.bind(pass.headers_, pass.commands_, resource_id_buf);
 
-  // GPU_storagebuf_bind(resource_id_buf, DRW_COMMAND_SLOT);
+  GPU_storagebuf_bind(resource_id_buf, DRW_COMMAND_SLOT);
 
   pass.submit(state);
 }
diff --git a/source/blender/draw/intern/draw_manager.hh b/source/blender/draw/intern/draw_manager.hh
index 3499a038eaa..b8f53eacbf8 100644
--- a/source/blender/draw/intern/draw_manager.hh
+++ b/source/blender/draw/intern/draw_manager.hh
@@ -62,8 +62,8 @@ class Manager {
    * Submit a pass for drawing. All resource reference will be dereferenced and commands will be
    * sent to GPU.
    */
-  void submit(const PassSimple &pass);
-  void submit(const PassMain &pass, View &view);
+  void submit(PassSimple &pass);
+  void submit(PassMain &pass, View &view);
 
  private:
   /**
diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh
index 266ad0b3681..86889cd679b 100644
--- a/source/blender/draw/intern/draw_pass.hh
+++ b/source/blender/draw/intern/draw_pass.hh
@@ -311,7 +311,7 @@ using PassSimple = detail::Pass<DrawCommandBuf>;
  * IMPORTANT: To be used only for passes containing lots of draw calls since it has a potentially
  * high overhead due to batching and culling optimizations.
  */
-using PassMain = detail::Pass<MultiDrawBuf>;
+using PassMain = detail::Pass<DrawMultiBuf>;
 
 /** \} */
 
@@ -372,9 +372,6 @@ template<class T> void PassBase<T>::submit(command::RecordingState &state) const
       case Type::SubPass:
         sub_passes_[header.command_index].submit(state);
         break;
-      case Type::MultiDraw:
-        /* TODO */
-        break;
       default:
         commands_[header.command_index].execute(header.type, state);
         break;
@@ -396,9 +393,6 @@ template<class T> std::string PassBase<T>::serialize(std::string line_prefix) co
       case Type::SubPass:
         ss << sub_passes_[header.command_index].serialize(line_prefix);
         break;
-      case Type::MultiDraw:
-        /* TODO */
-        break;
       default:
         ss << line_prefix << commands_[header.command_index].serialize(header.type) << std::endl;
         break;
diff --git a/source/blender/draw/intern/draw_shader_shared.h b/source/blender/draw/intern/draw_shader_shared.h
index 27ef8e6efe8..257a43f8266 100644
--- a/source/blender/draw/intern/draw_shader_shared.h
+++ b/source/blender/draw/intern/draw_shader_shared.h
@@ -191,20 +191,24 @@ BLI_STATIC_ASSERT_ALIGN(CurvesInfos, 16)
 
 struct DrawCommand {
   /* TODO(fclem): Rename */
-  uint v_count;
-  uint i_count;
-  uint v_first;
+  uint vertex_len;
+  uint instance_len;
+  uint vertex_first;
+#if defined(GPU_SHADER)
   uint base_index;
   /* NOTE: base_index is i_first for non-indexed draw-calls. */
-#define _instance_first_array base_index
-  uint i_first; /* TODO(fclem): Rename to instance_first_indexed */
+#  define _instance_first_array base_index
+#else
+  union {
+    uint base_index;
+    /* Use this instead of instance_first_indexed for non indexed draw calls. */
+    uint instance_first_array;
+  };
+#endif
 
-  /** Number of instances requested by the engine for this draw. */
-  uint engine_instance_count;
-  /** Access to object / component resources (matrices, object infos, object attributes). */
-  uint resource_id;
+  uint instance_first_indexed;
 
-  uint _pad0;
+  uint _pad0, _pad1, _pad2;
 };
 BLI_STATIC_ASSERT_ALIGN(DrawCommand, 16)
 
-- 
cgit v1.2.3