Add serialize for multidraw

author: Clément Foucault <foucault.clem@gmail.com> 2022-08-24 22:15:38 +0300
committer: Clément Foucault <foucault.clem@gmail.com> 2022-08-24 22:29:07 +0300
commit: e8fddb326ea7731d1e3b9af70f5372ac932c5a38 (patch)
tree: c226b2d51dc7465e3aaa2934de95e6fa94c74568
parent: e88a53c798b41d7ee4ea165a7d33911565151cfb (diff)
5 files changed, 192 insertions, 64 deletions
diff --git a/source/blender/draw/intern/draw_command.cc b/source/blender/draw/intern/draw_command.cc
index fb1a8a8dc2a..453898f4e2d 100644
--- a/source/blender/draw/intern/draw_command.cc
+++ b/source/blender/draw/intern/draw_command.cc
@@ -70,6 +70,39 @@ void Draw::execute(RecordingState &state) const
   GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len);
 }
 
+void DrawMulti::execute(RecordingState &state) const
+{
+  DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_;
+  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
+
+  uint group_index = this->group_first;
+  while (group_index != (uint)-1) {
+    const DrawGroup &grp = groups[group_index];
+
+    /** IMPORTANT: We cannot use grp.gpu_batch here since it has been overriden by the atomic
+     * counters. Use the DrawMulti.batch instead. */
+
+    GPU_batch_set_shader(batch, state.shader);
+
+    constexpr intptr_t stride = sizeof(DrawCommand);
+    /* We have 2 indirect command reserved per draw group. */
+    intptr_t offset = stride * group_index * 2;
+
+    /* Draw negatively scaled geometry first. */
+    if (grp.len - grp.front_facing_len > 0) {
+      state.front_facing_set(false);
+      GPU_batch_draw_indirect(batch, indirect_buf, offset);
+    }
+
+    if (grp.front_facing_len > 0) {
+      state.front_facing_set(true);
+      GPU_batch_draw_indirect(batch, indirect_buf, offset + stride);
+    }
+
+    group_index = grp.next;
+  }
+}
+
 void DrawIndirect::execute(RecordingState &state) const
 {
   state.front_facing_set(handle.has_inverted_handedness());
@@ -168,37 +201,6 @@ void StencilSet::execute() const
   GPU_stencil_reference_set(reference);
 }
 
-void DrawMulti::execute(RecordingState &state) const
-{
-  DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_;
-  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
-
-  uint group_index = this->group_first;
-  while (group_index != (uint)-1) {
-    const DrawGroup &grp = groups[group_index];
-
-    GPU_batch_set_shader(grp.gpu_batch, state.shader);
-
-    constexpr intptr_t stride = sizeof(DrawCommand);
-    intptr_t offset = stride * grp.command_start;
-
-    /* Draw negatively scaled geometry first. */
-    uint back_facing_len = grp.command_len - grp.front_facing_len;
-    if (back_facing_len > 0) {
-      state.front_facing_set(false);
-      GPU_batch_draw_indirect(grp.gpu_batch, indirect_buf, offset);
-      offset += stride * back_facing_len;
-    }
-
-    if (grp.front_facing_len > 0) {
-      state.front_facing_set(true);
-      GPU_batch_draw_indirect(grp.gpu_batch, indirect_buf, offset);
-    }
-
-    group_index = grp.next;
-  }
-}
-
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -335,6 +337,70 @@ std::string Draw::serialize() const
          ")";
 }
 
+std::string DrawMulti::serialize(std::string line_prefix) const
+{
+  DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
+
+  MutableSpan<DrawPrototype> prototypes(multi_draw_buf->prototype_buf_.data(),
+                                        multi_draw_buf->prototype_count_);
+
+  /* This emulates the GPU sorting but without the unstable draw order. */
+  std::sort(
+      prototypes.begin(), prototypes.end(), [](const DrawPrototype &a, const DrawPrototype &b) {
+        return (a.group_id < b.group_id) ||
+               (a.group_id == b.group_id && a.resource_handle > b.resource_handle);
+      });
+
+  /* Compute prefix sum to have correct offsets. */
+  uint prefix_sum = 0u;
+  for (DrawGroup &group : groups) {
+    group.start = prefix_sum;
+    prefix_sum += group.front_proto_len + group.back_proto_len;
+  }
+
+  std::stringstream ss;
+
+  uint group_len = 0;
+  uint group_index = this->group_first;
+  while (group_index != (uint)-1) {
+    const DrawGroup &grp = groups[group_index];
+
+    ss << std::endl << line_prefix << "  .group(id=" << group_index << ", len=" << grp.len << ")";
+
+    intptr_t offset = grp.start;
+
+    if (grp.back_proto_len > 0) {
+      for (DrawPrototype &proto : prototypes.slice({offset, grp.back_proto_len})) {
+        // BLI_assert(proto.group_id == group_index);
+        ResourceHandle handle(proto.resource_handle);
+        // BLI_assert(handle.has_inverted_handedness());
+        ss << std::endl
+           << line_prefix << "    .proto(instance_len=" << std::to_string(proto.instance_len)
+           << ", resource_id=" << std::to_string(handle.resource_index()) << ", back_face)";
+      }
+      offset += grp.back_proto_len;
+    }
+
+    if (grp.front_proto_len > 0) {
+      for (DrawPrototype &proto : prototypes.slice({offset, grp.front_proto_len})) {
+        // BLI_assert(proto.group_id == group_index);
+        ResourceHandle handle(proto.resource_handle);
+        // BLI_assert(!handle.has_inverted_handedness());
+        ss << std::endl
+           << line_prefix << "    .proto(instance_len=" << std::to_string(proto.instance_len)
+           << ", resource_id=" << std::to_string(handle.resource_index()) << ", front_face)";
+      }
+    }
+
+    group_index = grp.next;
+    group_len++;
+  }
+
+  ss << std::endl;
+
+  return line_prefix + ".draw_multi(" + std::to_string(group_len) + ")" + ss.str();
+}
+
 std::string DrawIndirect::serialize() const
 {
   return std::string(".draw_indirect()");
diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh
index 123a8fa4bcd..39c66d2ad6f 100644
--- a/source/blender/draw/intern/draw_command.hh
+++ b/source/blender/draw/intern/draw_command.hh
@@ -236,12 +236,13 @@ struct Draw {
 };
 
 struct DrawMulti {
+  GPUBatch *batch;
   DrawMultiBuf *multi_draw_buf;
   uint group_first;
   uint uuid;
 
   void execute(RecordingState &state) const;
-  std::string serialize() const;
+  std::string serialize(std::string line_prefix) const;
 };
 
 struct DrawIndirect {
@@ -460,7 +461,7 @@ class DrawMultiBuf {
   uint header_id_counter_ = 0;
   /** Number of groups inside group_buf_. */
   uint group_count_ = 0;
-  /** Number of groups inside group_buf_. */
+  /** Number of prototype command inside prototype_buf_. */
   uint prototype_count_ = 0;
 
  public:
@@ -480,52 +481,59 @@ class DrawMultiBuf {
                    ResourceHandle handle)
   {
     /* Unsupported for now. Use PassSimple. */
-    BLI_assert(vertex_first == 0);
+    BLI_assert(vertex_first == 0 || vertex_first == -1);
+    BLI_assert(vertex_len == -1);
 
     /* If there was some state changes since previous call, we have to create another command. */
     if (headers.last().type != Type::DrawMulti) {
       uint index = commands.append_and_get_index({});
       headers.append({Type::DrawMulti, index});
-      commands[index].draw_multi = {this, (uint)-1, header_id_counter_++};
+      commands[index].draw_multi = {batch, this, (uint)-1, header_id_counter_++};
     }
 
     DrawMulti &cmd = commands.last().draw_multi;
 
-    uint group_id = group_ids_.lookup_default(DrawGroupKey(cmd.uuid, batch), (uint)-1);
+    uint &group_id = group_ids_.lookup_or_add(DrawGroupKey(cmd.uuid, batch), (uint)-1);
+
+    bool inverted = handle.has_inverted_handedness();
 
     if (group_id == (uint)-1) {
       uint new_group_id = group_count_++;
 
       DrawGroup &group = group_buf_.get_or_resize(new_group_id);
       group.next = cmd.group_first;
-      group.command_len = 1;
-      group.front_facing_len = !handle.has_inverted_handedness();
+      group.len = instance_len;
+      group.front_facing_len = inverted ? 0 : instance_len;
       group.gpu_batch = batch;
-
+      group.front_proto_len = 0;
+      group.back_proto_len = 0;
+      /* For serialization only. */
+      (inverted ? group.back_proto_len : group.front_proto_len)++;
       /* Append to list. */
       cmd.group_first = new_group_id;
       group_id = new_group_id;
     }
     else {
-      DrawGroup &group = group_buf_.get_or_resize(group_id);
-      group.command_len += 1;
-      group.front_facing_len += !handle.has_inverted_handedness();
+      DrawGroup &group = group_buf_[group_id];
+      group.len += instance_len;
+      group.front_facing_len += inverted ? 0 : instance_len;
+      /* For serialization only. */
+      (inverted ? group.back_proto_len : group.front_proto_len)++;
     }
 
     DrawPrototype &draw = prototype_buf_.get_or_resize(prototype_count_++);
     draw.group_id = group_id;
     draw.resource_handle = handle.raw;
     draw.instance_len = instance_len;
-    draw.vertex_len = vertex_len;
   }
 
-  void bind(Vector<Header> &, Vector<Undetermined> &, ResourceIdBuf &)
+  void bind(Vector<Header> &, Vector<Undetermined> &, ResourceIdBuf &resource_id_buf)
   {
-    /* Compute prefix sum for each multi draw command. */
     uint prefix_sum = 0u;
     for (DrawGroup &group : group_buf_) {
-      group.command_start = prefix_sum;
-      prefix_sum += group.command_len;
+      /* Compute prefix sum of all instance of previous group. */
+      group.start = prefix_sum;
+      prefix_sum += group.len;
 
       int batch_inst_len;
       /* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
@@ -538,8 +546,15 @@ class DrawMultiBuf {
        * instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
       BLI_assert(batch_inst_len == 1);
       UNUSED_VARS_NDEBUG(batch_inst_len);
+
+      /* Now that we got the batch infos, we can set the counters to 0. */
+      group.total_counter = group.front_facing_counter = group.back_facing_counter = 0;
     }
 
+    group_buf_.push_update();
+    /* Allocate enough for the expansion pass. */
+    resource_id_buf.get_or_resize(prefix_sum);
+
     // GPU_compute_dispatch(resource_id_expand_shader, n, 1, 1);
   }
 };
diff --git a/source/blender/draw/intern/draw_command_shared.hh b/source/blender/draw/intern/draw_command_shared.hh
index 4b6c5d6fdf7..f73e38cb8cc 100644
--- a/source/blender/draw/intern/draw_command_shared.hh
+++ b/source/blender/draw/intern/draw_command_shared.hh
@@ -27,25 +27,33 @@ struct DrawGroup {
   /** Index of next DrawGroup from the same header. */
   uint next;
 
-  /** Index of the first command after sorting. */
-  uint command_start;
-  /** Total number of commands (including inverted facing). Needed to issue the draw call. */
-  uint command_len;
-  /** Number of non inverted scaling commands in this Group. */
+  /** Index of the first instances after sorting. */
+  uint start;
+  /** Total number of instances (including inverted facing). Needed to issue the draw call. */
+  uint len;
+  /** Number of non inverted scaling instances in this Group. */
   uint front_facing_len;
 
-  /** GPUBatch values to be copied to DrawCommand after sorting (if not overriden). */
-  int vertex_len; /** NOTE: Negative if using indexed draw. */
-  uint _pad0;
-
-#ifdef GPU_SHADER
-  uint _pad1 _pad2;
-#else
+#ifndef GPU_SHADER
   /* NOTE: Union just to make sure the struct has always the same size on all platform. */
   union {
-    /** Needed to create the correct draw call. */
-    GPUBatch *gpu_batch;
-    uint _pad1[2];
+    struct {
+      /** Needed to create the correct draw call. Deleted before upload. */
+      GPUBatch *gpu_batch;
+      /** For debugging only */
+      uint front_proto_len;
+      uint back_proto_len;
+    };
+    struct {
+#endif
+      /** GPUBatch values to be copied to DrawCommand after sorting (if not overriden). */
+      int vertex_len; /** NOTE: Negative if using indexed draw. */
+      /** Atomic counters used during command sorting. */
+      uint total_counter;
+      uint front_facing_counter;
+      uint back_facing_counter;
+#ifndef GPU_SHADER
+    };
   };
 #endif
 };
@@ -61,9 +69,9 @@ struct DrawPrototype {
   uint group_id;
   /* Resource handle associated with this call. Also reference visibility. */
   uint resource_handle;
-  /* Override of GPUBatch values. (uint)-1 otherwise. */
-  uint vertex_len;
+  /* Number of instances. */
   uint instance_len;
+  uint _pad0;
 };
 
 /** \} */
diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh
index 5b9cc352521..e13c600e3a5 100644
--- a/source/blender/draw/intern/draw_pass.hh
+++ b/source/blender/draw/intern/draw_pass.hh
@@ -442,6 +442,9 @@ template<class T> std::string PassBase<T>::serialize(std::string line_prefix) co
       case Type::Draw:
         ss << line_prefix << commands_[header.index].draw.serialize() << std::endl;
         break;
+      case Type::DrawMulti:
+        ss << commands_[header.index].draw_multi.serialize(line_prefix);
+        break;
       case Type::DrawIndirect:
         ss << line_prefix << commands_[header.index].draw_indirect.serialize() << std::endl;
         break;
diff --git a/source/blender/draw/tests/draw_pass_test.cc b/source/blender/draw/tests/draw_pass_test.cc
index e56a09d2757..30512159584 100644
--- a/source/blender/draw/tests/draw_pass_test.cc
+++ b/source/blender/draw/tests/draw_pass_test.cc
@@ -132,4 +132,40 @@ static void test_draw_pass_sub_ordering()
 }
 DRAW_TEST(draw_pass_sub_ordering)
 
+static void test_draw_pass_multi_draw()
+{
+  PassMain pass = {"test.multi_draw"};
+  pass.init();
+  pass.shader_set(GPU_shader_get_builtin_shader(GPU_SHADER_3D_IMAGE_MODULATE_ALPHA));
+  /* Each draw procedural type uses a different batch. Groups are drawn in reverse order. */
+  pass.draw_procedural(GPU_PRIM_TRIS, 1, -1, -1, {1});
+  pass.draw_procedural(GPU_PRIM_POINTS, 4, -1, -1, {2});
+  pass.draw_procedural(GPU_PRIM_TRIS, 2, -1, -1, {3});
+  pass.draw_procedural(GPU_PRIM_POINTS, 5, -1, -1, ResourceHandle(4, true));
+  pass.draw_procedural(GPU_PRIM_LINES, 1, -1, -1, {5});
+  pass.draw_procedural(GPU_PRIM_POINTS, 6, -1, -1, {5});
+  pass.draw_procedural(GPU_PRIM_TRIS, 3, -1, -1, {6});
+
+  std::string result = pass.serialize();
+  std::stringstream expected;
+  expected << ".test.multi_draw" << std::endl;
+  expected << "  .shader_bind(gpu_shader_3D_image_modulate_alpha)" << std::endl;
+  expected << "  .draw_multi(3)" << std::endl;
+  expected << "    .group(id=2, len=1)" << std::endl;
+  expected << "      .proto(instance_len=1, resource_id=5, front_face)" << std::endl;
+  expected << "    .group(id=1, len=15)" << std::endl;
+  expected << "      .proto(instance_len=5, resource_id=4, back_face)" << std::endl;
+  expected << "      .proto(instance_len=6, resource_id=5, front_face)" << std::endl;
+  expected << "      .proto(instance_len=4, resource_id=2, front_face)" << std::endl;
+  expected << "    .group(id=0, len=6)" << std::endl;
+  expected << "      .proto(instance_len=3, resource_id=6, front_face)" << std::endl;
+  expected << "      .proto(instance_len=2, resource_id=3, front_face)" << std::endl;
+  expected << "      .proto(instance_len=1, resource_id=1, front_face)" << std::endl;
+
+  EXPECT_EQ(result, expected.str());
+
+  DRW_shape_cache_free();
+}
+DRAW_TEST(draw_pass_multi_draw)
+
 }  // namespace blender::draw
 \ No newline at end of file
author	Clément Foucault <foucault.clem@gmail.com>	2022-08-24 22:15:38 +0300
committer	Clément Foucault <foucault.clem@gmail.com>	2022-08-24 22:29:07 +0300
commit	e8fddb326ea7731d1e3b9af70f5372ac932c5a38 (patch)
tree	c226b2d51dc7465e3aaa2934de95e6fa94c74568
parent	e88a53c798b41d7ee4ea165a7d33911565151cfb (diff)