1 files changed, 284 insertions, 0 deletions
diff --git a/source/blender/gpu/metal/mtl_drawlist.mm b/source/blender/gpu/metal/mtl_drawlist.mm
new file mode 100644
index 00000000000..99194d2b72c
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.mm
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#include "BLI_assert.h"
+
+#include "GPU_batch.h"
+#include "mtl_common.hh"
+#include "mtl_drawlist.hh"
+#include "mtl_primitive.hh"
+
+using namespace blender::gpu;
+
+namespace blender::gpu {
+
+/* Indirect draw call structure for reference. */
+/* MTLDrawPrimitivesIndirectArguments --
+ * https://developer.apple.com/documentation/metal/mtldrawprimitivesindirectarguments?language=objc
+ */
+/* struct MTLDrawPrimitivesIndirectArguments {
+ * uint32_t vertexCount;
+ * uint32_t instanceCount;
+ * uint32_t vertexStart;
+ * uint32_t baseInstance;
+};*/
+
+/* MTLDrawIndexedPrimitivesIndirectArguments --
+ * https://developer.apple.com/documentation/metal/mtldrawindexedprimitivesindirectarguments?language=objc
+ */
+/* struct MTLDrawIndexedPrimitivesIndirectArguments {
+ * uint32_t indexCount;
+ * uint32_t instanceCount;
+ * uint32_t indexStart;
+ * uint32_t baseVertex;
+ * uint32_t baseInstance;
+};*/
+
+#define MDI_ENABLED (buffer_size_ != 0)
+#define MDI_DISABLED (buffer_size_ == 0)
+#define MDI_INDEXED (base_index_ != UINT_MAX)
+
+MTLDrawList::MTLDrawList(int length)
+{
+  BLI_assert(length > 0);
+  batch_ = nullptr;
+  command_len_ = 0;
+  base_index_ = 0;
+  command_offset_ = 0;
+  data_size_ = 0;
+  buffer_size_ = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * length;
+  data_ = (void *)MEM_mallocN(buffer_size_, __func__);
+}
+
+MTLDrawList::~MTLDrawList()
+{
+  if (data_) {
+    MEM_freeN(data_);
+    data_ = nullptr;
+  }
+}
+
+void MTLDrawList::init()
+{
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_ == nullptr);
+  UNUSED_VARS_NDEBUG(ctx);
+
+  batch_ = nullptr;
+  command_len_ = 0;
+  BLI_assert(data_);
+
+  command_offset_ = 0;
+}
+
+void MTLDrawList::append(GPUBatch *gpu_batch, int i_first, int i_count)
+{
+  /* Fallback when MultiDrawIndirect is not supported/enabled. */
+  MTLShader *shader = static_cast<MTLShader *>(unwrap(gpu_batch->shader));
+  bool requires_ssbo = (shader->get_uses_ssbo_vertex_fetch());
+  bool requires_emulation = mtl_needs_topology_emulation(gpu_batch->prim_type);
+  if (MDI_DISABLED || requires_ssbo || requires_emulation) {
+    GPU_batch_draw_advanced(gpu_batch, 0, 0, i_first, i_count);
+    return;
+  }
+
+  if (data_ == nullptr) {
+    this->init();
+  }
+  BLI_assert(data_);
+
+  MTLBatch *mtl_batch = static_cast<MTLBatch *>(gpu_batch);
+  BLI_assert(mtl_batch);
+  if (mtl_batch != batch_) {
+    /* Submit existing calls. */
+    this->submit();
+
+    /* Begin new batch. */
+    batch_ = mtl_batch;
+
+    /* Cached for faster access. */
+    MTLIndexBuf *el = batch_->elem_();
+    base_index_ = el ? el->index_base_ : UINT_MAX;
+    v_first_ = el ? el->index_start_ : 0;
+    v_count_ = el ? el->index_len_ : batch_->verts_(0)->vertex_len;
+  }
+
+  if (v_count_ == 0) {
+    /* Nothing to draw. */
+    return;
+  }
+
+  if (MDI_INDEXED) {
+    MTLDrawIndexedPrimitivesIndirectArguments *cmd =
+        reinterpret_cast<MTLDrawIndexedPrimitivesIndirectArguments *>((char *)data_ +
+                                                                      command_offset_);
+    cmd->indexStart = v_first_;
+    cmd->indexCount = v_count_;
+    cmd->instanceCount = i_count;
+    cmd->baseVertex = base_index_;
+    cmd->baseInstance = i_first;
+  }
+  else {
+    MTLDrawPrimitivesIndirectArguments *cmd =
+        reinterpret_cast<MTLDrawPrimitivesIndirectArguments *>((char *)data_ + command_offset_);
+    cmd->vertexStart = v_first_;
+    cmd->vertexCount = v_count_;
+    cmd->instanceCount = i_count;
+    cmd->baseInstance = i_first;
+  }
+
+  size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) :
+                                      sizeof(MTLDrawPrimitivesIndirectArguments);
+
+  command_offset_ += command_size;
+  command_len_++;
+
+  /* Check if we can fit at least one other command. */
+  if (command_offset_ + command_size > buffer_size_) {
+    this->submit();
+  }
+
+  return;
+}
+
+void MTLDrawList::submit()
+{
+  /* Metal does not support MDI from the host side, but we still benefit from only executing the
+   * batch bind a single time, rather than per-draw.
+   * NOTE(Metal): Consider using #MTLIndirectCommandBuffer to achieve similar behavior. */
+  if (command_len_ == 0) {
+    return;
+  }
+
+  /* Something's wrong if we get here without MDI support. */
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_);
+
+  /* Host-side MDI Currently unsupported on Metal. */
+  bool can_use_MDI = false;
+
+  /* Verify context. */
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+
+  /* Execute indirect draw calls. */
+  MTLShader *shader = static_cast<MTLShader *>(unwrap(batch_->shader));
+  bool SSBO_MODE = (shader->get_uses_ssbo_vertex_fetch());
+  if (SSBO_MODE) {
+    can_use_MDI = false;
+    BLI_assert(false);
+    return;
+  }
+
+  /* Heuristic to determine whether using indirect drawing is more efficient. */
+  size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) :
+                                      sizeof(MTLDrawPrimitivesIndirectArguments);
+  const bool is_finishing_a_buffer = (command_offset_ + command_size > buffer_size_);
+  can_use_MDI = can_use_MDI && (is_finishing_a_buffer || command_len_ > 2);
+
+  /* Bind Batch to setup render pipeline state. */
+  id<MTLRenderCommandEncoder> rec = batch_->bind(0, 0, 0, 0);
+  if (!rec) {
+    BLI_assert_msg(false, "A RenderCommandEncoder should always be available!\n");
+    return;
+  }
+
+  /* Common properties. */
+  MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(batch_->prim_type);
+
+  /* Execute multi-draw indirect. */
+  if (can_use_MDI && false) {
+    /* Metal Doesn't support MDI -- Singular Indirect draw calls are supported,
+     * but Multi-draw is not.
+     * TODO(Metal): Consider using #IndirectCommandBuffers to provide similar
+     * behavior. */
+  }
+  else {
+
+    /* Execute draws manually. */
+    if (MDI_INDEXED) {
+      MTLDrawIndexedPrimitivesIndirectArguments *cmd =
+          (MTLDrawIndexedPrimitivesIndirectArguments *)data_;
+      MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(
+          reinterpret_cast<IndexBuf *>(batch_->elem));
+      BLI_assert(mtl_elem);
+      MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
+      uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4;
+      uint32_t v_first_ofs = (mtl_elem->index_start_ * index_size);
+      uint32_t index_count = cmd->indexCount;
+
+      /* Fetch index buffer. May return an index buffer of a differing format,
+       * if index buffer optimization is used. In these cases, mtl_prim_type and
+       * index_count get updated with the new properties. */
+      GPUPrimType final_prim_type = batch_->prim_type;
+      id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
+      BLI_assert(index_buffer != nil);
+
+      /* Final primitive type. */
+      mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
+
+      if (index_buffer != nil) {
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+        for (int i = 0; i < command_len_; i++, cmd++) {
+          [rec drawIndexedPrimitives:mtl_prim_type
+                          indexCount:index_count
+                           indexType:index_type
+                         indexBuffer:index_buffer
+                   indexBufferOffset:v_first_ofs
+                       instanceCount:cmd->instanceCount
+                          baseVertex:cmd->baseVertex
+                        baseInstance:cmd->baseInstance];
+          ctx->main_command_buffer.register_draw_counters(cmd->indexCount * cmd->instanceCount);
+        }
+      }
+      else {
+        BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
+      }
+    }
+    else {
+      MTLDrawPrimitivesIndirectArguments *cmd = (MTLDrawPrimitivesIndirectArguments *)data_;
+
+      /* Verify if topology emulation is required. */
+      if (mtl_needs_topology_emulation(batch_->prim_type)) {
+        BLI_assert_msg(false, "topology emulation cases should use fallback.");
+      }
+      else {
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+        for (int i = 0; i < command_len_; i++, cmd++) {
+          [rec drawPrimitives:mtl_prim_type
+                  vertexStart:cmd->vertexStart
+                  vertexCount:cmd->vertexCount
+                instanceCount:cmd->instanceCount
+                 baseInstance:cmd->baseInstance];
+          ctx->main_command_buffer.register_draw_counters(cmd->vertexCount * cmd->instanceCount);
+        }
+      }
+    }
+  }
+
+  /* Unbind batch. */
+  batch_->unbind();
+
+  /* Reset command offsets. */
+  command_len_ = 0;
+  command_offset_ = 0;
+
+  /* Avoid keeping reference to the batch. */
+  batch_ = nullptr;
+}
+
+}  // namespace blender::gpu