13 files changed, 1533 insertions, 66 deletions
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index 2ca1fd3f3d0..240951c1ebd 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -47,13 +47,11 @@ Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
 
 Batch *MTLBackend::batch_alloc()
 {
-  /* TODO(Metal): Full MTLBatch implementation. */
   return new MTLBatch();
 };
 
 DrawList *MTLBackend::drawlist_alloc(int list_length)
 {
-  /* TODO(Metal): Full MTLDrawList implementation. */
   return new MTLDrawList(list_length);
 };
 
@@ -420,6 +418,7 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
   GCaps.depth_blitting_workaround = false;
   GCaps.use_main_context_workaround = false;
   GCaps.broken_amd_driver = false;
+  GCaps.clear_viewport_workaround = true;
 
   /* Metal related workarounds. */
   /* Minimum per-vertex stride is 4 bytes in Metal.
diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh
index 236367bf5a4..9e179e662b5 100644
--- a/source/blender/gpu/metal/mtl_batch.hh
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -10,31 +10,126 @@
 #pragma once
 
 #include "MEM_guardedalloc.h"
-
 #include "gpu_batch_private.hh"
+#include "mtl_index_buffer.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+#include "mtl_vertex_buffer.hh"
+
+namespace blender::gpu {
+
+class MTLContext;
+class MTLShaderInterface;
+
+#define GPU_VAO_STATIC_LEN 64
 
-namespace blender {
-namespace gpu {
+struct VertexBufferID {
+  uint32_t id : 16;
+  uint32_t is_instance : 15;
+  uint32_t used : 1;
+};
 
-/* Pass-through MTLBatch. TODO(Metal): Implement. */
 class MTLBatch : public Batch {
+
+  /* Vertex Bind-state Caching for a given shader interface used with the Batch. */
+  struct VertexDescriptorShaderInterfacePair {
+    MTLVertexDescriptor vertex_descriptor{};
+    const ShaderInterface *interface = nullptr;
+    uint16_t attr_mask{};
+    int num_buffers{};
+    VertexBufferID bufferIds[GPU_BATCH_VBO_MAX_LEN] = {};
+    /* Cache life index compares a cache entry with the active MTLBatch state.
+     * This is initially set to the cache life index of MTLBatch. If the batch has been modified,
+     * this index is incremented to cheaply invalidate existing cache entries.  */
+    uint32_t cache_life_index = 0;
+  };
+
+  class MTLVertexDescriptorCache {
+
+   private:
+    MTLBatch *batch_;
+
+    VertexDescriptorShaderInterfacePair cache_[GPU_VAO_STATIC_LEN] = {};
+    MTLContext *cache_context_ = nullptr;
+    uint32_t cache_life_index_ = 0;
+
+   public:
+    MTLVertexDescriptorCache(MTLBatch *batch) : batch_(batch){};
+    VertexDescriptorShaderInterfacePair *find(const ShaderInterface *interface);
+    bool insert(VertexDescriptorShaderInterfacePair &data);
+
+   private:
+    void vertex_descriptor_cache_init(MTLContext *ctx);
+    void vertex_descriptor_cache_clear();
+    void vertex_descriptor_cache_ensure();
+  };
+
+ private:
+  MTLShader *active_shader_ = nullptr;
+  bool shader_in_use_ = false;
+  MTLVertexDescriptorCache vao_cache = {this};
+
+  /* Topology emulation. */
+  gpu::MTLBuffer *emulated_topology_buffer_ = nullptr;
+  GPUPrimType emulated_topology_type_;
+  uint32_t topology_buffer_input_v_count_ = 0;
+  uint32_t topology_buffer_output_v_count_ = 0;
+
  public:
-  void draw(int v_first, int v_count, int i_first, int i_count) override
-  {
-  }
+  MTLBatch(){};
+  ~MTLBatch(){};
 
+  void draw(int v_first, int v_count, int i_first, int i_count) override;
   void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override
   {
+    /* TODO(Metal): Support indirect draw commands. */
   }
-
   void multi_draw_indirect(GPUStorageBuf *indirect_buf,
                            int count,
                            intptr_t offset,
                            intptr_t stride) override
   {
+    /* TODO(Metal): Support indirect draw commands. */
+  }
+
+  /* Returns an initialized RenderComandEncoder for drawing if all is good.
+   * Otherwise, nil. */
+  id<MTLRenderCommandEncoder> bind(uint v_first, uint v_count, uint i_first, uint i_count);
+  void unbind();
+
+  /* Convenience getters. */
+  MTLIndexBuf *elem_() const
+  {
+    return static_cast<MTLIndexBuf *>(unwrap(elem));
+  }
+  MTLVertBuf *verts_(const int index) const
+  {
+    return static_cast<MTLVertBuf *>(unwrap(verts[index]));
   }
+  MTLVertBuf *inst_(const int index) const
+  {
+    return static_cast<MTLVertBuf *>(unwrap(inst[index]));
+  }
+  MTLShader *active_shader_get() const
+  {
+    return active_shader_;
+  }
+
+ private:
+  void shader_bind();
+  void draw_advanced(int v_first, int v_count, int i_first, int i_count);
+  int prepare_vertex_binding(MTLVertBuf *verts,
+                             MTLRenderPipelineStateDescriptor &desc,
+                             const MTLShaderInterface *interface,
+                             uint16_t &attr_mask,
+                             bool instanced);
+
+  id<MTLBuffer> get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type, uint32_t &v_count);
+
+  void prepare_vertex_descriptor_and_bindings(
+      MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count);
+
   MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
 };
 
-}  // namespace gpu
-}  // namespace blender
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_batch.mm b/source/blender/gpu/metal/mtl_batch.mm
new file mode 100644
index 00000000000..988fb9b793b
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_batch.mm
@@ -0,0 +1,998 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Metal implementation of GPUBatch.
+ */
+
+#include "BLI_assert.h"
+#include "BLI_span.hh"
+
+#include "BKE_global.h"
+
+#include "GPU_common.h"
+#include "gpu_batch_private.hh"
+#include "gpu_shader_private.hh"
+
+#include "mtl_batch.hh"
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_index_buffer.hh"
+#include "mtl_shader.hh"
+#include "mtl_vertex_buffer.hh"
+
+#include <string>
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count)
+{
+  if (this->flag & GPU_BATCH_INVALID) {
+    this->shader_in_use_ = false;
+  }
+  this->draw_advanced(v_first, v_count, i_first, i_count);
+}
+
+void MTLBatch::shader_bind()
+{
+  if (active_shader_ && active_shader_->is_valid()) {
+    active_shader_->bind();
+    shader_in_use_ = true;
+  }
+}
+
+void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_init(MTLContext *ctx)
+{
+  BLI_assert(ctx != nullptr);
+  this->vertex_descriptor_cache_clear();
+  cache_context_ = ctx;
+}
+
+void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_clear()
+{
+  cache_life_index_++;
+  cache_context_ = nullptr;
+}
+
+void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_ensure()
+{
+  if (this->cache_context_ != nullptr) {
+
+    /* Invalidate vertex descriptor bindings cache if batch has changed. */
+    if (batch_->flag & GPU_BATCH_DIRTY) {
+      batch_->flag &= ~GPU_BATCH_DIRTY;
+      this->vertex_descriptor_cache_clear();
+    }
+  }
+
+  /* Initialize cache if not ready. */
+  if (cache_context_ == nullptr) {
+    this->vertex_descriptor_cache_init(MTLContext::get());
+  }
+}
+
+MTLBatch::VertexDescriptorShaderInterfacePair *MTLBatch::MTLVertexDescriptorCache::find(
+    const ShaderInterface *interface)
+{
+  this->vertex_descriptor_cache_ensure();
+  for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
+    if (cache_[i].interface == interface && cache_[i].cache_life_index == cache_life_index_) {
+      return &cache_[i];
+    }
+  }
+  return nullptr;
+}
+
+bool MTLBatch::MTLVertexDescriptorCache::insert(
+    MTLBatch::VertexDescriptorShaderInterfacePair &data)
+{
+  vertex_descriptor_cache_ensure();
+  for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
+    if (cache_[i].interface == nullptr || cache_[i].cache_life_index != cache_life_index_) {
+      cache_[i] = data;
+      cache_[i].cache_life_index = cache_life_index_;
+      return true;
+    }
+  }
+  return false;
+}
+
+int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts,
+                                     MTLRenderPipelineStateDescriptor &desc,
+                                     const MTLShaderInterface *interface,
+                                     uint16_t &attr_mask,
+                                     bool instanced)
+{
+
+  const GPUVertFormat *format = &verts->format;
+  /* Whether the current vertex buffer has been added to the buffer layout descriptor. */
+  bool buffer_added = false;
+  /* Per-vertex stride of current vertex buffer. */
+  int buffer_stride = format->stride;
+  /* Buffer binding index of the vertex buffer once added to the buffer layout descriptor. */
+  int buffer_index = -1;
+  int attribute_offset = 0;
+
+  if (!active_shader_->get_uses_ssbo_vertex_fetch()) {
+    BLI_assert(
+        buffer_stride >= 4 &&
+        "In Metal, Vertex buffer stride should be 4. SSBO Vertex fetch is not affected by this");
+  }
+
+  /* Iterate over GPUVertBuf vertex format and find attributes matching those in the active
+   * shader's interface. */
+  for (uint32_t a_idx = 0; a_idx < format->attr_len; a_idx++) {
+    const GPUVertAttr *a = &format->attrs[a_idx];
+
+    if (format->deinterleaved) {
+      attribute_offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].size) * verts->vertex_len;
+      buffer_stride = a->size;
+    }
+    else {
+      attribute_offset = a->offset;
+    }
+
+    /* Find attribute with the matching name. Attributes may have multiple compatible
+     * name aliases. */
+    for (uint32_t n_idx = 0; n_idx < a->name_len; n_idx++) {
+      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
+      const ShaderInput *input = interface->attr_get(name);
+
+      if (input == nullptr || input->location == -1) {
+        /* Vertex/instance buffers provided have attribute data for attributes which are not needed
+         * by this particular shader. This shader only needs binding information for the attributes
+         * has in the shader interface. */
+        MTL_LOG_WARNING(
+            "MTLBatch: Could not find attribute with name '%s' (defined in active vertex format) "
+            "in the shader interface for shader '%s'\n",
+            name,
+            interface->get_name());
+        continue;
+      }
+
+      /* Fetch metal attribute information. */
+      const MTLShaderInputAttribute &mtl_attr = interface->get_attribute(input->location);
+      BLI_assert(mtl_attr.location >= 0);
+      /* Verify that the attribute location from the shader interface
+       * matches the attribute location returned. */
+      BLI_assert(mtl_attr.location == input->location);
+
+      /* Check if attribute is already present in the given slot. */
+      if ((~attr_mask) & (1 << mtl_attr.location)) {
+        MTL_LOG_INFO(
+            "  -- [Batch] Skipping attribute with input location %d (As one is already bound)\n",
+            mtl_attr.location);
+      }
+      else {
+
+        /* Update attribute used-slot mask. */
+        attr_mask &= ~(1 << mtl_attr.location);
+
+        /* Add buffer layout entry in descriptor if it has not yet been added
+         * for current vertex buffer. */
+        if (!buffer_added) {
+          buffer_index = desc.vertex_descriptor.num_vert_buffers;
+          desc.vertex_descriptor.buffer_layouts[buffer_index].step_function =
+              (instanced) ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
+          desc.vertex_descriptor.buffer_layouts[buffer_index].step_rate = 1;
+          desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
+          desc.vertex_descriptor.num_vert_buffers++;
+          buffer_added = true;
+
+          MTL_LOG_INFO("  -- [Batch] Adding source %s buffer (Index: %d, Stride: %d)\n",
+                       (instanced) ? "instance" : "vertex",
+                       buffer_index,
+                       buffer_stride);
+        }
+        else {
+          /* Ensure stride is correct for de-interleaved attributes. */
+          desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
+        }
+
+        /* Handle Matrix/Array vertex attribute types.
+         * Metal does not natively support these as attribute types, so we handle these cases
+         * by stacking together compatible types (e.g. 4xVec4 for Mat4) and combining
+         * the data in the shader.
+         * The generated Metal shader will contain a generated input binding, which reads
+         * in individual attributes and merges them into the desired type after vertex
+         * assembly. e.g. a Mat4 (Float4x4) will generate 4 Float4 attributes. */
+        if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
+          BLI_assert_msg(
+              a->comp_len == 16,
+              "only mat4 attributes currently supported -- Not ready to handle other long "
+              "component length attributes yet");
+
+          /* SSBO Vertex Fetch Attribute safety checks. */
+          if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+            /* When using SSBO vertex fetch, we do not need to expose split attributes,
+             * A matrix can be read directly as a whole block of contiguous data. */
+            MTLSSBOAttribute ssbo_attr(mtl_attr.index,
+                                       buffer_index,
+                                       attribute_offset,
+                                       buffer_stride,
+                                       GPU_SHADER_ATTR_TYPE_MAT4,
+                                       instanced);
+            active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+            desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+                ssbo_attr;
+            desc.vertex_descriptor.num_ssbo_attributes++;
+          }
+          else {
+
+            /* Handle Mat4 attributes. */
+            if (a->comp_len == 16) {
+              /* Debug safety checks. */
+              BLI_assert_msg(mtl_attr.matrix_element_count == 4,
+                             "mat4 type expected but there are fewer components");
+              BLI_assert_msg(mtl_attr.size == 16, "Expecting subtype 'vec4' with 16 bytes");
+              BLI_assert_msg(
+                  mtl_attr.format == MTLVertexFormatFloat4,
+                  "Per-attribute vertex format MUST be float4 for an input type of 'mat4'");
+
+              /* We have found the 'ROOT' attribute. A mat4 contains 4 consecutive float4 attribute
+               * locations we must map to. */
+              for (int i = 0; i < a->comp_len / 4; i++) {
+                desc.vertex_descriptor.attributes[mtl_attr.location + i].format =
+                    MTLVertexFormatFloat4;
+                /* Data is consecutive in the buffer for the whole matrix, each float4 will shift
+                 * the offset by 16 bytes. */
+                desc.vertex_descriptor.attributes[mtl_attr.location + i].offset =
+                    attribute_offset + i * 16;
+                /* All source data for a matrix is in the same singular buffer. */
+                desc.vertex_descriptor.attributes[mtl_attr.location + i].buffer_index =
+                    buffer_index;
+
+                /* Update total attribute account. */
+                desc.vertex_descriptor.num_attributes = max_ii(
+                    mtl_attr.location + i + 1, desc.vertex_descriptor.num_attributes);
+                MTL_LOG_INFO("-- Sub-Attrib Location: %d, offset: %d, buffer index: %d\n",
+                             mtl_attr.location + i,
+                             attribute_offset + i * 16,
+                             buffer_index);
+              }
+              MTL_LOG_INFO(
+                  "Float4x4 attribute type added for '%s' at attribute locations: %d to %d\n",
+                  name,
+                  mtl_attr.location,
+                  mtl_attr.location + 3);
+            }
+
+            /* Ensure we are not exceeding the attribute limit. */
+            BLI_assert(desc.vertex_descriptor.num_attributes <= MTL_MAX_VERTEX_INPUT_ATTRIBUTES);
+          }
+        }
+        else {
+
+          /* Handle Any required format conversions.
+           * NOTE(Metal): If there is a mis-match between the format of an attribute
+           * in the shader interface, and the specified format in the VertexBuffer VertexFormat,
+           * we need to perform a format conversion.
+           *
+           * The Metal API can perform certain conversions internally during vertex assembly:
+           *   - Type Normalization e.g short2 to float2 between 0.0 to 1.0.
+           *   - Type Truncation e.g. Float4 to Float2.
+           *   - Type expansion e,g, Float3 to Float4 (Following 0,0,0,1 for assignment to empty
+           * elements).
+           *
+           * Certain conversion cannot be performed however, and in these cases, we need to
+           * instruct the shader to generate a specialized version with a conversion routine upon
+           * attribute read.
+           *   - This handles cases such as conversion between types e.g. Integer to float without
+           * normalization.
+           *
+           * For more information on the supported and unsupported conversions, see:
+           * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
+           */
+          MTLVertexFormat converted_format;
+          bool can_use_internal_conversion = mtl_convert_vertex_format(
+              mtl_attr.format,
+              (GPUVertCompType)a->comp_type,
+              a->comp_len,
+              (GPUVertFetchMode)a->fetch_mode,
+              &converted_format);
+          bool is_floating_point_format = (a->comp_type == GPU_COMP_F32);
+
+          if (can_use_internal_conversion) {
+            desc.vertex_descriptor.attributes[mtl_attr.location].format = converted_format;
+            desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
+                is_floating_point_format ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+                                           (GPUVertFetchMode)GPU_FETCH_INT;
+            BLI_assert(converted_format != MTLVertexFormatInvalid);
+          }
+          else {
+            /* The internal implicit conversion is not supported.
+             * In this case, we need to handle conversion inside the shader.
+             * This is handled using `format_conversion_mode`.
+             * `format_conversion_mode` is assigned the blender-specified fetch mode (GPU_FETCH_*).
+             * This then controls how a given attribute is interpreted. The data will be read
+             * as specified and then converted appropriately to the correct form.
+             *
+             * e.g. if `GPU_FETCH_INT_TO_FLOAT` is specified, the specialized read-routine
+             * in the shader will read the data as an int, and cast this to floating point
+             * representation. (Rather than reading the source data as float).
+             *
+             * NOTE: Even if full conversion is not supported, we may still partially perform an
+             * implicit conversion where possible, such as vector truncation or expansion. */
+            MTLVertexFormat converted_format;
+            bool can_convert = mtl_vertex_format_resize(
+                mtl_attr.format, a->comp_len, &converted_format);
+            desc.vertex_descriptor.attributes[mtl_attr.location].format = can_convert ?
+                                                                              converted_format :
+                                                                              mtl_attr.format;
+            desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
+                (GPUVertFetchMode)a->fetch_mode;
+            BLI_assert(desc.vertex_descriptor.attributes[mtl_attr.location].format !=
+                       MTLVertexFormatInvalid);
+          }
+          desc.vertex_descriptor.attributes[mtl_attr.location].offset = attribute_offset;
+          desc.vertex_descriptor.attributes[mtl_attr.location].buffer_index = buffer_index;
+          desc.vertex_descriptor.num_attributes = ((mtl_attr.location + 1) >
+                                                   desc.vertex_descriptor.num_attributes) ?
+                                                      (mtl_attr.location + 1) :
+                                                      desc.vertex_descriptor.num_attributes;
+
+          /* SSBO Vertex Fetch attribute bind. */
+          if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+            BLI_assert_msg(desc.vertex_descriptor.attributes[mtl_attr.location].format ==
+                               mtl_attr.format,
+                           "SSBO Vertex Fetch does not support attribute conversion.");
+
+            MTLSSBOAttribute ssbo_attr(
+                mtl_attr.index,
+                buffer_index,
+                attribute_offset,
+                buffer_stride,
+                MTLShader::ssbo_vertex_type_to_attr_type(
+                    desc.vertex_descriptor.attributes[mtl_attr.location].format),
+                instanced);
+
+            active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+            desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+                ssbo_attr;
+            desc.vertex_descriptor.num_ssbo_attributes++;
+          }
+
+          /* NOTE: We are setting num_attributes to be up to the maximum found index, because of
+           * this, it is possible that we may skip over certain attributes if they were not in the
+           * source GPUVertFormat. */
+          MTL_LOG_INFO(
+              " -- Batch Attribute(%d): ORIG Shader Format: %d, ORIG Vert format: %d, Vert "
+              "components: %d, Fetch Mode %d --> FINAL FORMAT: %d\n",
+              mtl_attr.location,
+              (int)mtl_attr.format,
+              (int)a->comp_type,
+              (int)a->comp_len,
+              (int)a->fetch_mode,
+              (int)desc.vertex_descriptor.attributes[mtl_attr.location].format);
+
+          MTL_LOG_INFO(
+              "  -- [Batch] matching %s attribute '%s' (Attribute Index: %d, Buffer index: %d, "
+              "offset: %d)\n",
+              (instanced) ? "instance" : "vertex",
+              name,
+              mtl_attr.location,
+              buffer_index,
+              attribute_offset);
+        }
+      }
+    }
+  }
+  if (buffer_added) {
+    return buffer_index;
+  }
+  return -1;
+}
+
+id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_first, uint i_count)
+{
+  /* Setup draw call and render pipeline state here. Called by every draw, but setup here so that
+   * MTLDrawList only needs to perform setup a single time. */
+  BLI_assert(this);
+
+  /* Fetch Metal device. */
+  MTLContext *ctx = MTLContext::get();
+  if (!ctx) {
+    BLI_assert_msg(false, "No context available for rendering.");
+    return nil;
+  }
+
+  /* Verify Shader. */
+  active_shader_ = (shader) ? static_cast<MTLShader *>(unwrap(shader)) : nullptr;
+
+  if (active_shader_ == nullptr || !active_shader_->is_valid()) {
+    /* Skip drawing if there is no valid Metal shader.
+     * This will occur if the path through which the shader is prepared
+     * is invalid (e.g. Python without create-info), or, the source shader uses a geometry pass. */
+    BLI_assert_msg(false, "No valid Metal shader!");
+    return nil;
+  }
+
+  /* Check if using SSBO Fetch Mode.
+   * This is an alternative drawing mode to geometry shaders, wherein vertex buffers
+   * are bound as readable (random-access) GPU buffers and certain descriptor properties
+   * are passed using Shader uniforms. */
+  bool uses_ssbo_fetch = active_shader_->get_uses_ssbo_vertex_fetch();
+
+  /* Prepare Vertex Descriptor and extract VertexBuffers to bind. */
+  MTLVertBuf *buffers[GPU_BATCH_VBO_MAX_LEN] = {nullptr};
+  int num_buffers = 0;
+
+  /* Ensure Index Buffer is ready. */
+  MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
+  if (mtl_elem != NULL) {
+    mtl_elem->upload_data();
+  }
+
+  /* Populate vertex descriptor with attribute binding information.
+   * The vertex descriptor and buffer layout descriptors describe
+   * how vertex data from bound vertex buffers maps to the
+   * shader's input.
+   * A unique vertex descriptor will result in a new PipelineStateObject
+   * being generated for the currently bound shader. */
+  prepare_vertex_descriptor_and_bindings(buffers, num_buffers, v_first, v_count, i_first, i_count);
+
+  /* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer
+   * data operations are required. */
+  for (int i = 0; i < num_buffers; i++) {
+    MTLVertBuf *buf_at_index = buffers[i];
+    if (buf_at_index == NULL) {
+      BLI_assert_msg(
+          false,
+          "Total buffer count does not match highest buffer index, could be gaps in bindings");
+      continue;
+    }
+
+    MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
+    mtlvbo->bind();
+  }
+
+  /* Ensure render pass is active and fetch active RenderCommandEncoder. */
+  id<MTLRenderCommandEncoder> rec = ctx->ensure_begin_render_pass();
+
+  /* Fetch RenderPassState to enable resource binding for active pass. */
+  MTLRenderPassState &rps = ctx->main_command_buffer.get_render_pass_state();
+
+  /* Debug Check: Ensure Frame-buffer instance is not dirty. */
+  BLI_assert(!ctx->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+  /* Bind Shader. */
+  this->shader_bind();
+
+  /* GPU debug markers. */
+  if (G.debug & G_DEBUG_GPU) {
+    [rec pushDebugGroup:[NSString stringWithFormat:@"batch_bind%@(shader: %s)",
+                                                   this->elem ? @"(indexed)" : @"",
+                                                   active_shader_->get_interface()->get_name()]];
+    [rec insertDebugSignpost:[NSString
+                                 stringWithFormat:@"batch_bind%@(shader: %s)",
+                                                  this->elem ? @"(indexed)" : @"",
+                                                  active_shader_->get_interface()->get_name()]];
+  }
+
+  /* Ensure Context Render Pipeline State is fully setup and ready to execute the draw. */
+  MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+  if (!ctx->ensure_render_pipeline_state(mtl_prim_type)) {
+    printf("FAILED TO ENSURE RENDER PIPELINE STATE");
+    BLI_assert(false);
+
+    if (G.debug & G_DEBUG_GPU) {
+      [rec popDebugGroup];
+    }
+    return nil;
+  }
+
+  /*** Bind Vertex Buffers and Index Buffers **/
+
+  /* SSBO Vertex Fetch Buffer bindings. */
+  if (uses_ssbo_fetch) {
+
+    /* SSBO Vertex Fetch - Bind Index Buffer to appropriate slot -- if used. */
+    id<MTLBuffer> idx_buffer = nil;
+    GPUPrimType final_prim_type = this->prim_type;
+
+    if (mtl_elem != nullptr) {
+
+      /* Fetch index buffer. This function can situationally return an optimized
+       * index buffer of a different primitive type. If this is the case, `final_prim_type`
+       * and `v_count` will be updated with the new format.
+       * NOTE: For indexed rendering, v_count represents the number of indices. */
+      idx_buffer = mtl_elem->get_index_buffer(final_prim_type, v_count);
+      BLI_assert(idx_buffer != nil);
+
+      /* Update uniforms for SSBO-vertex-fetch-mode indexed rendering to flag usage. */
+      int &uniform_ssbo_index_mode_u16 = active_shader_->uni_ssbo_uses_index_mode_u16;
+      BLI_assert(uniform_ssbo_index_mode_u16 != -1);
+      int uses_index_mode_u16 = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 1 : 0;
+      active_shader_->uniform_int(uniform_ssbo_index_mode_u16, 1, 1, &uses_index_mode_u16);
+    }
+    else {
+      idx_buffer = ctx->get_null_buffer();
+    }
+    rps.bind_vertex_buffer(idx_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+
+    /* Ensure all attributes are set */
+    active_shader_->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+    /* Bind NULL Buffers for unused vertex data slots. */
+    id<MTLBuffer> null_buffer = ctx->get_null_buffer();
+    BLI_assert(null_buffer != nil);
+    for (int i = num_buffers; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+      if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+        rps.bind_vertex_buffer(null_buffer, 0, i);
+      }
+    }
+
+    /* Flag whether Indexed rendering is used or not. */
+    int &uniform_ssbo_use_indexed = active_shader_->uni_ssbo_uses_indexed_rendering;
+    BLI_assert(uniform_ssbo_use_indexed != -1);
+    int uses_indexed_rendering = (mtl_elem != NULL) ? 1 : 0;
+    active_shader_->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+
+    /* Set SSBO-fetch-mode status uniforms. */
+    BLI_assert(active_shader_->uni_ssbo_input_prim_type_loc != -1);
+    BLI_assert(active_shader_->uni_ssbo_input_vert_count_loc != -1);
+    GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_shader_)),
+                                  active_shader_->uni_ssbo_input_prim_type_loc,
+                                  1,
+                                  1,
+                                  (const int *)(&final_prim_type));
+    GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_shader_)),
+                                  active_shader_->uni_ssbo_input_vert_count_loc,
+                                  1,
+                                  1,
+                                  (const int *)(&v_count));
+  }
+
+  /* Bind Vertex Buffers. */
+  for (int i = 0; i < num_buffers; i++) {
+    MTLVertBuf *buf_at_index = buffers[i];
+    if (buf_at_index == NULL) {
+      BLI_assert_msg(
+          false,
+          "Total buffer count does not match highest buffer index, could be gaps in bindings");
+      continue;
+    }
+    /* Buffer handle. */
+    MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
+    mtlvbo->flag_used();
+
+    /* Fetch buffer from MTLVertexBuffer and bind. */
+    id<MTLBuffer> mtl_buffer = mtlvbo->get_metal_buffer();
+
+    BLI_assert(mtl_buffer != nil);
+    rps.bind_vertex_buffer(mtl_buffer, 0, i);
+  }
+
+  if (G.debug & G_DEBUG_GPU) {
+    [rec popDebugGroup];
+  }
+
+  /* Return Render Command Encoder used with setup. */
+  return rec;
+}
+
+void MTLBatch::unbind()
+{
+}
+
+void MTLBatch::prepare_vertex_descriptor_and_bindings(
+    MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count)
+{
+
+  /* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound.
+   */
+  MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+      MTLContext::get()->state_manager);
+  MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+  const MTLShaderInterface *interface = active_shader_->get_interface();
+  uint16_t attr_mask = interface->get_enabled_attribute_mask();
+
+  /* Reset vertex descriptor to default state. */
+  desc.reset_vertex_descriptor();
+
+  /* Fetch Vertex and Instance Buffers. */
+  Span<MTLVertBuf *> mtl_verts(reinterpret_cast<MTLVertBuf **>(this->verts),
+                               GPU_BATCH_VBO_MAX_LEN);
+  Span<MTLVertBuf *> mtl_inst(reinterpret_cast<MTLVertBuf **>(this->inst),
+                              GPU_BATCH_INST_VBO_MAX_LEN);
+
+  /* SSBO Vertex fetch also passes vertex descriptor information into the shader. */
+  if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+    active_shader_->ssbo_vertex_fetch_bind_attributes_begin();
+  }
+
+  /* Resolve Metal vertex buffer bindings. */
+  /* Vertex Descriptors
+   * ------------------
+   * Vertex Descriptors are required to generate a pipeline state, based on the current Batch's
+   * buffer bindings. These bindings are a unique matching, depending on what input attributes a
+   * batch has in its buffers, and those which are supported by the shader interface.
+
+   * We iterate through the buffers and resolve which attributes satisfy the requirements of the
+   * currently bound shader. We cache this data, for a given Batch<->ShderInterface pairing in a
+   * VAO cache to avoid the need to recalculate this data. */
+  bool buffer_is_instanced[GPU_BATCH_VBO_MAX_LEN] = {false};
+
+  VertexDescriptorShaderInterfacePair *descriptor = this->vao_cache.find(interface);
+  if (descriptor) {
+    desc.vertex_descriptor = descriptor->vertex_descriptor;
+    attr_mask = descriptor->attr_mask;
+    num_buffers = descriptor->num_buffers;
+
+    for (int bid = 0; bid < GPU_BATCH_VBO_MAX_LEN; ++bid) {
+      if (descriptor->bufferIds[bid].used) {
+        if (descriptor->bufferIds[bid].is_instance) {
+          buffers[bid] = mtl_inst[descriptor->bufferIds[bid].id];
+          buffer_is_instanced[bid] = true;
+        }
+        else {
+          buffers[bid] = mtl_verts[descriptor->bufferIds[bid].id];
+          buffer_is_instanced[bid] = false;
+        }
+      }
+    }
+
+    /* Use cached ssbo attribute binding data. */
+    if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+      BLI_assert(desc.vertex_descriptor.uses_ssbo_vertex_fetch);
+      for (int attr_id = 0; attr_id < desc.vertex_descriptor.num_ssbo_attributes; attr_id++) {
+        active_shader_->ssbo_vertex_fetch_bind_attribute(
+            desc.vertex_descriptor.ssbo_attributes[attr_id]);
+      }
+    }
+  }
+  else {
+    VertexDescriptorShaderInterfacePair pair{};
+    pair.interface = interface;
+
+    for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN; ++i) {
+      pair.bufferIds[i].id = -1;
+      pair.bufferIds[i].is_instance = 0;
+      pair.bufferIds[i].used = 0;
+    }
+    /* NOTE: Attribute extraction order from buffer is the reverse of the OpenGL as we flag once an
+     * attribute is found, rather than pre-setting the mask. */
+    /* Extract Instance attributes (These take highest priority). */
+    for (int v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
+      if (mtl_inst[v]) {
+        MTL_LOG_INFO(" -- [Batch] Checking bindings for bound instance buffer %p\n", mtl_inst[v]);
+        int buffer_ind = this->prepare_vertex_binding(
+            mtl_inst[v], desc, interface, attr_mask, true);
+        if (buffer_ind >= 0) {
+          buffers[buffer_ind] = mtl_inst[v];
+          buffer_is_instanced[buffer_ind] = true;
+
+          pair.bufferIds[buffer_ind].id = v;
+          pair.bufferIds[buffer_ind].used = 1;
+          pair.bufferIds[buffer_ind].is_instance = 1;
+          num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
+        }
+      }
+    }
+
+    /* Extract Vertex attributes (First-bound vertex buffer takes priority). */
+    for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
+      if (mtl_verts[v] != NULL) {
+        MTL_LOG_INFO(" -- [Batch] Checking bindings for bound vertex buffer %p\n", mtl_verts[v]);
+        int buffer_ind = this->prepare_vertex_binding(
+            mtl_verts[v], desc, interface, attr_mask, false);
+        if (buffer_ind >= 0) {
+          buffers[buffer_ind] = mtl_verts[v];
+          buffer_is_instanced[buffer_ind] = false;
+
+          pair.bufferIds[buffer_ind].id = v;
+          pair.bufferIds[buffer_ind].used = 1;
+          pair.bufferIds[buffer_ind].is_instance = 0;
+          num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
+        }
+      }
+    }
+
+    /* Add to VertexDescriptor cache */
+    desc.vertex_descriptor.uses_ssbo_vertex_fetch = active_shader_->get_uses_ssbo_vertex_fetch();
+    pair.attr_mask = attr_mask;
+    pair.vertex_descriptor = desc.vertex_descriptor;
+    pair.num_buffers = num_buffers;
+    if (!this->vao_cache.insert(pair)) {
+      printf(
+          "[Performance Warning] cache is full (Size: %d), vertex descriptor will not be cached\n",
+          GPU_VAO_STATIC_LEN);
+    }
+  }
+
+/* DEBUG: verify if our attribute bindings have been fully provided as expected. */
+#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
+  if (attr_mask != 0) {
+    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
+      if (attr_mask & mask) {
+        /* Fallback for setting default attributes, for missed slots. Attributes flagged with
+         * 'MTLVertexFormatInvalid' in the vertex descriptor are bound to a NULL buffer during PSO
+         * creation. */
+        MTL_LOG_WARNING("MTLBatch: Missing expected attribute '%s' at index '%d' for shader: %s\n",
+                        this->active_shader->interface->attributes[a].name,
+                        a,
+                        interface->name);
+        /* Ensure any assigned attribute has not been given an invalid format. This should not
+         * occur and may be the result of an unsupported attribute type conversion. */
+        BLI_assert(desc.attributes[a].format == MTLVertexFormatInvalid);
+      }
+    }
+  }
+#endif
+}
+
+void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count)
+{
+
+#if TRUST_NO_ONE
+  BLI_assert(v_count > 0 && i_count > 0);
+#endif
+
+  /* Setup RenderPipelineState for batch. */
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+  id<MTLRenderCommandEncoder> rec = this->bind(v_first, v_count, i_first, i_count);
+  if (rec == nil) {
+    return;
+  }
+
+  /* Fetch IndexBuffer and resolve primitive type. */
+  MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
+  MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+
+  /* Render using SSBO Vertex Fetch. */
+  if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+
+    /* Submit draw call with modified vertex count, which reflects vertices per primitive defined
+     * in the USE_SSBO_VERTEX_FETCH pragma. */
+    int num_input_primitives = gpu_get_prim_count_from_type(v_count, this->prim_type);
+    int output_num_verts = num_input_primitives *
+                           active_shader_->get_ssbo_vertex_fetch_output_num_verts();
+    BLI_assert_msg(
+        mtl_vertex_count_fits_primitive_type(
+            output_num_verts, active_shader_->get_ssbo_vertex_fetch_output_prim_type()),
+        "Output Vertex count is not compatible with the requested output vertex primitive type");
+    [rec drawPrimitives:active_shader_->get_ssbo_vertex_fetch_output_prim_type()
+            vertexStart:0
+            vertexCount:output_num_verts
+          instanceCount:i_count
+           baseInstance:i_first];
+    ctx->main_command_buffer.register_draw_counters(output_num_verts * i_count);
+  }
+  /* Perform regular draw. */
+  else if (mtl_elem == NULL) {
+
+    /* Primitive Type toplogy emulation. */
+    if (mtl_needs_topology_emulation(this->prim_type)) {
+
+      /* Generate index buffer for primitive types requiring emulation. */
+      GPUPrimType emulated_prim_type = this->prim_type;
+      uint32_t emulated_v_count = v_count;
+      id<MTLBuffer> generated_index_buffer = this->get_emulated_toplogy_buffer(emulated_prim_type,
+                                                                               emulated_v_count);
+      BLI_assert(generated_index_buffer != nil);
+
+      MTLPrimitiveType emulated_mtl_prim_type = gpu_prim_type_to_metal(emulated_prim_type);
+
+      /* Temp: Disable culling for emulated primitive types.
+       * TODO(Metal): Support face winding in topology buffer. */
+      [rec setCullMode:MTLCullModeNone];
+
+      if (generated_index_buffer != nil) {
+        BLI_assert(emulated_mtl_prim_type == MTLPrimitiveTypeTriangle ||
+                   emulated_mtl_prim_type == MTLPrimitiveTypeLine);
+        if (emulated_mtl_prim_type == MTLPrimitiveTypeTriangle) {
+          BLI_assert(emulated_v_count % 3 == 0);
+        }
+        if (emulated_mtl_prim_type == MTLPrimitiveTypeLine) {
+          BLI_assert(emulated_v_count % 2 == 0);
+        }
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        ctx->ensure_depth_stencil_state(emulated_mtl_prim_type);
+
+        [rec drawIndexedPrimitives:emulated_mtl_prim_type
+                        indexCount:emulated_v_count
+                         indexType:MTLIndexTypeUInt32
+                       indexBuffer:generated_index_buffer
+                 indexBufferOffset:0
+                     instanceCount:i_count
+                        baseVertex:v_first
+                      baseInstance:i_first];
+      }
+      else {
+        printf("[Note] Cannot draw batch -- Emulated Topology mode: %u not yet supported\n",
+               this->prim_type);
+      }
+    }
+    else {
+      /* Set depth stencil state (requires knowledge of primitive type). */
+      ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+      /* Issue draw call. */
+      [rec drawPrimitives:mtl_prim_type
+              vertexStart:v_first
+              vertexCount:v_count
+            instanceCount:i_count
+             baseInstance:i_first];
+    }
+    ctx->main_command_buffer.register_draw_counters(v_count * i_count);
+  }
+  /* Perform indexed draw. */
+  else {
+
+    MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
+    uint32_t base_index = mtl_elem->index_base_;
+    uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4;
+    uint32_t v_first_ofs = ((v_first + mtl_elem->index_start_) * index_size);
+    BLI_assert_msg((v_first_ofs % index_size) == 0,
+                   "Index offset is not 2/4-byte aligned as per METAL spec");
+
+    /* Fetch index buffer. May return an index buffer of a differing format,
+     * if index buffer optimization is used. In these cases, final_prim_type and
+     * index_count get updated with the new properties. */
+    GPUPrimType final_prim_type = this->prim_type;
+    uint index_count = v_count;
+
+    id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
+    mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
+    BLI_assert(index_buffer != nil);
+
+    if (index_buffer != nil) {
+
+      /* Set depth stencil state (requires knowledge of primitive type). */
+      ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+      /* Issue draw call. */
+      [rec drawIndexedPrimitives:mtl_prim_type
+                      indexCount:index_count
+                       indexType:index_type
+                     indexBuffer:index_buffer
+               indexBufferOffset:v_first_ofs
+                   instanceCount:i_count
+                      baseVertex:base_index
+                    baseInstance:i_first];
+      ctx->main_command_buffer.register_draw_counters(index_count * i_count);
+    }
+    else {
+      BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
+    }
+  }
+
+  /* End of draw. */
+  this->unbind();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Topology emulation and optimization
+ * \{ */
+
+id<MTLBuffer> MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type,
+                                                    uint32_t &in_out_v_count)
+{
+
+  BLI_assert(in_out_v_count > 0);
+  /* Determine emulated primitive types. */
+  GPUPrimType input_prim_type = in_out_prim_type;
+  uint32_t v_count = in_out_v_count;
+  GPUPrimType output_prim_type;
+  switch (input_prim_type) {
+    case GPU_PRIM_POINTS:
+    case GPU_PRIM_LINES:
+    case GPU_PRIM_TRIS:
+      BLI_assert_msg(false, "Optimal primitive types should not reach here.");
+      return nil;
+      break;
+    case GPU_PRIM_LINES_ADJ:
+    case GPU_PRIM_TRIS_ADJ:
+      BLI_assert_msg(false, "Adjacency primitive types should not reach here.");
+      return nil;
+      break;
+    case GPU_PRIM_LINE_STRIP:
+    case GPU_PRIM_LINE_LOOP:
+    case GPU_PRIM_LINE_STRIP_ADJ:
+      output_prim_type = GPU_PRIM_LINES;
+      break;
+    case GPU_PRIM_TRI_STRIP:
+    case GPU_PRIM_TRI_FAN:
+      output_prim_type = GPU_PRIM_TRIS;
+      break;
+    default:
+      BLI_assert_msg(false, "Invalid primitive type.");
+      return nil;
+  }
+
+  /* Check if topology buffer exists and is valid. */
+  if (this->emulated_topology_buffer_ != nullptr &&
+      (emulated_topology_type_ != input_prim_type || topology_buffer_input_v_count_ != v_count)) {
+
+    /* Release existing topology buffer. */
+    emulated_topology_buffer_->free();
+    emulated_topology_buffer_ = nullptr;
+  }
+
+  /* Generate new topology index buffer. */
+  if (this->emulated_topology_buffer_ == nullptr) {
+    /* Calculate IB len. */
+    uint32_t output_prim_count = 0;
+    switch (input_prim_type) {
+      case GPU_PRIM_LINE_STRIP:
+      case GPU_PRIM_LINE_STRIP_ADJ:
+        output_prim_count = v_count - 1;
+        break;
+      case GPU_PRIM_LINE_LOOP:
+        output_prim_count = v_count;
+        break;
+      case GPU_PRIM_TRI_STRIP:
+      case GPU_PRIM_TRI_FAN:
+        output_prim_count = v_count - 2;
+        break;
+      default:
+        BLI_assert_msg(false, "Cannot generate optimized topology buffer for other types.");
+        break;
+    }
+    uint32_t output_IB_elems = output_prim_count * ((output_prim_type == GPU_PRIM_TRIS) ? 3 : 2);
+
+    /* Allocate buffer. */
+    uint32_t buffer_bytes = output_IB_elems * 4;
+    BLI_assert(buffer_bytes > 0);
+    this->emulated_topology_buffer_ = MTLContext::get_global_memory_manager().allocate(
+        buffer_bytes, true);
+
+    /* Populate. */
+    uint32_t *data = (uint32_t *)this->emulated_topology_buffer_->get_host_ptr();
+    BLI_assert(data != nullptr);
+
+    /* TODO(Metal): Support inverse winding modes. */
+    bool winding_clockwise = false;
+    UNUSED_VARS(winding_clockwise);
+
+    switch (input_prim_type) {
+      /* Line Loop. */
+      case GPU_PRIM_LINE_LOOP: {
+        int line = 0;
+        for (line = 0; line < output_prim_count - 1; line++) {
+          data[line * 3 + 0] = line + 0;
+          data[line * 3 + 1] = line + 1;
+        }
+        /* Closing line. */
+        data[line * 2 + 0] = line + 0;
+        data[line * 2 + 1] = 0;
+      } break;
+
+      /* Triangle Fan. */
+      case GPU_PRIM_TRI_FAN: {
+        for (int triangle = 0; triangle < output_prim_count; triangle++) {
+          data[triangle * 3 + 0] = 0; /* Always 0 */
+          data[triangle * 3 + 1] = triangle + 1;
+          data[triangle * 3 + 2] = triangle + 2;
+        }
+      } break;
+
+      default:
+        BLI_assert_msg(false, "Other primitive types do not require emulation.");
+        return nil;
+    }
+
+    /* Flush. */
+    this->emulated_topology_buffer_->flush();
+    /* Assign members relating to current cached IB. */
+    topology_buffer_input_v_count_ = v_count;
+    topology_buffer_output_v_count_ = output_IB_elems;
+    emulated_topology_type_ = input_prim_type;
+  }
+
+  /* Return. */
+  in_out_v_count = topology_buffer_output_v_count_;
+  in_out_prim_type = output_prim_type;
+  return (emulated_topology_buffer_) ? emulated_topology_buffer_->get_metal_buffer() : nil;
+}
+
+/** \} */
+
+}  // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index ef66a1f2111..50576379f0d 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -995,19 +995,21 @@ bool MTLContext::ensure_uniform_buffer_bindings(
 
     if (ubo.buffer_index >= 0) {
 
-      const uint32_t buffer_index = ubo.buffer_index;
+      /* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
+       * the uniform PushConstantBlock. */
+      const uint32_t buffer_index = ubo.buffer_index + 1;
       int ubo_offset = 0;
       id<MTLBuffer> ubo_buffer = nil;
       int ubo_size = 0;
 
       bool bind_dummy_buffer = false;
-      if (this->pipeline_state.ubo_bindings[buffer_index].bound) {
+      if (this->pipeline_state.ubo_bindings[ubo_index].bound) {
 
         /* Fetch UBO global-binding properties from slot. */
         ubo_offset = 0;
-        ubo_buffer = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_metal_buffer(
+        ubo_buffer = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_metal_buffer(
             &ubo_offset);
-        ubo_size = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_size();
+        ubo_size = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_size();
 
         /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
          * allocating zero buffers. */
diff --git a/source/blender/gpu/metal/mtl_drawlist.hh b/source/blender/gpu/metal/mtl_drawlist.hh
index ed99c76faa7..47055f3d7f4 100644
--- a/source/blender/gpu/metal/mtl_drawlist.hh
+++ b/source/blender/gpu/metal/mtl_drawlist.hh
@@ -9,34 +9,50 @@
 
 #pragma once
 
-#pragma once
-
+#include "BLI_sys_types.h"
+#include "GPU_batch.h"
+#include "MEM_guardedalloc.h"
 #include "gpu_drawlist_private.hh"
 
-namespace blender {
-namespace gpu {
+#include "mtl_batch.hh"
+#include "mtl_context.hh"
+
+namespace blender::gpu {
 
 /**
- * TODO(Metal): MTLDrawList Implementation. Included as temporary stub.
- */
+ * Implementation of Multi Draw Indirect using OpenGL.
+ **/
 class MTLDrawList : public DrawList {
+
+ private:
+  /** Batch for which we are recording commands for. */
+  MTLBatch *batch_;
+  /** Mapped memory bounds. */
+  void *data_;
+  /** Length of the mapped buffer (in byte). */
+  size_t data_size_;
+  /** Current offset inside the mapped buffer (in byte). */
+  size_t command_offset_;
+  /** Current number of command recorded inside the mapped buffer. */
+  uint32_t command_len_;
+  /** Is UINT_MAX if not drawing indexed geom. Also Avoid dereferencing batch. */
+  uint32_t base_index_;
+  /** Also Avoid dereferencing batch. */
+  uint32_t v_first_, v_count_;
+  /** Length of whole the buffer (in byte). */
+  uint32_t buffer_size_;
+
  public:
-  MTLDrawList(int length)
-  {
-  }
-  ~MTLDrawList()
-  {
-  }
-
-  void append(GPUBatch *batch, int i_first, int i_count) override
-  {
-  }
-  void submit() override
-  {
-  }
+  MTLDrawList(int length);
+  ~MTLDrawList();
+
+  void append(GPUBatch *batch, int i_first, int i_count) override;
+  void submit() override;
+
+ private:
+  void init();
 
   MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList");
 };
 
-}  // namespace gpu
-}  // namespace blender
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_drawlist.mm b/source/blender/gpu/metal/mtl_drawlist.mm
new file mode 100644
index 00000000000..99194d2b72c
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.mm
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#include "BLI_assert.h"
+
+#include "GPU_batch.h"
+#include "mtl_common.hh"
+#include "mtl_drawlist.hh"
+#include "mtl_primitive.hh"
+
+using namespace blender::gpu;
+
+namespace blender::gpu {
+
+/* Indirect draw call structure for reference. */
+/* MTLDrawPrimitivesIndirectArguments --
+ * https://developer.apple.com/documentation/metal/mtldrawprimitivesindirectarguments?language=objc
+ */
+/* struct MTLDrawPrimitivesIndirectArguments {
+ * uint32_t vertexCount;
+ * uint32_t instanceCount;
+ * uint32_t vertexStart;
+ * uint32_t baseInstance;
+};*/
+
+/* MTLDrawIndexedPrimitivesIndirectArguments --
+ * https://developer.apple.com/documentation/metal/mtldrawindexedprimitivesindirectarguments?language=objc
+ */
+/* struct MTLDrawIndexedPrimitivesIndirectArguments {
+ * uint32_t indexCount;
+ * uint32_t instanceCount;
+ * uint32_t indexStart;
+ * uint32_t baseVertex;
+ * uint32_t baseInstance;
+};*/
+
+#define MDI_ENABLED (buffer_size_ != 0)
+#define MDI_DISABLED (buffer_size_ == 0)
+#define MDI_INDEXED (base_index_ != UINT_MAX)
+
+MTLDrawList::MTLDrawList(int length)
+{
+  BLI_assert(length > 0);
+  batch_ = nullptr;
+  command_len_ = 0;
+  base_index_ = 0;
+  command_offset_ = 0;
+  data_size_ = 0;
+  buffer_size_ = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * length;
+  data_ = (void *)MEM_mallocN(buffer_size_, __func__);
+}
+
+MTLDrawList::~MTLDrawList()
+{
+  if (data_) {
+    MEM_freeN(data_);
+    data_ = nullptr;
+  }
+}
+
+void MTLDrawList::init()
+{
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_ == nullptr);
+  UNUSED_VARS_NDEBUG(ctx);
+
+  batch_ = nullptr;
+  command_len_ = 0;
+  BLI_assert(data_);
+
+  command_offset_ = 0;
+}
+
+void MTLDrawList::append(GPUBatch *gpu_batch, int i_first, int i_count)
+{
+  /* Fallback when MultiDrawIndirect is not supported/enabled. */
+  MTLShader *shader = static_cast<MTLShader *>(unwrap(gpu_batch->shader));
+  bool requires_ssbo = (shader->get_uses_ssbo_vertex_fetch());
+  bool requires_emulation = mtl_needs_topology_emulation(gpu_batch->prim_type);
+  if (MDI_DISABLED || requires_ssbo || requires_emulation) {
+    GPU_batch_draw_advanced(gpu_batch, 0, 0, i_first, i_count);
+    return;
+  }
+
+  if (data_ == nullptr) {
+    this->init();
+  }
+  BLI_assert(data_);
+
+  MTLBatch *mtl_batch = static_cast<MTLBatch *>(gpu_batch);
+  BLI_assert(mtl_batch);
+  if (mtl_batch != batch_) {
+    /* Submit existing calls. */
+    this->submit();
+
+    /* Begin new batch. */
+    batch_ = mtl_batch;
+
+    /* Cached for faster access. */
+    MTLIndexBuf *el = batch_->elem_();
+    base_index_ = el ? el->index_base_ : UINT_MAX;
+    v_first_ = el ? el->index_start_ : 0;
+    v_count_ = el ? el->index_len_ : batch_->verts_(0)->vertex_len;
+  }
+
+  if (v_count_ == 0) {
+    /* Nothing to draw. */
+    return;
+  }
+
+  if (MDI_INDEXED) {
+    MTLDrawIndexedPrimitivesIndirectArguments *cmd =
+        reinterpret_cast<MTLDrawIndexedPrimitivesIndirectArguments *>((char *)data_ +
+                                                                      command_offset_);
+    cmd->indexStart = v_first_;
+    cmd->indexCount = v_count_;
+    cmd->instanceCount = i_count;
+    cmd->baseVertex = base_index_;
+    cmd->baseInstance = i_first;
+  }
+  else {
+    MTLDrawPrimitivesIndirectArguments *cmd =
+        reinterpret_cast<MTLDrawPrimitivesIndirectArguments *>((char *)data_ + command_offset_);
+    cmd->vertexStart = v_first_;
+    cmd->vertexCount = v_count_;
+    cmd->instanceCount = i_count;
+    cmd->baseInstance = i_first;
+  }
+
+  size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) :
+                                      sizeof(MTLDrawPrimitivesIndirectArguments);
+
+  command_offset_ += command_size;
+  command_len_++;
+
+  /* Check if we can fit at least one other command. */
+  if (command_offset_ + command_size > buffer_size_) {
+    this->submit();
+  }
+
+  return;
+}
+
+void MTLDrawList::submit()
+{
+  /* Metal does not support MDI from the host side, but we still benefit from only executing the
+   * batch bind a single time, rather than per-draw.
+   * NOTE(Metal): Consider using #MTLIndirectCommandBuffer to achieve similar behavior. */
+  if (command_len_ == 0) {
+    return;
+  }
+
+  /* Something's wrong if we get here without MDI support. */
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_);
+
+  /* Host-side MDI Currently unsupported on Metal. */
+  bool can_use_MDI = false;
+
+  /* Verify context. */
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+
+  /* Execute indirect draw calls. */
+  MTLShader *shader = static_cast<MTLShader *>(unwrap(batch_->shader));
+  bool SSBO_MODE = (shader->get_uses_ssbo_vertex_fetch());
+  if (SSBO_MODE) {
+    can_use_MDI = false;
+    BLI_assert(false);
+    return;
+  }
+
+  /* Heuristic to determine whether using indirect drawing is more efficient. */
+  size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) :
+                                      sizeof(MTLDrawPrimitivesIndirectArguments);
+  const bool is_finishing_a_buffer = (command_offset_ + command_size > buffer_size_);
+  can_use_MDI = can_use_MDI && (is_finishing_a_buffer || command_len_ > 2);
+
+  /* Bind Batch to setup render pipeline state. */
+  id<MTLRenderCommandEncoder> rec = batch_->bind(0, 0, 0, 0);
+  if (!rec) {
+    BLI_assert_msg(false, "A RenderCommandEncoder should always be available!\n");
+    return;
+  }
+
+  /* Common properties. */
+  MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(batch_->prim_type);
+
+  /* Execute multi-draw indirect. */
+  if (can_use_MDI && false) {
+    /* Metal Doesn't support MDI -- Singular Indirect draw calls are supported,
+     * but Multi-draw is not.
+     * TODO(Metal): Consider using #IndirectCommandBuffers to provide similar
+     * behavior. */
+  }
+  else {
+
+    /* Execute draws manually. */
+    if (MDI_INDEXED) {
+      MTLDrawIndexedPrimitivesIndirectArguments *cmd =
+          (MTLDrawIndexedPrimitivesIndirectArguments *)data_;
+      MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(
+          reinterpret_cast<IndexBuf *>(batch_->elem));
+      BLI_assert(mtl_elem);
+      MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
+      uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4;
+      uint32_t v_first_ofs = (mtl_elem->index_start_ * index_size);
+      uint32_t index_count = cmd->indexCount;
+
+      /* Fetch index buffer. May return an index buffer of a differing format,
+       * if index buffer optimization is used. In these cases, mtl_prim_type and
+       * index_count get updated with the new properties. */
+      GPUPrimType final_prim_type = batch_->prim_type;
+      id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
+      BLI_assert(index_buffer != nil);
+
+      /* Final primitive type. */
+      mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
+
+      if (index_buffer != nil) {
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+        for (int i = 0; i < command_len_; i++, cmd++) {
+          [rec drawIndexedPrimitives:mtl_prim_type
+                          indexCount:index_count
+                           indexType:index_type
+                         indexBuffer:index_buffer
+                   indexBufferOffset:v_first_ofs
+                       instanceCount:cmd->instanceCount
+                          baseVertex:cmd->baseVertex
+                        baseInstance:cmd->baseInstance];
+          ctx->main_command_buffer.register_draw_counters(cmd->indexCount * cmd->instanceCount);
+        }
+      }
+      else {
+        BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
+      }
+    }
+    else {
+      MTLDrawPrimitivesIndirectArguments *cmd = (MTLDrawPrimitivesIndirectArguments *)data_;
+
+      /* Verify if topology emulation is required. */
+      if (mtl_needs_topology_emulation(batch_->prim_type)) {
+        BLI_assert_msg(false, "topology emulation cases should use fallback.");
+      }
+      else {
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+        for (int i = 0; i < command_len_; i++, cmd++) {
+          [rec drawPrimitives:mtl_prim_type
+                  vertexStart:cmd->vertexStart
+                  vertexCount:cmd->vertexCount
+                instanceCount:cmd->instanceCount
+                 baseInstance:cmd->baseInstance];
+          ctx->main_command_buffer.register_draw_counters(cmd->vertexCount * cmd->instanceCount);
+        }
+      }
+    }
+  }
+
+  /* Unbind batch. */
+  batch_->unbind();
+
+  /* Reset command offsets. */
+  command_len_ = 0;
+  command_offset_ = 0;
+
+  /* Avoid keeping reference to the batch. */
+  batch_ = nullptr;
+}
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm
index 4b63a3b1ce2..ee48bdd6ee1 100644
--- a/source/blender/gpu/metal/mtl_immediate.mm
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -99,6 +99,9 @@ void MTLImmediate::end()
     MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
     const MTLShaderInterface *interface = active_mtl_shader->get_interface();
 
+    /* Reset vertex descriptor to default state. */
+    desc.reset_vertex_descriptor();
+
     desc.vertex_descriptor.num_attributes = interface->get_total_attributes();
     desc.vertex_descriptor.num_vert_buffers = 1;
 
diff --git a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
index 198d309874b..04ceb5bdf03 100644
--- a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
+++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
@@ -243,6 +243,19 @@ struct MTLRenderPipelineStateDescriptor {
 
     return hash;
   }
+
+  /* Reset the Vertex Descriptor to default. */
+  void reset_vertex_descriptor()
+  {
+    vertex_descriptor.num_attributes = 0;
+    vertex_descriptor.num_vert_buffers = 0;
+    for (int i = 0; i < GPU_VERT_ATTR_MAX_LEN; i++) {
+      vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+      vertex_descriptor.attributes[i].offset = 0;
+    }
+    vertex_descriptor.uses_ssbo_vertex_fetch = false;
+    vertex_descriptor.num_ssbo_attributes = 0;
+  }
 };
 
 }  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh
index 43890ca0170..63e2e6d5924 100644
--- a/source/blender/gpu/metal/mtl_shader_generator.hh
+++ b/source/blender/gpu/metal/mtl_shader_generator.hh
@@ -497,7 +497,7 @@ inline std::string get_stage_class_name(ShaderStage stage)
 inline bool is_builtin_type(std::string type)
 {
   /* Add Types as needed. */
-  /* TODO(Metal): Consider replacing this with a switch and constexpr hash and switch.
+  /* TODO(Metal): Consider replacing this with a switch and `constexpr` hash and switch.
    * Though most efficient and maintainable approach to be determined. */
   static std::map<std::string, eMTLDataType> glsl_builtin_types = {
       {"float", MTL_DATATYPE_FLOAT},
diff --git a/source/blender/gpu/metal/mtl_shader_interface.mm b/source/blender/gpu/metal/mtl_shader_interface.mm
index 3703d5b5684..97a82345761 100644
--- a/source/blender/gpu/metal/mtl_shader_interface.mm
+++ b/source/blender/gpu/metal/mtl_shader_interface.mm
@@ -117,9 +117,7 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
 
   MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_];
   uni_block.name_offset = name_offset;
-  /* We offset the buffer binding index by one, as the first slot is reserved for push constant
-   * data. */
-  uni_block.buffer_index = buffer_index + 1;
+  uni_block.buffer_index = buffer_index;
   uni_block.size = size;
   uni_block.current_offset = 0;
   uni_block.stage_mask = ShaderStage::BOTH;
@@ -297,8 +295,10 @@ void MTLShaderInterface::prepare_common_shader_inputs()
     current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset));
     /* Location refers to the index in the ubos_ array. */
     current_input->location = ubo_index;
-    /* Final binding location refers to the buffer binding index within the shader (Relative to
-     * MTL_uniform_buffer_base_index). */
+    /* Binding location refers to the UBO bind slot in
+     * #MTLContextGlobalShaderPipelineState::ubo_bindings. The buffer bind index [[buffer(N)]]
+     * within the shader will apply an offset for bound vertex buffers and the default uniform
+     * PushConstantBlock. */
     current_input->binding = shd_ubo.buffer_index;
     current_input++;
   }
diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh
index ebc9eb2e00e..28b55306707 100644
--- a/source/blender/gpu/metal/mtl_texture.hh
+++ b/source/blender/gpu/metal/mtl_texture.hh
@@ -51,9 +51,9 @@ struct TextureUpdateRoutineSpecialisation {
   uint64_t hash() const
   {
     blender::DefaultHash<std::string> string_hasher;
-    return uint64_t(string_hasher(
+    return (uint64_t)string_hasher(
         this->input_data_type + this->output_data_type +
-        std::to_string((this->component_count_input << 8) + this->component_count_output)));
+        std::to_string((this->component_count_input << 8) + this->component_count_output));
   }
 };
 
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index 32029db6fd9..29dcc8d32ee 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -337,20 +337,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
 
   GPU_batch_draw(quad);
 
-  /* TMP draw with IMM TODO(Metal): Remove this once GPUBatch is supported. */
-  GPUVertFormat *imm_format = immVertexFormat();
-  uint pos = GPU_vertformat_attr_add(imm_format, "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
-
-  immBindShader(shader);
-  immBegin(GPU_PRIM_TRI_STRIP, 4);
-  immVertex2f(pos, 1, 0);
-  immVertex2f(pos, 0, 0);
-  immVertex2f(pos, 1, 1);
-  immVertex2f(pos, 0, 1);
-  immEnd();
-  immUnbindProgram();
-  /**********************/
-
   /* restoring old pipeline state. */
   GPU_depth_mask(depth_write_prev);
   GPU_stencil_write_mask_set(stencil_mask_prev);
@@ -1472,10 +1458,82 @@ bool gpu::MTLTexture::init_internal()
 
 bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
 {
-  /* Not a valid vertex buffer format, though verifying texture is not set as such
-   * as this is not supported on Apple Silicon. */
-  BLI_assert_msg(this->format_ != GPU_DEPTH24_STENCIL8,
-                 "Apple silicon does not support GPU_DEPTH24_S8");
+  if (this->format_ == GPU_DEPTH24_STENCIL8) {
+    /* Apple Silicon requires GPU_DEPTH32F_STENCIL8 instead of GPU_DEPTH24_STENCIL8. */
+    this->format_ = GPU_DEPTH32F_STENCIL8;
+  }
+
+  MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
+  mtl_max_mips_ = 1;
+  mipmaps_ = 0;
+  this->mip_range_set(0, 0);
+
+  /* Create texture from GPUVertBuf's buffer. */
+  MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo));
+  mtl_vbo->bind();
+  mtl_vbo->flag_used();
+
+  /* Get Metal Buffer. */
+  id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
+  BLI_assert(source_buffer);
+
+  /* Verify size. */
+  if (w_ <= 0) {
+    MTL_LOG_WARNING("Allocating texture buffer of width 0!\n");
+    w_ = 1;
+  }
+
+  /* Verify Texture and vertex buffer alignment. */
+  int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
+  int bytes_per_row = bytes_per_pixel * w_;
+
+  MTLContext *mtl_ctx = MTLContext::get();
+  uint32_t align_requirement = static_cast<uint32_t>(
+      [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
+
+  /* Verify per-vertex size aligns with texture size. */
+  const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
+  BLI_assert(bytes_per_pixel == format->stride &&
+             "Pixel format stride MUST match the texture format stride -- These being different "
+             "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
+  UNUSED_VARS_NDEBUG(format);
+
+  /* Create texture descriptor. */
+  BLI_assert(type_ == GPU_TEXTURE_BUFFER);
+  texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
+  texture_descriptor_.pixelFormat = mtl_format;
+  texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
+  texture_descriptor_.width = w_;
+  texture_descriptor_.height = 1;
+  texture_descriptor_.depth = 1;
+  texture_descriptor_.arrayLength = 1;
+  texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
+  texture_descriptor_.usage =
+      MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
+      MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
+  texture_descriptor_.storageMode = [source_buffer storageMode];
+  texture_descriptor_.sampleCount = 1;
+  texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
+  texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
+
+  texture_ = [source_buffer
+      newTextureWithDescriptor:texture_descriptor_
+                        offset:0
+                   bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)];
+  aligned_w_ = bytes_per_row / bytes_per_pixel;
+
+  BLI_assert(texture_);
+  texture_.label = [NSString stringWithUTF8String:this->get_name()];
+  is_baked_ = true;
+  is_dirty_ = false;
+  resource_mode_ = MTL_TEXTURE_MODE_VBO;
+
+  /* Track Status. */
+  vert_buffer_ = mtl_vbo;
+  vert_buffer_mtl_ = source_buffer;
+  /* Cleanup. */
+  [texture_descriptor_ release];
+  texture_descriptor_ = nullptr;
 
   return true;
 }
@@ -1522,7 +1580,6 @@ bool gpu::MTLTexture::texture_is_baked()
 /* Prepare texture parameters after initialization, but before baking. */
 void gpu::MTLTexture::prepare_internal()
 {
-
   /* Derive implicit usage flags for Depth/Stencil attachments. */
   if (format_flag_ & GPU_FORMAT_DEPTH || format_flag_ & GPU_FORMAT_STENCIL) {
     gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT;
@@ -1687,7 +1744,7 @@ void gpu::MTLTexture::ensure_baked()
     /* Determine Resource Mode. */
     resource_mode_ = MTL_TEXTURE_MODE_DEFAULT;
 
-    /* Create texture. */
+    /* Standard texture allocation. */
     texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_];
 
     [texture_descriptor_ release];
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index 5ed7659f260..33a62e2e3ef 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -34,7 +34,7 @@ MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
 {
 
   switch (tex_format) {
-    /* Formats texture & renderbuffer. */
+    /* Formats texture & render-buffer. */
     case GPU_RGBA8UI:
       return MTLPixelFormatRGBA8Uint;
     case GPU_RGBA8I: