25 files changed, 4011 insertions, 179 deletions
diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh
index 214a5d738a9..082fab24ba4 100644
--- a/source/blender/gpu/metal/mtl_backend.hh
+++ b/source/blender/gpu/metal/mtl_backend.hh
@@ -63,7 +63,7 @@ class MTLBackend : public GPUBackend {
 
   /* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
    * objects. */
-  Context *context_alloc(void *ghost_window) override;
+  Context *context_alloc(void *ghost_window, void *ghost_context) override;
   Batch *batch_alloc() override;
   DrawList *drawlist_alloc(int list_length) override;
   FrameBuffer *framebuffer_alloc(const char *name) override;
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index 3cd7794f6c9..240951c1ebd 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -8,12 +8,16 @@
 
 #include "gpu_backend.hh"
 #include "mtl_backend.hh"
+#include "mtl_batch.hh"
 #include "mtl_context.hh"
+#include "mtl_drawlist.hh"
 #include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
 #include "mtl_index_buffer.hh"
 #include "mtl_query.hh"
 #include "mtl_shader.hh"
 #include "mtl_uniform_buffer.hh"
+#include "mtl_vertex_buffer.hh"
 
 #include "gpu_capabilities_private.hh"
 #include "gpu_platform_private.hh"
@@ -36,21 +40,19 @@ void MTLBackend::samplers_update(){
     /* Placeholder -- Handled in MTLContext. */
 };
 
-Context *MTLBackend::context_alloc(void *ghost_window)
+Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
 {
-  return new MTLContext(ghost_window);
+  return new MTLContext(ghost_window, ghost_context);
 };
 
 Batch *MTLBackend::batch_alloc()
 {
-  /* TODO(Metal): Implement MTLBatch. */
-  return nullptr;
+  return new MTLBatch();
 };
 
 DrawList *MTLBackend::drawlist_alloc(int list_length)
 {
-  /* TODO(Metal): Implement MTLDrawList. */
-  return nullptr;
+  return new MTLDrawList(list_length);
 };
 
 FrameBuffer *MTLBackend::framebuffer_alloc(const char *name)
@@ -94,8 +96,7 @@ StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const cha
 
 VertBuf *MTLBackend::vertbuf_alloc()
 {
-  /* TODO(Metal): Implement MTLVertBuf. */
-  return nullptr;
+  return new MTLVertBuf();
 }
 
 void MTLBackend::render_begin()
@@ -417,6 +418,7 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
   GCaps.depth_blitting_workaround = false;
   GCaps.use_main_context_workaround = false;
   GCaps.broken_amd_driver = false;
+  GCaps.clear_viewport_workaround = true;
 
   /* Metal related workarounds. */
   /* Minimum per-vertex stride is 4 bytes in Metal.
diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh
new file mode 100644
index 00000000000..9e179e662b5
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_batch_private.hh"
+#include "mtl_index_buffer.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+#include "mtl_vertex_buffer.hh"
+
+namespace blender::gpu {
+
+class MTLContext;
+class MTLShaderInterface;
+
+#define GPU_VAO_STATIC_LEN 64
+
+struct VertexBufferID {
+  uint32_t id : 16;
+  uint32_t is_instance : 15;
+  uint32_t used : 1;
+};
+
+class MTLBatch : public Batch {
+
+  /* Vertex Bind-state Caching for a given shader interface used with the Batch. */
+  struct VertexDescriptorShaderInterfacePair {
+    MTLVertexDescriptor vertex_descriptor{};
+    const ShaderInterface *interface = nullptr;
+    uint16_t attr_mask{};
+    int num_buffers{};
+    VertexBufferID bufferIds[GPU_BATCH_VBO_MAX_LEN] = {};
+    /* Cache life index compares a cache entry with the active MTLBatch state.
+     * This is initially set to the cache life index of MTLBatch. If the batch has been modified,
+     * this index is incremented to cheaply invalidate existing cache entries.  */
+    uint32_t cache_life_index = 0;
+  };
+
+  class MTLVertexDescriptorCache {
+
+   private:
+    MTLBatch *batch_;
+
+    VertexDescriptorShaderInterfacePair cache_[GPU_VAO_STATIC_LEN] = {};
+    MTLContext *cache_context_ = nullptr;
+    uint32_t cache_life_index_ = 0;
+
+   public:
+    MTLVertexDescriptorCache(MTLBatch *batch) : batch_(batch){};
+    VertexDescriptorShaderInterfacePair *find(const ShaderInterface *interface);
+    bool insert(VertexDescriptorShaderInterfacePair &data);
+
+   private:
+    void vertex_descriptor_cache_init(MTLContext *ctx);
+    void vertex_descriptor_cache_clear();
+    void vertex_descriptor_cache_ensure();
+  };
+
+ private:
+  MTLShader *active_shader_ = nullptr;
+  bool shader_in_use_ = false;
+  MTLVertexDescriptorCache vao_cache = {this};
+
+  /* Topology emulation. */
+  gpu::MTLBuffer *emulated_topology_buffer_ = nullptr;
+  GPUPrimType emulated_topology_type_;
+  uint32_t topology_buffer_input_v_count_ = 0;
+  uint32_t topology_buffer_output_v_count_ = 0;
+
+ public:
+  MTLBatch(){};
+  ~MTLBatch(){};
+
+  void draw(int v_first, int v_count, int i_first, int i_count) override;
+  void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override
+  {
+    /* TODO(Metal): Support indirect draw commands. */
+  }
+  void multi_draw_indirect(GPUStorageBuf *indirect_buf,
+                           int count,
+                           intptr_t offset,
+                           intptr_t stride) override
+  {
+    /* TODO(Metal): Support indirect draw commands. */
+  }
+
+  /* Returns an initialized RenderComandEncoder for drawing if all is good.
+   * Otherwise, nil. */
+  id<MTLRenderCommandEncoder> bind(uint v_first, uint v_count, uint i_first, uint i_count);
+  void unbind();
+
+  /* Convenience getters. */
+  MTLIndexBuf *elem_() const
+  {
+    return static_cast<MTLIndexBuf *>(unwrap(elem));
+  }
+  MTLVertBuf *verts_(const int index) const
+  {
+    return static_cast<MTLVertBuf *>(unwrap(verts[index]));
+  }
+  MTLVertBuf *inst_(const int index) const
+  {
+    return static_cast<MTLVertBuf *>(unwrap(inst[index]));
+  }
+  MTLShader *active_shader_get() const
+  {
+    return active_shader_;
+  }
+
+ private:
+  void shader_bind();
+  void draw_advanced(int v_first, int v_count, int i_first, int i_count);
+  int prepare_vertex_binding(MTLVertBuf *verts,
+                             MTLRenderPipelineStateDescriptor &desc,
+                             const MTLShaderInterface *interface,
+                             uint16_t &attr_mask,
+                             bool instanced);
+
+  id<MTLBuffer> get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type, uint32_t &v_count);
+
+  void prepare_vertex_descriptor_and_bindings(
+      MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count);
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
+};
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_batch.mm b/source/blender/gpu/metal/mtl_batch.mm
new file mode 100644
index 00000000000..988fb9b793b
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_batch.mm
@@ -0,0 +1,998 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Metal implementation of GPUBatch.
+ */
+
+#include "BLI_assert.h"
+#include "BLI_span.hh"
+
+#include "BKE_global.h"
+
+#include "GPU_common.h"
+#include "gpu_batch_private.hh"
+#include "gpu_shader_private.hh"
+
+#include "mtl_batch.hh"
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_index_buffer.hh"
+#include "mtl_shader.hh"
+#include "mtl_vertex_buffer.hh"
+
+#include <string>
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count)
+{
+  if (this->flag & GPU_BATCH_INVALID) {
+    this->shader_in_use_ = false;
+  }
+  this->draw_advanced(v_first, v_count, i_first, i_count);
+}
+
+void MTLBatch::shader_bind()
+{
+  if (active_shader_ && active_shader_->is_valid()) {
+    active_shader_->bind();
+    shader_in_use_ = true;
+  }
+}
+
+void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_init(MTLContext *ctx)
+{
+  BLI_assert(ctx != nullptr);
+  this->vertex_descriptor_cache_clear();
+  cache_context_ = ctx;
+}
+
+void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_clear()
+{
+  cache_life_index_++;
+  cache_context_ = nullptr;
+}
+
+void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_ensure()
+{
+  if (this->cache_context_ != nullptr) {
+
+    /* Invalidate vertex descriptor bindings cache if batch has changed. */
+    if (batch_->flag & GPU_BATCH_DIRTY) {
+      batch_->flag &= ~GPU_BATCH_DIRTY;
+      this->vertex_descriptor_cache_clear();
+    }
+  }
+
+  /* Initialize cache if not ready. */
+  if (cache_context_ == nullptr) {
+    this->vertex_descriptor_cache_init(MTLContext::get());
+  }
+}
+
+MTLBatch::VertexDescriptorShaderInterfacePair *MTLBatch::MTLVertexDescriptorCache::find(
+    const ShaderInterface *interface)
+{
+  this->vertex_descriptor_cache_ensure();
+  for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
+    if (cache_[i].interface == interface && cache_[i].cache_life_index == cache_life_index_) {
+      return &cache_[i];
+    }
+  }
+  return nullptr;
+}
+
+bool MTLBatch::MTLVertexDescriptorCache::insert(
+    MTLBatch::VertexDescriptorShaderInterfacePair &data)
+{
+  vertex_descriptor_cache_ensure();
+  for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
+    if (cache_[i].interface == nullptr || cache_[i].cache_life_index != cache_life_index_) {
+      cache_[i] = data;
+      cache_[i].cache_life_index = cache_life_index_;
+      return true;
+    }
+  }
+  return false;
+}
+
+int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts,
+                                     MTLRenderPipelineStateDescriptor &desc,
+                                     const MTLShaderInterface *interface,
+                                     uint16_t &attr_mask,
+                                     bool instanced)
+{
+
+  const GPUVertFormat *format = &verts->format;
+  /* Whether the current vertex buffer has been added to the buffer layout descriptor. */
+  bool buffer_added = false;
+  /* Per-vertex stride of current vertex buffer. */
+  int buffer_stride = format->stride;
+  /* Buffer binding index of the vertex buffer once added to the buffer layout descriptor. */
+  int buffer_index = -1;
+  int attribute_offset = 0;
+
+  if (!active_shader_->get_uses_ssbo_vertex_fetch()) {
+    BLI_assert(
+        buffer_stride >= 4 &&
+        "In Metal, Vertex buffer stride should be 4. SSBO Vertex fetch is not affected by this");
+  }
+
+  /* Iterate over GPUVertBuf vertex format and find attributes matching those in the active
+   * shader's interface. */
+  for (uint32_t a_idx = 0; a_idx < format->attr_len; a_idx++) {
+    const GPUVertAttr *a = &format->attrs[a_idx];
+
+    if (format->deinterleaved) {
+      attribute_offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].size) * verts->vertex_len;
+      buffer_stride = a->size;
+    }
+    else {
+      attribute_offset = a->offset;
+    }
+
+    /* Find attribute with the matching name. Attributes may have multiple compatible
+     * name aliases. */
+    for (uint32_t n_idx = 0; n_idx < a->name_len; n_idx++) {
+      const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
+      const ShaderInput *input = interface->attr_get(name);
+
+      if (input == nullptr || input->location == -1) {
+        /* Vertex/instance buffers provided have attribute data for attributes which are not needed
+         * by this particular shader. This shader only needs binding information for the attributes
+         * has in the shader interface. */
+        MTL_LOG_WARNING(
+            "MTLBatch: Could not find attribute with name '%s' (defined in active vertex format) "
+            "in the shader interface for shader '%s'\n",
+            name,
+            interface->get_name());
+        continue;
+      }
+
+      /* Fetch metal attribute information. */
+      const MTLShaderInputAttribute &mtl_attr = interface->get_attribute(input->location);
+      BLI_assert(mtl_attr.location >= 0);
+      /* Verify that the attribute location from the shader interface
+       * matches the attribute location returned. */
+      BLI_assert(mtl_attr.location == input->location);
+
+      /* Check if attribute is already present in the given slot. */
+      if ((~attr_mask) & (1 << mtl_attr.location)) {
+        MTL_LOG_INFO(
+            "  -- [Batch] Skipping attribute with input location %d (As one is already bound)\n",
+            mtl_attr.location);
+      }
+      else {
+
+        /* Update attribute used-slot mask. */
+        attr_mask &= ~(1 << mtl_attr.location);
+
+        /* Add buffer layout entry in descriptor if it has not yet been added
+         * for current vertex buffer. */
+        if (!buffer_added) {
+          buffer_index = desc.vertex_descriptor.num_vert_buffers;
+          desc.vertex_descriptor.buffer_layouts[buffer_index].step_function =
+              (instanced) ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
+          desc.vertex_descriptor.buffer_layouts[buffer_index].step_rate = 1;
+          desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
+          desc.vertex_descriptor.num_vert_buffers++;
+          buffer_added = true;
+
+          MTL_LOG_INFO("  -- [Batch] Adding source %s buffer (Index: %d, Stride: %d)\n",
+                       (instanced) ? "instance" : "vertex",
+                       buffer_index,
+                       buffer_stride);
+        }
+        else {
+          /* Ensure stride is correct for de-interleaved attributes. */
+          desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
+        }
+
+        /* Handle Matrix/Array vertex attribute types.
+         * Metal does not natively support these as attribute types, so we handle these cases
+         * by stacking together compatible types (e.g. 4xVec4 for Mat4) and combining
+         * the data in the shader.
+         * The generated Metal shader will contain a generated input binding, which reads
+         * in individual attributes and merges them into the desired type after vertex
+         * assembly. e.g. a Mat4 (Float4x4) will generate 4 Float4 attributes. */
+        if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
+          BLI_assert_msg(
+              a->comp_len == 16,
+              "only mat4 attributes currently supported -- Not ready to handle other long "
+              "component length attributes yet");
+
+          /* SSBO Vertex Fetch Attribute safety checks. */
+          if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+            /* When using SSBO vertex fetch, we do not need to expose split attributes,
+             * A matrix can be read directly as a whole block of contiguous data. */
+            MTLSSBOAttribute ssbo_attr(mtl_attr.index,
+                                       buffer_index,
+                                       attribute_offset,
+                                       buffer_stride,
+                                       GPU_SHADER_ATTR_TYPE_MAT4,
+                                       instanced);
+            active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+            desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+                ssbo_attr;
+            desc.vertex_descriptor.num_ssbo_attributes++;
+          }
+          else {
+
+            /* Handle Mat4 attributes. */
+            if (a->comp_len == 16) {
+              /* Debug safety checks. */
+              BLI_assert_msg(mtl_attr.matrix_element_count == 4,
+                             "mat4 type expected but there are fewer components");
+              BLI_assert_msg(mtl_attr.size == 16, "Expecting subtype 'vec4' with 16 bytes");
+              BLI_assert_msg(
+                  mtl_attr.format == MTLVertexFormatFloat4,
+                  "Per-attribute vertex format MUST be float4 for an input type of 'mat4'");
+
+              /* We have found the 'ROOT' attribute. A mat4 contains 4 consecutive float4 attribute
+               * locations we must map to. */
+              for (int i = 0; i < a->comp_len / 4; i++) {
+                desc.vertex_descriptor.attributes[mtl_attr.location + i].format =
+                    MTLVertexFormatFloat4;
+                /* Data is consecutive in the buffer for the whole matrix, each float4 will shift
+                 * the offset by 16 bytes. */
+                desc.vertex_descriptor.attributes[mtl_attr.location + i].offset =
+                    attribute_offset + i * 16;
+                /* All source data for a matrix is in the same singular buffer. */
+                desc.vertex_descriptor.attributes[mtl_attr.location + i].buffer_index =
+                    buffer_index;
+
+                /* Update total attribute account. */
+                desc.vertex_descriptor.num_attributes = max_ii(
+                    mtl_attr.location + i + 1, desc.vertex_descriptor.num_attributes);
+                MTL_LOG_INFO("-- Sub-Attrib Location: %d, offset: %d, buffer index: %d\n",
+                             mtl_attr.location + i,
+                             attribute_offset + i * 16,
+                             buffer_index);
+              }
+              MTL_LOG_INFO(
+                  "Float4x4 attribute type added for '%s' at attribute locations: %d to %d\n",
+                  name,
+                  mtl_attr.location,
+                  mtl_attr.location + 3);
+            }
+
+            /* Ensure we are not exceeding the attribute limit. */
+            BLI_assert(desc.vertex_descriptor.num_attributes <= MTL_MAX_VERTEX_INPUT_ATTRIBUTES);
+          }
+        }
+        else {
+
+          /* Handle Any required format conversions.
+           * NOTE(Metal): If there is a mis-match between the format of an attribute
+           * in the shader interface, and the specified format in the VertexBuffer VertexFormat,
+           * we need to perform a format conversion.
+           *
+           * The Metal API can perform certain conversions internally during vertex assembly:
+           *   - Type Normalization e.g short2 to float2 between 0.0 to 1.0.
+           *   - Type Truncation e.g. Float4 to Float2.
+           *   - Type expansion e,g, Float3 to Float4 (Following 0,0,0,1 for assignment to empty
+           * elements).
+           *
+           * Certain conversion cannot be performed however, and in these cases, we need to
+           * instruct the shader to generate a specialized version with a conversion routine upon
+           * attribute read.
+           *   - This handles cases such as conversion between types e.g. Integer to float without
+           * normalization.
+           *
+           * For more information on the supported and unsupported conversions, see:
+           * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
+           */
+          MTLVertexFormat converted_format;
+          bool can_use_internal_conversion = mtl_convert_vertex_format(
+              mtl_attr.format,
+              (GPUVertCompType)a->comp_type,
+              a->comp_len,
+              (GPUVertFetchMode)a->fetch_mode,
+              &converted_format);
+          bool is_floating_point_format = (a->comp_type == GPU_COMP_F32);
+
+          if (can_use_internal_conversion) {
+            desc.vertex_descriptor.attributes[mtl_attr.location].format = converted_format;
+            desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
+                is_floating_point_format ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+                                           (GPUVertFetchMode)GPU_FETCH_INT;
+            BLI_assert(converted_format != MTLVertexFormatInvalid);
+          }
+          else {
+            /* The internal implicit conversion is not supported.
+             * In this case, we need to handle conversion inside the shader.
+             * This is handled using `format_conversion_mode`.
+             * `format_conversion_mode` is assigned the blender-specified fetch mode (GPU_FETCH_*).
+             * This then controls how a given attribute is interpreted. The data will be read
+             * as specified and then converted appropriately to the correct form.
+             *
+             * e.g. if `GPU_FETCH_INT_TO_FLOAT` is specified, the specialized read-routine
+             * in the shader will read the data as an int, and cast this to floating point
+             * representation. (Rather than reading the source data as float).
+             *
+             * NOTE: Even if full conversion is not supported, we may still partially perform an
+             * implicit conversion where possible, such as vector truncation or expansion. */
+            MTLVertexFormat converted_format;
+            bool can_convert = mtl_vertex_format_resize(
+                mtl_attr.format, a->comp_len, &converted_format);
+            desc.vertex_descriptor.attributes[mtl_attr.location].format = can_convert ?
+                                                                              converted_format :
+                                                                              mtl_attr.format;
+            desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
+                (GPUVertFetchMode)a->fetch_mode;
+            BLI_assert(desc.vertex_descriptor.attributes[mtl_attr.location].format !=
+                       MTLVertexFormatInvalid);
+          }
+          desc.vertex_descriptor.attributes[mtl_attr.location].offset = attribute_offset;
+          desc.vertex_descriptor.attributes[mtl_attr.location].buffer_index = buffer_index;
+          desc.vertex_descriptor.num_attributes = ((mtl_attr.location + 1) >
+                                                   desc.vertex_descriptor.num_attributes) ?
+                                                      (mtl_attr.location + 1) :
+                                                      desc.vertex_descriptor.num_attributes;
+
+          /* SSBO Vertex Fetch attribute bind. */
+          if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+            BLI_assert_msg(desc.vertex_descriptor.attributes[mtl_attr.location].format ==
+                               mtl_attr.format,
+                           "SSBO Vertex Fetch does not support attribute conversion.");
+
+            MTLSSBOAttribute ssbo_attr(
+                mtl_attr.index,
+                buffer_index,
+                attribute_offset,
+                buffer_stride,
+                MTLShader::ssbo_vertex_type_to_attr_type(
+                    desc.vertex_descriptor.attributes[mtl_attr.location].format),
+                instanced);
+
+            active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+            desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+                ssbo_attr;
+            desc.vertex_descriptor.num_ssbo_attributes++;
+          }
+
+          /* NOTE: We are setting num_attributes to be up to the maximum found index, because of
+           * this, it is possible that we may skip over certain attributes if they were not in the
+           * source GPUVertFormat. */
+          MTL_LOG_INFO(
+              " -- Batch Attribute(%d): ORIG Shader Format: %d, ORIG Vert format: %d, Vert "
+              "components: %d, Fetch Mode %d --> FINAL FORMAT: %d\n",
+              mtl_attr.location,
+              (int)mtl_attr.format,
+              (int)a->comp_type,
+              (int)a->comp_len,
+              (int)a->fetch_mode,
+              (int)desc.vertex_descriptor.attributes[mtl_attr.location].format);
+
+          MTL_LOG_INFO(
+              "  -- [Batch] matching %s attribute '%s' (Attribute Index: %d, Buffer index: %d, "
+              "offset: %d)\n",
+              (instanced) ? "instance" : "vertex",
+              name,
+              mtl_attr.location,
+              buffer_index,
+              attribute_offset);
+        }
+      }
+    }
+  }
+  if (buffer_added) {
+    return buffer_index;
+  }
+  return -1;
+}
+
+id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_first, uint i_count)
+{
+  /* Setup draw call and render pipeline state here. Called by every draw, but setup here so that
+   * MTLDrawList only needs to perform setup a single time. */
+  BLI_assert(this);
+
+  /* Fetch Metal device. */
+  MTLContext *ctx = MTLContext::get();
+  if (!ctx) {
+    BLI_assert_msg(false, "No context available for rendering.");
+    return nil;
+  }
+
+  /* Verify Shader. */
+  active_shader_ = (shader) ? static_cast<MTLShader *>(unwrap(shader)) : nullptr;
+
+  if (active_shader_ == nullptr || !active_shader_->is_valid()) {
+    /* Skip drawing if there is no valid Metal shader.
+     * This will occur if the path through which the shader is prepared
+     * is invalid (e.g. Python without create-info), or, the source shader uses a geometry pass. */
+    BLI_assert_msg(false, "No valid Metal shader!");
+    return nil;
+  }
+
+  /* Check if using SSBO Fetch Mode.
+   * This is an alternative drawing mode to geometry shaders, wherein vertex buffers
+   * are bound as readable (random-access) GPU buffers and certain descriptor properties
+   * are passed using Shader uniforms. */
+  bool uses_ssbo_fetch = active_shader_->get_uses_ssbo_vertex_fetch();
+
+  /* Prepare Vertex Descriptor and extract VertexBuffers to bind. */
+  MTLVertBuf *buffers[GPU_BATCH_VBO_MAX_LEN] = {nullptr};
+  int num_buffers = 0;
+
+  /* Ensure Index Buffer is ready. */
+  MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
+  if (mtl_elem != NULL) {
+    mtl_elem->upload_data();
+  }
+
+  /* Populate vertex descriptor with attribute binding information.
+   * The vertex descriptor and buffer layout descriptors describe
+   * how vertex data from bound vertex buffers maps to the
+   * shader's input.
+   * A unique vertex descriptor will result in a new PipelineStateObject
+   * being generated for the currently bound shader. */
+  prepare_vertex_descriptor_and_bindings(buffers, num_buffers, v_first, v_count, i_first, i_count);
+
+  /* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer
+   * data operations are required. */
+  for (int i = 0; i < num_buffers; i++) {
+    MTLVertBuf *buf_at_index = buffers[i];
+    if (buf_at_index == NULL) {
+      BLI_assert_msg(
+          false,
+          "Total buffer count does not match highest buffer index, could be gaps in bindings");
+      continue;
+    }
+
+    MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
+    mtlvbo->bind();
+  }
+
+  /* Ensure render pass is active and fetch active RenderCommandEncoder. */
+  id<MTLRenderCommandEncoder> rec = ctx->ensure_begin_render_pass();
+
+  /* Fetch RenderPassState to enable resource binding for active pass. */
+  MTLRenderPassState &rps = ctx->main_command_buffer.get_render_pass_state();
+
+  /* Debug Check: Ensure Frame-buffer instance is not dirty. */
+  BLI_assert(!ctx->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+  /* Bind Shader. */
+  this->shader_bind();
+
+  /* GPU debug markers. */
+  if (G.debug & G_DEBUG_GPU) {
+    [rec pushDebugGroup:[NSString stringWithFormat:@"batch_bind%@(shader: %s)",
+                                                   this->elem ? @"(indexed)" : @"",
+                                                   active_shader_->get_interface()->get_name()]];
+    [rec insertDebugSignpost:[NSString
+                                 stringWithFormat:@"batch_bind%@(shader: %s)",
+                                                  this->elem ? @"(indexed)" : @"",
+                                                  active_shader_->get_interface()->get_name()]];
+  }
+
+  /* Ensure Context Render Pipeline State is fully setup and ready to execute the draw. */
+  MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+  if (!ctx->ensure_render_pipeline_state(mtl_prim_type)) {
+    printf("FAILED TO ENSURE RENDER PIPELINE STATE");
+    BLI_assert(false);
+
+    if (G.debug & G_DEBUG_GPU) {
+      [rec popDebugGroup];
+    }
+    return nil;
+  }
+
+  /*** Bind Vertex Buffers and Index Buffers **/
+
+  /* SSBO Vertex Fetch Buffer bindings. */
+  if (uses_ssbo_fetch) {
+
+    /* SSBO Vertex Fetch - Bind Index Buffer to appropriate slot -- if used. */
+    id<MTLBuffer> idx_buffer = nil;
+    GPUPrimType final_prim_type = this->prim_type;
+
+    if (mtl_elem != nullptr) {
+
+      /* Fetch index buffer. This function can situationally return an optimized
+       * index buffer of a different primitive type. If this is the case, `final_prim_type`
+       * and `v_count` will be updated with the new format.
+       * NOTE: For indexed rendering, v_count represents the number of indices. */
+      idx_buffer = mtl_elem->get_index_buffer(final_prim_type, v_count);
+      BLI_assert(idx_buffer != nil);
+
+      /* Update uniforms for SSBO-vertex-fetch-mode indexed rendering to flag usage. */
+      int &uniform_ssbo_index_mode_u16 = active_shader_->uni_ssbo_uses_index_mode_u16;
+      BLI_assert(uniform_ssbo_index_mode_u16 != -1);
+      int uses_index_mode_u16 = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 1 : 0;
+      active_shader_->uniform_int(uniform_ssbo_index_mode_u16, 1, 1, &uses_index_mode_u16);
+    }
+    else {
+      idx_buffer = ctx->get_null_buffer();
+    }
+    rps.bind_vertex_buffer(idx_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+
+    /* Ensure all attributes are set */
+    active_shader_->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+    /* Bind NULL Buffers for unused vertex data slots. */
+    id<MTLBuffer> null_buffer = ctx->get_null_buffer();
+    BLI_assert(null_buffer != nil);
+    for (int i = num_buffers; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+      if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+        rps.bind_vertex_buffer(null_buffer, 0, i);
+      }
+    }
+
+    /* Flag whether Indexed rendering is used or not. */
+    int &uniform_ssbo_use_indexed = active_shader_->uni_ssbo_uses_indexed_rendering;
+    BLI_assert(uniform_ssbo_use_indexed != -1);
+    int uses_indexed_rendering = (mtl_elem != NULL) ? 1 : 0;
+    active_shader_->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+
+    /* Set SSBO-fetch-mode status uniforms. */
+    BLI_assert(active_shader_->uni_ssbo_input_prim_type_loc != -1);
+    BLI_assert(active_shader_->uni_ssbo_input_vert_count_loc != -1);
+    GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_shader_)),
+                                  active_shader_->uni_ssbo_input_prim_type_loc,
+                                  1,
+                                  1,
+                                  (const int *)(&final_prim_type));
+    GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_shader_)),
+                                  active_shader_->uni_ssbo_input_vert_count_loc,
+                                  1,
+                                  1,
+                                  (const int *)(&v_count));
+  }
+
+  /* Bind Vertex Buffers. */
+  for (int i = 0; i < num_buffers; i++) {
+    MTLVertBuf *buf_at_index = buffers[i];
+    if (buf_at_index == NULL) {
+      BLI_assert_msg(
+          false,
+          "Total buffer count does not match highest buffer index, could be gaps in bindings");
+      continue;
+    }
+    /* Buffer handle. */
+    MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
+    mtlvbo->flag_used();
+
+    /* Fetch buffer from MTLVertexBuffer and bind. */
+    id<MTLBuffer> mtl_buffer = mtlvbo->get_metal_buffer();
+
+    BLI_assert(mtl_buffer != nil);
+    rps.bind_vertex_buffer(mtl_buffer, 0, i);
+  }
+
+  if (G.debug & G_DEBUG_GPU) {
+    [rec popDebugGroup];
+  }
+
+  /* Return Render Command Encoder used with setup. */
+  return rec;
+}
+
+void MTLBatch::unbind()
+{
+}
+
+void MTLBatch::prepare_vertex_descriptor_and_bindings(
+    MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count)
+{
+
+  /* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound.
+   */
+  MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+      MTLContext::get()->state_manager);
+  MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+  const MTLShaderInterface *interface = active_shader_->get_interface();
+  uint16_t attr_mask = interface->get_enabled_attribute_mask();
+
+  /* Reset vertex descriptor to default state. */
+  desc.reset_vertex_descriptor();
+
+  /* Fetch Vertex and Instance Buffers. */
+  Span<MTLVertBuf *> mtl_verts(reinterpret_cast<MTLVertBuf **>(this->verts),
+                               GPU_BATCH_VBO_MAX_LEN);
+  Span<MTLVertBuf *> mtl_inst(reinterpret_cast<MTLVertBuf **>(this->inst),
+                              GPU_BATCH_INST_VBO_MAX_LEN);
+
+  /* SSBO Vertex fetch also passes vertex descriptor information into the shader. */
+  if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+    active_shader_->ssbo_vertex_fetch_bind_attributes_begin();
+  }
+
+  /* Resolve Metal vertex buffer bindings. */
+  /* Vertex Descriptors
+   * ------------------
+   * Vertex Descriptors are required to generate a pipeline state, based on the current Batch's
+   * buffer bindings. These bindings are a unique matching, depending on what input attributes a
+   * batch has in its buffers, and those which are supported by the shader interface.
+
+   * We iterate through the buffers and resolve which attributes satisfy the requirements of the
+   * currently bound shader. We cache this data, for a given Batch<->ShderInterface pairing in a
+   * VAO cache to avoid the need to recalculate this data. */
+  bool buffer_is_instanced[GPU_BATCH_VBO_MAX_LEN] = {false};
+
+  VertexDescriptorShaderInterfacePair *descriptor = this->vao_cache.find(interface);
+  if (descriptor) {
+    desc.vertex_descriptor = descriptor->vertex_descriptor;
+    attr_mask = descriptor->attr_mask;
+    num_buffers = descriptor->num_buffers;
+
+    for (int bid = 0; bid < GPU_BATCH_VBO_MAX_LEN; ++bid) {
+      if (descriptor->bufferIds[bid].used) {
+        if (descriptor->bufferIds[bid].is_instance) {
+          buffers[bid] = mtl_inst[descriptor->bufferIds[bid].id];
+          buffer_is_instanced[bid] = true;
+        }
+        else {
+          buffers[bid] = mtl_verts[descriptor->bufferIds[bid].id];
+          buffer_is_instanced[bid] = false;
+        }
+      }
+    }
+
+    /* Use cached ssbo attribute binding data. */
+    if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+      BLI_assert(desc.vertex_descriptor.uses_ssbo_vertex_fetch);
+      for (int attr_id = 0; attr_id < desc.vertex_descriptor.num_ssbo_attributes; attr_id++) {
+        active_shader_->ssbo_vertex_fetch_bind_attribute(
+            desc.vertex_descriptor.ssbo_attributes[attr_id]);
+      }
+    }
+  }
+  else {
+    VertexDescriptorShaderInterfacePair pair{};
+    pair.interface = interface;
+
+    for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN; ++i) {
+      pair.bufferIds[i].id = -1;
+      pair.bufferIds[i].is_instance = 0;
+      pair.bufferIds[i].used = 0;
+    }
+    /* NOTE: Attribute extraction order from buffer is the reverse of the OpenGL as we flag once an
+     * attribute is found, rather than pre-setting the mask. */
+    /* Extract Instance attributes (These take highest priority). */
+    for (int v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
+      if (mtl_inst[v]) {
+        MTL_LOG_INFO(" -- [Batch] Checking bindings for bound instance buffer %p\n", mtl_inst[v]);
+        int buffer_ind = this->prepare_vertex_binding(
+            mtl_inst[v], desc, interface, attr_mask, true);
+        if (buffer_ind >= 0) {
+          buffers[buffer_ind] = mtl_inst[v];
+          buffer_is_instanced[buffer_ind] = true;
+
+          pair.bufferIds[buffer_ind].id = v;
+          pair.bufferIds[buffer_ind].used = 1;
+          pair.bufferIds[buffer_ind].is_instance = 1;
+          num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
+        }
+      }
+    }
+
+    /* Extract Vertex attributes (First-bound vertex buffer takes priority). */
+    for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
+      if (mtl_verts[v] != NULL) {
+        MTL_LOG_INFO(" -- [Batch] Checking bindings for bound vertex buffer %p\n", mtl_verts[v]);
+        int buffer_ind = this->prepare_vertex_binding(
+            mtl_verts[v], desc, interface, attr_mask, false);
+        if (buffer_ind >= 0) {
+          buffers[buffer_ind] = mtl_verts[v];
+          buffer_is_instanced[buffer_ind] = false;
+
+          pair.bufferIds[buffer_ind].id = v;
+          pair.bufferIds[buffer_ind].used = 1;
+          pair.bufferIds[buffer_ind].is_instance = 0;
+          num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
+        }
+      }
+    }
+
+    /* Add to VertexDescriptor cache */
+    desc.vertex_descriptor.uses_ssbo_vertex_fetch = active_shader_->get_uses_ssbo_vertex_fetch();
+    pair.attr_mask = attr_mask;
+    pair.vertex_descriptor = desc.vertex_descriptor;
+    pair.num_buffers = num_buffers;
+    if (!this->vao_cache.insert(pair)) {
+      printf(
+          "[Performance Warning] cache is full (Size: %d), vertex descriptor will not be cached\n",
+          GPU_VAO_STATIC_LEN);
+    }
+  }
+
+/* DEBUG: verify if our attribute bindings have been fully provided as expected. */
+#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
+  if (attr_mask != 0) {
+    for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
+      if (attr_mask & mask) {
+        /* Fallback for setting default attributes, for missed slots. Attributes flagged with
+         * 'MTLVertexFormatInvalid' in the vertex descriptor are bound to a NULL buffer during PSO
+         * creation. */
+        MTL_LOG_WARNING("MTLBatch: Missing expected attribute '%s' at index '%d' for shader: %s\n",
+                        this->active_shader->interface->attributes[a].name,
+                        a,
+                        interface->name);
+        /* Ensure any assigned attribute has not been given an invalid format. This should not
+         * occur and may be the result of an unsupported attribute type conversion. */
+        BLI_assert(desc.attributes[a].format == MTLVertexFormatInvalid);
+      }
+    }
+  }
+#endif
+}
+
+void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count)
+{
+
+#if TRUST_NO_ONE
+  BLI_assert(v_count > 0 && i_count > 0);
+#endif
+
+  /* Setup RenderPipelineState for batch. */
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+  id<MTLRenderCommandEncoder> rec = this->bind(v_first, v_count, i_first, i_count);
+  if (rec == nil) {
+    return;
+  }
+
+  /* Fetch IndexBuffer and resolve primitive type. */
+  MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
+  MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+
+  /* Render using SSBO Vertex Fetch. */
+  if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+
+    /* Submit draw call with modified vertex count, which reflects vertices per primitive defined
+     * in the USE_SSBO_VERTEX_FETCH pragma. */
+    int num_input_primitives = gpu_get_prim_count_from_type(v_count, this->prim_type);
+    int output_num_verts = num_input_primitives *
+                           active_shader_->get_ssbo_vertex_fetch_output_num_verts();
+    BLI_assert_msg(
+        mtl_vertex_count_fits_primitive_type(
+            output_num_verts, active_shader_->get_ssbo_vertex_fetch_output_prim_type()),
+        "Output Vertex count is not compatible with the requested output vertex primitive type");
+    [rec drawPrimitives:active_shader_->get_ssbo_vertex_fetch_output_prim_type()
+            vertexStart:0
+            vertexCount:output_num_verts
+          instanceCount:i_count
+           baseInstance:i_first];
+    ctx->main_command_buffer.register_draw_counters(output_num_verts * i_count);
+  }
+  /* Perform regular draw. */
+  else if (mtl_elem == NULL) {
+
+    /* Primitive Type toplogy emulation. */
+    if (mtl_needs_topology_emulation(this->prim_type)) {
+
+      /* Generate index buffer for primitive types requiring emulation. */
+      GPUPrimType emulated_prim_type = this->prim_type;
+      uint32_t emulated_v_count = v_count;
+      id<MTLBuffer> generated_index_buffer = this->get_emulated_toplogy_buffer(emulated_prim_type,
+                                                                               emulated_v_count);
+      BLI_assert(generated_index_buffer != nil);
+
+      MTLPrimitiveType emulated_mtl_prim_type = gpu_prim_type_to_metal(emulated_prim_type);
+
+      /* Temp: Disable culling for emulated primitive types.
+       * TODO(Metal): Support face winding in topology buffer. */
+      [rec setCullMode:MTLCullModeNone];
+
+      if (generated_index_buffer != nil) {
+        BLI_assert(emulated_mtl_prim_type == MTLPrimitiveTypeTriangle ||
+                   emulated_mtl_prim_type == MTLPrimitiveTypeLine);
+        if (emulated_mtl_prim_type == MTLPrimitiveTypeTriangle) {
+          BLI_assert(emulated_v_count % 3 == 0);
+        }
+        if (emulated_mtl_prim_type == MTLPrimitiveTypeLine) {
+          BLI_assert(emulated_v_count % 2 == 0);
+        }
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        ctx->ensure_depth_stencil_state(emulated_mtl_prim_type);
+
+        [rec drawIndexedPrimitives:emulated_mtl_prim_type
+                        indexCount:emulated_v_count
+                         indexType:MTLIndexTypeUInt32
+                       indexBuffer:generated_index_buffer
+                 indexBufferOffset:0
+                     instanceCount:i_count
+                        baseVertex:v_first
+                      baseInstance:i_first];
+      }
+      else {
+        printf("[Note] Cannot draw batch -- Emulated Topology mode: %u not yet supported\n",
+               this->prim_type);
+      }
+    }
+    else {
+      /* Set depth stencil state (requires knowledge of primitive type). */
+      ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+      /* Issue draw call. */
+      [rec drawPrimitives:mtl_prim_type
+              vertexStart:v_first
+              vertexCount:v_count
+            instanceCount:i_count
+             baseInstance:i_first];
+    }
+    ctx->main_command_buffer.register_draw_counters(v_count * i_count);
+  }
+  /* Perform indexed draw. */
+  else {
+
+    MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
+    uint32_t base_index = mtl_elem->index_base_;
+    uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4;
+    uint32_t v_first_ofs = ((v_first + mtl_elem->index_start_) * index_size);
+    BLI_assert_msg((v_first_ofs % index_size) == 0,
+                   "Index offset is not 2/4-byte aligned as per METAL spec");
+
+    /* Fetch index buffer. May return an index buffer of a differing format,
+     * if index buffer optimization is used. In these cases, final_prim_type and
+     * index_count get updated with the new properties. */
+    GPUPrimType final_prim_type = this->prim_type;
+    uint index_count = v_count;
+
+    id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
+    mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
+    BLI_assert(index_buffer != nil);
+
+    if (index_buffer != nil) {
+
+      /* Set depth stencil state (requires knowledge of primitive type). */
+      ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+      /* Issue draw call. */
+      [rec drawIndexedPrimitives:mtl_prim_type
+                      indexCount:index_count
+                       indexType:index_type
+                     indexBuffer:index_buffer
+               indexBufferOffset:v_first_ofs
+                   instanceCount:i_count
+                      baseVertex:base_index
+                    baseInstance:i_first];
+      ctx->main_command_buffer.register_draw_counters(index_count * i_count);
+    }
+    else {
+      BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
+    }
+  }
+
+  /* End of draw. */
+  this->unbind();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Topology emulation and optimization
+ * \{ */
+
+id<MTLBuffer> MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type,
+                                                    uint32_t &in_out_v_count)
+{
+
+  BLI_assert(in_out_v_count > 0);
+  /* Determine emulated primitive types. */
+  GPUPrimType input_prim_type = in_out_prim_type;
+  uint32_t v_count = in_out_v_count;
+  GPUPrimType output_prim_type;
+  switch (input_prim_type) {
+    case GPU_PRIM_POINTS:
+    case GPU_PRIM_LINES:
+    case GPU_PRIM_TRIS:
+      BLI_assert_msg(false, "Optimal primitive types should not reach here.");
+      return nil;
+      break;
+    case GPU_PRIM_LINES_ADJ:
+    case GPU_PRIM_TRIS_ADJ:
+      BLI_assert_msg(false, "Adjacency primitive types should not reach here.");
+      return nil;
+      break;
+    case GPU_PRIM_LINE_STRIP:
+    case GPU_PRIM_LINE_LOOP:
+    case GPU_PRIM_LINE_STRIP_ADJ:
+      output_prim_type = GPU_PRIM_LINES;
+      break;
+    case GPU_PRIM_TRI_STRIP:
+    case GPU_PRIM_TRI_FAN:
+      output_prim_type = GPU_PRIM_TRIS;
+      break;
+    default:
+      BLI_assert_msg(false, "Invalid primitive type.");
+      return nil;
+  }
+
+  /* Check if topology buffer exists and is valid. */
+  if (this->emulated_topology_buffer_ != nullptr &&
+      (emulated_topology_type_ != input_prim_type || topology_buffer_input_v_count_ != v_count)) {
+
+    /* Release existing topology buffer. */
+    emulated_topology_buffer_->free();
+    emulated_topology_buffer_ = nullptr;
+  }
+
+  /* Generate new topology index buffer. */
+  if (this->emulated_topology_buffer_ == nullptr) {
+    /* Calculate IB len. */
+    uint32_t output_prim_count = 0;
+    switch (input_prim_type) {
+      case GPU_PRIM_LINE_STRIP:
+      case GPU_PRIM_LINE_STRIP_ADJ:
+        output_prim_count = v_count - 1;
+        break;
+      case GPU_PRIM_LINE_LOOP:
+        output_prim_count = v_count;
+        break;
+      case GPU_PRIM_TRI_STRIP:
+      case GPU_PRIM_TRI_FAN:
+        output_prim_count = v_count - 2;
+        break;
+      default:
+        BLI_assert_msg(false, "Cannot generate optimized topology buffer for other types.");
+        break;
+    }
+    uint32_t output_IB_elems = output_prim_count * ((output_prim_type == GPU_PRIM_TRIS) ? 3 : 2);
+
+    /* Allocate buffer. */
+    uint32_t buffer_bytes = output_IB_elems * 4;
+    BLI_assert(buffer_bytes > 0);
+    this->emulated_topology_buffer_ = MTLContext::get_global_memory_manager().allocate(
+        buffer_bytes, true);
+
+    /* Populate. */
+    uint32_t *data = (uint32_t *)this->emulated_topology_buffer_->get_host_ptr();
+    BLI_assert(data != nullptr);
+
+    /* TODO(Metal): Support inverse winding modes. */
+    bool winding_clockwise = false;
+    UNUSED_VARS(winding_clockwise);
+
+    switch (input_prim_type) {
+      /* Line Loop. */
+      case GPU_PRIM_LINE_LOOP: {
+        int line = 0;
+        for (line = 0; line < output_prim_count - 1; line++) {
+          data[line * 3 + 0] = line + 0;
+          data[line * 3 + 1] = line + 1;
+        }
+        /* Closing line. */
+        data[line * 2 + 0] = line + 0;
+        data[line * 2 + 1] = 0;
+      } break;
+
+      /* Triangle Fan. */
+      case GPU_PRIM_TRI_FAN: {
+        for (int triangle = 0; triangle < output_prim_count; triangle++) {
+          data[triangle * 3 + 0] = 0; /* Always 0 */
+          data[triangle * 3 + 1] = triangle + 1;
+          data[triangle * 3 + 2] = triangle + 2;
+        }
+      } break;
+
+      default:
+        BLI_assert_msg(false, "Other primitive types do not require emulation.");
+        return nil;
+    }
+
+    /* Flush. */
+    this->emulated_topology_buffer_->flush();
+    /* Assign members relating to current cached IB. */
+    topology_buffer_input_v_count_ = v_count;
+    topology_buffer_output_v_count_ = output_IB_elems;
+    emulated_topology_type_ = input_prim_type;
+  }
+
+  /* Return. */
+  in_out_v_count = topology_buffer_output_v_count_;
+  in_out_prim_type = output_prim_type;
+  return (emulated_topology_buffer_) ? emulated_topology_buffer_->get_metal_buffer() : nil;
+}
+
+/** \} */
+
+}  // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm
index 0e13e8d4690..a9cabbb111f 100644
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -54,6 +54,7 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
       MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
       desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
       desc.retainedReferences = YES;
+      BLI_assert(context_.queue != nil);
       active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
     }
     else {
@@ -498,7 +499,7 @@ bool MTLCommandBufferManager::insert_memory_barrier(eGPUBarrier barrier_bits,
         /* Rendering. */
         case MTL_RENDER_COMMAND_ENCODER: {
           /* Currently flagging both stages -- can use bits above to filter on stage type --
-           * though full barrier is safe for now*/
+           * though full barrier is safe for now. */
           MTLRenderStages before_stage_flags = 0;
           MTLRenderStages after_stage_flags = 0;
           if (before_stages & GPU_BARRIER_STAGE_VERTEX &&
@@ -611,40 +612,187 @@ void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
                                              bool use_argument_buffer_for_samplers,
                                              uint slot)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be
-   * implemented alongside MTLShader. */
+  /* Range check. */
+  const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+  BLI_assert(slot >= 0);
+  BLI_assert(slot <= shader_interface->get_max_texture_index());
+  BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+  UNUSED_VARS_NDEBUG(shader_interface);
+
+  /* If sampler state has not changed for the given slot, we do not need to fetch. */
+  if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil ||
+      !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) ||
+      use_argument_buffer_for_samplers) {
+
+    id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+                                            ctx.get_default_sampler_state() :
+                                            ctx.get_sampler_from_state(sampler_binding.state);
+    if (!use_argument_buffer_for_samplers) {
+      /* Update binding and cached state. */
+      id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+      BLI_assert(rec != nil);
+      [rec setVertexSamplerState:sampler_state atIndex:slot];
+      this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+      this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state;
+    }
+
+    /* Flag last binding type. */
+    this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding =
+        use_argument_buffer_for_samplers;
+
+    /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+     * the samplers array is always up to date. */
+    ctx.samplers_.mtl_sampler[slot] = sampler_state;
+    ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+  }
 }
 
 void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
                                                bool use_argument_buffer_for_samplers,
                                                uint slot)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be
-   * implemented alongside MTLShader. */
+  /* Range check. */
+  const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+  BLI_assert(slot >= 0);
+  BLI_assert(slot <= shader_interface->get_max_texture_index());
+  BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+  UNUSED_VARS_NDEBUG(shader_interface);
+
+  /* If sampler state has not changed for the given slot, we do not need to fetch*/
+  if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil ||
+      !(this->cached_fragment_sampler_state_bindings[slot].binding_state ==
+        sampler_binding.state) ||
+      use_argument_buffer_for_samplers) {
+
+    id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+                                            ctx.get_default_sampler_state() :
+                                            ctx.get_sampler_from_state(sampler_binding.state);
+    if (!use_argument_buffer_for_samplers) {
+      /* Update binding and cached state. */
+      id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+      BLI_assert(rec != nil);
+      [rec setFragmentSamplerState:sampler_state atIndex:slot];
+      this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+      this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state;
+    }
+
+    /* Flag last binding type */
+    this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding =
+        use_argument_buffer_for_samplers;
+
+    /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+     * the samplers array is always up to date. */
+    ctx.samplers_.mtl_sampler[slot] = sampler_state;
+    ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+  }
 }
 
 void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  BLI_assert(index >= 0);
+  BLI_assert(buffer_offset >= 0);
+  BLI_assert(buffer != nil);
+
+  BufferBindingCached &current_vert_ubo_binding = this->cached_vertex_buffer_bindings[index];
+  if (current_vert_ubo_binding.offset != buffer_offset ||
+      current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes) {
+
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    BLI_assert(rec != nil);
+
+    if (current_vert_ubo_binding.metal_buffer == buffer) {
+      /* If buffer is the same, but offset has changed. */
+      [rec setVertexBufferOffset:buffer_offset atIndex:index];
+    }
+    else {
+      /* Bind Vertex Buffer. */
+      [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index];
+    }
+
+    /* Update Bind-state cache. */
+    this->cached_vertex_buffer_bindings[index].is_bytes = false;
+    this->cached_vertex_buffer_bindings[index].metal_buffer = buffer;
+    this->cached_vertex_buffer_bindings[index].offset = buffer_offset;
+  }
 }
 
 void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  BLI_assert(index >= 0);
+  BLI_assert(buffer_offset >= 0);
+  BLI_assert(buffer != nil);
+
+  BufferBindingCached &current_frag_ubo_binding = this->cached_fragment_buffer_bindings[index];
+  if (current_frag_ubo_binding.offset != buffer_offset ||
+      current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes) {
+
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    BLI_assert(rec != nil);
+
+    if (current_frag_ubo_binding.metal_buffer == buffer) {
+      /* If buffer is the same, but offset has changed. */
+      [rec setFragmentBufferOffset:buffer_offset atIndex:index];
+    }
+    else {
+      /* Bind Fragment Buffer */
+      [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index];
+    }
+
+    /* Update Bind-state cache */
+    this->cached_fragment_buffer_bindings[index].is_bytes = false;
+    this->cached_fragment_buffer_bindings[index].metal_buffer = buffer;
+    this->cached_fragment_buffer_bindings[index].offset = buffer_offset;
+  }
 }
 
 void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  /* Bytes always updated as source data may have changed. */
+  BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+  BLI_assert(length > 0);
+  BLI_assert(bytes != nullptr);
+
+  if (length < MTL_MAX_SET_BYTES_SIZE) {
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    [rec setVertexBytes:bytes length:length atIndex:index];
+  }
+  else {
+    /* We have run over the setBytes limit, bind buffer instead. */
+    MTLTemporaryBuffer range =
+        ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+    memcpy(range.data, bytes, length);
+    this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index);
+  }
+
+  /* Update Bind-state cache */
+  this->cached_vertex_buffer_bindings[index].is_bytes = true;
+  this->cached_vertex_buffer_bindings[index].metal_buffer = nil;
+  this->cached_vertex_buffer_bindings[index].offset = -1;
 }
 
 void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  /* Bytes always updated as source data may have changed. */
+  BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+  BLI_assert(length > 0);
+  BLI_assert(bytes != nullptr);
+
+  if (length < MTL_MAX_SET_BYTES_SIZE) {
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    [rec setFragmentBytes:bytes length:length atIndex:index];
+  }
+  else {
+    /* We have run over the setBytes limit, bind buffer instead. */
+    MTLTemporaryBuffer range =
+        ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+    memcpy(range.data, bytes, length);
+    this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index);
+  }
+
+  /* Update Bind-state cache. */
+  this->cached_fragment_buffer_bindings[index].is_bytes = true;
+  this->cached_fragment_buffer_bindings[index].metal_buffer = nil;
+  this->cached_fragment_buffer_bindings[index].offset = -1;
 }
 
 /** \} */
diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh
index b6f9c0050a9..5c322efa3f9 100644
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -3,7 +3,9 @@
 #ifndef __MTL_COMMON
 #define __MTL_COMMON
 
-// -- Renderer Options --
+/** -- Renderer Options -- */
+/* Number of frames over which rolling averages are taken. */
+#define MTL_FRAME_AVERAGE_COUNT 5
 #define MTL_MAX_DRAWABLES 3
 #define MTL_MAX_SET_BYTES_SIZE 4096
 #define MTL_FORCE_WAIT_IDLE 0
diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh
index e996193e722..6229afcef79 100644
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -12,6 +12,10 @@
 #include "GPU_common_types.h"
 #include "GPU_context.h"
 
+#include "intern/GHOST_Context.h"
+#include "intern/GHOST_ContextCGL.h"
+#include "intern/GHOST_Window.h"
+
 #include "mtl_backend.hh"
 #include "mtl_capabilities.hh"
 #include "mtl_common.hh"
@@ -248,7 +252,7 @@ struct MTLContextTextureUtils {
   /* Depth texture updates are not directly supported with Blit operations, similarly, we cannot
    * use a compute shader to write to depth, so we must instead render to a depth target.
    * These processes use vertex/fragment shaders to render texture data from an intermediate
-   * source, in order to prime the depth buffer*/
+   * source, in order to prime the depth buffer. */
   blender::Map<DepthTextureUpdateRoutineSpecialisation, GPUShader *> depth_2d_update_shaders;
   GPUShader *fullscreen_blit_shader = nullptr;
 
@@ -348,7 +352,7 @@ struct MTLSamplerArray {
   {
     uint32_t hash = this->num_samplers;
     for (int i = 0; i < this->num_samplers; i++) {
-      hash ^= (uint32_t)this->mtl_sampler_flags[i] << (i % 3);
+      hash ^= uint32_t(this->mtl_sampler_flags[i]) << (i % 3);
     }
     return hash;
   }
@@ -570,12 +574,44 @@ class MTLCommandBufferManager {
 
 class MTLContext : public Context {
   friend class MTLBackend;
+  friend class MTLRenderPassState;
+
+ public:
+  /* Swap-chain and latency management. */
+  static std::atomic<int> max_drawables_in_flight;
+  static std::atomic<int64_t> avg_drawable_latency_us;
+  static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT];
+
+ public:
+  /* Shaders and Pipeline state. */
+  MTLContextGlobalShaderPipelineState pipeline_state;
+
+  /* Metal API Resource Handles. */
+  id<MTLCommandQueue> queue = nil;
+  id<MTLDevice> device = nil;
+
+#ifndef NDEBUG
+  /* Label for Context debug name assignment. */
+  NSString *label = nil;
+#endif
+
+  /* Memory Management. */
+  MTLScratchBufferManager memory_manager;
+  static MTLBufferPool global_memory_manager;
+
+  /* CommandBuffer managers. */
+  MTLCommandBufferManager main_command_buffer;
 
  private:
-  /* Null buffers for empty/uninitialized bindings.
-   * Null attribute buffer follows default attribute format of OpenGL Back-end. */
-  id<MTLBuffer> null_buffer_;           /* All zero's. */
-  id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
+  /* Parent Context. */
+  GHOST_ContextCGL *ghost_context_;
+
+  /* Render Passes and Frame-buffers. */
+  id<MTLTexture> default_fbo_mtltexture_ = nil;
+  gpu::MTLTexture *default_fbo_gputexture_ = nullptr;
+
+  /* Depth-stencil state cache. */
+  blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache;
 
   /* Compute and specialization caches. */
   MTLContextTextureUtils texture_utils_;
@@ -601,23 +637,20 @@ class MTLContext : public Context {
   gpu::MTLBuffer *visibility_buffer_ = nullptr;
   bool visibility_is_dirty_ = false;
 
- public:
-  /* Shaders and Pipeline state. */
-  MTLContextGlobalShaderPipelineState pipeline_state;
-
-  /* Metal API Resource Handles. */
-  id<MTLCommandQueue> queue = nil;
-  id<MTLDevice> device = nil;
-
-  /* Memory Management */
-  MTLScratchBufferManager memory_manager;
-  static MTLBufferPool global_memory_manager;
+  /* Null buffers for empty/uninitialized bindings.
+   * Null attribute buffer follows default attribute format of OpenGL Backend. */
+  id<MTLBuffer> null_buffer_;           /* All zero's. */
+  id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
 
-  /* CommandBuffer managers. */
-  MTLCommandBufferManager main_command_buffer;
+  /** Dummy Resources */
+  /* Maximum of 32 texture types. Though most combinations invalid. */
+  gpu::MTLTexture *dummy_textures_[GPU_TEXTURE_BUFFER] = {nullptr};
+  GPUVertFormat dummy_vertformat_;
+  GPUVertBuf *dummy_verts_ = nullptr;
 
+ public:
   /* GPUContext interface. */
-  MTLContext(void *ghost_window);
+  MTLContext(void *ghost_window, void *ghost_context);
   ~MTLContext();
 
   static void check_error(const char *info);
@@ -673,6 +706,35 @@ class MTLContext : public Context {
   void pipeline_state_init();
   MTLShader *get_active_shader();
 
+  /* These functions ensure that the current RenderCommandEncoder has
+   * the correct global state assigned. This should be called prior
+   * to every draw call, to ensure that all state is applied and up
+   * to date. We handle:
+   *
+   * - Buffer bindings (Vertex buffers, Uniforms, UBOs, transform feedback)
+   * - Texture bindings
+   * - Sampler bindings (+ argument buffer bindings)
+   * - Dynamic Render pipeline state (on encoder)
+   * - Baking Pipeline State Objects (PSOs) for current shader, based
+   *   on final pipeline state.
+   *
+   * `ensure_render_pipeline_state` will return false if the state is
+   * invalid and cannot be applied. This should cancel a draw call. */
+  bool ensure_render_pipeline_state(MTLPrimitiveType prim_type);
+  bool ensure_uniform_buffer_bindings(
+      id<MTLRenderCommandEncoder> rec,
+      const MTLShaderInterface *shader_interface,
+      const MTLRenderPipelineStateInstance *pipeline_state_instance);
+  void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec,
+                               MTLShaderInterface *shader_interface,
+                               const MTLRenderPipelineStateInstance *pipeline_state_instance);
+  void ensure_depth_stencil_state(MTLPrimitiveType prim_type);
+
+  id<MTLBuffer> get_null_buffer();
+  id<MTLBuffer> get_null_attribute_buffer();
+  gpu::MTLTexture *get_dummy_texture(eGPUTextureType type);
+  void free_dummy_resources();
+
   /* State assignment. */
   void set_viewport(int origin_x, int origin_y, int width, int height);
   void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height);
@@ -720,9 +782,37 @@ class MTLContext : public Context {
   {
     return MTLContext::global_memory_manager;
   }
-  /* Uniform Buffer Bindings to command encoders. */
-  id<MTLBuffer> get_null_buffer();
-  id<MTLBuffer> get_null_attribute_buffer();
+
+  /* Swap-chain and latency management. */
+  static void latency_resolve_average(int64_t frame_latency_us)
+  {
+    int64_t avg = 0;
+    int64_t frame_c = 0;
+    for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) {
+      MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1];
+      avg += MTLContext::frame_latency[i];
+      frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0;
+    }
+    MTLContext::frame_latency[0] = frame_latency_us;
+    avg += MTLContext::frame_latency[0];
+    if (frame_c > 0) {
+      avg /= frame_c;
+    }
+    else {
+      avg = 0;
+    }
+    MTLContext::avg_drawable_latency_us = avg;
+  }
+
+ private:
+  void set_ghost_context(GHOST_ContextHandle ghostCtxHandle);
+  void set_ghost_window(GHOST_WindowHandle ghostWinHandle);
 };
 
+/* GHOST Context callback and present. */
+void present(MTLRenderPassDescriptor *blit_descriptor,
+             id<MTLRenderPipelineState> blit_pso,
+             id<MTLTexture> swapchain_texture,
+             id<CAMetalDrawable> drawable);
+
 }  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index a66645e5fb5..50576379f0d 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -5,13 +5,29 @@
  */
 #include "mtl_context.hh"
 #include "mtl_debug.hh"
+#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
+#include "mtl_memory.hh"
+#include "mtl_primitive.hh"
 #include "mtl_shader.hh"
 #include "mtl_shader_interface.hh"
 #include "mtl_state.hh"
+#include "mtl_uniform_buffer.hh"
 
 #include "DNA_userdef_types.h"
 
 #include "GPU_capabilities.h"
+#include "GPU_matrix.h"
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+#include "GPU_uniform_buffer.h"
+#include "GPU_vertex_buffer.h"
+#include "intern/gpu_matrix_private.h"
+
+#include "PIL_time.h"
+
+#include <fstream>
+#include <string>
 
 using namespace blender;
 using namespace blender::gpu;
@@ -21,21 +37,118 @@ namespace blender::gpu {
 /* Global memory manager. */
 MTLBufferPool MTLContext::global_memory_manager;
 
+/* Swap-chain and latency management. */
+std::atomic<int> MTLContext::max_drawables_in_flight = 0;
+std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0;
+int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0};
+
+/* -------------------------------------------------------------------- */
+/** \name GHOST Context interaction.
+ * \{ */
+
+void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle)
+{
+  GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle);
+  BLI_assert(ghost_ctx != nullptr);
+
+  /* Release old MTLTexture handle */
+  if (default_fbo_mtltexture_) {
+    [default_fbo_mtltexture_ release];
+    default_fbo_mtltexture_ = nil;
+  }
+
+  /* Release Framebuffer attachments */
+  MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left);
+  MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left);
+  mtl_front_left->remove_all_attachments();
+  mtl_back_left->remove_all_attachments();
+
+  GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx);
+  if (ghost_cgl_ctx != NULL) {
+    default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture();
+
+    MTL_LOG_INFO(
+        "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n",
+        ghost_cgl_ctx,
+        this,
+        this->device,
+        this->queue,
+        default_fbo_gputexture_);
+
+    /* Check if the GHOST Context provides a default framebuffer: */
+    if (default_fbo_mtltexture_) {
+
+      /* Release old GPUTexture handle */
+      if (default_fbo_gputexture_) {
+        GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_)));
+        default_fbo_gputexture_ = nullptr;
+      }
+
+      /* Retain handle */
+      [default_fbo_mtltexture_ retain];
+
+      /*** Create front and back-buffers ***/
+      /* Create gpu::MTLTexture objects */
+      default_fbo_gputexture_ = new gpu::MTLTexture(
+          "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_);
+
+      /* Update frame-buffers with new texture attachments. */
+      mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+      mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+#ifndef NDEBUG
+      this->label = default_fbo_mtltexture_.label;
+#endif
+    }
+    else {
+
+      /* Add default texture for cases where no other framebuffer is bound */
+      if (!default_fbo_gputexture_) {
+        default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>(
+            unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr)));
+      }
+      mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+
+      MTL_LOG_INFO(
+          "-- Bound context %p for GPU context: %p is offscreen and does not have a default "
+          "framebuffer\n",
+          ghost_cgl_ctx,
+          this);
+#ifndef NDEBUG
+      this->label = @"Offscreen Metal Context";
+#endif
+    }
+  }
+  else {
+    MTL_LOG_INFO(
+        "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null "
+        "(GhostContext: %p, GhostContext_CGL: %p)\n",
+        ghost_ctx,
+        ghost_cgl_ctx);
+    BLI_assert(false);
+  }
+}
+
+void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle)
+{
+  GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle);
+  this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL));
+}
+
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name MTLContext
  * \{ */
 
 /* Placeholder functions */
-MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this)
+MTLContext::MTLContext(void *ghost_window, void *ghost_context)
+    : memory_manager(*this), main_command_buffer(*this)
 {
   /* Init debug. */
   debug::mtl_debug_init();
 
-  /* Device creation.
-   * TODO(Metal): This is a temporary initialization path to enable testing of features
-   * and shader compilation tests. Future functionality should fetch the existing device
-   * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */
-  this->device = MTLCreateSystemDefaultDevice();
+  /* Initialize Render-pass and Frame-buffer State. */
+  this->back_left = nullptr;
 
   /* Initialize command buffer state. */
   this->main_command_buffer.prepare();
@@ -47,10 +160,35 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
   is_inside_frame_ = false;
   current_frame_index_ = 0;
 
-  /* Prepare null data buffer */
+  /* Prepare null data buffer. */
   null_buffer_ = nil;
   null_attribute_buffer_ = nil;
 
+  /* Zero-initialize MTL textures. */
+  default_fbo_mtltexture_ = nil;
+  default_fbo_gputexture_ = nullptr;
+
+  /** Fetch GHOSTContext and fetch Metal device/queue. */
+  ghost_window_ = ghost_window;
+  if (ghost_window_ && ghost_context == NULL) {
+    /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided.
+     * Regardless of whether windowed or not, we need access to the GhostContext
+     * for presentation, and device/queue access. */
+    GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_);
+    ghost_context = (ghostWin ? ghostWin->getContext() : NULL);
+  }
+  BLI_assert(ghost_context);
+  this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context);
+  this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue();
+  this->device = (id<MTLDevice>)this->ghost_context_->metalDevice();
+  BLI_assert(this->queue);
+  BLI_assert(this->device);
+  [this->queue retain];
+  [this->device retain];
+
+  /* Register present callback. */
+  this->ghost_context_->metalRegisterPresentCallback(&present);
+
   /* Create FrameBuffer handles. */
   MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
   MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
@@ -66,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
   /* Initialize Metal modules. */
   this->memory_manager.init();
   this->state_manager = new MTLStateManager(this);
+  this->imm = new MTLImmediate(this);
 
   /* Ensure global memory manager is initialized. */
   MTLContext::global_memory_manager.init(this->device);
@@ -99,9 +238,29 @@ MTLContext::~MTLContext()
       this->end_frame();
     }
   }
+
+  /* Release Memory Manager */
+  this->get_scratchbuffer_manager().free();
+
   /* Release update/blit shaders. */
   this->get_texture_utils().cleanup();
 
+  /* Detach resource references */
+  GPU_texture_unbind_all();
+
+  /* Unbind UBOs */
+  for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ubo_bindings[i].bound &&
+        this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+      GPUUniformBuf *ubo = wrap(
+          static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo));
+      GPU_uniformbuf_unbind(ubo);
+    }
+  }
+
+  /* Release Dummy resources */
+  this->free_dummy_resources();
+
   /* Release Sampler States. */
   for (int i = 0; i < GPU_SAMPLER_MAX; i++) {
     if (sampler_state_cache_[i] != nil) {
@@ -109,12 +268,28 @@ MTLContext::~MTLContext()
       sampler_state_cache_[i] = nil;
     }
   }
+
+  /* Empty cached sampler argument buffers. */
+  for (auto entry : cached_sampler_buffers_.values()) {
+    entry->free();
+  }
+  cached_sampler_buffers_.clear();
+
+  /* Free null buffers. */
   if (null_buffer_) {
     [null_buffer_ release];
   }
   if (null_attribute_buffer_) {
     [null_attribute_buffer_ release];
   }
+
+  /* Free Metal objects. */
+  if (this->queue) {
+    [this->queue release];
+  }
+  if (this->device) {
+    [this->device release];
+  }
 }
 
 void MTLContext::begin_frame()
@@ -146,20 +321,49 @@ void MTLContext::check_error(const char *info)
 
 void MTLContext::activate()
 {
-  /* TODO(Metal): Implement. */
+  /* Make sure no other context is already bound to this thread. */
+  BLI_assert(is_active_ == false);
+  is_active_ = true;
+  thread_ = pthread_self();
+
+  /* Re-apply ghost window/context for resizing */
+  if (ghost_window_) {
+    this->set_ghost_window((GHOST_WindowHandle)ghost_window_);
+  }
+  else if (ghost_context_) {
+    this->set_ghost_context((GHOST_ContextHandle)ghost_context_);
+  }
+
+  /* Reset UBO bind state. */
+  for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ubo_bindings[i].bound &&
+        this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+      this->pipeline_state.ubo_bindings[i].bound = false;
+      this->pipeline_state.ubo_bindings[i].ubo = nullptr;
+    }
+  }
+
+  /* Ensure imm active. */
+  immActivate();
 }
+
 void MTLContext::deactivate()
 {
-  /* TODO(Metal): Implement. */
+  BLI_assert(this->is_active_on_thread());
+  /* Flush context on deactivate. */
+  this->flush();
+  is_active_ = false;
+  immDeactivate();
 }
 
 void MTLContext::flush()
 {
-  /* TODO(Metal): Implement. */
+  this->main_command_buffer.submit(false);
 }
+
 void MTLContext::finish()
 {
-  /* TODO(Metal): Implement. */
+  this->main_command_buffer.submit(true);
 }
 
 void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
@@ -200,9 +404,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
 
   /* Ensure command buffer workload submissions are optimal --
    * Though do not split a batch mid-IMM recording. */
-  /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */
-  if (this->main_command_buffer.do_break_submission()/*&&
-      !((MTLImmediate *)(this->imm))->imm_is_recording()*/) {
+  if (this->main_command_buffer.do_break_submission() &&
+      !((MTLImmediate *)(this->imm))->imm_is_recording()) {
     this->flush();
   }
 
@@ -293,6 +496,72 @@ id<MTLBuffer> MTLContext::get_null_attribute_buffer()
   return null_attribute_buffer_;
 }
 
+gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type)
+{
+  /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */
+  gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1];
+  if (dummy_tex != nullptr) {
+    return dummy_tex;
+  }
+  else {
+    GPUTexture *tex = nullptr;
+    switch (type) {
+      case GPU_TEXTURE_1D:
+        tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_1D_ARRAY:
+        tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_2D:
+        tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_2D_ARRAY:
+        tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_3D:
+        tex = GPU_texture_create_3d(
+            "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr);
+        break;
+      case GPU_TEXTURE_CUBE:
+        tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_CUBE_ARRAY:
+        tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_BUFFER:
+        if (!dummy_verts_) {
+          GPU_vertformat_clear(&dummy_vertformat_);
+          GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+          dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC);
+          GPU_vertbuf_data_alloc(dummy_verts_, 64);
+        }
+        tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_);
+        break;
+      default:
+        BLI_assert_msg(false, "Unrecognised texture type");
+        return nullptr;
+    }
+    gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex));
+    dummy_textures_[type - 1] = metal_tex;
+    return metal_tex;
+  }
+  return nullptr;
+}
+
+void MTLContext::free_dummy_resources()
+{
+  for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) {
+    if (dummy_textures_[tex]) {
+      GPU_texture_free(
+          reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex])));
+      dummy_textures_[tex] = nullptr;
+    }
+  }
+  if (dummy_verts_) {
+    GPU_vertbuf_discard(dummy_verts_);
+  }
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -439,6 +708,757 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled)
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Command Encoder and pipeline state
+ * These utilities ensure that all of the globally bound resources and state have been
+ * correctly encoded within the current RenderCommandEncoder. This involves managing
+ * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state.
+ *
+ * We will also trigger compilation of new PSOs where the input state has changed
+ * and is required.
+ * All of this setup is required in order to perform a valid draw call.
+ * \{ */
+
+bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type)
+{
+  BLI_assert(this->pipeline_state.initialised);
+
+  /* Check if an active shader is bound. */
+  if (!this->pipeline_state.active_shader) {
+    MTL_LOG_WARNING("No Metal shader for bound GL shader\n");
+    return false;
+  }
+
+  /* Also ensure active shader is valid. */
+  if (!this->pipeline_state.active_shader->is_valid()) {
+    MTL_LOG_WARNING(
+        "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", );
+    return false;
+  }
+
+  /* Apply global state. */
+  this->state_manager->apply_state();
+
+  /* Main command buffer tracks the current state of the render pass, based on bound
+   * MTLFrameBuffer. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /* Debug Check: Ensure Framebuffer instance is not dirty. */
+  BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+  /* Fetch shader interface. */
+  MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
+  if (shader_interface == nullptr) {
+    MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", );
+    return false;
+  }
+
+  /* Fetch shader and bake valid PipelineStateObject (PSO) based on current
+   * shader and state combination. This PSO represents the final GPU-executable
+   * permutation of the shader. */
+  MTLRenderPipelineStateInstance *pipeline_state_instance =
+      this->pipeline_state.active_shader->bake_current_pipeline_state(
+          this, mtl_prim_type_to_topology_class(mtl_prim_type));
+  if (!pipeline_state_instance) {
+    MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n",
+                  shader_interface->get_name());
+    return false;
+  }
+
+  bool result = false;
+  if (pipeline_state_instance->pso) {
+
+    /* Fetch render command encoder. A render pass should already be active.
+     * This will be NULL if invalid. */
+    id<MTLRenderCommandEncoder> rec =
+        this->main_command_buffer.get_active_render_command_encoder();
+    BLI_assert(rec);
+    if (rec == nil) {
+      MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n");
+      return false;
+    }
+
+    /* Bind Render Pipeline State. */
+    BLI_assert(pipeline_state_instance->pso);
+    if (rps.bound_pso != pipeline_state_instance->pso) {
+      [rec setRenderPipelineState:pipeline_state_instance->pso];
+      rps.bound_pso = pipeline_state_instance->pso;
+    }
+
+    /** Ensure resource bindings. */
+    /* Texture Bindings. */
+    /* We will iterate through all texture bindings on the context and determine if any of the
+     * active slots match those in our shader interface. If so, textures will be bound. */
+    if (shader_interface->get_total_textures() > 0) {
+      this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance);
+    }
+
+    /* Transform feedback buffer binding. */
+    /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which
+     * transform feedback data will be written. */
+    // GPUVertBuf *tf_vbo =
+    //     this->pipeline_state.active_shader->get_transform_feedback_active_buffer();
+    // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) {
+
+    //   /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */
+    //   BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine ||
+    //              mtl_prim_type == MTLPrimitiveTypeTriangle ||
+    //              mtl_prim_type == MTLPrimitiveTypePoint);
+
+    //   /* Fetch active transform feedback buffer from vertbuf */
+    //   MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo));
+    //   int tf_buffer_offset = 0;
+    //   id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset);
+
+    //   if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) {
+    //     [rec setVertexBuffer:tf_buffer_mtl
+    //                   offset:tf_buffer_offset
+    //                  atIndex:pipeline_state_instance->transform_feedback_buffer_index];
+    //     printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n",
+    //            tf_vbo_mtl,
+    //            tf_buffer_mtl);
+    //   }
+    // }
+
+    /* Matrix Bindings. */
+    /* This is now called upon shader bind. We may need to re-evaluate this though,
+     * as was done here to ensure uniform changes between draws were tracked.
+     * NOTE(Metal): We may be able to remove this. */
+    GPU_matrix_bind(reinterpret_cast<struct GPUShader *>(
+        static_cast<Shader *>(this->pipeline_state.active_shader)));
+
+    /* Bind Uniforms */
+    this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance);
+
+    /* Bind Null attribute buffer, if needed. */
+    if (pipeline_state_instance->null_attribute_buffer_index >= 0) {
+      if (G.debug & G_DEBUG_GPU) {
+        MTL_LOG_INFO("Binding null attribute buffer at index: %d\n",
+                     pipeline_state_instance->null_attribute_buffer_index);
+      }
+      rps.bind_vertex_buffer(this->get_null_attribute_buffer(),
+                             0,
+                             pipeline_state_instance->null_attribute_buffer_index);
+    }
+
+    /** Dynamic Per-draw Render State on RenderCommandEncoder. */
+    /* State: Viewport. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) {
+      MTLViewport viewport;
+      viewport.originX = (double)this->pipeline_state.viewport_offset_x;
+      viewport.originY = (double)this->pipeline_state.viewport_offset_y;
+      viewport.width = (double)this->pipeline_state.viewport_width;
+      viewport.height = (double)this->pipeline_state.viewport_height;
+      viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near;
+      viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far;
+      [rec setViewport:viewport];
+
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_VIEWPORT_FLAG);
+    }
+
+    /* State: Scissor. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) {
+
+      /* Get FrameBuffer associated with active RenderCommandEncoder. */
+      MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer();
+
+      MTLScissorRect scissor;
+      if (this->pipeline_state.scissor_enabled) {
+        scissor.x = this->pipeline_state.scissor_x;
+        scissor.y = this->pipeline_state.scissor_y;
+        scissor.width = this->pipeline_state.scissor_width;
+        scissor.height = this->pipeline_state.scissor_height;
+
+        /* Some scissor assignments exceed the bounds of the viewport due to implicitly added
+         * padding to the width/height - Clamp width/height. */
+        BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width());
+        BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height());
+        scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x);
+        scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y);
+        BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width()));
+        BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height()));
+      }
+      else {
+        /* Scissor is disabled, reset to default size as scissor state may have been previously
+         * assigned on this encoder. */
+        scissor.x = 0;
+        scissor.y = 0;
+        scissor.width = render_fb->get_width();
+        scissor.height = render_fb->get_height();
+      }
+
+      /* Scissor state can still be flagged as changed if it is toggled on and off, without
+       * parameters changing between draws. */
+      if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) {
+        [rec setScissorRect:scissor];
+        rps.last_scissor_rect = scissor;
+      }
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_SCISSOR_FLAG);
+    }
+
+    /* State: Face winding. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) {
+      /* We need to invert the face winding in Metal, to account for the inverted-Y coordinate
+       * system. */
+      MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ?
+                               MTLWindingClockwise :
+                               MTLWindingCounterClockwise;
+      [rec setFrontFacingWinding:winding];
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG);
+    }
+
+    /* State: cull-mode. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) {
+
+      MTLCullMode mode = MTLCullModeNone;
+      if (this->pipeline_state.culling_enabled) {
+        switch (this->pipeline_state.cull_mode) {
+          case GPU_CULL_NONE:
+            mode = MTLCullModeNone;
+            break;
+          case GPU_CULL_FRONT:
+            mode = MTLCullModeFront;
+            break;
+          case GPU_CULL_BACK:
+            mode = MTLCullModeBack;
+            break;
+          default:
+            BLI_assert_unreachable();
+            break;
+        }
+      }
+      [rec setCullMode:mode];
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_CULLMODE_FLAG);
+    }
+
+    /* Pipeline state is now good. */
+    result = true;
+  }
+  return result;
+}
+
+/* Bind uniform buffers to an active render command encoder using the rendering state of the
+ * current context -> Active shader, Bound UBOs). */
+bool MTLContext::ensure_uniform_buffer_bindings(
+    id<MTLRenderCommandEncoder> rec,
+    const MTLShaderInterface *shader_interface,
+    const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /* Shader owned push constant block for uniforms.. */
+  bool active_shader_changed = (rps.last_bound_shader_state.shader_ !=
+                                    this->pipeline_state.active_shader ||
+                                rps.last_bound_shader_state.shader_ == nullptr ||
+                                rps.last_bound_shader_state.pso_index_ !=
+                                    pipeline_state_instance->shader_pso_index);
+
+  const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
+  if (push_constant_block.size > 0) {
+
+    /* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
+     * will be offset by the number of bound VBOs. */
+    uint32_t block_size = push_constant_block.size;
+    uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index +
+                            push_constant_block.buffer_index;
+
+    /* Only need to rebind block if push constants have been modified -- or if no data is bound for
+     * the current RenderCommandEncoder. */
+    if (this->pipeline_state.active_shader->get_push_constant_is_dirty() ||
+        active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes ||
+        !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) {
+
+      /* Bind push constant data. */
+      BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr);
+      rps.bind_vertex_bytes(
+          this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+      rps.bind_fragment_bytes(
+          this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+
+      /* Only need to rebind block if it has been modified. */
+      this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false);
+    }
+  }
+  rps.last_bound_shader_state.set(this->pipeline_state.active_shader,
+                                  pipeline_state_instance->shader_pso_index);
+
+  /* Bind Global GPUUniformBuffers */
+  /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones
+   * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
+   * and not owned by the shader instance. */
+  for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
+    const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
+
+    if (ubo.buffer_index >= 0) {
+
+      /* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
+       * the uniform PushConstantBlock. */
+      const uint32_t buffer_index = ubo.buffer_index + 1;
+      int ubo_offset = 0;
+      id<MTLBuffer> ubo_buffer = nil;
+      int ubo_size = 0;
+
+      bool bind_dummy_buffer = false;
+      if (this->pipeline_state.ubo_bindings[ubo_index].bound) {
+
+        /* Fetch UBO global-binding properties from slot. */
+        ubo_offset = 0;
+        ubo_buffer = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_metal_buffer(
+            &ubo_offset);
+        ubo_size = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_size();
+
+        /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
+         * allocating zero buffers. */
+        if (ubo_buffer == nil) {
+          bind_dummy_buffer = true;
+        }
+        else {
+          BLI_assert(ubo_buffer != nil);
+          BLI_assert(ubo_size > 0);
+
+          if (pipeline_state_instance->reflection_data_available) {
+            /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each
+             * case will be the same for the same UBO.
+             * We also determine expected size and then ensure buffer of the correct size
+             * exists in one of the vertex/fragment shader binding tables. This path is used
+             * to verify that the size of the bound UBO matches what is expected in the shader. */
+            uint32_t expected_size =
+                (buffer_index <
+                 pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ?
+                    pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index]
+                        .size :
+                    0;
+            if (expected_size == 0) {
+              expected_size =
+                  (buffer_index <
+                   pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ?
+                      pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index]
+                          .size :
+                      0;
+            }
+            BLI_assert_msg(
+                expected_size > 0,
+                "Shader interface expects UBO, but shader reflection data reports that it "
+                "is not present");
+
+            /* If ubo size is smaller than the size expected by the shader, we need to bind the
+             * dummy buffer, which will be big enough, to avoid an OOB error. */
+            if (ubo_size < expected_size) {
+              MTL_LOG_INFO(
+                  "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
+                  "%d)  (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
+                  "over-binding, which is not used,  but we need this to avoid validation "
+                  "issues\n",
+                  shader_interface->get_name_at_offset(ubo.name_offset),
+                  buffer_index,
+                  ubo_size,
+                  expected_size,
+                  shader_interface->get_name());
+              bind_dummy_buffer = true;
+            }
+          }
+        }
+      }
+      else {
+        MTL_LOG_INFO(
+            "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
+            "nothing was bound -- binding dummy buffer\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ubo.name_offset),
+            buffer_index);
+        bind_dummy_buffer = true;
+      }
+
+      if (bind_dummy_buffer) {
+        /* Perform Dummy binding. */
+        ubo_offset = 0;
+        ubo_buffer = this->get_null_buffer();
+        ubo_size = [ubo_buffer length];
+      }
+
+      if (ubo_buffer != nil) {
+
+        uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index +
+                                     buffer_index;
+
+        /* Bind Vertex UBO. */
+        if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
+          BLI_assert(buffer_bind_index >= 0 &&
+                     buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+          rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+        }
+
+        /* Bind Fragment UBOs. */
+        if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
+          BLI_assert(buffer_bind_index >= 0 &&
+                     buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+          rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+        }
+      }
+      else {
+        MTL_LOG_WARNING(
+            "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
+            "is NULL!\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ubo.name_offset),
+            buffer_index);
+      }
+    }
+  }
+  return true;
+}
+
+/* Ensure texture bindings are correct and up to date for current draw call. */
+void MTLContext::ensure_texture_bindings(
+    id<MTLRenderCommandEncoder> rec,
+    MTLShaderInterface *shader_interface,
+    const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+  BLI_assert(shader_interface != nil);
+  BLI_assert(rec != nil);
+
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  @autoreleasepool {
+    int vertex_arg_buffer_bind_index = -1;
+    int fragment_arg_buffer_bind_index = -1;
+
+    /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */
+    bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers(
+        &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index);
+
+    /* Loop through expected textures in shader interface and resolve bindings with currently
+     * bound textures.. */
+    for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+      /* Ensure the bound texture is compatible with the shader interface. If the
+       * shader does not expect a texture to be bound for the current slot, we skip
+       * binding.
+       * NOTE: Global texture bindings may be left over from prior draw calls. */
+      const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t);
+      if (!shader_texture_info.used) {
+        /* Skip unused binding points if explicit indices are specified. */
+        continue;
+      }
+
+      int slot = shader_texture_info.slot_index;
+      if (slot >= 0 && slot < GPU_max_textures()) {
+        bool bind_dummy_texture = true;
+        if (this->pipeline_state.texture_bindings[slot].used) {
+          gpu::MTLTexture *bound_texture =
+              this->pipeline_state.texture_bindings[slot].texture_resource;
+          MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
+          BLI_assert(bound_texture);
+          BLI_assert(bound_sampler.used);
+
+          if (shader_texture_info.type == bound_texture->type_) {
+            /* Bind texture and sampler if the bound texture matches the type expected by the
+             * shader. */
+            id<MTLTexture> tex = bound_texture->get_metal_handle();
+
+            if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+              rps.bind_vertex_texture(tex, slot);
+              rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+            }
+
+            if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+              rps.bind_fragment_texture(tex, slot);
+              rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+            }
+
+            /* Texture state resolved, no need to bind dummy texture */
+            bind_dummy_texture = false;
+          }
+          else {
+            /* Texture type for bound texture (e.g. Texture2DArray) does not match what was
+             * expected in the shader interface. This is a problem and we will need to bind
+             * a dummy texture to ensure correct API usage. */
+            MTL_LOG_WARNING(
+                "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
+                "texture target type. (Expecting type %d, actual type %d) (binding "
+                "name:'%s')(texture name:'%s')\n",
+                shader_interface->get_name(),
+                bound_texture,
+                slot,
+                shader_texture_info.type,
+                bound_texture->type_,
+                shader_interface->get_name_at_offset(shader_texture_info.name_offset),
+                bound_texture->get_name());
+          }
+        }
+        else {
+          MTL_LOG_WARNING(
+              "Shader '%s' expected texture to be bound to slot %d -- No texture was "
+              "bound. (name:'%s')\n",
+              shader_interface->get_name(),
+              slot,
+              shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+        }
+
+        /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats
+         * are provided -- as certain configurations may not need any binding. These issues should
+         * be fixed in the high-level, if problems crop up. */
+        if (bind_dummy_texture) {
+          if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+            rps.bind_vertex_texture(
+                get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+            /* Bind default sampler state. */
+            MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+            rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+          }
+          if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+            rps.bind_fragment_texture(
+                get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+            /* Bind default sampler state. */
+            MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+            rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+          }
+        }
+      }
+      else {
+        MTL_LOG_WARNING(
+            "Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
+            "hardware/API limit of '%d'. (name:'%s')\n",
+            this->pipeline_state.active_shader,
+            slot,
+            GPU_max_textures(),
+            shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+      }
+    }
+
+    /* Construct and Bind argument buffer.
+     * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */
+    if (use_argument_buffer_for_samplers) {
+#ifndef NDEBUG
+      /* Debug check to validate each expected texture in the shader interface has a valid
+       * sampler object bound to the context. We will need all of these to be valid
+       * when constructing the sampler argument buffer. */
+      for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+        const MTLShaderTexture &texture = shader_interface->get_texture(i);
+        if (texture.used) {
+          BLI_assert(this->samplers_.mtl_sampler[i] != nil);
+        }
+      }
+#endif
+
+      /* Check to ensure the buffer binding index for the argument buffer has been assigned.
+       * This PSO property will be set if we expect to use argument buffers, and the shader
+       * uses any amount of textures. */
+      BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0);
+      if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) {
+        /* Offset binding index to be relative to the start of static uniform buffer binding slots.
+         * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
+         * used by vertex and index buffer bindings, and the number of buffers present will vary
+         * between PSOs. */
+        int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index +
+                              vertex_arg_buffer_bind_index);
+        assert(arg_buffer_idx < 32);
+        id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
+            arg_buffer_idx);
+        if (argument_encoder == nil) {
+          argument_encoder = [pipeline_state_instance->vert
+              newArgumentEncoderWithBufferIndex:arg_buffer_idx];
+          shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
+        }
+
+        /* Generate or Fetch argument buffer sampler configuration.
+         * NOTE(Metal): we need to base sampler counts off of the maximal texture
+         * index. This is not the most optimal, but in practice, not a use-case
+         * when argument buffers are required.
+         * This is because with explicit texture indices, the binding indices
+         * should match across draws, to allow the high-level to optimize bind-points. */
+        gpu::MTLBuffer *encoder_buffer = nullptr;
+        this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1;
+
+        gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr(
+            this->samplers_);
+        if (cached_smp_buffer_search != nullptr) {
+          encoder_buffer = *cached_smp_buffer_search;
+        }
+        else {
+          /* Populate argument buffer with current global sampler bindings. */
+          int size = [argument_encoder encodedLength];
+          int alignment = max_uu([argument_encoder alignment], 256);
+          int size_align_delta = (size % alignment);
+          int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ?
+                                       size + (alignment - (size % alignment)) :
+                                       size;
+
+          /* Allocate buffer to store encoded sampler arguments. */
+          encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size,
+                                                                            true);
+          BLI_assert(encoder_buffer);
+          BLI_assert(encoder_buffer->get_metal_buffer());
+          [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0];
+          [argument_encoder
+              setSamplerStates:this->samplers_.mtl_sampler
+                     withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)];
+          encoder_buffer->flush();
+
+          /* Insert into cache. */
+          this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer);
+        }
+
+        BLI_assert(encoder_buffer != nullptr);
+        int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+                                 vertex_arg_buffer_bind_index);
+        rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index);
+
+        /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to
+         * re-encode. We can use the same argument buffer. */
+        if (fragment_arg_buffer_bind_index >= 0) {
+          BLI_assert(fragment_arg_buffer_bind_index);
+          int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+                                   fragment_arg_buffer_bind_index);
+          rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index);
+        }
+      }
+    }
+  }
+}
+
+/* Encode latest depth-stencil state. */
+void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type)
+{
+  /* Check if we need to update state. */
+  if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) {
+    return;
+  }
+
+  /* Fetch render command encoder. */
+  id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder();
+  BLI_assert(rec);
+
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /** Prepare Depth-stencil state based on current global pipeline state. */
+  MTLFrameBuffer *fb = this->get_current_framebuffer();
+  bool hasDepthTarget = fb->has_depth_attachment();
+  bool hasStencilTarget = fb->has_stencil_attachment();
+
+  if (hasDepthTarget || hasStencilTarget) {
+    /* Update FrameBuffer State. */
+    this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget;
+    this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget;
+
+    /* Check if current MTLContextDepthStencilState maps to an existing state object in
+     * the Depth-stencil state cache. */
+    id<MTLDepthStencilState> ds_state = nil;
+    id<MTLDepthStencilState> *depth_stencil_state_lookup =
+        this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state);
+
+    /* If not, populate DepthStencil state descriptor. */
+    if (depth_stencil_state_lookup == nullptr) {
+
+      MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init]
+          autorelease];
+
+      if (hasDepthTarget) {
+        ds_state_desc.depthWriteEnabled =
+            this->pipeline_state.depth_stencil_state.depth_write_enable;
+        ds_state_desc.depthCompareFunction =
+            this->pipeline_state.depth_stencil_state.depth_test_enabled ?
+                this->pipeline_state.depth_stencil_state.depth_function :
+                MTLCompareFunctionAlways;
+      }
+
+      if (hasStencilTarget) {
+        ds_state_desc.backFaceStencil.readMask =
+            this->pipeline_state.depth_stencil_state.stencil_read_mask;
+        ds_state_desc.backFaceStencil.writeMask =
+            this->pipeline_state.depth_stencil_state.stencil_write_mask;
+        ds_state_desc.backFaceStencil.stencilFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail;
+        ds_state_desc.backFaceStencil.depthFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail;
+        ds_state_desc.backFaceStencil.depthStencilPassOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass;
+        ds_state_desc.backFaceStencil.stencilCompareFunction =
+            (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+                this->pipeline_state.depth_stencil_state.stencil_func :
+                MTLCompareFunctionAlways;
+
+        ds_state_desc.frontFaceStencil.readMask =
+            this->pipeline_state.depth_stencil_state.stencil_read_mask;
+        ds_state_desc.frontFaceStencil.writeMask =
+            this->pipeline_state.depth_stencil_state.stencil_write_mask;
+        ds_state_desc.frontFaceStencil.stencilFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail;
+        ds_state_desc.frontFaceStencil.depthFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail;
+        ds_state_desc.frontFaceStencil.depthStencilPassOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass;
+        ds_state_desc.frontFaceStencil.stencilCompareFunction =
+            (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+                this->pipeline_state.depth_stencil_state.stencil_func :
+                MTLCompareFunctionAlways;
+      }
+
+      /* Bake new DS state. */
+      ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc];
+
+      /* Store state in cache. */
+      BLI_assert(ds_state != nil);
+      this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state);
+    }
+    else {
+      ds_state = *depth_stencil_state_lookup;
+      BLI_assert(ds_state != nil);
+    }
+
+    /* Bind Depth Stencil State to render command encoder. */
+    BLI_assert(ds_state != nil);
+    if (ds_state != nil) {
+      if (rps.bound_ds_state != ds_state) {
+        [rec setDepthStencilState:ds_state];
+        rps.bound_ds_state = ds_state;
+      }
+    }
+
+    /* Apply dynamic depth-stencil state on encoder. */
+    if (hasStencilTarget) {
+      uint32_t stencil_ref_value =
+          (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+              this->pipeline_state.depth_stencil_state.stencil_ref :
+              0;
+      if (stencil_ref_value != rps.last_used_stencil_ref_value) {
+        [rec setStencilReferenceValue:stencil_ref_value];
+        rps.last_used_stencil_ref_value = stencil_ref_value;
+      }
+    }
+
+    if (hasDepthTarget) {
+      bool doBias = false;
+      switch (prim_type) {
+        case MTLPrimitiveTypeTriangle:
+        case MTLPrimitiveTypeTriangleStrip:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris;
+          break;
+        case MTLPrimitiveTypeLine:
+        case MTLPrimitiveTypeLineStrip:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines;
+          break;
+        case MTLPrimitiveTypePoint:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points;
+          break;
+      }
+      [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0
+             slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0
+                  clamp:0];
+    }
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Visibility buffer control for MTLQueryPool.
  * \{ */
 
@@ -605,4 +1625,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state()
 
 /** \} */
 
+/* -------------------------------------------------------------------- */
+/** \name Swap-chain management and Metal presentation.
+ * \{ */
+
+void present(MTLRenderPassDescriptor *blit_descriptor,
+             id<MTLRenderPipelineState> blit_pso,
+             id<MTLTexture> swapchain_texture,
+             id<CAMetalDrawable> drawable)
+{
+
+  MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+  BLI_assert(ctx);
+
+  /* Flush any outstanding work. */
+  ctx->flush();
+
+  /* Always pace CPU to maximum of 3 drawables in flight.
+   * nextDrawable may have more in flight if backing swapchain
+   * textures are re-allocate, such as during resize events.
+   *
+   * Determine frames in flight based on current latency. If
+   * we are in a high-latency situation, limit frames in flight
+   * to increase app responsiveness and keep GPU execution under control.
+   * If latency improves, increase frames in flight to improve overall
+   * performance. */
+  int perf_max_drawables = MTL_MAX_DRAWABLES;
+  if (MTLContext::avg_drawable_latency_us > 185000) {
+    perf_max_drawables = 1;
+  }
+  else if (MTLContext::avg_drawable_latency_us > 85000) {
+    perf_max_drawables = 2;
+  }
+
+  while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) {
+    PIL_sleep_ms(2);
+  }
+
+  /* Present is submitted in its own CMD Buffer to ensure drawable reference released as early as
+   * possible. This command buffer is separate as it does not utilize the global state
+   * for rendering as the main context does. */
+  id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer];
+  MTLCommandBufferManager::num_active_cmd_bufs++;
+
+  if (MTLCommandBufferManager::sync_event != nil) {
+    /* Ensure command buffer ordering. */
+    [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event
+                         value:MTLCommandBufferManager::event_signal_val];
+  }
+
+  /* Do Present Call and final Blit to MTLDrawable. */
+  id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor];
+  [enc setRenderPipelineState:blit_pso];
+  [enc setFragmentTexture:swapchain_texture atIndex:0];
+  [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
+  [enc endEncoding];
+
+  /* Present drawable. */
+  BLI_assert(drawable);
+  [cmdbuf presentDrawable:drawable];
+
+  /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */
+  MTLSafeFreeList *cmd_free_buffer_list =
+      MTLContext::get_global_memory_manager().get_current_safe_list();
+  BLI_assert(cmd_free_buffer_list);
+
+  id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf;
+  [cmd_buffer_ref retain];
+
+  /* Increment drawables in flight limiter. */
+  MTLContext::max_drawables_in_flight++;
+  std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now();
+
+  /* Increment free pool reference and decrement upon command buffer completion. */
+  cmd_free_buffer_list->increment_reference();
+  [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) {
+    /* Flag freed buffers associated with this CMD buffer as ready to be freed. */
+    cmd_free_buffer_list->decrement_reference();
+    [cmd_buffer_ref release];
+
+    /* Decrement count */
+    MTLCommandBufferManager::num_active_cmd_bufs--;
+    MTL_LOG_INFO("[Metal] Active command buffers: %d\n",
+                 MTLCommandBufferManager::num_active_cmd_bufs);
+
+    /* Drawable count and latency management. */
+    MTLContext::max_drawables_in_flight--;
+    std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now();
+    int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>(
+                                         completion_time - submission_time)
+                                         .count();
+    MTLContext::latency_resolve_average(microseconds_per_frame);
+
+    MTL_LOG_INFO("Frame Latency: %f ms  (Rolling avg: %f ms  Drawables: %d)\n",
+                 ((float)microseconds_per_frame) / 1000.0f,
+                 ((float)MTLContext::avg_drawable_latency_us) / 1000.0f,
+                 perf_max_drawables);
+  }];
+
+  if (MTLCommandBufferManager::sync_event == nil) {
+    MTLCommandBufferManager::sync_event = [ctx->device newEvent];
+    BLI_assert(MTLCommandBufferManager::sync_event);
+    [MTLCommandBufferManager::sync_event retain];
+  }
+  BLI_assert(MTLCommandBufferManager::sync_event != nil);
+
+  MTLCommandBufferManager::event_signal_val++;
+  [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event
+                      value:MTLCommandBufferManager::event_signal_val];
+
+  [cmdbuf commit];
+
+  /* When debugging, fetch advanced command buffer errors. */
+  if (G.debug & G_DEBUG_GPU) {
+    [cmdbuf waitUntilCompleted];
+    NSError *error = [cmdbuf error];
+    if (error != nil) {
+      NSLog(@"%@", error);
+      BLI_assert(false);
+
+      @autoreleasepool {
+        const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String];
+
+        std::ofstream outfile;
+        outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+        outfile << stringAsChar;
+        outfile.close();
+      }
+    }
+    else {
+      @autoreleasepool {
+        NSString *str = @"Command buffer completed successfully!\n";
+        const char *stringAsChar = [str UTF8String];
+
+        std::ofstream outfile;
+        outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+        outfile << stringAsChar;
+        outfile.close();
+      }
+    }
+  }
+}
+
+/** \} */
+
 }  // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_drawlist.hh b/source/blender/gpu/metal/mtl_drawlist.hh
new file mode 100644
index 00000000000..47055f3d7f4
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.hh
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#pragma once
+
+#include "BLI_sys_types.h"
+#include "GPU_batch.h"
+#include "MEM_guardedalloc.h"
+#include "gpu_drawlist_private.hh"
+
+#include "mtl_batch.hh"
+#include "mtl_context.hh"
+
+namespace blender::gpu {
+
+/**
+ * Implementation of Multi Draw Indirect using OpenGL.
+ **/
+class MTLDrawList : public DrawList {
+
+ private:
+  /** Batch for which we are recording commands for. */
+  MTLBatch *batch_;
+  /** Mapped memory bounds. */
+  void *data_;
+  /** Length of the mapped buffer (in byte). */
+  size_t data_size_;
+  /** Current offset inside the mapped buffer (in byte). */
+  size_t command_offset_;
+  /** Current number of command recorded inside the mapped buffer. */
+  uint32_t command_len_;
+  /** Is UINT_MAX if not drawing indexed geom. Also Avoid dereferencing batch. */
+  uint32_t base_index_;
+  /** Also Avoid dereferencing batch. */
+  uint32_t v_first_, v_count_;
+  /** Length of whole the buffer (in byte). */
+  uint32_t buffer_size_;
+
+ public:
+  MTLDrawList(int length);
+  ~MTLDrawList();
+
+  void append(GPUBatch *batch, int i_first, int i_count) override;
+  void submit() override;
+
+ private:
+  void init();
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList");
+};
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_drawlist.mm b/source/blender/gpu/metal/mtl_drawlist.mm
new file mode 100644
index 00000000000..99194d2b72c
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.mm
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#include "BLI_assert.h"
+
+#include "GPU_batch.h"
+#include "mtl_common.hh"
+#include "mtl_drawlist.hh"
+#include "mtl_primitive.hh"
+
+using namespace blender::gpu;
+
+namespace blender::gpu {
+
+/* Indirect draw call structure for reference. */
+/* MTLDrawPrimitivesIndirectArguments --
+ * https://developer.apple.com/documentation/metal/mtldrawprimitivesindirectarguments?language=objc
+ */
+/* struct MTLDrawPrimitivesIndirectArguments {
+ * uint32_t vertexCount;
+ * uint32_t instanceCount;
+ * uint32_t vertexStart;
+ * uint32_t baseInstance;
+};*/
+
+/* MTLDrawIndexedPrimitivesIndirectArguments --
+ * https://developer.apple.com/documentation/metal/mtldrawindexedprimitivesindirectarguments?language=objc
+ */
+/* struct MTLDrawIndexedPrimitivesIndirectArguments {
+ * uint32_t indexCount;
+ * uint32_t instanceCount;
+ * uint32_t indexStart;
+ * uint32_t baseVertex;
+ * uint32_t baseInstance;
+};*/
+
+#define MDI_ENABLED (buffer_size_ != 0)
+#define MDI_DISABLED (buffer_size_ == 0)
+#define MDI_INDEXED (base_index_ != UINT_MAX)
+
+MTLDrawList::MTLDrawList(int length)
+{
+  BLI_assert(length > 0);
+  batch_ = nullptr;
+  command_len_ = 0;
+  base_index_ = 0;
+  command_offset_ = 0;
+  data_size_ = 0;
+  buffer_size_ = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * length;
+  data_ = (void *)MEM_mallocN(buffer_size_, __func__);
+}
+
+MTLDrawList::~MTLDrawList()
+{
+  if (data_) {
+    MEM_freeN(data_);
+    data_ = nullptr;
+  }
+}
+
+void MTLDrawList::init()
+{
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_ == nullptr);
+  UNUSED_VARS_NDEBUG(ctx);
+
+  batch_ = nullptr;
+  command_len_ = 0;
+  BLI_assert(data_);
+
+  command_offset_ = 0;
+}
+
+void MTLDrawList::append(GPUBatch *gpu_batch, int i_first, int i_count)
+{
+  /* Fallback when MultiDrawIndirect is not supported/enabled. */
+  MTLShader *shader = static_cast<MTLShader *>(unwrap(gpu_batch->shader));
+  bool requires_ssbo = (shader->get_uses_ssbo_vertex_fetch());
+  bool requires_emulation = mtl_needs_topology_emulation(gpu_batch->prim_type);
+  if (MDI_DISABLED || requires_ssbo || requires_emulation) {
+    GPU_batch_draw_advanced(gpu_batch, 0, 0, i_first, i_count);
+    return;
+  }
+
+  if (data_ == nullptr) {
+    this->init();
+  }
+  BLI_assert(data_);
+
+  MTLBatch *mtl_batch = static_cast<MTLBatch *>(gpu_batch);
+  BLI_assert(mtl_batch);
+  if (mtl_batch != batch_) {
+    /* Submit existing calls. */
+    this->submit();
+
+    /* Begin new batch. */
+    batch_ = mtl_batch;
+
+    /* Cached for faster access. */
+    MTLIndexBuf *el = batch_->elem_();
+    base_index_ = el ? el->index_base_ : UINT_MAX;
+    v_first_ = el ? el->index_start_ : 0;
+    v_count_ = el ? el->index_len_ : batch_->verts_(0)->vertex_len;
+  }
+
+  if (v_count_ == 0) {
+    /* Nothing to draw. */
+    return;
+  }
+
+  if (MDI_INDEXED) {
+    MTLDrawIndexedPrimitivesIndirectArguments *cmd =
+        reinterpret_cast<MTLDrawIndexedPrimitivesIndirectArguments *>((char *)data_ +
+                                                                      command_offset_);
+    cmd->indexStart = v_first_;
+    cmd->indexCount = v_count_;
+    cmd->instanceCount = i_count;
+    cmd->baseVertex = base_index_;
+    cmd->baseInstance = i_first;
+  }
+  else {
+    MTLDrawPrimitivesIndirectArguments *cmd =
+        reinterpret_cast<MTLDrawPrimitivesIndirectArguments *>((char *)data_ + command_offset_);
+    cmd->vertexStart = v_first_;
+    cmd->vertexCount = v_count_;
+    cmd->instanceCount = i_count;
+    cmd->baseInstance = i_first;
+  }
+
+  size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) :
+                                      sizeof(MTLDrawPrimitivesIndirectArguments);
+
+  command_offset_ += command_size;
+  command_len_++;
+
+  /* Check if we can fit at least one other command. */
+  if (command_offset_ + command_size > buffer_size_) {
+    this->submit();
+  }
+
+  return;
+}
+
+void MTLDrawList::submit()
+{
+  /* Metal does not support MDI from the host side, but we still benefit from only executing the
+   * batch bind a single time, rather than per-draw.
+   * NOTE(Metal): Consider using #MTLIndirectCommandBuffer to achieve similar behavior. */
+  if (command_len_ == 0) {
+    return;
+  }
+
+  /* Something's wrong if we get here without MDI support. */
+  BLI_assert(MDI_ENABLED);
+  BLI_assert(data_);
+
+  /* Host-side MDI Currently unsupported on Metal. */
+  bool can_use_MDI = false;
+
+  /* Verify context. */
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+  BLI_assert(ctx);
+
+  /* Execute indirect draw calls. */
+  MTLShader *shader = static_cast<MTLShader *>(unwrap(batch_->shader));
+  bool SSBO_MODE = (shader->get_uses_ssbo_vertex_fetch());
+  if (SSBO_MODE) {
+    can_use_MDI = false;
+    BLI_assert(false);
+    return;
+  }
+
+  /* Heuristic to determine whether using indirect drawing is more efficient. */
+  size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) :
+                                      sizeof(MTLDrawPrimitivesIndirectArguments);
+  const bool is_finishing_a_buffer = (command_offset_ + command_size > buffer_size_);
+  can_use_MDI = can_use_MDI && (is_finishing_a_buffer || command_len_ > 2);
+
+  /* Bind Batch to setup render pipeline state. */
+  id<MTLRenderCommandEncoder> rec = batch_->bind(0, 0, 0, 0);
+  if (!rec) {
+    BLI_assert_msg(false, "A RenderCommandEncoder should always be available!\n");
+    return;
+  }
+
+  /* Common properties. */
+  MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(batch_->prim_type);
+
+  /* Execute multi-draw indirect. */
+  if (can_use_MDI && false) {
+    /* Metal Doesn't support MDI -- Singular Indirect draw calls are supported,
+     * but Multi-draw is not.
+     * TODO(Metal): Consider using #IndirectCommandBuffers to provide similar
+     * behavior. */
+  }
+  else {
+
+    /* Execute draws manually. */
+    if (MDI_INDEXED) {
+      MTLDrawIndexedPrimitivesIndirectArguments *cmd =
+          (MTLDrawIndexedPrimitivesIndirectArguments *)data_;
+      MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(
+          reinterpret_cast<IndexBuf *>(batch_->elem));
+      BLI_assert(mtl_elem);
+      MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
+      uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4;
+      uint32_t v_first_ofs = (mtl_elem->index_start_ * index_size);
+      uint32_t index_count = cmd->indexCount;
+
+      /* Fetch index buffer. May return an index buffer of a differing format,
+       * if index buffer optimization is used. In these cases, mtl_prim_type and
+       * index_count get updated with the new properties. */
+      GPUPrimType final_prim_type = batch_->prim_type;
+      id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
+      BLI_assert(index_buffer != nil);
+
+      /* Final primitive type. */
+      mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
+
+      if (index_buffer != nil) {
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+        for (int i = 0; i < command_len_; i++, cmd++) {
+          [rec drawIndexedPrimitives:mtl_prim_type
+                          indexCount:index_count
+                           indexType:index_type
+                         indexBuffer:index_buffer
+                   indexBufferOffset:v_first_ofs
+                       instanceCount:cmd->instanceCount
+                          baseVertex:cmd->baseVertex
+                        baseInstance:cmd->baseInstance];
+          ctx->main_command_buffer.register_draw_counters(cmd->indexCount * cmd->instanceCount);
+        }
+      }
+      else {
+        BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
+      }
+    }
+    else {
+      MTLDrawPrimitivesIndirectArguments *cmd = (MTLDrawPrimitivesIndirectArguments *)data_;
+
+      /* Verify if topology emulation is required. */
+      if (mtl_needs_topology_emulation(batch_->prim_type)) {
+        BLI_assert_msg(false, "topology emulation cases should use fallback.");
+      }
+      else {
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+        for (int i = 0; i < command_len_; i++, cmd++) {
+          [rec drawPrimitives:mtl_prim_type
+                  vertexStart:cmd->vertexStart
+                  vertexCount:cmd->vertexCount
+                instanceCount:cmd->instanceCount
+                 baseInstance:cmd->baseInstance];
+          ctx->main_command_buffer.register_draw_counters(cmd->vertexCount * cmd->instanceCount);
+        }
+      }
+    }
+  }
+
+  /* Unbind batch. */
+  batch_->unbind();
+
+  /* Reset command offsets. */
+  command_len_ = 0;
+  command_offset_ = 0;
+
+  /* Avoid keeping reference to the batch. */
+  batch_ = nullptr;
+}
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_immediate.hh b/source/blender/gpu/metal/mtl_immediate.hh
new file mode 100644
index 00000000000..8d852282ac8
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.hh
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_immediate_private.hh"
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+namespace blender::gpu {
+
+class MTLImmediate : public Immediate {
+ private:
+  MTLContext *context_ = nullptr;
+  MTLTemporaryBuffer current_allocation_;
+  MTLPrimitiveTopologyClass metal_primitive_mode_;
+  MTLPrimitiveType metal_primitive_type_;
+  bool has_begun_ = false;
+
+ public:
+  MTLImmediate(MTLContext *ctx);
+  ~MTLImmediate();
+
+  uchar *begin() override;
+  void end() override;
+  bool imm_is_recording()
+  {
+    return has_begun_;
+  }
+};
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm
new file mode 100644
index 00000000000..ee48bdd6ee1
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -0,0 +1,401 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#include "BKE_global.h"
+
+#include "GPU_vertex_format.h"
+#include "gpu_context_private.hh"
+#include "gpu_shader_private.hh"
+#include "gpu_vertex_format_private.h"
+
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_immediate.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+
+namespace blender::gpu {
+
+MTLImmediate::MTLImmediate(MTLContext *ctx)
+{
+  context_ = ctx;
+}
+
+MTLImmediate::~MTLImmediate()
+{
+}
+
+uchar *MTLImmediate::begin()
+{
+  BLI_assert(!has_begun_);
+
+  /* Determine primitive type. */
+  metal_primitive_type_ = gpu_prim_type_to_metal(this->prim_type);
+  metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
+  has_begun_ = true;
+
+  /* Allocate a range of data and return host-accessible pointer. */
+  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+  current_allocation_ = context_->get_scratchbuffer_manager()
+                            .scratch_buffer_allocate_range_aligned(bytes_needed, 256);
+  [current_allocation_.metal_buffer retain];
+  return reinterpret_cast<uchar *>(current_allocation_.data);
+}
+
+void MTLImmediate::end()
+{
+  /* Ensure we're between a `imm::begin` / `imm:end` pair. */
+  BLI_assert(has_begun_);
+  BLI_assert(prim_type != GPU_PRIM_NONE);
+
+  /* Verify context is valid, vertex data is written and a valid shader is bound. */
+  if (context_ && this->vertex_idx > 0 && this->shader) {
+
+    MTLShader *active_mtl_shader = static_cast<MTLShader *>(unwrap(shader));
+
+    /* Skip draw if Metal shader is not valid. */
+    if (active_mtl_shader == nullptr || !active_mtl_shader->is_valid() ||
+        active_mtl_shader->get_interface() == nullptr) {
+
+      const char *ptr = (active_mtl_shader) ? active_mtl_shader->name_get() : nullptr;
+      MTL_LOG_WARNING(
+          "MTLImmediate::end -- cannot perform draw as active shader is NULL or invalid (likely "
+          "unimplemented) (shader %p '%s')\n",
+          active_mtl_shader,
+          ptr);
+      return;
+    }
+
+    /* Ensure we are inside a render pass and fetch active RenderCommandEncoder. */
+    id<MTLRenderCommandEncoder> rec = context_->ensure_begin_render_pass();
+    BLI_assert(rec != nil);
+
+    /* Fetch active render pipeline state. */
+    MTLRenderPassState &rps = context_->main_command_buffer.get_render_pass_state();
+
+    /* Bind Shader. */
+    GPU_shader_bind(this->shader);
+
+    /* Debug markers for frame-capture and detailed error messages. */
+    if (G.debug & G_DEBUG_GPU) {
+      [rec pushDebugGroup:[NSString
+                              stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+                                               this->vertex_idx,
+                                               active_mtl_shader->get_interface()->get_name()]];
+      [rec insertDebugSignpost:[NSString stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+                                                          this->vertex_idx,
+                                                          active_mtl_shader->get_interface()
+                                                              ->get_name()]];
+    }
+
+    /* Populate pipeline state vertex descriptor. */
+    MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+        MTLContext::get()->state_manager);
+    MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+    const MTLShaderInterface *interface = active_mtl_shader->get_interface();
+
+    /* Reset vertex descriptor to default state. */
+    desc.reset_vertex_descriptor();
+
+    desc.vertex_descriptor.num_attributes = interface->get_total_attributes();
+    desc.vertex_descriptor.num_vert_buffers = 1;
+
+    for (int i = 0; i < desc.vertex_descriptor.num_attributes; i++) {
+      desc.vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+    }
+    desc.vertex_descriptor.uses_ssbo_vertex_fetch =
+        active_mtl_shader->get_uses_ssbo_vertex_fetch();
+    desc.vertex_descriptor.num_ssbo_attributes = 0;
+
+    /* SSBO Vertex Fetch -- Verify Attributes. */
+    if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+      active_mtl_shader->ssbo_vertex_fetch_bind_attributes_begin();
+
+      /* Disable Indexed rendering in SSBO vertex fetch. */
+      int uniform_ssbo_use_indexed = active_mtl_shader->uni_ssbo_uses_indexed_rendering;
+      BLI_assert_msg(uniform_ssbo_use_indexed != -1,
+                     "Expected valid uniform location for ssbo_uses_indexed_rendering.");
+      int uses_indexed_rendering = 0;
+      active_mtl_shader->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+    }
+
+    /* Populate Vertex descriptor and verify attributes.
+     * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */
+    for (int i = 0; i < interface->get_total_attributes(); i++) {
+
+      /* NOTE: Attribute in VERTEX FORMAT does not necessarily share the same array index as
+       * attributes in shader interface. */
+      GPUVertAttr *attr = nullptr;
+      const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i);
+
+      /* Scan through vertex_format attributes until one with a name matching the shader interface
+       * is found. */
+      for (uint32_t a_idx = 0; a_idx < this->vertex_format.attr_len && attr == nullptr; a_idx++) {
+        GPUVertAttr *check_attribute = &this->vertex_format.attrs[a_idx];
+
+        /* Attributes can have multiple name aliases associated with them. */
+        for (uint32_t n_idx = 0; n_idx < check_attribute->name_len; n_idx++) {
+          const char *name = GPU_vertformat_attr_name_get(
+              &this->vertex_format, check_attribute, n_idx);
+
+          if (strcmp(name, interface->get_name_at_offset(mtl_shader_attribute.name_offset)) == 0) {
+            attr = check_attribute;
+            break;
+          }
+        }
+      }
+
+      BLI_assert_msg(attr != nullptr,
+                     "Could not find expected attribute in immediate mode vertex format.");
+      if (attr == nullptr) {
+        MTL_LOG_ERROR(
+            "MTLImmediate::end Could not find matching attribute '%s' from Shader Interface in "
+            "Vertex Format! - TODO: Bind Dummy attribute\n",
+            interface->get_name_at_offset(mtl_shader_attribute.name_offset));
+        return;
+      }
+
+      /* Determine whether implicit type conversion between input vertex format
+       * and shader interface vertex format is supported. */
+      MTLVertexFormat convertedFormat;
+      bool can_use_implicit_conversion = mtl_convert_vertex_format(
+          mtl_shader_attribute.format,
+          (GPUVertCompType)attr->comp_type,
+          attr->comp_len,
+          (GPUVertFetchMode)attr->fetch_mode,
+          &convertedFormat);
+
+      if (can_use_implicit_conversion) {
+        /* Metal API can implicitly convert some formats during vertex assembly:
+         * - Converting from a normalized short2 format to float2
+         * - Type truncation e.g. Float4 to Float2.
+         * - Type expansion from Float3 to Float4.
+         * - Note: extra components are filled with the corresponding components of (0,0,0,1).
+         * (See
+         * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format)
+         */
+        bool is_floating_point_format = (attr->comp_type == GPU_COMP_F32);
+        desc.vertex_descriptor.attributes[i].format = convertedFormat;
+        desc.vertex_descriptor.attributes[i].format_conversion_mode =
+            (is_floating_point_format) ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+                                         (GPUVertFetchMode)GPU_FETCH_INT;
+        BLI_assert(convertedFormat != MTLVertexFormatInvalid);
+      }
+      else {
+        /* Some conversions are NOT valid, e.g. Int4 to Float4
+         * - In this case, we need to implement a conversion routine inside the shader.
+         * - This is handled using the format_conversion_mode flag
+         * - This flag is passed into the PSO as a function specialization,
+         *   and will generate an appropriate conversion function when reading the vertex attribute
+         *   value into local shader storage.
+         *   (If no explicit conversion is needed, the function specialize to a pass-through). */
+        MTLVertexFormat converted_format;
+        bool can_convert = mtl_vertex_format_resize(
+            mtl_shader_attribute.format, attr->comp_len, &converted_format);
+        desc.vertex_descriptor.attributes[i].format = (can_convert) ? converted_format :
+                                                                      mtl_shader_attribute.format;
+        desc.vertex_descriptor.attributes[i].format_conversion_mode = (GPUVertFetchMode)
+                                                                          attr->fetch_mode;
+        BLI_assert(desc.vertex_descriptor.attributes[i].format != MTLVertexFormatInvalid);
+      }
+      /* Using attribute offset in vertex format, as this will be correct */
+      desc.vertex_descriptor.attributes[i].offset = attr->offset;
+      desc.vertex_descriptor.attributes[i].buffer_index = mtl_shader_attribute.buffer_index;
+
+      /* SSBO Vertex Fetch Attribute bind. */
+      if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+        BLI_assert_msg(mtl_shader_attribute.buffer_index == 0,
+                       "All attributes should be in buffer index zero");
+        MTLSSBOAttribute ssbo_attr(
+            mtl_shader_attribute.index,
+            mtl_shader_attribute.buffer_index,
+            attr->offset,
+            this->vertex_format.stride,
+            MTLShader::ssbo_vertex_type_to_attr_type(desc.vertex_descriptor.attributes[i].format),
+            false);
+        desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+            ssbo_attr;
+        desc.vertex_descriptor.num_ssbo_attributes++;
+        active_mtl_shader->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+      }
+    }
+
+    /* Buffer bindings for singular vertex buffer. */
+    desc.vertex_descriptor.buffer_layouts[0].step_function = MTLVertexStepFunctionPerVertex;
+    desc.vertex_descriptor.buffer_layouts[0].step_rate = 1;
+    desc.vertex_descriptor.buffer_layouts[0].stride = this->vertex_format.stride;
+    BLI_assert(this->vertex_format.stride > 0);
+
+    /* SSBO Vertex Fetch -- Verify Attributes. */
+    if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+      active_mtl_shader->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+      /* Set Status uniforms. */
+      BLI_assert_msg(active_mtl_shader->uni_ssbo_input_prim_type_loc != -1,
+                     "ssbo_input_prim_type uniform location invalid!");
+      BLI_assert_msg(active_mtl_shader->uni_ssbo_input_vert_count_loc != -1,
+                     "ssbo_input_vert_count uniform location invalid!");
+      GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+                                    active_mtl_shader->uni_ssbo_input_prim_type_loc,
+                                    1,
+                                    1,
+                                    (const int *)(&this->prim_type));
+      GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+                                    active_mtl_shader->uni_ssbo_input_vert_count_loc,
+                                    1,
+                                    1,
+                                    (const int *)(&this->vertex_idx));
+    }
+
+    MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+    if (context_->ensure_render_pipeline_state(mtl_prim_type)) {
+
+      /* Issue draw call. */
+      BLI_assert(this->vertex_idx > 0);
+
+      /* Metal API does not support triangle fan, so we can emulate this
+       * input data by generating an index buffer to re-map indices to
+       * a TriangleList.
+       *
+       * NOTE(Metal): Consider caching generated triangle fan index buffers.
+       * For immediate mode, generating these is currently very cheap, as we use
+       * fast scratch buffer allocations. Though we may benefit from caching of
+       * frequently used buffer sizes. */
+      if (mtl_needs_topology_emulation(this->prim_type)) {
+
+        /* Debug safety check for SSBO FETCH MODE. */
+        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+          BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
+        }
+
+        /* Emulate Tri-fan. */
+        if (this->prim_type == GPU_PRIM_TRI_FAN) {
+          /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
+           * vertices. */
+          uint32_t base_vert_count = this->vertex_idx;
+          uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
+          uint32_t fan_index_count = num_triangles * 3;
+          BLI_assert(num_triangles > 0);
+
+          uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
+          uint32_t *index_buffer = nullptr;
+
+          MTLTemporaryBuffer allocation =
+              context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
+                  alloc_size, 128);
+          index_buffer = (uint32_t *)allocation.data;
+
+          int a = 0;
+          for (int i = 0; i < num_triangles; i++) {
+            index_buffer[a++] = 0;
+            index_buffer[a++] = i + 1;
+            index_buffer[a++] = i + 2;
+          }
+
+          @autoreleasepool {
+
+            id<MTLBuffer> index_buffer_mtl = nil;
+            uint32_t index_buffer_offset = 0;
+
+            /* Region of scratch buffer used for topology emulation element data.
+             * NOTE(Metal): We do not need to manually flush as the entire scratch
+             * buffer for current command buffer is flushed upon submission. */
+            index_buffer_mtl = allocation.metal_buffer;
+            index_buffer_offset = allocation.buffer_offset;
+
+            /* Set depth stencil state (requires knowledge of primitive type). */
+            context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
+
+            /* Bind Vertex Buffer. */
+            rps.bind_vertex_buffer(
+                current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+            /* Draw. */
+            [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+                            indexCount:fan_index_count
+                             indexType:MTLIndexTypeUInt32
+                           indexBuffer:index_buffer_mtl
+                     indexBufferOffset:index_buffer_offset];
+          }
+        }
+        else {
+          /* TODO(Metal): Topology emulation for line loop.
+           * NOTE(Metal): This is currently not used anywhere and modified at the high
+           * level for efficiency in such cases. */
+          BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
+        }
+      }
+      else {
+        MTLPrimitiveType primitive_type = metal_primitive_type_;
+        int vertex_count = this->vertex_idx;
+
+        /* Bind Vertex Buffer. */
+        rps.bind_vertex_buffer(
+            current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        context_->ensure_depth_stencil_state(primitive_type);
+
+        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+
+          /* Bind Null Buffers for empty/missing bind slots. */
+          id<MTLBuffer> null_buffer = context_->get_null_buffer();
+          BLI_assert(null_buffer != nil);
+          for (int i = 1; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+
+            /* We only need to ensure a buffer is bound to the context, its contents do not matter
+             * as it will not be used. */
+            if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+              rps.bind_vertex_buffer(null_buffer, 0, i);
+            }
+          }
+
+          /* SSBO vertex fetch - Nullify elements buffer. */
+          if (rps.cached_vertex_buffer_bindings[MTL_SSBO_VERTEX_FETCH_IBO_INDEX].metal_buffer ==
+              nil) {
+            rps.bind_vertex_buffer(null_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+          }
+
+          /* Submit draw call with modified vertex count, which reflects vertices per primitive
+           * defined in the USE_SSBO_VERTEX_FETCH `pragma`. */
+          int num_input_primitives = gpu_get_prim_count_from_type(vertex_count, this->prim_type);
+          int output_num_verts = num_input_primitives *
+                                 active_mtl_shader->get_ssbo_vertex_fetch_output_num_verts();
+#ifndef NDEBUG
+          BLI_assert(
+              mtl_vertex_count_fits_primitive_type(
+                  output_num_verts, active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()) &&
+              "Output Vertex count is not compatible with the requested output vertex primitive "
+              "type");
+#endif
+          [rec drawPrimitives:active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()
+                  vertexStart:0
+                  vertexCount:output_num_verts];
+          context_->main_command_buffer.register_draw_counters(output_num_verts);
+        }
+        else {
+          /* Regular draw. */
+          [rec drawPrimitives:primitive_type vertexStart:0 vertexCount:vertex_count];
+          context_->main_command_buffer.register_draw_counters(vertex_count);
+        }
+      }
+    }
+    if (G.debug & G_DEBUG_GPU) {
+      [rec popDebugGroup];
+    }
+  }
+
+  /* Reset allocation after draw submission. */
+  has_begun_ = false;
+  if (current_allocation_.metal_buffer) {
+    [current_allocation_.metal_buffer release];
+    current_allocation_.metal_buffer = nil;
+  }
+}
+
+}  // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_index_buffer.mm b/source/blender/gpu/metal/mtl_index_buffer.mm
index 2195ab7538d..9712dce7b40 100644
--- a/source/blender/gpu/metal/mtl_index_buffer.mm
+++ b/source/blender/gpu/metal/mtl_index_buffer.mm
@@ -138,7 +138,7 @@ void MTLIndexBuf::update_sub(uint32_t start, uint32_t len, const void *data)
   BLI_assert(ibo_ != nullptr);
 
   /* Otherwise, we will inject a data update, using staged data, into the command stream.
-   * Stage update contents in temporary buffer*/
+   * Stage update contents in temporary buffer. */
   MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
   BLI_assert(ctx);
   MTLTemporaryBuffer range = ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range(len);
diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh
index df80df6543f..bd354376b12 100644
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -340,13 +340,13 @@ class MTLBufferPool {
 
  private:
   /* Memory statistics. */
-  long long int total_allocation_bytes_ = 0;
+  int64_t total_allocation_bytes_ = 0;
 
 #if MTL_DEBUG_MEMORY_STATISTICS == 1
   /* Debug statistics. */
   std::atomic<int> per_frame_allocation_count_;
-  std::atomic<long long int> allocations_in_pool_;
-  std::atomic<long long int> buffers_in_pool_;
+  std::atomic<int64_t> allocations_in_pool_;
+  std::atomic<int64_t> buffers_in_pool_;
 #endif
 
   /* Metal resources. */
diff --git a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
index 1906350679a..04ceb5bdf03 100644
--- a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
+++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
@@ -28,8 +28,8 @@ struct MTLVertexAttributeDescriptorPSO {
 
   uint64_t hash() const
   {
-    return (uint64_t)((uint64_t)this->format ^ (this->offset << 4) ^ (this->buffer_index << 8) ^
-                      (this->format_conversion_mode << 12));
+    return uint64_t((uint64_t(this->format) ^ (this->offset << 4) ^ (this->buffer_index << 8) ^
+                     (this->format_conversion_mode << 12)));
   }
 };
 
@@ -46,8 +46,7 @@ struct MTLVertexBufferLayoutDescriptorPSO {
 
   uint64_t hash() const
   {
-    return (uint64_t)((uint64_t)this->step_function ^ (this->step_rate << 4) ^
-                      (this->stride << 8));
+    return uint64_t(uint64_t(this->step_function) ^ (this->step_rate << 4) ^ (this->stride << 8));
   }
 };
 
@@ -217,34 +216,46 @@ struct MTLRenderPipelineStateDescriptor {
      * has collisions. */
 
     uint64_t hash = this->vertex_descriptor.hash();
-    hash ^= (uint64_t)this->num_color_attachments << 16;     /* up to 6 (3 bits). */
-    hash ^= (uint64_t)this->depth_attachment_format << 18;   /* up to 555 (9 bits). */
-    hash ^= (uint64_t)this->stencil_attachment_format << 20; /* up to 555 (9 bits). */
-    hash ^= (uint64_t)(*(
-        (uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */
+    hash ^= uint64_t(this->num_color_attachments) << 16;     /* up to 6 (3 bits). */
+    hash ^= uint64_t(this->depth_attachment_format) << 18;   /* up to 555 (9 bits). */
+    hash ^= uint64_t(this->stencil_attachment_format) << 20; /* up to 555 (9 bits). */
+    hash ^= uint64_t(
+        *((uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */
 
     /* Only include elements in Hash if they are needed - avoids variable null assignments
      * influencing hash. */
     if (this->num_color_attachments > 0) {
-      hash ^= (uint64_t)this->color_write_mask << 22;        /* 4 bit bit-mask. */
-      hash ^= (uint64_t)this->alpha_blend_op << 26;          /* Up to 4 (3 bits). */
-      hash ^= (uint64_t)this->rgb_blend_op << 29;            /* Up to 4 (3 bits). */
-      hash ^= (uint64_t)this->dest_alpha_blend_factor << 32; /* Up to 18 (5 bits). */
-      hash ^= (uint64_t)this->dest_rgb_blend_factor << 37;   /* Up to 18 (5 bits). */
-      hash ^= (uint64_t)this->src_alpha_blend_factor << 42;  /* Up to 18 (5 bits). */
-      hash ^= (uint64_t)this->src_rgb_blend_factor << 47;    /* Up to 18 (5 bits). */
+      hash ^= uint64_t(this->color_write_mask) << 22;        /* 4 bit bit-mask. */
+      hash ^= uint64_t(this->alpha_blend_op) << 26;          /* Up to 4 (3 bits). */
+      hash ^= uint64_t(this->rgb_blend_op) << 29;            /* Up to 4 (3 bits). */
+      hash ^= uint64_t(this->dest_alpha_blend_factor) << 32; /* Up to 18 (5 bits). */
+      hash ^= uint64_t(this->dest_rgb_blend_factor) << 37;   /* Up to 18 (5 bits). */
+      hash ^= uint64_t(this->src_alpha_blend_factor) << 42;  /* Up to 18 (5 bits). */
+      hash ^= uint64_t(this->src_rgb_blend_factor) << 47;    /* Up to 18 (5 bits). */
     }
 
     for (const uint c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) {
-      hash ^= (uint64_t)this->color_attachment_format[c] << (c + 52);  // up to 555 (9 bits)
+      hash ^= uint64_t(this->color_attachment_format[c]) << (c + 52); /* Up to 555 (9 bits). */
     }
 
-    hash |= (uint64_t)((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0)
-            << 62;
-    hash ^= (uint64_t)this->point_size;
+    hash |= uint64_t((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0) << 62;
+    hash ^= uint64_t(this->point_size);
 
     return hash;
   }
+
+  /* Reset the Vertex Descriptor to default. */
+  void reset_vertex_descriptor()
+  {
+    vertex_descriptor.num_attributes = 0;
+    vertex_descriptor.num_vert_buffers = 0;
+    for (int i = 0; i < GPU_VERT_ATTR_MAX_LEN; i++) {
+      vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+      vertex_descriptor.attributes[i].offset = 0;
+    }
+    vertex_descriptor.uses_ssbo_vertex_fetch = false;
+    vertex_descriptor.num_ssbo_attributes = 0;
+  }
 };
 
 }  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh
index 64d9d1cf849..5485b32dd31 100644
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -261,8 +261,6 @@ class MTLShader : public Shader {
   bool get_push_constant_is_dirty();
   void push_constant_bindstate_mark_dirty(bool is_dirty);
 
-  void vertformat_from_shader(GPUVertFormat *format) const override;
-
   /* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */
   int program_handle_get() const override
   {
diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm
index 23097f312f0..006d3394378 100644
--- a/source/blender/gpu/metal/mtl_shader.mm
+++ b/source/blender/gpu/metal/mtl_shader.mm
@@ -129,6 +129,7 @@ MTLShader::~MTLShader()
 
   if (shd_builder_ != nullptr) {
     delete shd_builder_;
+    shd_builder_ = nullptr;
   }
 }
 
@@ -209,6 +210,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 
       /* Release temporary compilation resources. */
       delete shd_builder_;
+      shd_builder_ = nullptr;
       return false;
     }
   }
@@ -279,6 +281,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 
           /* Release temporary compilation resources. */
           delete shd_builder_;
+          shd_builder_ = nullptr;
           return false;
         }
       }
@@ -324,6 +327,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 
   /* Release temporary compilation resources. */
   delete shd_builder_;
+  shd_builder_ = nullptr;
   return true;
 }
 
@@ -536,27 +540,6 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
   push_constant_modified_ = is_dirty;
 }
 
-void MTLShader::vertformat_from_shader(GPUVertFormat *format) const
-{
-  GPU_vertformat_clear(format);
-
-  const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface);
-  for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) {
-    const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id);
-
-    /* Extract type parameters from Metal type. */
-    GPUVertCompType comp_type = comp_type_from_vert_format(attr.format);
-    uint comp_len = comp_count_from_vert_format(attr.format);
-    GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format);
-
-    GPU_vertformat_attr_add(format,
-                            mtl_interface->get_name_at_offset(attr.name_offset),
-                            comp_type,
-                            comp_len,
-                            fetch_mode);
-  }
-}
-
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -1167,6 +1150,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_at
   MTLShaderInterface *mtl_interface = this->get_interface();
   BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
              ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
+  UNUSED_VARS_NDEBUG(mtl_interface);
 
   /* Update bind-mask to verify this attribute has been used. */
   BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh
index 43890ca0170..63e2e6d5924 100644
--- a/source/blender/gpu/metal/mtl_shader_generator.hh
+++ b/source/blender/gpu/metal/mtl_shader_generator.hh
@@ -497,7 +497,7 @@ inline std::string get_stage_class_name(ShaderStage stage)
 inline bool is_builtin_type(std::string type)
 {
   /* Add Types as needed. */
-  /* TODO(Metal): Consider replacing this with a switch and constexpr hash and switch.
+  /* TODO(Metal): Consider replacing this with a switch and `constexpr` hash and switch.
    * Though most efficient and maintainable approach to be determined. */
   static std::map<std::string, eMTLDataType> glsl_builtin_types = {
       {"float", MTL_DATATYPE_FLOAT},
diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm
index 977e97dbd82..4a2be0753bb 100644
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@@ -724,10 +724,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
   }
   if (msl_iface.uses_ssbo_vertex_fetch_mode) {
     ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
-    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS
-              << std::endl;
-    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX
-              << std::endl;
     for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) {
       ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl;
     }
diff --git a/source/blender/gpu/metal/mtl_shader_interface.mm b/source/blender/gpu/metal/mtl_shader_interface.mm
index 3703d5b5684..97a82345761 100644
--- a/source/blender/gpu/metal/mtl_shader_interface.mm
+++ b/source/blender/gpu/metal/mtl_shader_interface.mm
@@ -117,9 +117,7 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
 
   MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_];
   uni_block.name_offset = name_offset;
-  /* We offset the buffer binding index by one, as the first slot is reserved for push constant
-   * data. */
-  uni_block.buffer_index = buffer_index + 1;
+  uni_block.buffer_index = buffer_index;
   uni_block.size = size;
   uni_block.current_offset = 0;
   uni_block.stage_mask = ShaderStage::BOTH;
@@ -297,8 +295,10 @@ void MTLShaderInterface::prepare_common_shader_inputs()
     current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset));
     /* Location refers to the index in the ubos_ array. */
     current_input->location = ubo_index;
-    /* Final binding location refers to the buffer binding index within the shader (Relative to
-     * MTL_uniform_buffer_base_index). */
+    /* Binding location refers to the UBO bind slot in
+     * #MTLContextGlobalShaderPipelineState::ubo_bindings. The buffer bind index [[buffer(N)]]
+     * within the shader will apply an offset for bound vertex buffers and the default uniform
+     * PushConstantBlock. */
     current_input->binding = shd_ubo.buffer_index;
     current_input++;
   }
diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh
index be6f3a3a02b..28b55306707 100644
--- a/source/blender/gpu/metal/mtl_texture.hh
+++ b/source/blender/gpu/metal/mtl_texture.hh
@@ -108,10 +108,10 @@ struct TextureReadRoutineSpecialisation {
   uint64_t hash() const
   {
     blender::DefaultHash<std::string> string_hasher;
-    return (uint64_t)string_hasher(this->input_data_type + this->output_data_type +
-                                   std::to_string((this->component_count_input << 8) +
-                                                  this->component_count_output +
-                                                  (this->depth_format_mode << 28)));
+    return uint64_t(string_hasher(this->input_data_type + this->output_data_type +
+                                  std::to_string((this->component_count_input << 8) +
+                                                 this->component_count_output +
+                                                 (this->depth_format_mode << 28))));
   }
 };
 
@@ -138,12 +138,12 @@ struct MTLSamplerState {
 
   operator uint() const
   {
-    return (uint)state;
+    return uint(state);
   }
 
   operator uint64_t() const
   {
-    return (uint64_t)state;
+    return uint64_t(state);
   }
 };
 
@@ -200,7 +200,7 @@ class MTLTexture : public Texture {
     TEXTURE_VIEW_SWIZZLE_DIRTY = (1 << 0),
     TEXTURE_VIEW_MIP_DIRTY = (1 << 1)
   };
-  id<MTLTexture> mip_swizzle_view_;
+  id<MTLTexture> mip_swizzle_view_ = nil;
   char tex_swizzle_mask_[4];
   MTLTextureSwizzleChannels mtl_swizzle_mask_;
   bool mip_range_dirty_ = false;
@@ -216,7 +216,6 @@ class MTLTexture : public Texture {
   /* VBO. */
   MTLVertBuf *vert_buffer_;
   id<MTLBuffer> vert_buffer_mtl_;
-  int vert_buffer_offset_;
 
   /* Core parameters and sub-resources. */
   eGPUTextureUsage gpu_image_usage_flags_;
@@ -247,7 +246,7 @@ class MTLTexture : public Texture {
   void mip_range_set(int min, int max) override;
   void *read(int mip, eGPUDataFormat type) override;
 
-  /* Remove once no longer required -- will just return 0 for now in MTL path*/
+  /* Remove once no longer required -- will just return 0 for now in MTL path. */
   uint gl_bindcode_get() const override;
 
   bool texture_is_baked();
@@ -256,6 +255,14 @@ class MTLTexture : public Texture {
     return name_;
   }
 
+  id<MTLBuffer> get_vertex_buffer() const
+  {
+    if (resource_mode_ == MTL_TEXTURE_MODE_VBO) {
+      return vert_buffer_mtl_;
+    }
+    return nil;
+  }
+
  protected:
   bool init_internal() override;
   bool init_internal(GPUVertBuf *vbo) override;
@@ -324,8 +331,6 @@ class MTLTexture : public Texture {
             int height);
   GPUFrameBuffer *get_blit_framebuffer(uint dst_slice, uint dst_mip);
 
-  MEM_CXX_CLASS_ALLOC_FUNCS("gpu::MTLTexture")
-
   /* Texture Update function Utilities. */
   /* Metal texture updating does not provide the same range of functionality for type conversion
    * and format compatibility as are available in OpenGL. To achieve the same level of
@@ -357,9 +362,9 @@ class MTLTexture : public Texture {
    */
   struct TextureUpdateParams {
     int mip_index;
-    int extent[3];          /* Width, Height, Slice on 2D Array tex*/
-    int offset[3];          /* Width, Height, Slice on 2D Array tex*/
-    uint unpack_row_length; /* Number of pixels between bytes in input data */
+    int extent[3];          /* Width, Height, Slice on 2D Array tex. */
+    int offset[3];          /* Width, Height, Slice on 2D Array tex. */
+    uint unpack_row_length; /* Number of pixels between bytes in input data. */
   };
 
   id<MTLComputePipelineState> texture_update_1d_get_kernel(
@@ -383,7 +388,7 @@ class MTLTexture : public Texture {
   /* Depth texture updates are not directly supported with Blit operations, similarly, we cannot
    * use a compute shader to write to depth, so we must instead render to a depth target.
    * These processes use vertex/fragment shaders to render texture data from an intermediate
-   * source, in order to prime the depth buffer*/
+   * source, in order to prime the depth buffer. */
   GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialization);
 
   void update_sub_depth_2d(
@@ -392,8 +397,8 @@ class MTLTexture : public Texture {
   /* Texture Read function utilities -- Follows a similar mechanism to the updating routines */
   struct TextureReadParams {
     int mip_index;
-    int extent[3]; /* Width, Height, Slice on 2D Array tex*/
-    int offset[3]; /* Width, Height, Slice on 2D Array tex*/
+    int extent[3]; /* Width, Height, Slice on 2D Array tex. */
+    int offset[3]; /* Width, Height, Slice on 2D Array tex. */
   };
 
   id<MTLComputePipelineState> texture_read_1d_get_kernel(
@@ -415,6 +420,8 @@ class MTLTexture : public Texture {
 
   /* fullscreen blit utilities. */
   GPUShader *fullscreen_blit_sh_get();
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLTexture")
 };
 
 /* Utility */
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index 2b7c2333bff..29dcc8d32ee 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -12,6 +12,7 @@
 #include "GPU_batch_presets.h"
 #include "GPU_capabilities.h"
 #include "GPU_framebuffer.h"
+#include "GPU_immediate.h"
 #include "GPU_platform.h"
 #include "GPU_state.h"
 
@@ -20,6 +21,7 @@
 #include "mtl_context.hh"
 #include "mtl_debug.hh"
 #include "mtl_texture.hh"
+#include "mtl_vertex_buffer.hh"
 
 #include "GHOST_C-api.h"
 
@@ -50,7 +52,6 @@ void gpu::MTLTexture::mtl_texture_init()
   /* VBO. */
   vert_buffer_ = nullptr;
   vert_buffer_mtl_ = nil;
-  vert_buffer_offset_ = -1;
 
   /* Default Swizzle. */
   tex_swizzle_mask_[0] = 'r';
@@ -169,26 +170,39 @@ void gpu::MTLTexture::bake_mip_swizzle_view()
 id<MTLTexture> gpu::MTLTexture::get_metal_handle()
 {
 
-  /* ensure up to date and baked. */
-  this->ensure_baked();
-
   /* Verify VBO texture shares same buffer. */
   if (resource_mode_ == MTL_TEXTURE_MODE_VBO) {
-    int r_offset = -1;
+    id<MTLBuffer> buf = vert_buffer_->get_metal_buffer();
+
+    /* Source vertex buffer has been re-generated, require re-initialization. */
+    if (buf != vert_buffer_mtl_) {
+      MTL_LOG_INFO(
+          "MTLTexture '%p' using MTL_TEXTURE_MODE_VBO requires re-generation due to updated "
+          "Vertex-Buffer.\n",
+          this);
+      /* Clear state. */
+      this->reset();
+
+      /* Re-initialize. */
+      this->init_internal(wrap(vert_buffer_));
+
+      /* Update for assertion check below. */
+      buf = vert_buffer_->get_metal_buffer();
+    }
 
-    /* TODO(Metal): Fetch buffer from MTLVertBuf when implemented. */
-    id<MTLBuffer> buf = nil; /*vert_buffer_->get_metal_buffer(&r_offset);*/
+    /* Ensure buffer is valid.
+     * Fetch-vert buffer handle directly in-case it changed above. */
     BLI_assert(vert_buffer_mtl_ != nil);
-    BLI_assert(buf == vert_buffer_mtl_ && r_offset == vert_buffer_offset_);
-
-    UNUSED_VARS(buf);
-    UNUSED_VARS_NDEBUG(r_offset);
+    BLI_assert(vert_buffer_->get_metal_buffer() == vert_buffer_mtl_);
   }
 
+  /* ensure up to date and baked. */
+  this->ensure_baked();
+
   if (is_baked_) {
     /* For explicit texture views, ensure we always return the texture view. */
     if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
-      BLI_assert(mip_swizzle_view_ && "Texture view should always have a valid handle.");
+      BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle.");
     }
 
     if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) {
@@ -208,7 +222,7 @@ id<MTLTexture> gpu::MTLTexture::get_metal_handle_base()
 
   /* For explicit texture views, always return the texture view. */
   if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
-    BLI_assert(mip_swizzle_view_ && "Texture view should always have a valid handle.");
+    BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle.");
     if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) {
       bake_mip_swizzle_view();
     }
@@ -290,7 +304,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
 
   /* Execute graphics draw call to perform the blit. */
   GPUBatch *quad = GPU_batch_preset_quad();
-
   GPU_batch_set_shader(quad, shader);
 
   float w = dst->width_get();
@@ -915,7 +928,7 @@ void gpu::MTLTexture::generate_mipmap()
 
   /* Ensure texture is baked. */
   this->ensure_baked();
-  BLI_assert(is_baked_ && texture_ && "MTLTexture is not valid");
+  BLI_assert_msg(is_baked_ && texture_, "MTLTexture is not valid");
 
   if (mipmaps_ == 1 || mtl_max_mips_ == 1) {
     MTL_LOG_WARNING("Call to generate mipmaps on texture with 'mipmaps_=1\n'");
@@ -1231,7 +1244,7 @@ void gpu::MTLTexture::read_internal(int mip,
         depth_format_mode = 4;
         break;
       default:
-        BLI_assert(false && "Unhandled depth read format case");
+        BLI_assert_msg(false, "Unhandled depth read format case");
         break;
     }
   }
@@ -1445,11 +1458,84 @@ bool gpu::MTLTexture::init_internal()
 
 bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
 {
-  /* Zero initialize. */
-  this->prepare_internal();
+  if (this->format_ == GPU_DEPTH24_STENCIL8) {
+    /* Apple Silicon requires GPU_DEPTH32F_STENCIL8 instead of GPU_DEPTH24_STENCIL8. */
+    this->format_ = GPU_DEPTH32F_STENCIL8;
+  }
+
+  MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
+  mtl_max_mips_ = 1;
+  mipmaps_ = 0;
+  this->mip_range_set(0, 0);
+
+  /* Create texture from GPUVertBuf's buffer. */
+  MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo));
+  mtl_vbo->bind();
+  mtl_vbo->flag_used();
+
+  /* Get Metal Buffer. */
+  id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
+  BLI_assert(source_buffer);
+
+  /* Verify size. */
+  if (w_ <= 0) {
+    MTL_LOG_WARNING("Allocating texture buffer of width 0!\n");
+    w_ = 1;
+  }
+
+  /* Verify Texture and vertex buffer alignment. */
+  int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
+  int bytes_per_row = bytes_per_pixel * w_;
+
+  MTLContext *mtl_ctx = MTLContext::get();
+  uint32_t align_requirement = static_cast<uint32_t>(
+      [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
+
+  /* Verify per-vertex size aligns with texture size. */
+  const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
+  BLI_assert(bytes_per_pixel == format->stride &&
+             "Pixel format stride MUST match the texture format stride -- These being different "
+             "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
+  UNUSED_VARS_NDEBUG(format);
+
+  /* Create texture descriptor. */
+  BLI_assert(type_ == GPU_TEXTURE_BUFFER);
+  texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
+  texture_descriptor_.pixelFormat = mtl_format;
+  texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
+  texture_descriptor_.width = w_;
+  texture_descriptor_.height = 1;
+  texture_descriptor_.depth = 1;
+  texture_descriptor_.arrayLength = 1;
+  texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
+  texture_descriptor_.usage =
+      MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
+      MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
+  texture_descriptor_.storageMode = [source_buffer storageMode];
+  texture_descriptor_.sampleCount = 1;
+  texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
+  texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
+
+  texture_ = [source_buffer
+      newTextureWithDescriptor:texture_descriptor_
+                        offset:0
+                   bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)];
+  aligned_w_ = bytes_per_row / bytes_per_pixel;
+
+  BLI_assert(texture_);
+  texture_.label = [NSString stringWithUTF8String:this->get_name()];
+  is_baked_ = true;
+  is_dirty_ = false;
+  resource_mode_ = MTL_TEXTURE_MODE_VBO;
 
-  /* TODO(Metal): Add implementation for GPU Vert buf. */
-  return false;
+  /* Track Status. */
+  vert_buffer_ = mtl_vbo;
+  vert_buffer_mtl_ = source_buffer;
+  /* Cleanup. */
+  [texture_descriptor_ release];
+  texture_descriptor_ = nullptr;
+
+  return true;
 }
 
 bool gpu::MTLTexture::init_internal(const GPUTexture *src, int mip_offset, int layer_offset)
@@ -1494,7 +1580,6 @@ bool gpu::MTLTexture::texture_is_baked()
 /* Prepare texture parameters after initialization, but before baking. */
 void gpu::MTLTexture::prepare_internal()
 {
-
   /* Derive implicit usage flags for Depth/Stencil attachments. */
   if (format_flag_ & GPU_FORMAT_DEPTH || format_flag_ & GPU_FORMAT_STENCIL) {
     gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT;
@@ -1659,7 +1744,7 @@ void gpu::MTLTexture::ensure_baked()
     /* Determine Resource Mode. */
     resource_mode_ = MTL_TEXTURE_MODE_DEFAULT;
 
-    /* Create texture. */
+    /* Standard texture allocation. */
     texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_];
 
     [texture_descriptor_ release];
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index 928393fb39e..33a62e2e3ef 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -22,13 +22,7 @@
 /* Utility file for secondary functionality which supports mtl_texture.mm. */
 
 extern char datatoc_compute_texture_update_msl[];
-extern char datatoc_depth_2d_update_vert_glsl[];
-extern char datatoc_depth_2d_update_float_frag_glsl[];
-extern char datatoc_depth_2d_update_int24_frag_glsl[];
-extern char datatoc_depth_2d_update_int32_frag_glsl[];
 extern char datatoc_compute_texture_read_msl[];
-extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[];
-extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[];
 
 namespace blender::gpu {
 
@@ -40,7 +34,7 @@ MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
 {
 
   switch (tex_format) {
-    /* Formats texture & renderbuffer. */
+    /* Formats texture & render-buffer. */
     case GPU_RGBA8UI:
       return MTLPixelFormatRGBA8Uint;
     case GPU_RGBA8I:
@@ -447,42 +441,34 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
     return *result;
   }
 
-  const char *fragment_source = nullptr;
+  const char *depth_2d_info_variant = nullptr;
   switch (specialization.data_mode) {
     case MTL_DEPTH_UPDATE_MODE_FLOAT:
-      fragment_source = datatoc_depth_2d_update_float_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_float";
       break;
     case MTL_DEPTH_UPDATE_MODE_INT24:
-      fragment_source = datatoc_depth_2d_update_int24_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_int24";
       break;
     case MTL_DEPTH_UPDATE_MODE_INT32:
-      fragment_source = datatoc_depth_2d_update_int32_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_int32";
       break;
     default:
       BLI_assert(false && "Invalid format mode\n");
       return nullptr;
   }
 
-  GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl,
-                                        fragment_source,
-                                        nullptr,
-                                        nullptr,
-                                        nullptr,
-                                        "depth_2d_update_sh_get");
+  GPUShader *shader = GPU_shader_create_from_info_name(depth_2d_info_variant);
   mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader);
   return shader;
 }
 
 GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get()
 {
-
   MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
   BLI_assert(mtl_context != nullptr);
   if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) {
-    const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl;
-    const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl;
-    GPUShader *shader = GPU_shader_create(
-        vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit");
+    GPUShader *shader = GPU_shader_create_from_info_name("fullscreen_blit");
+
     mtl_context->get_texture_utils().fullscreen_blit_shader = shader;
   }
   return mtl_context->get_texture_utils().fullscreen_blit_shader;
@@ -614,7 +600,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
         stringWithUTF8String:datatoc_compute_texture_read_msl];
 
     /* Defensive Debug Checks. */
-    long long int depth_scale_factor = 1;
+    int64_t depth_scale_factor = 1;
     if (specialization_params.depth_format_mode > 0) {
       BLI_assert(specialization_params.component_count_input == 1);
       BLI_assert(specialization_params.component_count_output == 1);
diff --git a/source/blender/gpu/metal/mtl_vertex_buffer.hh b/source/blender/gpu/metal/mtl_vertex_buffer.hh
new file mode 100644
index 00000000000..2cc8b0a9636
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_vertex_buffer.hh
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "GPU_vertex_buffer.h"
+#include "gpu_vertex_buffer_private.hh"
+#include "mtl_context.hh"
+
+namespace blender::gpu {
+
+class MTLVertBuf : public VertBuf {
+  friend class gpu::MTLTexture; /* For buffer texture. */
+  friend class MTLShader;       /* For transform feedback. */
+  friend class MTLBatch;
+  friend class MTLContext; /* For transform feedback. */
+
+ private:
+  /** Metal buffer allocation. **/
+  gpu::MTLBuffer *vbo_ = nullptr;
+  /** Texture used if the buffer is bound as buffer texture. Init on first use. */
+  struct ::GPUTexture *buffer_texture_ = nullptr;
+  /** Defines whether the buffer handle is wrapped by this MTLVertBuf, i.e. we do not own it and
+   * should not free it. */
+  bool is_wrapper_ = false;
+  /** Requested allocation size for Metal buffer.
+   * Differs from raw buffer size as alignment is not included. */
+  uint64_t alloc_size_ = 0;
+  /** Whether existing allocation has been submitted for use by the GPU. */
+  bool contents_in_flight_ = false;
+
+  /* Fetch Metal buffer and offset into allocation if necessary.
+   * Access limited to friend classes. */
+  id<MTLBuffer> get_metal_buffer()
+  {
+    vbo_->debug_ensure_used();
+    return vbo_->get_metal_buffer();
+  }
+
+ public:
+  MTLVertBuf();
+  ~MTLVertBuf();
+
+  void bind();
+  void flag_used();
+
+  void update_sub(uint start, uint len, const void *data) override;
+
+  const void *read() const override;
+  void *unmap(const void *mapped_data) const override;
+
+  void wrap_handle(uint64_t handle) override;
+
+ protected:
+  void acquire_data() override;
+  void resize_data() override;
+  void release_data() override;
+  void upload_data() override;
+  void duplicate_data(VertBuf *dst) override;
+  void bind_as_ssbo(uint binding) override;
+  void bind_as_texture(uint binding) override;
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLVertBuf");
+};
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_vertex_buffer.mm b/source/blender/gpu/metal/mtl_vertex_buffer.mm
new file mode 100644
index 00000000000..1c7201ce5f9
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_vertex_buffer.mm
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+#include "mtl_vertex_buffer.hh"
+#include "mtl_debug.hh"
+
+namespace blender::gpu {
+
+MTLVertBuf::MTLVertBuf() : VertBuf()
+{
+}
+
+MTLVertBuf::~MTLVertBuf()
+{
+  this->release_data();
+}
+
+void MTLVertBuf::acquire_data()
+{
+  /* Discard previous data, if any. */
+  MEM_SAFE_FREE(data);
+  if (usage_ == GPU_USAGE_DEVICE_ONLY) {
+    data = nullptr;
+  }
+  else {
+    data = (uchar *)MEM_mallocN(sizeof(uchar) * this->size_alloc_get(), __func__);
+  }
+}
+
+void MTLVertBuf::resize_data()
+{
+  if (usage_ == GPU_USAGE_DEVICE_ONLY) {
+    data = nullptr;
+  }
+  else {
+    data = (uchar *)MEM_reallocN(data, sizeof(uchar) * this->size_alloc_get());
+  }
+}
+
+void MTLVertBuf::release_data()
+{
+  if (vbo_ != nullptr) {
+    vbo_->free();
+    vbo_ = nullptr;
+    is_wrapper_ = false;
+  }
+
+  GPU_TEXTURE_FREE_SAFE(buffer_texture_);
+
+  MEM_SAFE_FREE(data);
+}
+
+void MTLVertBuf::duplicate_data(VertBuf *dst_)
+{
+  BLI_assert(MTLContext::get() != NULL);
+  MTLVertBuf *src = this;
+  MTLVertBuf *dst = static_cast<MTLVertBuf *>(dst_);
+
+  /* Ensure buffer has been initialized. */
+  src->bind();
+
+  if (src->vbo_) {
+
+    /* Fetch active context. */
+    MTLContext *ctx = MTLContext::get();
+    BLI_assert(ctx);
+
+    /* Ensure destination does not have an active VBO. */
+    BLI_assert(dst->vbo_ == nullptr);
+
+    /* Allocate VBO for destination vertbuf. */
+    uint length = src->vbo_->get_size();
+    dst->vbo_ = MTLContext::get_global_memory_manager().allocate(
+        length, (dst->get_usage_type() != GPU_USAGE_DEVICE_ONLY));
+    dst->alloc_size_ = length;
+
+    /* Fetch Metal buffer handles. */
+    id<MTLBuffer> src_buffer = src->vbo_->get_metal_buffer();
+    id<MTLBuffer> dest_buffer = dst->vbo_->get_metal_buffer();
+
+    /* Use blit encoder to copy data to duplicate buffer allocation. */
+    id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
+    if (G.debug & G_DEBUG_GPU) {
+      [enc insertDebugSignpost:@"VertexBufferDuplicate"];
+    }
+    [enc copyFromBuffer:src_buffer
+             sourceOffset:0
+                 toBuffer:dest_buffer
+        destinationOffset:0
+                     size:length];
+
+    /* Flush results back to host buffer, if one exists. */
+    if (dest_buffer.storageMode == MTLStorageModeManaged) {
+      [enc synchronizeResource:dest_buffer];
+    }
+
+    if (G.debug & G_DEBUG_GPU) {
+      [enc insertDebugSignpost:@"VertexBufferDuplicateEnd"];
+    }
+
+    /* Mark as in-use, as contents are updated via GPU command. */
+    src->flag_used();
+  }
+
+  /* Copy raw CPU data. */
+  if (data != nullptr) {
+    dst->data = (uchar *)MEM_dupallocN(src->data);
+  }
+}
+
+void MTLVertBuf::upload_data()
+{
+  this->bind();
+}
+
+void MTLVertBuf::bind()
+{
+  /* Determine allocation size. Set minimum allocation size to be
+   * the maximal of a single attribute to avoid validation and
+   * correctness errors. */
+  uint64_t required_size_raw = sizeof(uchar) * this->size_used_get();
+  uint64_t required_size = max_ulul(required_size_raw, 128);
+
+  if (required_size_raw == 0) {
+    MTL_LOG_WARNING("Warning: Vertex buffer required_size = 0\n");
+  }
+
+  /* If the vertex buffer has already been allocated, but new data is ready,
+   * or the usage size has changed, we release the existing buffer and
+   * allocate a new buffer to ensure we do not overwrite in-use GPU resources.
+   *
+   * NOTE: We only need to free the existing allocation if contents have been
+   * submitted to the GPU. Otherwise we can simply upload new data to the
+   * existing buffer, if it will fit.
+   *
+   * NOTE: If a buffer is re-sized, but no new data is provided, the previous
+   * contents are copied into the newly allocated buffer. */
+  bool requires_reallocation = (vbo_ != nullptr) && (alloc_size_ != required_size);
+  bool new_data_ready = (this->flag & GPU_VERTBUF_DATA_DIRTY) && this->data;
+
+  gpu::MTLBuffer *prev_vbo = nullptr;
+  GPUVertBufStatus prev_flag = this->flag;
+
+  if (vbo_ != nullptr) {
+    if (requires_reallocation || (new_data_ready && contents_in_flight_)) {
+      /* Track previous VBO to copy data from. */
+      prev_vbo = vbo_;
+
+      /* Reset current allocation status. */
+      vbo_ = nullptr;
+      is_wrapper_ = false;
+      alloc_size_ = 0;
+
+      /* Flag as requiring data upload. */
+      if (requires_reallocation) {
+        this->flag &= ~GPU_VERTBUF_DATA_UPLOADED;
+      }
+    }
+  }
+
+  /* Create MTLBuffer of requested size. */
+  if (vbo_ == nullptr) {
+    vbo_ = MTLContext::get_global_memory_manager().allocate(
+        required_size, (this->get_usage_type() != GPU_USAGE_DEVICE_ONLY));
+    vbo_->set_label(@"Vertex Buffer");
+    BLI_assert(vbo_ != nullptr);
+    BLI_assert(vbo_->get_metal_buffer() != nil);
+
+    is_wrapper_ = false;
+    alloc_size_ = required_size;
+    contents_in_flight_ = false;
+  }
+
+  /* Upload new data, if provided. */
+  if (new_data_ready) {
+
+    /* Only upload data if usage size is greater than zero.
+     * Do not upload data for device-only buffers. */
+    if (required_size_raw > 0 && usage_ != GPU_USAGE_DEVICE_ONLY) {
+
+      /* Debug: Verify allocation is large enough. */
+      BLI_assert(vbo_->get_size() >= required_size_raw);
+
+      /* Fetch mapped buffer host ptr and upload data. */
+      void *dst_data = vbo_->get_host_ptr();
+      memcpy((uint8_t *)dst_data, this->data, required_size_raw);
+      vbo_->flush_range(0, required_size_raw);
+    }
+
+    /* If static usage, free host-side data. */
+    if (usage_ == GPU_USAGE_STATIC) {
+      MEM_SAFE_FREE(data);
+    }
+
+    /* Flag data as having been uploaded. */
+    this->flag &= ~GPU_VERTBUF_DATA_DIRTY;
+    this->flag |= GPU_VERTBUF_DATA_UPLOADED;
+  }
+  else if (requires_reallocation) {
+
+    /* If buffer has been re-sized, copy existing data if host
+     * data had been previously uploaded. */
+    BLI_assert(prev_vbo != nullptr);
+
+    if (prev_flag & GPU_VERTBUF_DATA_UPLOADED) {
+
+      /* Fetch active context. */
+      MTLContext *ctx = MTLContext::get();
+      BLI_assert(ctx);
+
+      id<MTLBuffer> copy_prev_buffer = prev_vbo->get_metal_buffer();
+      id<MTLBuffer> copy_new_buffer = vbo_->get_metal_buffer();
+      BLI_assert(copy_prev_buffer != nil);
+      BLI_assert(copy_new_buffer != nil);
+
+      /* Ensure a blit command encoder is active for buffer copy operation. */
+      id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
+      [enc copyFromBuffer:copy_prev_buffer
+               sourceOffset:0
+                   toBuffer:copy_new_buffer
+          destinationOffset:0
+                       size:min_ii([copy_new_buffer length], [copy_prev_buffer length])];
+
+      /* Flush newly copied data back to host-side buffer, if one exists.
+       * Ensures data and cache coherency for managed MTLBuffers. */
+      if (copy_new_buffer.storageMode == MTLStorageModeManaged) {
+        [enc synchronizeResource:copy_new_buffer];
+      }
+
+      /* For VBOs flagged as static, release host data as it will no longer be needed. */
+      if (usage_ == GPU_USAGE_STATIC) {
+        MEM_SAFE_FREE(data);
+      }
+
+      /* Flag data as uploaded. */
+      this->flag |= GPU_VERTBUF_DATA_UPLOADED;
+
+      /* Flag as in-use, as contents have been updated via GPU commands. */
+      this->flag_used();
+    }
+  }
+
+  /* Release previous buffer if re-allocated. */
+  if (prev_vbo != nullptr) {
+    prev_vbo->free();
+  }
+
+  /* Ensure buffer has been created. */
+  BLI_assert(vbo_ != nullptr);
+}
+
+/* Update Sub currently only used by hair */
+void MTLVertBuf::update_sub(uint start, uint len, const void *data)
+{
+  /* Fetch and verify active context. */
+  MTLContext *ctx = reinterpret_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+  BLI_assert(ctx);
+  BLI_assert(ctx->device);
+
+  /* Ensure vertbuf has been created. */
+  this->bind();
+  BLI_assert(start + len <= alloc_size_);
+
+  /* Create temporary scratch buffer allocation for sub-range of data. */
+  MTLTemporaryBuffer scratch_allocation =
+      ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(len, 256);
+  memcpy(scratch_allocation.data, data, len);
+  [scratch_allocation.metal_buffer
+      didModifyRange:NSMakeRange(scratch_allocation.buffer_offset, len)];
+  id<MTLBuffer> data_buffer = scratch_allocation.metal_buffer;
+  uint data_buffer_offset = scratch_allocation.buffer_offset;
+
+  BLI_assert(vbo_ != nullptr && data != nullptr);
+  BLI_assert((start + len) <= vbo_->get_size());
+
+  /* Fetch destination buffer. */
+  id<MTLBuffer> dst_buffer = vbo_->get_metal_buffer();
+
+  /* Ensure blit command encoder for copying data. */
+  id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
+  [enc copyFromBuffer:data_buffer
+           sourceOffset:data_buffer_offset
+               toBuffer:dst_buffer
+      destinationOffset:start
+                   size:len];
+
+  /* Flush modified buffer back to host buffer, if one exists. */
+  if (dst_buffer.storageMode == MTLStorageModeManaged) {
+    [enc synchronizeResource:dst_buffer];
+  }
+}
+
+void MTLVertBuf::bind_as_ssbo(uint binding)
+{
+  /* TODO(Metal): Support binding of buffers as SSBOs.
+   * Pending overall compute support for Metal backend. */
+  MTL_LOG_WARNING("MTLVertBuf::bind_as_ssbo not yet implemented!\n");
+  this->flag_used();
+}
+
+void MTLVertBuf::bind_as_texture(uint binding)
+{
+  /* Ensure allocations are ready, and data uploaded. */
+  this->bind();
+  BLI_assert(vbo_ != nullptr);
+
+  /* If vertex buffer updated, release existing texture and re-create. */
+  id<MTLBuffer> buf = this->get_metal_buffer();
+  if (buffer_texture_ != nullptr) {
+    gpu::MTLTexture *mtl_buffer_tex = static_cast<gpu::MTLTexture *>(
+        unwrap(this->buffer_texture_));
+    id<MTLBuffer> tex_buf = mtl_buffer_tex->get_vertex_buffer();
+    if (tex_buf != buf) {
+      GPU_TEXTURE_FREE_SAFE(buffer_texture_);
+      buffer_texture_ = nullptr;
+    }
+  }
+
+  /* Create texture from vertex buffer. */
+  if (buffer_texture_ == nullptr) {
+    buffer_texture_ = GPU_texture_create_from_vertbuf("vertbuf_as_texture", wrap(this));
+  }
+
+  /* Verify successful creation and bind. */
+  BLI_assert(buffer_texture_ != nullptr);
+  GPU_texture_bind(buffer_texture_, binding);
+}
+
+const void *MTLVertBuf::read() const
+{
+  BLI_assert(vbo_ != nullptr);
+  BLI_assert(usage_ != GPU_USAGE_DEVICE_ONLY);
+  void *return_ptr = vbo_->get_host_ptr();
+  BLI_assert(return_ptr != nullptr);
+
+  return return_ptr;
+}
+
+void *MTLVertBuf::unmap(const void *mapped_data) const
+{
+  void *result = MEM_mallocN(alloc_size_, __func__);
+  memcpy(result, mapped_data, alloc_size_);
+  return result;
+}
+
+void MTLVertBuf::wrap_handle(uint64_t handle)
+{
+  BLI_assert(vbo_ == nullptr);
+
+  /* Attempt to cast to Metal buffer handle. */
+  BLI_assert(handle != 0);
+  id<MTLBuffer> buffer = reinterpret_cast<id<MTLBuffer>>((void *)handle);
+
+  is_wrapper_ = true;
+  vbo_ = new gpu::MTLBuffer(buffer);
+
+  /* We assume the data is already on the device, so no need to allocate or send it. */
+  flag = GPU_VERTBUF_DATA_UPLOADED;
+}
+
+void MTLVertBuf::flag_used()
+{
+  contents_in_flight_ = true;
+}
+
+}  // namespace blender::gpu