30 files changed, 2372 insertions, 219 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 18da5169620..0ce4011b2b4 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -192,6 +192,7 @@ set(METAL_SRC
   metal/mtl_context.mm
   metal/mtl_debug.mm
   metal/mtl_framebuffer.mm
+  metal/mtl_immediate.mm
   metal/mtl_index_buffer.mm
   metal/mtl_memory.mm
   metal/mtl_query.mm
@@ -205,11 +206,14 @@ set(METAL_SRC
   metal/mtl_vertex_buffer.mm
 
   metal/mtl_backend.hh
+  metal/mtl_batch.hh
   metal/mtl_capabilities.hh
   metal/mtl_common.hh
   metal/mtl_context.hh
   metal/mtl_debug.hh
+  metal/mtl_drawlist.hh
   metal/mtl_framebuffer.hh
+  metal/mtl_immediate.hh
   metal/mtl_index_buffer.hh
   metal/mtl_memory.hh
   metal/mtl_primitive.hh
diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h
index a242bb7cc94..b59ea9e55d2 100644
--- a/source/blender/gpu/GPU_context.h
+++ b/source/blender/gpu/GPU_context.h
@@ -26,7 +26,7 @@ eGPUBackendType GPU_backend_get_type(void);
 /** Opaque type hiding blender::gpu::Context. */
 typedef struct GPUContext GPUContext;
 
-GPUContext *GPU_context_create(void *ghost_window);
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context);
 /**
  * To be called after #GPU_context_active_set(ctx_to_destroy).
  */
diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h
index 922988bf95a..11500f5af60 100644
--- a/source/blender/gpu/GPU_material.h
+++ b/source/blender/gpu/GPU_material.h
@@ -117,6 +117,15 @@ typedef enum eGPUMaterialStatus {
   GPU_MAT_SUCCESS,
 } eGPUMaterialStatus;
 
+/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not
+ * plan to perform optimization on a given material. */
+typedef enum eGPUMaterialOptimizationStatus {
+  GPU_MAT_OPTIMIZATION_SKIP = 0,
+  GPU_MAT_OPTIMIZATION_READY,
+  GPU_MAT_OPTIMIZATION_QUEUED,
+  GPU_MAT_OPTIMIZATION_SUCCESS,
+} eGPUMaterialOptimizationStatus;
+
 typedef enum eGPUDefaultValue {
   GPU_DEFAULT_0 = 0,
   GPU_DEFAULT_1,
@@ -246,6 +255,15 @@ struct Scene *GPU_material_scene(GPUMaterial *material);
 struct GPUPass *GPU_material_get_pass(GPUMaterial *material);
 struct GPUShader *GPU_material_get_shader(GPUMaterial *material);
 const char *GPU_material_get_name(GPUMaterial *material);
+
+/**
+ * Material Optimization.
+ * \note Compiles optimal version of shader graph, populating mat->optimized_pass.
+ * This operation should always be deferred until existing compilations have completed.
+ * Default un-optimized materials will still exist for interactive material editing performance.
+ */
+void GPU_material_optimize(GPUMaterial *mat);
+
 /**
  * Return can be NULL if it's a world material.
  */
@@ -256,6 +274,13 @@ struct Material *GPU_material_get_material(GPUMaterial *material);
 eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
 void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status);
 
+/**
+ * Return status for async optimization jobs.
+ */
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat);
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status);
+bool GPU_material_optimization_ready(GPUMaterial *mat);
+
 struct GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
 /**
  * Create dynamic UBO from parameters
diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh
index d2890efee72..2a545c8114e 100644
--- a/source/blender/gpu/intern/gpu_backend.hh
+++ b/source/blender/gpu/intern/gpu_backend.hh
@@ -38,7 +38,7 @@ class GPUBackend {
   virtual void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) = 0;
   virtual void compute_dispatch_indirect(StorageBuf *indirect_buf) = 0;
 
-  virtual Context *context_alloc(void *ghost_window) = 0;
+  virtual Context *context_alloc(void *ghost_window, void *ghost_context) = 0;
 
   virtual Batch *batch_alloc() = 0;
   virtual DrawList *drawlist_alloc(int list_length) = 0;
diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc
index 2241bcf9f9b..85cfa9749fa 100644
--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@@ -95,6 +95,9 @@ struct GPUPass {
   uint32_t hash;
   /** Did we already tried to compile the attached GPUShader. */
   bool compiled;
+  /** Hint that an optimized variant of this pass should be created based on a complexity heuristic
+   * during pass code generation. */
+  bool should_optimize;
 };
 
 /* -------------------------------------------------------------------- */
@@ -242,6 +245,11 @@ class GPUCodegen {
   ListBase ubo_inputs_ = {nullptr, nullptr};
   GPUInput *cryptomatte_input_ = nullptr;
 
+  /** Cache paramters for complexity heuristic. */
+  uint nodes_total_ = 0;
+  uint textures_total_ = 0;
+  uint uniforms_total_ = 0;
+
  public:
   GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
   {
@@ -282,6 +290,14 @@ class GPUCodegen {
     return hash_;
   }
 
+  /* Heuristic determined during pass codegen for whether a
+   * more optimal variant of this material should be compiled. */
+  bool should_optimize_heuristic() const
+  {
+    bool do_optimize = (nodes_total_ >= 100 || textures_total_ >= 4 || uniforms_total_ >= 64);
+    return do_optimize;
+  }
+
  private:
   void set_unique_ids();
 
@@ -403,6 +419,9 @@ void GPUCodegen::generate_resources()
     }
   }
 
+  /* Increment heuristic. */
+  textures_total_ = slot;
+
   if (!BLI_listbase_is_empty(&ubo_inputs_)) {
     /* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
     ss << "struct NodeTree {\n";
@@ -440,11 +459,16 @@ void GPUCodegen::generate_library()
   GPUCodegenCreateInfo &info = *create_info;
 
   void *value;
-  GSetIterState pop_state = {};
-  while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) {
+  /* Iterate over libraries. We need to keep this struct intact incase
+   * it is required for the optimization an pass. */
+  GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
+  while (!BLI_ghashIterator_done(ihash)) {
+    value = BLI_ghashIterator_getKey(ihash);
     auto deps = gpu_shader_dependency_get_resolved_source((const char *)value);
     info.dependencies_generated.extend_non_duplicates(deps);
+    BLI_ghashIterator_step(ihash);
   }
+  BLI_ghashIterator_free(ihash);
 }
 
 void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
@@ -512,6 +536,9 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
     }
   }
   eval_ss << ");\n\n";
+
+  /* Increment heuristic. */
+  nodes_total_++;
 }
 
 char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link)
@@ -575,6 +602,7 @@ void GPUCodegen::generate_uniform_buffer()
       if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
         /* We handle the UBO uniforms separately. */
         BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
+        uniforms_total_++;
       }
     }
   }
@@ -602,6 +630,7 @@ void GPUCodegen::generate_graphs()
 {
   set_unique_ids();
 
+  /* Serialize graph. */
   output.surface = graph_serialize(GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface);
   output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume);
   output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement);
@@ -637,10 +666,17 @@ void GPUCodegen::generate_graphs()
 GPUPass *GPU_generate_pass(GPUMaterial *material,
                            GPUNodeGraph *graph,
                            GPUCodegenCallbackFn finalize_source_cb,
-                           void *thunk)
+                           void *thunk,
+                           bool optimize_graph)
 {
   gpu_node_graph_prune_unused(graph);
 
+  /* If Optimize flag is passed in, we are generating an optimized
+   * variant of the GPUMaterial's GPUPass. */
+  if (optimize_graph) {
+    gpu_node_graph_optimize(graph);
+  }
+
   /* Extract attributes before compiling so the generated VBOs are ready to accept the future
    * shader. */
   gpu_node_graph_finalize_uniform_attrs(graph);
@@ -648,23 +684,33 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
   GPUCodegen codegen(material, graph);
   codegen.generate_graphs();
   codegen.generate_cryptomatte();
-  codegen.generate_uniform_buffer();
 
-  /* Cache lookup: Reuse shaders already compiled. */
-  GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
-
-  /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
-   * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
-  if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
-    if (!gpu_pass_is_valid(pass_hash)) {
-      /* Shader has already been created but failed to compile. */
-      return nullptr;
+  GPUPass *pass_hash = nullptr;
+
+  if (!optimize_graph) {
+    /* The optimized version of the shader should not re-generate a UBO.
+     * The UBO will not be used for this variant. */
+    codegen.generate_uniform_buffer();
+
+    /** Cache lookup: Reuse shaders already compiled.
+     * NOTE: We only perform cache look-up for non-optimized shader
+     * graphs, as baked constant data amongst other optimizations will generate too many
+     * shader source permutations, with minimal re-usability. */
+    pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
+
+    /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
+     * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
+    if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
+      if (!gpu_pass_is_valid(pass_hash)) {
+        /* Shader has already been created but failed to compile. */
+        return nullptr;
+      }
+      /* No collision, just return the pass. */
+      BLI_spin_lock(&pass_cache_spin);
+      pass_hash->refcount += 1;
+      BLI_spin_unlock(&pass_cache_spin);
+      return pass_hash;
     }
-    /* No collision, just return the pass. */
-    BLI_spin_lock(&pass_cache_spin);
-    pass_hash->refcount += 1;
-    BLI_spin_unlock(&pass_cache_spin);
-    return pass_hash;
   }
 
   /* Either the shader is not compiled or there is a hash collision...
@@ -702,14 +748,31 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
     pass->create_info = codegen.create_info;
     pass->hash = codegen.hash_get();
     pass->compiled = false;
+    /* Only flag pass optimization hint if this is the first generated pass for a material.
+     * Optimized passes cannot be optimized further, even if the heuristic is still not
+     * favourable. */
+    pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
 
     codegen.create_info = nullptr;
 
-    gpu_pass_cache_insert_after(pass_hash, pass);
+    /* Only insert non-optimized graphs into cache.
+     * Optimized graphs will continuously be recompiled with new unique source during material
+     * editing, and thus causing the cache to fill up quickly with materials offering minimal
+     * re-use. */
+    if (!optimize_graph) {
+      gpu_pass_cache_insert_after(pass_hash, pass);
+    }
   }
   return pass;
 }
 
+bool GPU_pass_should_optimize(GPUPass *pass)
+{
+  /* Returns optimization heuristic prepared during
+   * initial codegen. */
+  return pass->should_optimize;
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
diff --git a/source/blender/gpu/intern/gpu_codegen.h b/source/blender/gpu/intern/gpu_codegen.h
index 95a672c0400..aabdf1ac003 100644
--- a/source/blender/gpu/intern/gpu_codegen.h
+++ b/source/blender/gpu/intern/gpu_codegen.h
@@ -25,10 +25,12 @@ typedef struct GPUPass GPUPass;
 GPUPass *GPU_generate_pass(GPUMaterial *material,
                            struct GPUNodeGraph *graph,
                            GPUCodegenCallbackFn finalize_source_cb,
-                           void *thunk);
+                           void *thunk,
+                           bool optimize_graph);
 GPUShader *GPU_pass_shader_get(GPUPass *pass);
 bool GPU_pass_compile(GPUPass *pass, const char *shname);
 void GPU_pass_release(GPUPass *pass);
+bool GPU_pass_should_optimize(GPUPass *pass);
 
 /* Module */
 
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index bcc418169b7..92cbbc5b4b0 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -94,7 +94,7 @@ Context *Context::get()
 
 /* -------------------------------------------------------------------- */
 
-GPUContext *GPU_context_create(void *ghost_window)
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context)
 {
   {
     std::scoped_lock lock(backend_users_mutex);
@@ -105,7 +105,7 @@ GPUContext *GPU_context_create(void *ghost_window)
     num_backend_users++;
   }
 
-  Context *ctx = GPUBackend::get()->context_alloc(ghost_window);
+  Context *ctx = GPUBackend::get()->context_alloc(ghost_window, ghost_context);
 
   GPU_context_active_set(wrap(ctx));
   return wrap(ctx);
@@ -216,6 +216,9 @@ void GPU_render_step()
 /** \name Backend selection
  * \{ */
 
+/* NOTE: To enable Metal API, we need to temporarily change this to `GPU_BACKEND_METAL`.
+ * Until a global switch is added, Metal also needs to be enabled in GHOST_ContextCGL:
+ * `m_useMetalForRendering = true`. */
 static const eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL;
 static GPUBackend *g_backend = nullptr;
 
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index 96809db1587..991cb229eda 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -34,6 +34,8 @@
 
 #include "DRW_engine.h"
 
+#include "PIL_time.h"
+
 #include "gpu_codegen.h"
 #include "gpu_node_graph.h"
 
@@ -43,6 +45,17 @@
 #define MAX_COLOR_BAND 128
 #define MAX_GPU_SKIES 8
 
+/** Whether the optimized variant of the GPUPass should be created asynchronously.
+ * Usage of this depends on whether there are possible threading challenges of doing so.
+ * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
+ * compilation, though this option exists in case any potential scenarios for material graph
+ * optimization cause a slow down on the main thread.
+ *
+ * NOTE: The actual shader program for the optimized pass will alwaysbe compiled asynchronously,
+ * this flag controls whether shader node graph source serialization happens on the compilation
+ * worker thread. */
+#define ASYNC_OPTIMIZED_PASS_CREATION 0
+
 typedef struct GPUColorBandBuilder {
   float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
   int current_layer;
@@ -57,6 +70,27 @@ struct GPUMaterial {
   /* Contains GPUShader and source code for deferred compilation.
    * Can be shared between similar material (i.e: sharing same nodetree topology). */
   GPUPass *pass;
+  /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
+   * This shader variant bakes dynamic uniform data as constant. This variant will not use
+   * the ubo, and instead bake constants directly into the shader source. */
+  GPUPass *optimized_pass;
+  /* Optimization status.
+   * We also use this status to determine whether this material should be considered for
+   * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
+   *   `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
+   *   `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
+   * performance to do so, based on the heuristic.
+   */
+  eGPUMaterialOptimizationStatus optimization_status;
+  double creation_time;
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+  struct DeferredOptimizePass {
+    GPUCodegenCallbackFn callback;
+    void *thunk;
+  } DeferredOptimizePass;
+  struct DeferredOptimizePass optimize_pass_info;
+#endif
+
   /** UBOs for this material parameters. */
   GPUUniformBuf *ubo;
   /** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
@@ -209,6 +243,9 @@ void GPU_material_free_single(GPUMaterial *material)
 
   gpu_node_graph_free(&material->graph);
 
+  if (material->optimized_pass != NULL) {
+    GPU_pass_release(material->optimized_pass);
+  }
   if (material->pass != NULL) {
     GPU_pass_release(material->pass);
   }
@@ -247,12 +284,15 @@ Scene *GPU_material_scene(GPUMaterial *material)
 
 GPUPass *GPU_material_get_pass(GPUMaterial *material)
 {
-  return material->pass;
+  return (material->optimized_pass) ? material->optimized_pass : material->pass;
 }
 
 GPUShader *GPU_material_get_shader(GPUMaterial *material)
 {
-  return material->pass ? GPU_pass_shader_get(material->pass) : NULL;
+  /* First attempt to select optimized shader. If not available, fetch original. */
+  GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) :
+                                                   NULL;
+  return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL);
 }
 
 const char *GPU_material_get_name(GPUMaterial *material)
@@ -665,6 +705,29 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
   mat->status = status;
 }
 
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
+{
+  return mat->optimization_status;
+}
+
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
+{
+  mat->optimization_status = status;
+  if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+    /* Reset creation timer to delay optimization pass. */
+    mat->creation_time = PIL_check_seconds_timer();
+  }
+}
+
+bool GPU_material_optimization_ready(GPUMaterial *mat)
+{
+  /* Timer threshold before optimizations will be queued.
+   * When materials are frequently being modified, optimization
+   * can incur CPU overhead from excessive compilation. */
+  const double optimization_time_threshold_s = 5.0;
+  return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s);
+}
+
 /* Code generation */
 
 bool GPU_material_has_surface_output(GPUMaterial *mat)
@@ -730,6 +793,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
   mat->uuid = shader_uuid;
   mat->flag = GPU_MATFLAG_UPDATED;
   mat->status = GPU_MAT_CREATED;
+  mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
   mat->is_volume_shader = is_volume_shader;
   mat->graph.used_libraries = BLI_gset_new(
       BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
@@ -748,7 +812,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
 
   {
     /* Create source code and search pass cache for an already compiled version. */
-    mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk);
+    mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);
 
     if (mat->pass == NULL) {
       /* We had a cache hit and the shader has already failed to compile. */
@@ -756,11 +820,44 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
       gpu_node_graph_free(&mat->graph);
     }
     else {
+      /* Determine whether we should generate an optimized variant of the graph.
+       * Heuristic is based on complexity of default material pass and shader node graph. */
+      if (GPU_pass_should_optimize(mat->pass)) {
+        GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+      }
+
       GPUShader *sh = GPU_pass_shader_get(mat->pass);
       if (sh != NULL) {
         /* We had a cache hit and the shader is already compiled. */
         mat->status = GPU_MAT_SUCCESS;
-        gpu_node_graph_free_nodes(&mat->graph);
+
+        if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+          gpu_node_graph_free_nodes(&mat->graph);
+        }
+      }
+
+      /* Generate optimized pass. */
+      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+        mat->optimized_pass = NULL;
+        mat->optimize_pass_info.callback = callback;
+        mat->optimize_pass_info.thunk = thunk;
+#else
+        mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
+        if (mat->optimized_pass == NULL) {
+          /* Failed to create optimized pass. */
+          gpu_node_graph_free_nodes(&mat->graph);
+          GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+        }
+        else {
+          GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
+          if (optimized_sh != NULL) {
+            /* Optimized shader already available. */
+            gpu_node_graph_free_nodes(&mat->graph);
+            GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+          }
+        }
+#endif
       }
     }
   }
@@ -811,7 +908,11 @@ void GPU_material_compile(GPUMaterial *mat)
     GPUShader *sh = GPU_pass_shader_get(mat->pass);
     if (sh != NULL) {
       mat->status = GPU_MAT_SUCCESS;
-      gpu_node_graph_free_nodes(&mat->graph);
+
+      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+        /* Only free node graph nodes if not required by secondary optimization pass. */
+        gpu_node_graph_free_nodes(&mat->graph);
+      }
     }
     else {
       mat->status = GPU_MAT_FAILED;
@@ -825,6 +926,71 @@ void GPU_material_compile(GPUMaterial *mat)
   }
 }
 
+void GPU_material_optimize(GPUMaterial *mat)
+{
+  /* If shader is flagged for skipping optimization or has already been successfully
+   * optimized, skip. */
+  if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
+    return;
+  }
+
+  /* If original shader has not been fully compiled, we are not
+   * ready to perform optimization. */
+  if (mat->status != GPU_MAT_SUCCESS) {
+    /* Reset optimization status. */
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+    return;
+  }
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+  /* If the optimized pass is not valid, first generate optimized pass.
+   * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
+   * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
+   * optimal, as these do not benefit from caching, due to baked constants. However, this could
+   * possibly be cause for concern for certain cases.  */
+  if (!mat->optimized_pass) {
+    mat->optimized_pass = GPU_generate_pass(
+        mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
+    BLI_assert(mat->optimized_pass);
+  }
+#else
+  if (!mat->optimized_pass) {
+    /* Optimized pass has not been created, skip future optimization attempts. */
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+    return;
+  }
+#endif
+
+  bool success;
+  /* NOTE: The shader may have already been compiled here since we are
+   * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
+#ifndef NDEBUG
+  success = GPU_pass_compile(mat->optimized_pass, mat->name);
+#else
+  success = GPU_pass_compile(mat->optimized_pass, __func__);
+#endif
+
+  if (success) {
+    GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
+    if (sh != NULL) {
+      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+    }
+    else {
+      /* Optimized pass failed to compile. Disable any future optimization attempts. */
+      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+    }
+  }
+  else {
+    /* Optimization pass generation failed. Disable future attempts to optimize. */
+    GPU_pass_release(mat->optimized_pass);
+    mat->optimized_pass = NULL;
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+  }
+
+  /* Release node graph as no longer needed. */
+  gpu_node_graph_free_nodes(&mat->graph);
+}
+
 void GPU_materials_free(Main *bmain)
 {
   LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
@@ -848,6 +1014,8 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
   material->graph.used_libraries = BLI_gset_new(
       BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
   material->refcount = 1;
+  material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
+  material->optimized_pass = NULL;
 
   /* Construct the material graph by adding and linking the necessary GPU material nodes. */
   construct_function_cb(thunk, material);
@@ -856,7 +1024,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
   gpu_material_ramp_texture_build(material);
 
   /* Lookup an existing pass in the cache or generate a new one. */
-  material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk);
+  material->pass = GPU_generate_pass(
+      material, &material->graph, generate_code_function_cb, thunk, false);
+  material->optimized_pass = NULL;
 
   /* The pass already exists in the pass cache but its shader already failed to compile. */
   if (material->pass == NULL) {
@@ -865,11 +1035,42 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
     return material;
   }
 
+  /* Generate optimized pass. */
+  if (GPU_pass_should_optimize(material->pass)) {
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+    mmaterial->optimized_pass = NULL;
+    material->optimize_pass_info.callback = generate_code_function_cb;
+    material->optimize_pass_info.thunk = thunk;
+    GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY);
+#else
+    material->optimized_pass = GPU_generate_pass(
+        material, &material->graph, generate_code_function_cb, thunk, true);
+
+    if (material->optimized_pass == NULL) {
+      /* Failed to create optimized pass. */
+      gpu_node_graph_free_nodes(&material->graph);
+      GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP);
+    }
+    else {
+      GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass);
+      if (optimized_sh != NULL) {
+        /* Optimized shader already available. */
+        gpu_node_graph_free_nodes(&material->graph);
+        GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS);
+      }
+    }
+#endif
+  }
+
   /* The pass already exists in the pass cache and its shader is already compiled. */
   GPUShader *shader = GPU_pass_shader_get(material->pass);
   if (shader != NULL) {
     material->status = GPU_MAT_SUCCESS;
-    gpu_node_graph_free_nodes(&material->graph);
+    if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+      /* Only free node graph if not required by secondary optimization pass. */
+      gpu_node_graph_free_nodes(&material->graph);
+    }
     return material;
   }
 
diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c
index a305413905b..3ca2399a547 100644
--- a/source/blender/gpu/intern/gpu_node_graph.c
+++ b/source/blender/gpu/intern/gpu_node_graph.c
@@ -914,3 +914,22 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
     }
   }
 }
+
+void gpu_node_graph_optimize(GPUNodeGraph *graph)
+{
+  /* Replace all uniform node links with constant. */
+  LISTBASE_FOREACH (GPUNode *, node, &graph->nodes) {
+    LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
+      if (input->link) {
+        if (input->link->link_type == GPU_NODE_LINK_UNIFORM) {
+          input->link->link_type = GPU_NODE_LINK_CONSTANT;
+        }
+      }
+      if (input->source == GPU_SOURCE_UNIFORM) {
+        input->source = (input->type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT;
+      }
+    }
+  }
+
+  /* TODO: Consider performing other node graph optimizations here. */
+}
diff --git a/source/blender/gpu/intern/gpu_node_graph.h b/source/blender/gpu/intern/gpu_node_graph.h
index 085620b30e4..75ca05ffaea 100644
--- a/source/blender/gpu/intern/gpu_node_graph.h
+++ b/source/blender/gpu/intern/gpu_node_graph.h
@@ -179,6 +179,21 @@ typedef struct GPUNodeGraph {
 
 void gpu_node_graph_prune_unused(GPUNodeGraph *graph);
 void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph);
+
+/**
+ * Optimize node graph for optimized material shader path.
+ * Once the base material has been generated, we can modify the shader
+ * node graph to create one which will produce an optimally performing shader.
+ * This currently involves baking uniform data into constant data to enable
+ * aggressive constant folding by the compiler in order to reduce complexity and
+ * shader core memory pressure.
+ *
+ * NOTE: Graph optimizations will produce a shader which needs to be re-compiled
+ * more frequently, however, the default material pass will always exist to fall
+ * back on.
+ */
+void gpu_node_graph_optimize(GPUNodeGraph *graph);
+
 /**
  * Free intermediate node graph.
  */
diff --git a/source/blender/gpu/intern/gpu_shader_builder.cc b/source/blender/gpu/intern/gpu_shader_builder.cc
index 9b699c60126..3aa2963ecd0 100644
--- a/source/blender/gpu/intern/gpu_shader_builder.cc
+++ b/source/blender/gpu/intern/gpu_shader_builder.cc
@@ -45,7 +45,7 @@ void ShaderBuilder::init()
   ghost_context_ = GHOST_CreateOpenGLContext(ghost_system_, glSettings);
   GHOST_ActivateOpenGLContext(ghost_context_);
 
-  gpu_context_ = GPU_context_create(nullptr);
+  gpu_context_ = GPU_context_create(nullptr, ghost_context_);
   GPU_init();
 }
 
diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc
index 6f43b379d31..d9e5e066fea 100644
--- a/source/blender/gpu/intern/gpu_shader_interface.cc
+++ b/source/blender/gpu/intern/gpu_shader_interface.cc
@@ -22,8 +22,8 @@ ShaderInterface::ShaderInterface() = default;
 ShaderInterface::~ShaderInterface()
 {
   /* Free memory used by name_buffer. */
-  MEM_freeN(name_buffer_);
-  MEM_freeN(inputs_);
+  MEM_SAFE_FREE(name_buffer_);
+  MEM_SAFE_FREE(inputs_);
 }
 
 static void sort_input_list(MutableSpan<ShaderInput> dst)
diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh
index 214a5d738a9..082fab24ba4 100644
--- a/source/blender/gpu/metal/mtl_backend.hh
+++ b/source/blender/gpu/metal/mtl_backend.hh
@@ -63,7 +63,7 @@ class MTLBackend : public GPUBackend {
 
   /* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
    * objects. */
-  Context *context_alloc(void *ghost_window) override;
+  Context *context_alloc(void *ghost_window, void *ghost_context) override;
   Batch *batch_alloc() override;
   DrawList *drawlist_alloc(int list_length) override;
   FrameBuffer *framebuffer_alloc(const char *name) override;
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index ec9e8ab4d15..2ca1fd3f3d0 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -8,8 +8,11 @@
 
 #include "gpu_backend.hh"
 #include "mtl_backend.hh"
+#include "mtl_batch.hh"
 #include "mtl_context.hh"
+#include "mtl_drawlist.hh"
 #include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
 #include "mtl_index_buffer.hh"
 #include "mtl_query.hh"
 #include "mtl_shader.hh"
@@ -37,21 +40,21 @@ void MTLBackend::samplers_update(){
     /* Placeholder -- Handled in MTLContext. */
 };
 
-Context *MTLBackend::context_alloc(void *ghost_window)
+Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
 {
-  return new MTLContext(ghost_window);
+  return new MTLContext(ghost_window, ghost_context);
 };
 
 Batch *MTLBackend::batch_alloc()
 {
-  /* TODO(Metal): Implement MTLBatch. */
-  return nullptr;
+  /* TODO(Metal): Full MTLBatch implementation. */
+  return new MTLBatch();
 };
 
 DrawList *MTLBackend::drawlist_alloc(int list_length)
 {
-  /* TODO(Metal): Implement MTLDrawList. */
-  return nullptr;
+  /* TODO(Metal): Full MTLDrawList implementation. */
+  return new MTLDrawList(list_length);
 };
 
 FrameBuffer *MTLBackend::framebuffer_alloc(const char *name)
diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh
new file mode 100644
index 00000000000..66603dabd15
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "gpu_batch_private.hh"
+
+namespace blender {
+namespace gpu {
+
+
+/* Pass-through MTLBatch. TODO(Metal): Implement. */
+class MTLBatch : public Batch {
+ public:
+  void draw(int v_first, int v_count, int i_first, int i_count) override {
+
+  }
+
+  void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override {
+
+  }
+  
+  void multi_draw_indirect(GPUStorageBuf *indirect_buf,
+                           int count,
+                           intptr_t offset,
+                           intptr_t stride) override {
+                               
+                           }
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm
index d2936e8e91f..a9cabbb111f 100644
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -54,6 +54,7 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
       MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
       desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
       desc.retainedReferences = YES;
+      BLI_assert(context_.queue != nil);
       active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
     }
     else {
@@ -611,40 +612,187 @@ void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
                                              bool use_argument_buffer_for_samplers,
                                              uint slot)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be
-   * implemented alongside MTLShader. */
+  /* Range check. */
+  const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+  BLI_assert(slot >= 0);
+  BLI_assert(slot <= shader_interface->get_max_texture_index());
+  BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+  UNUSED_VARS_NDEBUG(shader_interface);
+
+  /* If sampler state has not changed for the given slot, we do not need to fetch. */
+  if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil ||
+      !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) ||
+      use_argument_buffer_for_samplers) {
+
+    id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+                                            ctx.get_default_sampler_state() :
+                                            ctx.get_sampler_from_state(sampler_binding.state);
+    if (!use_argument_buffer_for_samplers) {
+      /* Update binding and cached state. */
+      id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+      BLI_assert(rec != nil);
+      [rec setVertexSamplerState:sampler_state atIndex:slot];
+      this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+      this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state;
+    }
+
+    /* Flag last binding type. */
+    this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding =
+        use_argument_buffer_for_samplers;
+
+    /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+     * the samplers array is always up to date. */
+    ctx.samplers_.mtl_sampler[slot] = sampler_state;
+    ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+  }
 }
 
 void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
                                                bool use_argument_buffer_for_samplers,
                                                uint slot)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be
-   * implemented alongside MTLShader. */
+  /* Range check. */
+  const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+  BLI_assert(slot >= 0);
+  BLI_assert(slot <= shader_interface->get_max_texture_index());
+  BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+  UNUSED_VARS_NDEBUG(shader_interface);
+
+  /* If sampler state has not changed for the given slot, we do not need to fetch*/
+  if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil ||
+      !(this->cached_fragment_sampler_state_bindings[slot].binding_state ==
+        sampler_binding.state) ||
+      use_argument_buffer_for_samplers) {
+
+    id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+                                            ctx.get_default_sampler_state() :
+                                            ctx.get_sampler_from_state(sampler_binding.state);
+    if (!use_argument_buffer_for_samplers) {
+      /* Update binding and cached state. */
+      id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+      BLI_assert(rec != nil);
+      [rec setFragmentSamplerState:sampler_state atIndex:slot];
+      this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+      this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state;
+    }
+
+    /* Flag last binding type */
+    this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding =
+        use_argument_buffer_for_samplers;
+
+    /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+     * the samplers array is always up to date. */
+    ctx.samplers_.mtl_sampler[slot] = sampler_state;
+    ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+  }
 }
 
 void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  BLI_assert(index >= 0);
+  BLI_assert(buffer_offset >= 0);
+  BLI_assert(buffer != nil);
+
+  BufferBindingCached &current_vert_ubo_binding = this->cached_vertex_buffer_bindings[index];
+  if (current_vert_ubo_binding.offset != buffer_offset ||
+      current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes) {
+
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    BLI_assert(rec != nil);
+
+    if (current_vert_ubo_binding.metal_buffer == buffer) {
+      /* If buffer is the same, but offset has changed. */
+      [rec setVertexBufferOffset:buffer_offset atIndex:index];
+    }
+    else {
+      /* Bind Vertex Buffer. */
+      [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index];
+    }
+
+    /* Update Bind-state cache. */
+    this->cached_vertex_buffer_bindings[index].is_bytes = false;
+    this->cached_vertex_buffer_bindings[index].metal_buffer = buffer;
+    this->cached_vertex_buffer_bindings[index].offset = buffer_offset;
+  }
 }
 
 void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  BLI_assert(index >= 0);
+  BLI_assert(buffer_offset >= 0);
+  BLI_assert(buffer != nil);
+
+  BufferBindingCached &current_frag_ubo_binding = this->cached_fragment_buffer_bindings[index];
+  if (current_frag_ubo_binding.offset != buffer_offset ||
+      current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes) {
+
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    BLI_assert(rec != nil);
+
+    if (current_frag_ubo_binding.metal_buffer == buffer) {
+      /* If buffer is the same, but offset has changed. */
+      [rec setFragmentBufferOffset:buffer_offset atIndex:index];
+    }
+    else {
+      /* Bind Fragment Buffer */
+      [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index];
+    }
+
+    /* Update Bind-state cache */
+    this->cached_fragment_buffer_bindings[index].is_bytes = false;
+    this->cached_fragment_buffer_bindings[index].metal_buffer = buffer;
+    this->cached_fragment_buffer_bindings[index].offset = buffer_offset;
+  }
 }
 
 void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  /* Bytes always updated as source data may have changed. */
+  BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+  BLI_assert(length > 0);
+  BLI_assert(bytes != nullptr);
+
+  if (length < MTL_MAX_SET_BYTES_SIZE) {
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    [rec setVertexBytes:bytes length:length atIndex:index];
+  }
+  else {
+    /* We have run over the setBytes limit, bind buffer instead. */
+    MTLTemporaryBuffer range =
+        ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+    memcpy(range.data, bytes, length);
+    this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index);
+  }
+
+  /* Update Bind-state cache */
+  this->cached_vertex_buffer_bindings[index].is_bytes = true;
+  this->cached_vertex_buffer_bindings[index].metal_buffer = nil;
+  this->cached_vertex_buffer_bindings[index].offset = -1;
 }
 
 void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  /* Bytes always updated as source data may have changed. */
+  BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+  BLI_assert(length > 0);
+  BLI_assert(bytes != nullptr);
+
+  if (length < MTL_MAX_SET_BYTES_SIZE) {
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    [rec setFragmentBytes:bytes length:length atIndex:index];
+  }
+  else {
+    /* We have run over the setBytes limit, bind buffer instead. */
+    MTLTemporaryBuffer range =
+        ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+    memcpy(range.data, bytes, length);
+    this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index);
+  }
+
+  /* Update Bind-state cache. */
+  this->cached_fragment_buffer_bindings[index].is_bytes = true;
+  this->cached_fragment_buffer_bindings[index].metal_buffer = nil;
+  this->cached_fragment_buffer_bindings[index].offset = -1;
 }
 
 /** \} */
diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh
index b6f9c0050a9..5c322efa3f9 100644
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -3,7 +3,9 @@
 #ifndef __MTL_COMMON
 #define __MTL_COMMON
 
-// -- Renderer Options --
+/** -- Renderer Options -- */
+/* Number of frames over which rolling averages are taken. */
+#define MTL_FRAME_AVERAGE_COUNT 5
 #define MTL_MAX_DRAWABLES 3
 #define MTL_MAX_SET_BYTES_SIZE 4096
 #define MTL_FORCE_WAIT_IDLE 0
diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh
index 577438667d6..5991fe2bc3e 100644
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -12,6 +12,10 @@
 #include "GPU_common_types.h"
 #include "GPU_context.h"
 
+#include "intern/GHOST_Context.h"
+#include "intern/GHOST_ContextCGL.h"
+#include "intern/GHOST_Window.h"
+
 #include "mtl_backend.hh"
 #include "mtl_capabilities.hh"
 #include "mtl_common.hh"
@@ -570,12 +574,44 @@ class MTLCommandBufferManager {
 
 class MTLContext : public Context {
   friend class MTLBackend;
+  friend class MTLRenderPassState;
+
+ public:
+  /* Swapchain and latency management. */
+  static std::atomic<int> max_drawables_in_flight;
+  static std::atomic<int64_t> avg_drawable_latency_us;
+  static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT];
+
+ public:
+  /* Shaders and Pipeline state. */
+  MTLContextGlobalShaderPipelineState pipeline_state;
+
+  /* Metal API Resource Handles. */
+  id<MTLCommandQueue> queue = nil;
+  id<MTLDevice> device = nil;
+
+#ifndef NDEBUG
+  /* Label for Context debug name assignemnt. */
+  NSString *label = nil;
+#endif
+
+  /* Memory Management. */
+  MTLScratchBufferManager memory_manager;
+  static MTLBufferPool global_memory_manager;
+
+  /* CommandBuffer managers. */
+  MTLCommandBufferManager main_command_buffer;
 
  private:
-  /* Null buffers for empty/uninitialized bindings.
-   * Null attribute buffer follows default attribute format of OpenGL Back-end. */
-  id<MTLBuffer> null_buffer_;           /* All zero's. */
-  id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
+  /* Parent Context. */
+  GHOST_ContextCGL *ghost_context_;
+
+  /* Render Passes and Framebuffers. */
+  id<MTLTexture> default_fbo_mtltexture_ = nil;
+  gpu::MTLTexture *default_fbo_gputexture_ = nullptr;
+
+  /* Depth-stencil state cache. */
+  blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache;
 
   /* Compute and specialization caches. */
   MTLContextTextureUtils texture_utils_;
@@ -601,23 +637,20 @@ class MTLContext : public Context {
   gpu::MTLBuffer *visibility_buffer_ = nullptr;
   bool visibility_is_dirty_ = false;
 
- public:
-  /* Shaders and Pipeline state. */
-  MTLContextGlobalShaderPipelineState pipeline_state;
-
-  /* Metal API Resource Handles. */
-  id<MTLCommandQueue> queue = nil;
-  id<MTLDevice> device = nil;
-
-  /* Memory Management */
-  MTLScratchBufferManager memory_manager;
-  static MTLBufferPool global_memory_manager;
+  /* Null buffers for empty/unintialized bindings.
+   * Null attribute buffer follows default attribute format of OpenGL Backend. */
+  id<MTLBuffer> null_buffer_;           /* All zero's. */
+  id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
 
-  /* CommandBuffer managers. */
-  MTLCommandBufferManager main_command_buffer;
+  /** Dummy Resources */
+  /* Maximum of 32 texture types. Though most combinations invalid. */
+  gpu::MTLTexture *dummy_textures_[GPU_TEXTURE_BUFFER] = {nullptr};
+  GPUVertFormat dummy_vertformat_;
+  GPUVertBuf *dummy_verts_ = nullptr;
 
+ public:
   /* GPUContext interface. */
-  MTLContext(void *ghost_window);
+  MTLContext(void *ghost_window, void *ghost_context);
   ~MTLContext();
 
   static void check_error(const char *info);
@@ -673,6 +706,35 @@ class MTLContext : public Context {
   void pipeline_state_init();
   MTLShader *get_active_shader();
 
+  /* These functions ensure that the current RenderCommandEncoder has
+   * the correct global state assigned. This should be called prior
+   * to every draw call, to ensure that all state is applied and up
+   * to date. We handle:
+   *
+   * - Buffer bindings (Vertex buffers, Uniforms, UBOs, transform feedback)
+   * - Texture bindings
+   * - Sampler bindings (+ argument buffer bindings)
+   * - Dynamic Render pipeline state (on encoder)
+   * - Baking Pipeline State Objects (PSOs) for current shader, based
+   *   on final pipeline state.
+   *
+   * `ensure_render_pipeline_state` will return false if the state is
+   * invalid and cannot be applied. This should cancel a draw call. */
+  bool ensure_render_pipeline_state(MTLPrimitiveType prim_type);
+  bool ensure_uniform_buffer_bindings(
+      id<MTLRenderCommandEncoder> rec,
+      const MTLShaderInterface *shader_interface,
+      const MTLRenderPipelineStateInstance *pipeline_state_instance);
+  void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec,
+                               MTLShaderInterface *shader_interface,
+                               const MTLRenderPipelineStateInstance *pipeline_state_instance);
+  void ensure_depth_stencil_state(MTLPrimitiveType prim_type);
+
+  id<MTLBuffer> get_null_buffer();
+  id<MTLBuffer> get_null_attribute_buffer();
+  gpu::MTLTexture *get_dummy_texture(eGPUTextureType type);
+  void free_dummy_resources();
+
   /* State assignment. */
   void set_viewport(int origin_x, int origin_y, int width, int height);
   void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height);
@@ -720,9 +782,37 @@ class MTLContext : public Context {
   {
     return MTLContext::global_memory_manager;
   }
-  /* Uniform Buffer Bindings to command encoders. */
-  id<MTLBuffer> get_null_buffer();
-  id<MTLBuffer> get_null_attribute_buffer();
+
+  /* Swapchain and latency management. */
+  static void latency_resolve_average(int64_t frame_latency_us)
+  {
+    int64_t avg = 0;
+    int64_t frame_c = 0;
+    for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) {
+      MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1];
+      avg += MTLContext::frame_latency[i];
+      frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0;
+    }
+    MTLContext::frame_latency[0] = frame_latency_us;
+    avg += MTLContext::frame_latency[0];
+    if (frame_c > 0) {
+      avg /= frame_c;
+    }
+    else {
+      avg = 0;
+    }
+    MTLContext::avg_drawable_latency_us = avg;
+  }
+
+ private:
+  void set_ghost_context(GHOST_ContextHandle ghostCtxHandle);
+  void set_ghost_window(GHOST_WindowHandle ghostWinHandle);
 };
 
+/* GHOST Context callback and present. */
+void present(MTLRenderPassDescriptor *blit_descriptor,
+             id<MTLRenderPipelineState> blit_pso,
+             id<MTLTexture> swapchain_texture,
+             id<CAMetalDrawable> drawable);
+
 }  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index 1302cf0dabd..a89339d0d14 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -5,13 +5,29 @@
  */
 #include "mtl_context.hh"
 #include "mtl_debug.hh"
+#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
+#include "mtl_memory.hh"
+#include "mtl_primitive.hh"
 #include "mtl_shader.hh"
 #include "mtl_shader_interface.hh"
 #include "mtl_state.hh"
+#include "mtl_uniform_buffer.hh"
 
 #include "DNA_userdef_types.h"
 
 #include "GPU_capabilities.h"
+#include "GPU_matrix.h"
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+#include "GPU_uniform_buffer.h"
+#include "GPU_vertex_buffer.h"
+#include "intern/gpu_matrix_private.h"
+
+#include "PIL_time.h"
+
+#include <fstream>
+#include <string>
 
 using namespace blender;
 using namespace blender::gpu;
@@ -21,21 +37,118 @@ namespace blender::gpu {
 /* Global memory manager. */
 MTLBufferPool MTLContext::global_memory_manager;
 
+/* Swapchain and latency management. */
+std::atomic<int> MTLContext::max_drawables_in_flight = 0;
+std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0;
+int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0};
+
+/* -------------------------------------------------------------------- */
+/** \name GHOST Context interaction.
+ * \{ */
+
+void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle)
+{
+  GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle);
+  BLI_assert(ghost_ctx != nullptr);
+
+  /* Release old MTLTexture handle */
+  if (default_fbo_mtltexture_) {
+    [default_fbo_mtltexture_ release];
+    default_fbo_mtltexture_ = nil;
+  }
+
+  /* Release Framebuffer attachments */
+  MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left);
+  MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left);
+  mtl_front_left->remove_all_attachments();
+  mtl_back_left->remove_all_attachments();
+
+  GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx);
+  if (ghost_cgl_ctx != NULL) {
+    default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture();
+
+    MTL_LOG_INFO(
+        "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n",
+        ghost_cgl_ctx,
+        this,
+        this->device,
+        this->queue,
+        default_fbo_gputexture_);
+
+    /* Check if the GHOST Context provides a default framebuffer: */
+    if (default_fbo_mtltexture_) {
+
+      /* Release old GPUTexture handle */
+      if (default_fbo_gputexture_) {
+        GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_)));
+        default_fbo_gputexture_ = nullptr;
+      }
+
+      /* Retain handle */
+      [default_fbo_mtltexture_ retain];
+
+      /*** Create front and back-buffers ***/
+      /* Create gpu::MTLTexture objects */
+      default_fbo_gputexture_ = new gpu::MTLTexture(
+          "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_);
+
+      /* Update framebuffers with new texture attachments */
+      mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+      mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+#ifndef NDEBUG
+      this->label = default_fbo_mtltexture_.label;
+#endif
+    }
+    else {
+
+      /* Add default texture for cases where no other framebuffer is bound */
+      if (!default_fbo_gputexture_) {
+        default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>(
+            unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr)));
+      }
+      mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+
+      MTL_LOG_INFO(
+          "-- Bound context %p for GPU context: %p is offscreen and does not have a default "
+          "framebuffer\n",
+          ghost_cgl_ctx,
+          this);
+#ifndef NDEBUG
+      this->label = @"Offscreen Metal Context";
+#endif
+    }
+  }
+  else {
+    MTL_LOG_INFO(
+        "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null "
+        "(GhostContext: %p, GhostContext_CGL: %p)\n",
+        ghost_ctx,
+        ghost_cgl_ctx);
+    BLI_assert(false);
+  }
+}
+
+void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle)
+{
+  GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle);
+  this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL));
+}
+
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name MTLContext
  * \{ */
 
 /* Placeholder functions */
-MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this)
+MTLContext::MTLContext(void *ghost_window, void *ghost_context)
+    : memory_manager(*this), main_command_buffer(*this)
 {
   /* Init debug. */
   debug::mtl_debug_init();
 
-  /* Device creation.
-   * TODO(Metal): This is a temporary initialization path to enable testing of features
-   * and shader compilation tests. Future functionality should fetch the existing device
-   * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */
-  this->device = MTLCreateSystemDefaultDevice();
+  /* Initialise Renderpass and Framebuffer State */
+  this->back_left = nullptr;
 
   /* Initialize command buffer state. */
   this->main_command_buffer.prepare();
@@ -47,10 +160,35 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
   is_inside_frame_ = false;
   current_frame_index_ = 0;
 
-  /* Prepare null data buffer */
+  /* Prepare null data buffer. */
   null_buffer_ = nil;
   null_attribute_buffer_ = nil;
 
+  /* Zero-initialise MTL Textures. */
+  default_fbo_mtltexture_ = nil;
+  default_fbo_gputexture_ = nullptr;
+
+  /** Fetch GHOSTContext and fetch Metal device/queue. */
+  ghost_window_ = ghost_window;
+  if (ghost_window_ && ghost_context == NULL) {
+    /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided.
+     * Regardless of whether windowed or not, we need access to the GhostContext
+     * for presentation, and device/queue access. */
+    GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_);
+    ghost_context = (ghostWin ? ghostWin->getContext() : NULL);
+  }
+  BLI_assert(ghost_context);
+  this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context);
+  this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue();
+  this->device = (id<MTLDevice>)this->ghost_context_->metalDevice();
+  BLI_assert(this->queue);
+  BLI_assert(this->device);
+  [this->queue retain];
+  [this->device retain];
+
+  /* Register present callback. */
+  this->ghost_context_->metalRegisterPresentCallback(&present);
+
   /* Create FrameBuffer handles. */
   MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
   MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
@@ -66,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
   /* Initialize Metal modules. */
   this->memory_manager.init();
   this->state_manager = new MTLStateManager(this);
+  this->imm = new MTLImmediate(this);
 
   /* Ensure global memory manager is initialized. */
   MTLContext::global_memory_manager.init(this->device);
@@ -99,9 +238,29 @@ MTLContext::~MTLContext()
       this->end_frame();
     }
   }
+
+  /* Release Memory Manager */
+  this->get_scratchbuffer_manager().free();
+
   /* Release update/blit shaders. */
   this->get_texture_utils().cleanup();
 
+  /* Detach resource references */
+  GPU_texture_unbind_all();
+
+  /* Unbind UBOs */
+  for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ubo_bindings[i].bound &&
+        this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+      GPUUniformBuf *ubo = wrap(
+          static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo));
+      GPU_uniformbuf_unbind(ubo);
+    }
+  }
+
+  /* Release Dummy resources */
+  this->free_dummy_resources();
+
   /* Release Sampler States. */
   for (int i = 0; i < GPU_SAMPLER_MAX; i++) {
     if (sampler_state_cache_[i] != nil) {
@@ -109,12 +268,28 @@ MTLContext::~MTLContext()
       sampler_state_cache_[i] = nil;
     }
   }
+
+  /* Empty cached sampler argument buffers. */
+  for (auto entry : cached_sampler_buffers_.values()) {
+    entry->free();
+  }
+  cached_sampler_buffers_.clear();
+
+  /* Free null buffers. */
   if (null_buffer_) {
     [null_buffer_ release];
   }
   if (null_attribute_buffer_) {
     [null_attribute_buffer_ release];
   }
+
+  /* Free Metal objects. */
+  if (this->queue) {
+    [this->queue release];
+  }
+  if (this->device) {
+    [this->device release];
+  }
 }
 
 void MTLContext::begin_frame()
@@ -146,20 +321,49 @@ void MTLContext::check_error(const char *info)
 
 void MTLContext::activate()
 {
-  /* TODO(Metal): Implement. */
+  /* Make sure no other context is already bound to this thread. */
+  BLI_assert(is_active_ == false);
+  is_active_ = true;
+  thread_ = pthread_self();
+
+  /* Re-apply ghost window/context for resizing */
+  if (ghost_window_) {
+    this->set_ghost_window((GHOST_WindowHandle)ghost_window_);
+  }
+  else if (ghost_context_) {
+    this->set_ghost_context((GHOST_ContextHandle)ghost_context_);
+  }
+
+  /* Reset UBO bind state. */
+  for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ubo_bindings[i].bound &&
+        this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+      this->pipeline_state.ubo_bindings[i].bound = false;
+      this->pipeline_state.ubo_bindings[i].ubo = nullptr;
+    }
+  }
+
+  /* Ensure imm active. */
+  immActivate();
 }
+
 void MTLContext::deactivate()
 {
-  /* TODO(Metal): Implement. */
+  BLI_assert(this->is_active_on_thread());
+  /* Flush context on deactivate. */
+  this->flush();
+  is_active_ = false;
+  immDeactivate();
 }
 
 void MTLContext::flush()
 {
-  /* TODO(Metal): Implement. */
+  this->main_command_buffer.submit(false);
 }
+
 void MTLContext::finish()
 {
-  /* TODO(Metal): Implement. */
+  this->main_command_buffer.submit(true);
 }
 
 void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
@@ -200,10 +404,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
 
   /* Ensure command buffer workload submissions are optimal --
    * Though do not split a batch mid-IMM recording. */
-  /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */
-  if (this->main_command_buffer.do_break_submission()
-      // && !((MTLImmediate *)(this->imm))->imm_is_recording()
-  ) {
+  if (this->main_command_buffer.do_break_submission() &&
+      !((MTLImmediate *)(this->imm))->imm_is_recording()) {
     this->flush();
   }
 
@@ -294,6 +496,72 @@ id<MTLBuffer> MTLContext::get_null_attribute_buffer()
   return null_attribute_buffer_;
 }
 
+gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type)
+{
+  /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */
+  gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1];
+  if (dummy_tex != nullptr) {
+    return dummy_tex;
+  }
+  else {
+    GPUTexture *tex = nullptr;
+    switch (type) {
+      case GPU_TEXTURE_1D:
+        tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_1D_ARRAY:
+        tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_2D:
+        tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_2D_ARRAY:
+        tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_3D:
+        tex = GPU_texture_create_3d(
+            "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr);
+        break;
+      case GPU_TEXTURE_CUBE:
+        tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_CUBE_ARRAY:
+        tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_BUFFER:
+        if (!dummy_verts_) {
+          GPU_vertformat_clear(&dummy_vertformat_);
+          GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+          dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC);
+          GPU_vertbuf_data_alloc(dummy_verts_, 64);
+        }
+        tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_);
+        break;
+      default:
+        BLI_assert_msg(false, "Unrecognised texture type");
+        return nullptr;
+    }
+    gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex));
+    dummy_textures_[type - 1] = metal_tex;
+    return metal_tex;
+  }
+  return nullptr;
+}
+
+void MTLContext::free_dummy_resources()
+{
+  for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) {
+    if (dummy_textures_[tex]) {
+      GPU_texture_free(
+          reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex])));
+      dummy_textures_[tex] = nullptr;
+    }
+  }
+  if (dummy_verts_) {
+    GPU_vertbuf_discard(dummy_verts_);
+  }
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -440,6 +708,755 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled)
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Command Encoder and pipeline state
+ * These utilities ensure that all of the globally bound resources and state have been
+ * correctly encoded within the current RenderCommandEncoder. This involves managing
+ * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state.
+ *
+ * We will also trigger compilation of new PSOs where the input state has changed
+ * and is required.
+ * All of this setup is required in order to perform a valid draw call.
+ * \{ */
+
+bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type)
+{
+  BLI_assert(this->pipeline_state.initialised);
+
+  /* Check if an active shader is bound. */
+  if (!this->pipeline_state.active_shader) {
+    MTL_LOG_WARNING("No Metal shader for bound GL shader\n");
+    return false;
+  }
+
+  /* Also ensure active shader is valid. */
+  if (!this->pipeline_state.active_shader->is_valid()) {
+    MTL_LOG_WARNING(
+        "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", );
+    return false;
+  }
+
+  /* Apply global state. */
+  this->state_manager->apply_state();
+
+  /* Main command buffer tracks the current state of the render pass, based on bound
+   * MTLFrameBuffer. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /* Debug Check: Ensure Framebuffer instance is not dirty. */
+  BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+  /* Fetch shader interface. */
+  MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
+  if (shader_interface == nullptr) {
+    MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", );
+    return false;
+  }
+
+  /* Fetch shader and bake valid PipelineStateObject (PSO) based on current
+   * shader and state combination. This PSO represents the final GPU-executable
+   * permutation of the shader. */
+  MTLRenderPipelineStateInstance *pipeline_state_instance =
+      this->pipeline_state.active_shader->bake_current_pipeline_state(
+          this, mtl_prim_type_to_topology_class(mtl_prim_type));
+  if (!pipeline_state_instance) {
+    MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n",
+                  shader_interface->get_name());
+    return false;
+  }
+
+  bool result = false;
+  if (pipeline_state_instance->pso) {
+
+    /* Fetch render command encoder. A render pass should already be active.
+     * This will be NULL if invalid. */
+    id<MTLRenderCommandEncoder> rec =
+        this->main_command_buffer.get_active_render_command_encoder();
+    BLI_assert(rec);
+    if (rec == nil) {
+      MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n");
+      return false;
+    }
+
+    /* Bind Render Pipeline State. */
+    BLI_assert(pipeline_state_instance->pso);
+    if (rps.bound_pso != pipeline_state_instance->pso) {
+      [rec setRenderPipelineState:pipeline_state_instance->pso];
+      rps.bound_pso = pipeline_state_instance->pso;
+    }
+
+    /** Ensure resource bindings. */
+    /* Texture Bindings. */
+    /* We will iterate through all texture bindings on the context and determine if any of the
+     * active slots match those in our shader interface. If so, textures will be bound. */
+    if (shader_interface->get_total_textures() > 0) {
+      this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance);
+    }
+
+    /* Transform feedback buffer binding. */
+    /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which
+     * transform feedback data will be written. */
+    // GPUVertBuf *tf_vbo =
+    //     this->pipeline_state.active_shader->get_transform_feedback_active_buffer();
+    // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) {
+
+    //   /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */
+    //   BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine ||
+    //              mtl_prim_type == MTLPrimitiveTypeTriangle ||
+    //              mtl_prim_type == MTLPrimitiveTypePoint);
+
+    //   /* Fetch active transform feedback buffer from vertbuf */
+    //   MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo));
+    //   int tf_buffer_offset = 0;
+    //   id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset);
+
+    //   if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) {
+    //     [rec setVertexBuffer:tf_buffer_mtl
+    //                   offset:tf_buffer_offset
+    //                  atIndex:pipeline_state_instance->transform_feedback_buffer_index];
+    //     printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n",
+    //            tf_vbo_mtl,
+    //            tf_buffer_mtl);
+    //   }
+    // }
+
+    /* Matrix Bindings. */
+    /* This is now called upon shader bind. We may need to re-evaluate this though,
+     * as was done here to ensure uniform changes beween draws were tracked.
+     * NOTE(Metal): We may be able to remove this. */
+    GPU_matrix_bind(reinterpret_cast<struct GPUShader *>(
+        static_cast<Shader *>(this->pipeline_state.active_shader)));
+
+    /* Bind Uniforms */
+    this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance);
+
+    /* Bind Null attribute buffer, if needed. */
+    if (pipeline_state_instance->null_attribute_buffer_index >= 0) {
+      if (G.debug & G_DEBUG_GPU) {
+        MTL_LOG_INFO("Binding null attribute buffer at index: %d\n",
+                     pipeline_state_instance->null_attribute_buffer_index);
+      }
+      rps.bind_vertex_buffer(this->get_null_attribute_buffer(),
+                             0,
+                             pipeline_state_instance->null_attribute_buffer_index);
+    }
+
+    /** Dynamic Per-draw Render State on RenderCommandEncoder. */
+    /* State: Viewport. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) {
+      MTLViewport viewport;
+      viewport.originX = (double)this->pipeline_state.viewport_offset_x;
+      viewport.originY = (double)this->pipeline_state.viewport_offset_y;
+      viewport.width = (double)this->pipeline_state.viewport_width;
+      viewport.height = (double)this->pipeline_state.viewport_height;
+      viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near;
+      viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far;
+      [rec setViewport:viewport];
+
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_VIEWPORT_FLAG);
+    }
+
+    /* State: Scissor. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) {
+
+      /* Get FrameBuffer associated with active RenderCommandEncoder. */
+      MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer();
+
+      MTLScissorRect scissor;
+      if (this->pipeline_state.scissor_enabled) {
+        scissor.x = this->pipeline_state.scissor_x;
+        scissor.y = this->pipeline_state.scissor_y;
+        scissor.width = this->pipeline_state.scissor_width;
+        scissor.height = this->pipeline_state.scissor_height;
+
+        /* Some scissor assignments exceed the bounds of the viewport due to implictly added
+         * padding to the width/height - Clamp width/height. */
+        BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width());
+        BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height());
+        scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x);
+        scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y);
+        BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width()));
+        BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height()));
+      }
+      else {
+        /* Scissor is disabled, reset to default size as scissor state may have been previously
+         * assigned on this encoder. */
+        scissor.x = 0;
+        scissor.y = 0;
+        scissor.width = render_fb->get_width();
+        scissor.height = render_fb->get_height();
+      }
+
+      /* Scissor state can still be flagged as changed if it is toggled on and off, without
+       * parameters changing between draws. */
+      if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) {
+        [rec setScissorRect:scissor];
+        rps.last_scissor_rect = scissor;
+      }
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_SCISSOR_FLAG);
+    }
+
+    /* State: Face winding. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) {
+      /* We nede to invert the face winding in Metal, to account for the inverted-Y coordinate
+       * system. */
+      MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ?
+                               MTLWindingClockwise :
+                               MTLWindingCounterClockwise;
+      [rec setFrontFacingWinding:winding];
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG);
+    }
+
+    /* State: cullmode. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) {
+
+      MTLCullMode mode = MTLCullModeNone;
+      if (this->pipeline_state.culling_enabled) {
+        switch (this->pipeline_state.cull_mode) {
+          case GPU_CULL_NONE:
+            mode = MTLCullModeNone;
+            break;
+          case GPU_CULL_FRONT:
+            mode = MTLCullModeFront;
+            break;
+          case GPU_CULL_BACK:
+            mode = MTLCullModeBack;
+            break;
+          default:
+            BLI_assert_unreachable();
+            break;
+        }
+      }
+      [rec setCullMode:mode];
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_CULLMODE_FLAG);
+    }
+
+    /* Pipeline state is now good. */
+    result = true;
+  }
+  return result;
+}
+
+/* Bind uniform buffers to an active render command encoder using the rendering state of the
+ * current context -> Active shader, Bound UBOs). */
+bool MTLContext::ensure_uniform_buffer_bindings(
+    id<MTLRenderCommandEncoder> rec,
+    const MTLShaderInterface *shader_interface,
+    const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /* Shader owned push constant block for uniforms.. */
+  bool active_shader_changed = (rps.last_bound_shader_state.shader_ !=
+                                    this->pipeline_state.active_shader ||
+                                rps.last_bound_shader_state.shader_ == nullptr ||
+                                rps.last_bound_shader_state.pso_index_ !=
+                                    pipeline_state_instance->shader_pso_index);
+
+  const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
+  if (push_constant_block.size > 0) {
+
+    /* Fetch uniform buffer base binding index from pipeline_state_instance - Terhe buffer index
+     * will be offset by the number of bound VBOs. */
+    uint32_t block_size = push_constant_block.size;
+    uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index +
+                            push_constant_block.buffer_index;
+
+    /* Only need to rebind block if push constants have been modified -- or if no data is bound for
+     * the current RenderCommandEncoder. */
+    if (this->pipeline_state.active_shader->get_push_constant_is_dirty() ||
+        active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes ||
+        !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) {
+
+      /* Bind push constant data. */
+      BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr);
+      rps.bind_vertex_bytes(
+          this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+      rps.bind_fragment_bytes(
+          this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+
+      /* Only need to rebind block if it has been modified. */
+      this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false);
+    }
+  }
+  rps.last_bound_shader_state.set(this->pipeline_state.active_shader,
+                                  pipeline_state_instance->shader_pso_index);
+
+  /* Bind Global GPUUniformBuffers */
+  /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones
+   * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
+   * and not owned by the shader instance. */
+  for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
+    const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
+
+    if (ubo.buffer_index >= 0) {
+
+      const uint32_t buffer_index = ubo.buffer_index;
+      int ubo_offset = 0;
+      id<MTLBuffer> ubo_buffer = nil;
+      int ubo_size = 0;
+
+      bool bind_dummy_buffer = false;
+      if (this->pipeline_state.ubo_bindings[buffer_index].bound) {
+
+        /* Fetch UBO global-binding properties from slot. */
+        ubo_offset = 0;
+        ubo_buffer = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_metal_buffer(
+            &ubo_offset);
+        ubo_size = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_size();
+
+        /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
+         * allocating zero buffers. */
+        if (ubo_buffer == nil) {
+          bind_dummy_buffer = true;
+        }
+        else {
+          BLI_assert(ubo_buffer != nil);
+          BLI_assert(ubo_size > 0);
+
+          if (pipeline_state_instance->reflection_data_available) {
+            /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each
+             * case will be the same for the same UBO.
+             * We also determine expected size and then ensure buffer of the correct size
+             * exists in one of the vertex/fragment shader binding tables. This path is used
+             * to verify that the size of the bound UBO matches what is expected in the shader. */
+            uint32_t expected_size =
+                (buffer_index <
+                 pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ?
+                    pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index]
+                        .size :
+                    0;
+            if (expected_size == 0) {
+              expected_size =
+                  (buffer_index <
+                   pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ?
+                      pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index]
+                          .size :
+                      0;
+            }
+            BLI_assert_msg(
+                expected_size > 0,
+                "Shader interface expects UBO, but shader reflection data reports that it "
+                "is not present");
+
+            /* If ubo size is smaller than the size expected by the shader, we need to bind the
+             * dummy buffer, which will be big enough, to avoid an OOB error. */
+            if (ubo_size < expected_size) {
+              MTL_LOG_INFO(
+                  "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
+                  "%d)  (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
+                  "over-binding, which is not used,  but we need this to avoid validation "
+                  "issues\n",
+                  shader_interface->get_name_at_offset(ubo.name_offset),
+                  buffer_index,
+                  ubo_size,
+                  expected_size,
+                  shader_interface->get_name());
+              bind_dummy_buffer = true;
+            }
+          }
+        }
+      }
+      else {
+        MTL_LOG_INFO(
+            "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
+            "nothing was bound -- binding dummy buffer\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ubo.name_offset),
+            buffer_index);
+        bind_dummy_buffer = true;
+      }
+
+      if (bind_dummy_buffer) {
+        /* Perform Dummy binding. */
+        ubo_offset = 0;
+        ubo_buffer = this->get_null_buffer();
+        ubo_size = [ubo_buffer length];
+      }
+
+      if (ubo_buffer != nil) {
+
+        uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index +
+                                     buffer_index;
+
+        /* Bind Vertex UBO. */
+        if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
+          BLI_assert(buffer_bind_index >= 0 &&
+                     buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+          rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+        }
+
+        /* Bind Fragment UBOs. */
+        if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
+          BLI_assert(buffer_bind_index >= 0 &&
+                     buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+          rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+        }
+      }
+      else {
+        MTL_LOG_WARNING(
+            "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
+            "is NULL!\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ubo.name_offset),
+            buffer_index);
+      }
+    }
+  }
+  return true;
+}
+
+/* Ensure texture bindings are correct and up to date for current draw call. */
+void MTLContext::ensure_texture_bindings(
+    id<MTLRenderCommandEncoder> rec,
+    MTLShaderInterface *shader_interface,
+    const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+  BLI_assert(shader_interface != nil);
+  BLI_assert(rec != nil);
+
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  @autoreleasepool {
+    int vertex_arg_buffer_bind_index = -1;
+    int fragment_arg_buffer_bind_index = -1;
+
+    /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */
+    bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers(
+        &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index);
+
+    /* Loop through expected textures in shader interface and resolve bindings with currently
+     * bound textures.. */
+    for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+      /* Ensure the bound texture is compatible with the shader interface. If the
+       * shader does not expect a texture to be bound for the current slot, we skip
+       * binding.
+       * NOTE: Global texture bindings may be left over from prior draw calls. */
+      const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t);
+      if (!shader_texture_info.used) {
+        /* Skip unused binding points if explicit indices are specified. */
+        continue;
+      }
+
+      int slot = shader_texture_info.slot_index;
+      if (slot >= 0 && slot < GPU_max_textures()) {
+        bool bind_dummy_texture = true;
+        if (this->pipeline_state.texture_bindings[slot].used) {
+          gpu::MTLTexture *bound_texture =
+              this->pipeline_state.texture_bindings[slot].texture_resource;
+          MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
+          BLI_assert(bound_texture);
+          BLI_assert(bound_sampler.used);
+
+          if (shader_texture_info.type == bound_texture->type_) {
+            /* Bind texture and sampler if the bound texture matches the type expected by the
+             * shader. */
+            id<MTLTexture> tex = bound_texture->get_metal_handle();
+
+            if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+              rps.bind_vertex_texture(tex, slot);
+              rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+            }
+
+            if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+              rps.bind_fragment_texture(tex, slot);
+              rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+            }
+
+            /* Texture state resolved, no need to bind dummy texture */
+            bind_dummy_texture = false;
+          }
+          else {
+            /* Texture type for bound texture (e.g. Texture2DArray) does not match what was
+             * expected in the shader interface. This is a problem and we will need to bind
+             * a dummy texture to ensure correct API usage. */
+            MTL_LOG_WARNING(
+                "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
+                "texture target type. (Expecting type %d, actual type %d) (binding "
+                "name:'%s')(texture name:'%s')\n",
+                shader_interface->get_name(),
+                bound_texture,
+                slot,
+                shader_texture_info.type,
+                bound_texture->type_,
+                shader_interface->get_name_at_offset(shader_texture_info.name_offset),
+                bound_texture->get_name());
+          }
+        }
+        else {
+          MTL_LOG_WARNING(
+              "Shader '%s' expected texture to be bound to slot %d -- No texture was "
+              "bound. (name:'%s')\n",
+              shader_interface->get_name(),
+              slot,
+              shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+        }
+
+        /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats
+         * are provided -- as certain configurations may not need any binding. These issues should
+         * be fixed in the high-level, if problems crop up. */
+        if (bind_dummy_texture) {
+          if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+            rps.bind_vertex_texture(
+                get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+            /* Bind default sampler state. */
+            MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+            rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+          }
+          if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+            rps.bind_fragment_texture(
+                get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+            /* Bind default sampler state. */
+            MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+            rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+          }
+        }
+      }
+      else {
+        MTL_LOG_WARNING(
+            "Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
+            "hardware/API limit of '%d'. (name:'%s')\n",
+            this->pipeline_state.active_shader,
+            slot,
+            GPU_max_textures(),
+            shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+      }
+    }
+
+    /* Construct and Bind argument buffer.
+     * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */
+    if (use_argument_buffer_for_samplers) {
+#ifndef NDEBUG
+      /* Debug check to validate each expected texture in the shader interface has a valid
+       * sampler object bound to the context. We will need all of these to be valid
+       * when constructing the sampler argument buffer. */
+      for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+        const MTLShaderTexture &texture = shader_interface->get_texture(i);
+        if (texture.used) {
+          BLI_assert(this->samplers_.mtl_sampler[i] != nil);
+        }
+      }
+#endif
+
+      /* Check to ensure the buffer binding index for the argument buffer has been assigned.
+       * This PSO property will be set if we expect to use argument buffers, and the shader
+       * uses any amount of textures. */
+      BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0);
+      if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) {
+        /* Offset binding index to be relative to the start of static uniform buffer binding slots.
+         * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
+         * used by vertex and index buffer bindings, and the number of buffers present will vary
+         * between PSOs. */
+        int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index +
+                              vertex_arg_buffer_bind_index);
+        assert(arg_buffer_idx < 32);
+        id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
+            arg_buffer_idx);
+        if (argument_encoder == nil) {
+          argument_encoder = [pipeline_state_instance->vert
+              newArgumentEncoderWithBufferIndex:arg_buffer_idx];
+          shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
+        }
+
+        /* Generate or Fetch argument buffer sampler configuration.
+         * NOTE(Metal): we need to base sampler counts off of the maximal texture
+         * index. This is not the most optimal, but in practise, not a use-case
+         * when argument buffers are required.
+         * This is because with explicit texture indices, the binding indices
+         * should match across draws, to allow the high-level to optimise bindpoints. */
+        gpu::MTLBuffer *encoder_buffer = nullptr;
+        this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1;
+
+        gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr(
+            this->samplers_);
+        if (cached_smp_buffer_search != nullptr) {
+          encoder_buffer = *cached_smp_buffer_search;
+        }
+        else {
+          /* Populate argument buffer with current global sampler bindings. */
+          int size = [argument_encoder encodedLength];
+          int alignment = max_uu([argument_encoder alignment], 256);
+          int size_align_delta = (size % alignment);
+          int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ?
+                                       size + (alignment - (size % alignment)) :
+                                       size;
+
+          /* Allocate buffer to store encoded sampler arguments. */
+          encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size,
+                                                                            true);
+          BLI_assert(encoder_buffer);
+          BLI_assert(encoder_buffer->get_metal_buffer());
+          [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0];
+          [argument_encoder
+              setSamplerStates:this->samplers_.mtl_sampler
+                     withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)];
+          encoder_buffer->flush();
+
+          /* Insert into cache. */
+          this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer);
+        }
+
+        BLI_assert(encoder_buffer != nullptr);
+        int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+                                 vertex_arg_buffer_bind_index);
+        rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index);
+
+        /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to
+         * re-encode. We can use the same argument buffer. */
+        if (fragment_arg_buffer_bind_index >= 0) {
+          BLI_assert(fragment_arg_buffer_bind_index);
+          int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+                                   fragment_arg_buffer_bind_index);
+          rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index);
+        }
+      }
+    }
+  }
+}
+
+/* Encode latest depth-stencil state. */
+void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type)
+{
+  /* Check if we need to update state. */
+  if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) {
+    return;
+  }
+
+  /* Fetch render command encoder. */
+  id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder();
+  BLI_assert(rec);
+
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /** Prepare Depth-stencil state based on current global pipeline state. */
+  MTLFrameBuffer *fb = this->get_current_framebuffer();
+  bool hasDepthTarget = fb->has_depth_attachment();
+  bool hasStencilTarget = fb->has_stencil_attachment();
+
+  if (hasDepthTarget || hasStencilTarget) {
+    /* Update FrameBuffer State. */
+    this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget;
+    this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget;
+
+    /* Check if current MTLContextDepthStencilState maps to an existing state object in
+     * the Depth-stencil state cache. */
+    id<MTLDepthStencilState> ds_state = nil;
+    id<MTLDepthStencilState> *depth_stencil_state_lookup =
+        this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state);
+
+    /* If not, populate DepthStencil state descriptor. */
+    if (depth_stencil_state_lookup == nullptr) {
+
+      MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init]
+          autorelease];
+
+      if (hasDepthTarget) {
+        ds_state_desc.depthWriteEnabled =
+            this->pipeline_state.depth_stencil_state.depth_write_enable;
+        ds_state_desc.depthCompareFunction =
+            this->pipeline_state.depth_stencil_state.depth_test_enabled ?
+                this->pipeline_state.depth_stencil_state.depth_function :
+                MTLCompareFunctionAlways;
+      }
+
+      if (hasStencilTarget) {
+        ds_state_desc.backFaceStencil.readMask =
+            this->pipeline_state.depth_stencil_state.stencil_read_mask;
+        ds_state_desc.backFaceStencil.writeMask =
+            this->pipeline_state.depth_stencil_state.stencil_write_mask;
+        ds_state_desc.backFaceStencil.stencilFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail;
+        ds_state_desc.backFaceStencil.depthFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail;
+        ds_state_desc.backFaceStencil.depthStencilPassOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass;
+        ds_state_desc.backFaceStencil.stencilCompareFunction =
+            (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+                this->pipeline_state.depth_stencil_state.stencil_func :
+                MTLCompareFunctionAlways;
+
+        ds_state_desc.frontFaceStencil.readMask =
+            this->pipeline_state.depth_stencil_state.stencil_read_mask;
+        ds_state_desc.frontFaceStencil.writeMask =
+            this->pipeline_state.depth_stencil_state.stencil_write_mask;
+        ds_state_desc.frontFaceStencil.stencilFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail;
+        ds_state_desc.frontFaceStencil.depthFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail;
+        ds_state_desc.frontFaceStencil.depthStencilPassOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass;
+        ds_state_desc.frontFaceStencil.stencilCompareFunction =
+            (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+                this->pipeline_state.depth_stencil_state.stencil_func :
+                MTLCompareFunctionAlways;
+      }
+
+      /* Bake new DS state. */
+      ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc];
+
+      /* Store state in cache. */
+      BLI_assert(ds_state != nil);
+      this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state);
+    }
+    else {
+      ds_state = *depth_stencil_state_lookup;
+      BLI_assert(ds_state != nil);
+    }
+
+    /* Bind Depth Stencil State to render command encoder. */
+    BLI_assert(ds_state != nil);
+    if (ds_state != nil) {
+      if (rps.bound_ds_state != ds_state) {
+        [rec setDepthStencilState:ds_state];
+        rps.bound_ds_state = ds_state;
+      }
+    }
+
+    /* Apply dynamic depth-stencil state on encoder. */
+    if (hasStencilTarget) {
+      uint32_t stencil_ref_value =
+          (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+              this->pipeline_state.depth_stencil_state.stencil_ref :
+              0;
+      if (stencil_ref_value != rps.last_used_stencil_ref_value) {
+        [rec setStencilReferenceValue:stencil_ref_value];
+        rps.last_used_stencil_ref_value = stencil_ref_value;
+      }
+    }
+
+    if (hasDepthTarget) {
+      bool doBias = false;
+      switch (prim_type) {
+        case MTLPrimitiveTypeTriangle:
+        case MTLPrimitiveTypeTriangleStrip:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris;
+          break;
+        case MTLPrimitiveTypeLine:
+        case MTLPrimitiveTypeLineStrip:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines;
+          break;
+        case MTLPrimitiveTypePoint:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points;
+          break;
+      }
+      [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0
+             slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0
+                  clamp:0];
+    }
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Visibility buffer control for MTLQueryPool.
  * \{ */
 
@@ -606,4 +1623,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state()
 
 /** \} */
 
+/* -------------------------------------------------------------------- */
+/** \name Swapchain management and Metal presentation.
+ * \{ */
+
+void present(MTLRenderPassDescriptor *blit_descriptor,
+             id<MTLRenderPipelineState> blit_pso,
+             id<MTLTexture> swapchain_texture,
+             id<CAMetalDrawable> drawable)
+{
+
+  MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+  BLI_assert(ctx);
+
+  /* Flush any oustanding work. */
+  ctx->flush();
+
+  /* Always pace CPU to maximum of 3 drawables in flight.
+   * nextDrawable may have more in flight if backing swapchain
+   * textures are re-allocate, such as during resize events.
+   *
+   * Determine frames in flight based on current latency. If
+   * we are in a high-latency situation, limit frames in flight
+   * to increase app responsiveness and keep GPU execution under control.
+   * If latency improves, increase frames in flight to improve overall
+   * performance. */
+  int perf_max_drawables = MTL_MAX_DRAWABLES;
+  if (MTLContext::avg_drawable_latency_us > 185000) {
+    perf_max_drawables = 1;
+  }
+  else if (MTLContext::avg_drawable_latency_us > 85000) {
+    perf_max_drawables = 2;
+  }
+
+  while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) {
+    PIL_sleep_ms(2);
+  }
+
+  /* Present is submitted in its own CMD Buffer to enusure drawable reference released as early as
+   * possible. This command buffer is separate as it does not utilise the global state
+   * for rendering as the main context does. */
+  id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer];
+  MTLCommandBufferManager::num_active_cmd_bufs++;
+
+  if (MTLCommandBufferManager::sync_event != nil) {
+    /* Ensure command buffer ordering. */
+    [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event
+                         value:MTLCommandBufferManager::event_signal_val];
+  }
+
+  /* Do Present Call and final Blit to MTLDrawable. */
+  id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor];
+  [enc setRenderPipelineState:blit_pso];
+  [enc setFragmentTexture:swapchain_texture atIndex:0];
+  [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
+  [enc endEncoding];
+
+  /* Present drawable. */
+  BLI_assert(drawable);
+  [cmdbuf presentDrawable:drawable];
+
+  /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */
+  MTLSafeFreeList *cmd_free_buffer_list =
+      MTLContext::get_global_memory_manager().get_current_safe_list();
+  BLI_assert(cmd_free_buffer_list);
+
+  id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf;
+  [cmd_buffer_ref retain];
+
+  /* Increment drawables in flight limiter. */
+  MTLContext::max_drawables_in_flight++;
+  std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now();
+
+  /* Increment free pool reference and decrement upon command buffer completion. */
+  cmd_free_buffer_list->increment_reference();
+  [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) {
+    /* Flag freed buffers associated with this CMD buffer as ready to be freed. */
+    cmd_free_buffer_list->decrement_reference();
+    [cmd_buffer_ref release];
+
+    /* Decrement count */
+    MTLCommandBufferManager::num_active_cmd_bufs--;
+    MTL_LOG_INFO("[Metal] Active command buffers: %d\n",
+                 MTLCommandBufferManager::num_active_cmd_bufs);
+
+    /* Drawable count and latency management. */
+    MTLContext::max_drawables_in_flight--;
+    std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now();
+    int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>(
+                                         completion_time - submission_time)
+                                         .count();
+    MTLContext::latency_resolve_average(microseconds_per_frame);
+
+    MTL_LOG_INFO("Frame Latency: %f ms  (Rolling avg: %f ms  Drawables: %d)\n",
+                 ((float)microseconds_per_frame) / 1000.0f,
+                 ((float)MTLContext::avg_drawable_latency_us) / 1000.0f,
+                 perf_max_drawables);
+  }];
+
+  if (MTLCommandBufferManager::sync_event == nil) {
+    MTLCommandBufferManager::sync_event = [ctx->device newEvent];
+    BLI_assert(MTLCommandBufferManager::sync_event);
+    [MTLCommandBufferManager::sync_event retain];
+  }
+  BLI_assert(MTLCommandBufferManager::sync_event != nil);
+
+  MTLCommandBufferManager::event_signal_val++;
+  [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event
+                      value:MTLCommandBufferManager::event_signal_val];
+
+  [cmdbuf commit];
+
+  /* When debugging, fetch advanced command buffer errors. */
+  if (G.debug & G_DEBUG_GPU) {
+    [cmdbuf waitUntilCompleted];
+    NSError *error = [cmdbuf error];
+    if (error != nil) {
+      NSLog(@"%@", error);
+      BLI_assert(false);
+
+      @autoreleasepool {
+        const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String];
+
+        std::ofstream outfile;
+        outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+        outfile << stringAsChar;
+        outfile.close();
+      }
+    }
+    else {
+      @autoreleasepool {
+        NSString *str = @"Command buffer completed successfully!\n";
+        const char *stringAsChar = [str UTF8String];
+
+        std::ofstream outfile;
+        outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+        outfile << stringAsChar;
+        outfile.close();
+      }
+    }
+  }
+}
+
+/** \} */
+
 }  // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_drawlist.hh b/source/blender/gpu/metal/mtl_drawlist.hh
new file mode 100644
index 00000000000..9eb465b26a0
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.hh
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#pragma once
+
+#pragma once
+
+#include "gpu_drawlist_private.hh"
+
+namespace blender {
+namespace gpu {
+
+/**
+ * TODO(Metal): MTLDrawList Implementation. Included as temporary stub.
+ */
+class MTLDrawList : public DrawList {
+ public:
+  MTLDrawList(int length) {}
+  ~MTLDrawList() {}
+
+  void append(GPUBatch *batch, int i_first, int i_count) override {}
+  void submit() override {}
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/metal/mtl_immediate.hh b/source/blender/gpu/metal/mtl_immediate.hh
new file mode 100644
index 00000000000..b743efb397d
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_immediate_private.hh"
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+namespace blender::gpu {
+
+class MTLImmediate : public Immediate {
+ private:
+  MTLContext *context_ = nullptr;
+  MTLTemporaryBuffer current_allocation_;
+  MTLPrimitiveTopologyClass metal_primitive_mode_;
+  MTLPrimitiveType metal_primitive_type_;
+  bool has_begun_ = false;
+
+ public:
+  MTLImmediate(MTLContext *ctx);
+  ~MTLImmediate();
+
+  uchar *begin() override;
+  void end() override;
+  bool imm_is_recording()
+  {
+    return has_begun_;
+  }
+};
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm
new file mode 100644
index 00000000000..41632e39092
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -0,0 +1,397 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#include "BKE_global.h"
+
+#include "GPU_vertex_format.h"
+#include "gpu_context_private.hh"
+#include "gpu_shader_private.hh"
+#include "gpu_vertex_format_private.h"
+
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_immediate.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+
+namespace blender::gpu {
+
+MTLImmediate::MTLImmediate(MTLContext *ctx)
+{
+  context_ = ctx;
+}
+
+MTLImmediate::~MTLImmediate()
+{
+}
+
+uchar *MTLImmediate::begin()
+{
+  BLI_assert(!has_begun_);
+
+  /* Determine primitive type. */
+  metal_primitive_type_ = gpu_prim_type_to_metal(this->prim_type);
+  metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
+  has_begun_ = true;
+
+  /* Allocate a range of data and return host-accessible pointer. */
+  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+  current_allocation_ = context_->get_scratchbuffer_manager()
+                            .scratch_buffer_allocate_range_aligned(bytes_needed, 256);
+  [current_allocation_.metal_buffer retain];
+  return reinterpret_cast<uchar *>(current_allocation_.data);
+}
+
+void MTLImmediate::end()
+{
+  /* Ensure we're between a imm::begin/imm:end pair. */
+  BLI_assert(has_begun_);
+  BLI_assert(prim_type != GPU_PRIM_NONE);
+
+  /* Verify context is valid, vertex data is written and a valid shader is bound. */
+  if (context_ && this->vertex_idx > 0 && this->shader) {
+
+    MTLShader *active_mtl_shader = static_cast<MTLShader *>(unwrap(shader));
+
+    /* Skip draw if Metal shader is not valid. */
+    if (active_mtl_shader == nullptr || !active_mtl_shader->is_valid() ||
+        active_mtl_shader->get_interface() == nullptr) {
+
+      const char *ptr = (active_mtl_shader) ? active_mtl_shader->name_get() : nullptr;
+      MTL_LOG_WARNING(
+          "MTLImmediate::end -- cannot perform draw as active shader is NULL or invalid (likely "
+          "unimplemented) (shader %p '%s')\n",
+          active_mtl_shader,
+          ptr);
+      return;
+    }
+
+    /* Ensure we are inside a render pass and fetch active RenderCommandEncoder. */
+    id<MTLRenderCommandEncoder> rec = context_->ensure_begin_render_pass();
+    BLI_assert(rec != nil);
+
+    /* Fetch active render pipeline state. */
+    MTLRenderPassState &rps = context_->main_command_buffer.get_render_pass_state();
+
+    /* Bind Shader. */
+    GPU_shader_bind(this->shader);
+
+    /* Debug markers for frame-capture and detailed error messages. */
+    if (G.debug & G_DEBUG_GPU) {
+      [rec pushDebugGroup:[NSString
+                              stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+                                               this->vertex_idx,
+                                               active_mtl_shader->get_interface()->get_name()]];
+      [rec insertDebugSignpost:[NSString stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+                                                          this->vertex_idx,
+                                                          active_mtl_shader->get_interface()
+                                                              ->get_name()]];
+    }
+
+    /* Populate pipeline state vertex descriptor. */
+    MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+        MTLContext::get()->state_manager);
+    MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+    const MTLShaderInterface *interface = active_mtl_shader->get_interface();
+
+    desc.vertex_descriptor.num_attributes = interface->get_total_attributes();
+    desc.vertex_descriptor.num_vert_buffers = 1;
+
+    for (int i = 0; i < desc.vertex_descriptor.num_attributes; i++) {
+      desc.vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+    }
+    desc.vertex_descriptor.uses_ssbo_vertex_fetch =
+        active_mtl_shader->get_uses_ssbo_vertex_fetch();
+    desc.vertex_descriptor.num_ssbo_attributes = 0;
+
+    /* SSBO Vertex Fetch -- Verify Attributes. */
+    if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+      active_mtl_shader->ssbo_vertex_fetch_bind_attributes_begin();
+
+      /* Disable Indexed rendering in SSBO vertex fetch. */
+      int uniform_ssbo_use_indexed = active_mtl_shader->uni_ssbo_uses_indexed_rendering;
+      BLI_assert_msg(uniform_ssbo_use_indexed != -1, "Expected valid uniform location for ssbo_uses_indexed_rendering.");
+      int uses_indexed_rendering = 0;
+      active_mtl_shader->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+    }
+
+    /* Populate Vertex descriptor and verify attributes.
+     * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */
+    for (int i = 0; i < interface->get_total_attributes(); i++) {
+
+      /* Note: Attribute in VERTEX FORMAT does not necessarily share the same array index as
+       * attributes in shader interface. */
+      GPUVertAttr *attr = nullptr;
+      const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i);
+
+      /* Scan through vertex_format attributes until one with a name matching the shader interface
+       * is found. */
+      for (uint32_t a_idx = 0; a_idx < this->vertex_format.attr_len && attr == nullptr; a_idx++) {
+        GPUVertAttr *check_attribute = &this->vertex_format.attrs[a_idx];
+
+        /* Attributes can have multiple name aliases associated with them. */
+        for (uint32_t n_idx = 0; n_idx < check_attribute->name_len; n_idx++) {
+          const char *name = GPU_vertformat_attr_name_get(
+              &this->vertex_format, check_attribute, n_idx);
+
+          if (strcmp(name, interface->get_name_at_offset(mtl_shader_attribute.name_offset)) == 0) {
+            attr = check_attribute;
+            break;
+          }
+        }
+      }
+
+      BLI_assert_msg(attr != nullptr,
+                     "Could not find expected attribute in immediate mode vertex format.");
+      if (attr == nullptr) {
+        MTL_LOG_ERROR(
+            "MTLImmediate::end Could not find matching attribute '%s' from Shader Interface in "
+            "Vertex Format! - TODO: Bind Dummy attribute\n",
+            interface->get_name_at_offset(mtl_shader_attribute.name_offset));
+        return;
+      }
+
+      /* Determine whether implicit type conversion between input vertex format
+       * and shader interface vertex format is supported. */
+      MTLVertexFormat convertedFormat;
+      bool can_use_implicit_conversion = mtl_convert_vertex_format(
+          mtl_shader_attribute.format,
+          (GPUVertCompType)attr->comp_type,
+          attr->comp_len,
+          (GPUVertFetchMode)attr->fetch_mode,
+          &convertedFormat);
+
+      if (can_use_implicit_conversion) {
+        /* Metal API can implicitly convert some formats during vertex assembly:
+         * - Converting from a normalized short2 format to float2
+         * - Type truncation e.g. Float4 to Float2.
+         * - Type expansion from Float3 to Float4.
+         * - Note: extra components are filled with the corresponding components of (0,0,0,1).
+         * (See
+         * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format)
+         */
+        bool is_floating_point_format = (attr->comp_type == GPU_COMP_F32);
+        desc.vertex_descriptor.attributes[i].format = convertedFormat;
+        desc.vertex_descriptor.attributes[i].format_conversion_mode =
+            (is_floating_point_format) ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+                                         (GPUVertFetchMode)GPU_FETCH_INT;
+        BLI_assert(convertedFormat != MTLVertexFormatInvalid);
+      }
+      else {
+        /* Some conversions are NOT valid, e.g. Int4 to Float4
+         * - In this case, we need to implement a conversion routine inside the shader.
+         * - This is handled using the format_conversion_mode flag
+         * - This flag is passed into the PSO as a function specialisation,
+         *   and will generate an appropriate conversion function when reading the vertex attribute
+         *   value into local shader storage.
+         *   (If no explicit conversion is needed, the function specialize to a pass-through). */
+        MTLVertexFormat converted_format;
+        bool can_convert = mtl_vertex_format_resize(
+            mtl_shader_attribute.format, attr->comp_len, &converted_format);
+        desc.vertex_descriptor.attributes[i].format = (can_convert) ? converted_format :
+                                                                      mtl_shader_attribute.format;
+        desc.vertex_descriptor.attributes[i].format_conversion_mode = (GPUVertFetchMode)
+                                                                          attr->fetch_mode;
+        BLI_assert(desc.vertex_descriptor.attributes[i].format != MTLVertexFormatInvalid);
+      }
+      /* Using attribute offset in vertex format, as this will be correct */
+      desc.vertex_descriptor.attributes[i].offset = attr->offset;
+      desc.vertex_descriptor.attributes[i].buffer_index = mtl_shader_attribute.buffer_index;
+
+      /* SSBO Vertex Fetch Attribute bind. */
+      if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+        BLI_assert_msg(mtl_shader_attribute.buffer_index == 0,
+                   "All attributes should be in buffer index zero");
+        MTLSSBOAttribute ssbo_attr(
+            mtl_shader_attribute.index,
+            mtl_shader_attribute.buffer_index,
+            attr->offset,
+            this->vertex_format.stride,
+            MTLShader::ssbo_vertex_type_to_attr_type(desc.vertex_descriptor.attributes[i].format),
+            false);
+        desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+            ssbo_attr;
+        desc.vertex_descriptor.num_ssbo_attributes++;
+        active_mtl_shader->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+      }
+    }
+
+    /* Buffer bindings for singular vertex buffer. */
+    desc.vertex_descriptor.buffer_layouts[0].step_function = MTLVertexStepFunctionPerVertex;
+    desc.vertex_descriptor.buffer_layouts[0].step_rate = 1;
+    desc.vertex_descriptor.buffer_layouts[0].stride = this->vertex_format.stride;
+    BLI_assert(this->vertex_format.stride > 0);
+
+    /* SSBO Vertex Fetch -- Verify Attributes. */
+    if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+      active_mtl_shader->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+      /* Set Status uniforms. */
+      BLI_assert_msg(active_mtl_shader->uni_ssbo_input_prim_type_loc != -1,
+                     "ssbo_input_prim_type uniform location invalid!");
+      BLI_assert_msg(active_mtl_shader->uni_ssbo_input_vert_count_loc != -1,
+                     "ssbo_input_vert_count uniform location invalid!");
+      GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+                                    active_mtl_shader->uni_ssbo_input_prim_type_loc,
+                                    1,
+                                    1,
+                                    (const int *)(&this->prim_type));
+      GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+                                    active_mtl_shader->uni_ssbo_input_vert_count_loc,
+                                    1,
+                                    1,
+                                    (const int *)(&this->vertex_idx));
+    }
+
+    MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+    if (context_->ensure_render_pipeline_state(mtl_prim_type)) {
+
+      /* Issue draw call. */
+      BLI_assert(this->vertex_idx > 0);
+
+      /* Metal API does not support triangle fan, so we can emulate this
+       * input data by generating an index buffer to re-map indices to
+       * a TriangleList.
+       *
+       * NOTE(Metal): Consider caching generated triangle fan index buffers.
+       * For immediate mode, generating these is currently very cheap, as we use
+       * fast scratch buffer allocations. Though we may benefit from caching of
+       * frequently used buffer sizes. */
+      if (mtl_needs_topology_emulation(this->prim_type)) {
+
+        /* Debug safety check for SSBO FETCH MODE. */
+        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+          BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
+        }
+
+        /* Emulate Tri-fan. */
+        if (this->prim_type == GPU_PRIM_TRI_FAN) {
+          /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
+           * vertices. */
+          uint32_t base_vert_count = this->vertex_idx;
+          uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
+          uint32_t fan_index_count = num_triangles * 3;
+          BLI_assert(num_triangles > 0);
+
+          uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
+          uint32_t *index_buffer = nullptr;
+
+          MTLTemporaryBuffer allocation =
+              context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
+                  alloc_size, 128);
+          index_buffer = (uint32_t *)allocation.data;
+
+          int a = 0;
+          for (int i = 0; i < num_triangles; i++) {
+            index_buffer[a++] = 0;
+            index_buffer[a++] = i + 1;
+            index_buffer[a++] = i + 2;
+          }
+
+          @autoreleasepool {
+
+            id<MTLBuffer> index_buffer_mtl = nil;
+            uint32_t index_buffer_offset = 0;
+
+            /* Region of scratch buffer used for topology emulation element data.
+             * NOTE(Metal): We do not need to manually flush as the entire scratch
+             * buffer for current command buffer is flushed upon submission. */
+            index_buffer_mtl = allocation.metal_buffer;
+            index_buffer_offset = allocation.buffer_offset;
+
+            /* Set depth stencil state (requires knowledge of primitive type). */
+            context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
+
+            /* Bind Vertex Buffer. */
+            rps.bind_vertex_buffer(
+                current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+            /* Draw. */
+            [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+                            indexCount:fan_index_count
+                             indexType:MTLIndexTypeUInt32
+                           indexBuffer:index_buffer_mtl
+                     indexBufferOffset:index_buffer_offset];
+          }
+        }
+        else {
+          /* TODO(Metal): Topology emulation for line loop.
+           * NOTE(Metal): This is currently not used anywhere and modified at the high
+           * level for efficiency in such cases. */
+          BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
+        }
+      }
+      else {
+        MTLPrimitiveType primitive_type = metal_primitive_type_;
+        int vertex_count = this->vertex_idx;
+
+        /* Bind Vertex Buffer. */
+        rps.bind_vertex_buffer(
+            current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        context_->ensure_depth_stencil_state(primitive_type);
+
+        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+
+          /* Bind Null Buffers for empty/missing bind slots. */
+          id<MTLBuffer> null_buffer = context_->get_null_buffer();
+          BLI_assert(null_buffer != nil);
+          for (int i = 1; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+
+            /* We only need to ensure a buffer is bound to the context, its contents do not matter
+             * as it will not be used. */
+            if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+              rps.bind_vertex_buffer(null_buffer, 0, i);
+            }
+          }
+
+          /* SSBO vertex fetch - Nullify elements buffer. */
+          if (rps.cached_vertex_buffer_bindings[MTL_SSBO_VERTEX_FETCH_IBO_INDEX].metal_buffer ==
+              nil) {
+            rps.bind_vertex_buffer(null_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+          }
+
+          /* Submit draw call with modified vertex count, which reflects vertices per primitive
+           * defined in the USE_SSBO_VERTEX_FETCH pragma. */
+          int num_input_primitives = gpu_get_prim_count_from_type(vertex_count, this->prim_type);
+          int output_num_verts = num_input_primitives *
+                                 active_mtl_shader->get_ssbo_vertex_fetch_output_num_verts();
+#ifndef NDEBUG
+          BLI_assert(
+              mtl_vertex_count_fits_primitive_type(
+                  output_num_verts, active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()) &&
+              "Output Vertex count is not compatible with the requested output vertex primitive "
+              "type");
+#endif
+          [rec drawPrimitives:active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()
+                  vertexStart:0
+                  vertexCount:output_num_verts];
+          context_->main_command_buffer.register_draw_counters(output_num_verts);
+        }
+        else {
+          /* Regular draw. */
+          [rec drawPrimitives:primitive_type vertexStart:0 vertexCount:vertex_count];
+          context_->main_command_buffer.register_draw_counters(vertex_count);
+        }
+      }
+    }
+    if (G.debug & G_DEBUG_GPU) {
+      [rec popDebugGroup];
+    }
+  }
+
+  /* Reset allocation after draw submission. */
+  has_begun_ = false;
+  if (current_allocation_.metal_buffer) {
+    [current_allocation_.metal_buffer release];
+    current_allocation_.metal_buffer = nil;
+  }
+}
+
+}  // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh
index df80df6543f..bd354376b12 100644
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -340,13 +340,13 @@ class MTLBufferPool {
 
  private:
   /* Memory statistics. */
-  long long int total_allocation_bytes_ = 0;
+  int64_t total_allocation_bytes_ = 0;
 
 #if MTL_DEBUG_MEMORY_STATISTICS == 1
   /* Debug statistics. */
   std::atomic<int> per_frame_allocation_count_;
-  std::atomic<long long int> allocations_in_pool_;
-  std::atomic<long long int> buffers_in_pool_;
+  std::atomic<int64_t> allocations_in_pool_;
+  std::atomic<int64_t> buffers_in_pool_;
 #endif
 
   /* Metal resources. */
diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh
index 64d9d1cf849..5485b32dd31 100644
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -261,8 +261,6 @@ class MTLShader : public Shader {
   bool get_push_constant_is_dirty();
   void push_constant_bindstate_mark_dirty(bool is_dirty);
 
-  void vertformat_from_shader(GPUVertFormat *format) const override;
-
   /* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */
   int program_handle_get() const override
   {
diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm
index 23097f312f0..3b27b60bca0 100644
--- a/source/blender/gpu/metal/mtl_shader.mm
+++ b/source/blender/gpu/metal/mtl_shader.mm
@@ -129,6 +129,7 @@ MTLShader::~MTLShader()
 
   if (shd_builder_ != nullptr) {
     delete shd_builder_;
+    shd_builder_ = nullptr;
   }
 }
 
@@ -209,6 +210,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 
       /* Release temporary compilation resources. */
       delete shd_builder_;
+      shd_builder_ = nullptr;
       return false;
     }
   }
@@ -279,6 +281,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 
           /* Release temporary compilation resources. */
           delete shd_builder_;
+          shd_builder_ = nullptr;
           return false;
         }
       }
@@ -324,6 +327,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 
   /* Release temporary compilation resources. */
   delete shd_builder_;
+  shd_builder_ = nullptr;
   return true;
 }
 
@@ -535,28 +539,6 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
 {
   push_constant_modified_ = is_dirty;
 }
-
-void MTLShader::vertformat_from_shader(GPUVertFormat *format) const
-{
-  GPU_vertformat_clear(format);
-
-  const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface);
-  for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) {
-    const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id);
-
-    /* Extract type parameters from Metal type. */
-    GPUVertCompType comp_type = comp_type_from_vert_format(attr.format);
-    uint comp_len = comp_count_from_vert_format(attr.format);
-    GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format);
-
-    GPU_vertformat_attr_add(format,
-                            mtl_interface->get_name_at_offset(attr.name_offset),
-                            comp_type,
-                            comp_len,
-                            fetch_mode);
-  }
-}
-
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -1167,6 +1149,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_at
   MTLShaderInterface *mtl_interface = this->get_interface();
   BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
              ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
+  UNUSED_VARS_NDEBUG(mtl_interface);
 
   /* Update bind-mask to verify this attribute has been used. */
   BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm
index 977e97dbd82..4a2be0753bb 100644
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@@ -724,10 +724,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
   }
   if (msl_iface.uses_ssbo_vertex_fetch_mode) {
     ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
-    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS
-              << std::endl;
-    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX
-              << std::endl;
     for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) {
       ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl;
     }
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index 4af46c13751..b4e913e5be6 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -12,6 +12,7 @@
 #include "GPU_batch_presets.h"
 #include "GPU_capabilities.h"
 #include "GPU_framebuffer.h"
+#include "GPU_immediate.h"
 #include "GPU_platform.h"
 #include "GPU_state.h"
 
@@ -303,7 +304,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
 
   /* Execute graphics draw call to perform the blit. */
   GPUBatch *quad = GPU_batch_preset_quad();
-
   GPU_batch_set_shader(quad, shader);
 
   float w = dst->width_get();
@@ -337,6 +337,20 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
 
   GPU_batch_draw(quad);
 
+  /* TMP draw with IMM TODO(Metal): Remove this once GPUBatch is supported. */
+  GPUVertFormat *imm_format = immVertexFormat();
+  uint pos = GPU_vertformat_attr_add(imm_format, "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+
+  immBindShader(shader);
+  immBegin(GPU_PRIM_TRI_STRIP, 4);
+  immVertex2f(pos, 1, 0);
+  immVertex2f(pos, 0, 0);
+  immVertex2f(pos, 1, 1);
+  immVertex2f(pos, 0, 1);
+  immEnd();
+  immUnbindProgram();
+  /**********************/
+
   /* restoring old pipeline state. */
   GPU_depth_mask(depth_write_prev);
   GPU_stencil_write_mask_set(stencil_mask_prev);
@@ -1463,79 +1477,6 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
   BLI_assert_msg(this->format_ != GPU_DEPTH24_STENCIL8,
                  "Apple silicon does not support GPU_DEPTH24_S8");
 
-  MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
-  mtl_max_mips_ = 1;
-  mipmaps_ = 0;
-  this->mip_range_set(0, 0);
-
-  /* Create texture from GPUVertBuf's buffer. */
-  MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo));
-  mtl_vbo->bind();
-  mtl_vbo->flag_used();
-
-  /* Get Metal Buffer. */
-  id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
-  BLI_assert(source_buffer);
-
-  /* Verify size. */
-  if (w_ <= 0) {
-    MTL_LOG_WARNING("Allocating texture buffer of width 0!\n");
-    w_ = 1;
-  }
-
-  /* Verify Texture and vertex buffer alignment. */
-  int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
-  int bytes_per_row = bytes_per_pixel * w_;
-
-  MTLContext *mtl_ctx = MTLContext::get();
-  uint align_requirement = static_cast<uint>(
-      [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
-
-  /* Verify per-vertex size aligns with texture size. */
-  const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
-  BLI_assert(bytes_per_pixel == format->stride &&
-             "Pixel format stride MUST match the texture format stride -- These being different "
-             "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
-  UNUSED_VARS_NDEBUG(format);
-
-  /* Create texture descriptor. */
-  BLI_assert(type_ == GPU_TEXTURE_BUFFER);
-  texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
-  texture_descriptor_.pixelFormat = mtl_format;
-  texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
-  texture_descriptor_.width = w_;
-  texture_descriptor_.height = 1;
-  texture_descriptor_.depth = 1;
-  texture_descriptor_.arrayLength = 1;
-  texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
-  texture_descriptor_.usage =
-      MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
-      MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
-  texture_descriptor_.storageMode = [source_buffer storageMode];
-  texture_descriptor_.sampleCount = 1;
-  texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
-  texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
-
-  texture_ = [source_buffer
-      newTextureWithDescriptor:texture_descriptor_
-                        offset:0
-                   bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)];
-  aligned_w_ = bytes_per_row / bytes_per_pixel;
-
-  BLI_assert(texture_);
-  texture_.label = [NSString stringWithUTF8String:this->get_name()];
-  is_baked_ = true;
-  is_dirty_ = false;
-  resource_mode_ = MTL_TEXTURE_MODE_VBO;
-
-  /* Track Status. */
-  vert_buffer_ = mtl_vbo;
-  vert_buffer_mtl_ = source_buffer;
-
-  /* Cleanup. */
-  [texture_descriptor_ release];
-  texture_descriptor_ = nullptr;
-
   return true;
 }
 
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index 928393fb39e..5ed7659f260 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -22,13 +22,7 @@
 /* Utility file for secondary functionality which supports mtl_texture.mm. */
 
 extern char datatoc_compute_texture_update_msl[];
-extern char datatoc_depth_2d_update_vert_glsl[];
-extern char datatoc_depth_2d_update_float_frag_glsl[];
-extern char datatoc_depth_2d_update_int24_frag_glsl[];
-extern char datatoc_depth_2d_update_int32_frag_glsl[];
 extern char datatoc_compute_texture_read_msl[];
-extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[];
-extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[];
 
 namespace blender::gpu {
 
@@ -447,42 +441,34 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
     return *result;
   }
 
-  const char *fragment_source = nullptr;
+  const char *depth_2d_info_variant = nullptr;
   switch (specialization.data_mode) {
     case MTL_DEPTH_UPDATE_MODE_FLOAT:
-      fragment_source = datatoc_depth_2d_update_float_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_float";
       break;
     case MTL_DEPTH_UPDATE_MODE_INT24:
-      fragment_source = datatoc_depth_2d_update_int24_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_int24";
       break;
     case MTL_DEPTH_UPDATE_MODE_INT32:
-      fragment_source = datatoc_depth_2d_update_int32_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_int32";
       break;
     default:
       BLI_assert(false && "Invalid format mode\n");
       return nullptr;
   }
 
-  GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl,
-                                        fragment_source,
-                                        nullptr,
-                                        nullptr,
-                                        nullptr,
-                                        "depth_2d_update_sh_get");
+  GPUShader *shader = GPU_shader_create_from_info_name(depth_2d_info_variant);
   mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader);
   return shader;
 }
 
 GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get()
 {
-
   MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
   BLI_assert(mtl_context != nullptr);
   if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) {
-    const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl;
-    const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl;
-    GPUShader *shader = GPU_shader_create(
-        vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit");
+    GPUShader *shader = GPU_shader_create_from_info_name("fullscreen_blit");
+
     mtl_context->get_texture_utils().fullscreen_blit_shader = shader;
   }
   return mtl_context->get_texture_utils().fullscreen_blit_shader;
@@ -614,7 +600,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
         stringWithUTF8String:datatoc_compute_texture_read_msl];
 
     /* Defensive Debug Checks. */
-    long long int depth_scale_factor = 1;
+    int64_t depth_scale_factor = 1;
     if (specialization_params.depth_format_mode > 0) {
       BLI_assert(specialization_params.component_count_input == 1);
       BLI_assert(specialization_params.component_count_output == 1);
diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh
index 8646d94e2fd..14fca9f061d 100644
--- a/source/blender/gpu/opengl/gl_backend.hh
+++ b/source/blender/gpu/opengl/gl_backend.hh
@@ -61,7 +61,7 @@ class GLBackend : public GPUBackend {
     GLTexture::samplers_update();
   };
 
-  Context *context_alloc(void *ghost_window) override
+  Context *context_alloc(void *ghost_window, void *ghost_context) override
   {
     return new GLContext(ghost_window, shared_orphan_list_);
   };
diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc
index 224a9afcf59..67e296b11d5 100644
--- a/source/blender/gpu/tests/gpu_testing.cc
+++ b/source/blender/gpu/tests/gpu_testing.cc
@@ -19,7 +19,7 @@ void GPUTest::SetUp()
   ghost_system = GHOST_CreateSystem();
   ghost_context = GHOST_CreateOpenGLContext(ghost_system, glSettings);
   GHOST_ActivateOpenGLContext(ghost_context);
-  context = GPU_context_create(nullptr);
+  context = GPU_context_create(nullptr, ghost_context);
   GPU_init();
 }