1 files changed, 1283 insertions, 59 deletions
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index 26cfe6632ef..50576379f0d 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -5,11 +5,29 @@
  */
 #include "mtl_context.hh"
 #include "mtl_debug.hh"
+#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
+#include "mtl_memory.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+#include "mtl_shader_interface.hh"
 #include "mtl_state.hh"
+#include "mtl_uniform_buffer.hh"
 
 #include "DNA_userdef_types.h"
 
 #include "GPU_capabilities.h"
+#include "GPU_matrix.h"
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+#include "GPU_uniform_buffer.h"
+#include "GPU_vertex_buffer.h"
+#include "intern/gpu_matrix_private.h"
+
+#include "PIL_time.h"
+
+#include <fstream>
+#include <string>
 
 using namespace blender;
 using namespace blender::gpu;
@@ -19,29 +37,165 @@ namespace blender::gpu {
 /* Global memory manager. */
 MTLBufferPool MTLContext::global_memory_manager;
 
+/* Swap-chain and latency management. */
+std::atomic<int> MTLContext::max_drawables_in_flight = 0;
+std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0;
+int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0};
+
+/* -------------------------------------------------------------------- */
+/** \name GHOST Context interaction.
+ * \{ */
+
+void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle)
+{
+  GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle);
+  BLI_assert(ghost_ctx != nullptr);
+
+  /* Release old MTLTexture handle */
+  if (default_fbo_mtltexture_) {
+    [default_fbo_mtltexture_ release];
+    default_fbo_mtltexture_ = nil;
+  }
+
+  /* Release Framebuffer attachments */
+  MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left);
+  MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left);
+  mtl_front_left->remove_all_attachments();
+  mtl_back_left->remove_all_attachments();
+
+  GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx);
+  if (ghost_cgl_ctx != NULL) {
+    default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture();
+
+    MTL_LOG_INFO(
+        "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n",
+        ghost_cgl_ctx,
+        this,
+        this->device,
+        this->queue,
+        default_fbo_gputexture_);
+
+    /* Check if the GHOST Context provides a default framebuffer: */
+    if (default_fbo_mtltexture_) {
+
+      /* Release old GPUTexture handle */
+      if (default_fbo_gputexture_) {
+        GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_)));
+        default_fbo_gputexture_ = nullptr;
+      }
+
+      /* Retain handle */
+      [default_fbo_mtltexture_ retain];
+
+      /*** Create front and back-buffers ***/
+      /* Create gpu::MTLTexture objects */
+      default_fbo_gputexture_ = new gpu::MTLTexture(
+          "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_);
+
+      /* Update frame-buffers with new texture attachments. */
+      mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+      mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+#ifndef NDEBUG
+      this->label = default_fbo_mtltexture_.label;
+#endif
+    }
+    else {
+
+      /* Add default texture for cases where no other framebuffer is bound */
+      if (!default_fbo_gputexture_) {
+        default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>(
+            unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr)));
+      }
+      mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+
+      MTL_LOG_INFO(
+          "-- Bound context %p for GPU context: %p is offscreen and does not have a default "
+          "framebuffer\n",
+          ghost_cgl_ctx,
+          this);
+#ifndef NDEBUG
+      this->label = @"Offscreen Metal Context";
+#endif
+    }
+  }
+  else {
+    MTL_LOG_INFO(
+        "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null "
+        "(GhostContext: %p, GhostContext_CGL: %p)\n",
+        ghost_ctx,
+        ghost_cgl_ctx);
+    BLI_assert(false);
+  }
+}
+
+void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle)
+{
+  GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle);
+  this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL));
+}
+
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name MTLContext
  * \{ */
 
 /* Placeholder functions */
-MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this)
+MTLContext::MTLContext(void *ghost_window, void *ghost_context)
+    : memory_manager(*this), main_command_buffer(*this)
 {
   /* Init debug. */
   debug::mtl_debug_init();
 
+  /* Initialize Render-pass and Frame-buffer State. */
+  this->back_left = nullptr;
+
   /* Initialize command buffer state. */
   this->main_command_buffer.prepare();
 
+  /* Initialize IMM and pipeline state */
+  this->pipeline_state.initialised = false;
+
   /* Frame management. */
   is_inside_frame_ = false;
   current_frame_index_ = 0;
 
+  /* Prepare null data buffer. */
+  null_buffer_ = nil;
+  null_attribute_buffer_ = nil;
+
+  /* Zero-initialize MTL textures. */
+  default_fbo_mtltexture_ = nil;
+  default_fbo_gputexture_ = nullptr;
+
+  /** Fetch GHOSTContext and fetch Metal device/queue. */
+  ghost_window_ = ghost_window;
+  if (ghost_window_ && ghost_context == NULL) {
+    /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided.
+     * Regardless of whether windowed or not, we need access to the GhostContext
+     * for presentation, and device/queue access. */
+    GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_);
+    ghost_context = (ghostWin ? ghostWin->getContext() : NULL);
+  }
+  BLI_assert(ghost_context);
+  this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context);
+  this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue();
+  this->device = (id<MTLDevice>)this->ghost_context_->metalDevice();
+  BLI_assert(this->queue);
+  BLI_assert(this->device);
+  [this->queue retain];
+  [this->device retain];
+
+  /* Register present callback. */
+  this->ghost_context_->metalRegisterPresentCallback(&present);
+
   /* Create FrameBuffer handles. */
   MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
   MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
   this->front_left = mtl_front_left;
   this->back_left = mtl_back_left;
   this->active_fb = this->back_left;
+
   /* Prepare platform and capabilities. (NOTE: With METAL, this needs to be done after CTX
    * initialization). */
   MTLBackend::platform_init(this);
@@ -50,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
   /* Initialize Metal modules. */
   this->memory_manager.init();
   this->state_manager = new MTLStateManager(this);
+  this->imm = new MTLImmediate(this);
 
   /* Ensure global memory manager is initialized. */
   MTLContext::global_memory_manager.init(this->device);
@@ -83,9 +238,29 @@ MTLContext::~MTLContext()
       this->end_frame();
     }
   }
+
+  /* Release Memory Manager */
+  this->get_scratchbuffer_manager().free();
+
   /* Release update/blit shaders. */
   this->get_texture_utils().cleanup();
 
+  /* Detach resource references */
+  GPU_texture_unbind_all();
+
+  /* Unbind UBOs */
+  for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ubo_bindings[i].bound &&
+        this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+      GPUUniformBuf *ubo = wrap(
+          static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo));
+      GPU_uniformbuf_unbind(ubo);
+    }
+  }
+
+  /* Release Dummy resources */
+  this->free_dummy_resources();
+
   /* Release Sampler States. */
   for (int i = 0; i < GPU_SAMPLER_MAX; i++) {
     if (sampler_state_cache_[i] != nil) {
@@ -93,6 +268,28 @@ MTLContext::~MTLContext()
       sampler_state_cache_[i] = nil;
     }
   }
+
+  /* Empty cached sampler argument buffers. */
+  for (auto entry : cached_sampler_buffers_.values()) {
+    entry->free();
+  }
+  cached_sampler_buffers_.clear();
+
+  /* Free null buffers. */
+  if (null_buffer_) {
+    [null_buffer_ release];
+  }
+  if (null_attribute_buffer_) {
+    [null_attribute_buffer_ release];
+  }
+
+  /* Free Metal objects. */
+  if (this->queue) {
+    [this->queue release];
+  }
+  if (this->device) {
+    [this->device release];
+  }
 }
 
 void MTLContext::begin_frame()
@@ -124,20 +321,49 @@ void MTLContext::check_error(const char *info)
 
 void MTLContext::activate()
 {
-  /* TODO(Metal): Implement. */
+  /* Make sure no other context is already bound to this thread. */
+  BLI_assert(is_active_ == false);
+  is_active_ = true;
+  thread_ = pthread_self();
+
+  /* Re-apply ghost window/context for resizing */
+  if (ghost_window_) {
+    this->set_ghost_window((GHOST_WindowHandle)ghost_window_);
+  }
+  else if (ghost_context_) {
+    this->set_ghost_context((GHOST_ContextHandle)ghost_context_);
+  }
+
+  /* Reset UBO bind state. */
+  for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ubo_bindings[i].bound &&
+        this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+      this->pipeline_state.ubo_bindings[i].bound = false;
+      this->pipeline_state.ubo_bindings[i].ubo = nullptr;
+    }
+  }
+
+  /* Ensure imm active. */
+  immActivate();
 }
+
 void MTLContext::deactivate()
 {
-  /* TODO(Metal): Implement. */
+  BLI_assert(this->is_active_on_thread());
+  /* Flush context on deactivate. */
+  this->flush();
+  is_active_ = false;
+  immDeactivate();
 }
 
 void MTLContext::flush()
 {
-  /* TODO(Metal): Implement. */
+  this->main_command_buffer.submit(false);
 }
+
 void MTLContext::finish()
 {
-  /* TODO(Metal): Implement. */
+  this->main_command_buffer.submit(true);
 }
 
 void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
@@ -177,10 +403,9 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
   }
 
   /* Ensure command buffer workload submissions are optimal --
-   * Though do not split a batch mid-IMM recording */
-  /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */
-  if (this->main_command_buffer.do_break_submission()/*&&
-      !((MTLImmediate *)(this->imm))->imm_is_recording()*/) {
+   * Though do not split a batch mid-IMM recording. */
+  if (this->main_command_buffer.do_break_submission() &&
+      !((MTLImmediate *)(this->imm))->imm_is_recording()) {
     this->flush();
   }
 
@@ -227,6 +452,116 @@ MTLFrameBuffer *MTLContext::get_default_framebuffer()
   return static_cast<MTLFrameBuffer *>(this->back_left);
 }
 
+MTLShader *MTLContext::get_active_shader()
+{
+  return this->pipeline_state.active_shader;
+}
+
+id<MTLBuffer> MTLContext::get_null_buffer()
+{
+  if (null_buffer_ != nil) {
+    return null_buffer_;
+  }
+
+  static const int null_buffer_size = 4096;
+  null_buffer_ = [this->device newBufferWithLength:null_buffer_size
+                                           options:MTLResourceStorageModeManaged];
+  [null_buffer_ retain];
+  uint32_t *null_data = (uint32_t *)calloc(0, null_buffer_size);
+  memcpy([null_buffer_ contents], null_data, null_buffer_size);
+  [null_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)];
+  free(null_data);
+
+  BLI_assert(null_buffer_ != nil);
+  return null_buffer_;
+}
+
+id<MTLBuffer> MTLContext::get_null_attribute_buffer()
+{
+  if (null_attribute_buffer_ != nil) {
+    return null_attribute_buffer_;
+  }
+
+  /* Allocate Null buffer if it has not yet been created.
+   * Min buffer size is 256 bytes -- though we only need 64 bytes of data. */
+  static const int null_buffer_size = 256;
+  null_attribute_buffer_ = [this->device newBufferWithLength:null_buffer_size
+                                                     options:MTLResourceStorageModeManaged];
+  BLI_assert(null_attribute_buffer_ != nil);
+  [null_attribute_buffer_ retain];
+  float data[4] = {0.0f, 0.0f, 0.0f, 1.0f};
+  memcpy([null_attribute_buffer_ contents], data, sizeof(float) * 4);
+  [null_attribute_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)];
+
+  return null_attribute_buffer_;
+}
+
+gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type)
+{
+  /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */
+  gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1];
+  if (dummy_tex != nullptr) {
+    return dummy_tex;
+  }
+  else {
+    GPUTexture *tex = nullptr;
+    switch (type) {
+      case GPU_TEXTURE_1D:
+        tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_1D_ARRAY:
+        tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_2D:
+        tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_2D_ARRAY:
+        tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_3D:
+        tex = GPU_texture_create_3d(
+            "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr);
+        break;
+      case GPU_TEXTURE_CUBE:
+        tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_CUBE_ARRAY:
+        tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_BUFFER:
+        if (!dummy_verts_) {
+          GPU_vertformat_clear(&dummy_vertformat_);
+          GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+          dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC);
+          GPU_vertbuf_data_alloc(dummy_verts_, 64);
+        }
+        tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_);
+        break;
+      default:
+        BLI_assert_msg(false, "Unrecognised texture type");
+        return nullptr;
+    }
+    gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex));
+    dummy_textures_[type - 1] = metal_tex;
+    return metal_tex;
+  }
+  return nullptr;
+}
+
+void MTLContext::free_dummy_resources()
+{
+  for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) {
+    if (dummy_textures_[tex]) {
+      GPU_texture_free(
+          reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex])));
+      dummy_textures_[tex] = nullptr;
+    }
+  }
+  if (dummy_verts_) {
+    GPU_vertbuf_discard(dummy_verts_);
+  }
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -239,20 +574,20 @@ void MTLContext::pipeline_state_init()
   /*** Initialize state only once. ***/
   if (!this->pipeline_state.initialised) {
     this->pipeline_state.initialised = true;
-    this->pipeline_state.active_shader = NULL;
+    this->pipeline_state.active_shader = nullptr;
 
     /* Clear bindings state. */
     for (int t = 0; t < GPU_max_textures(); t++) {
       this->pipeline_state.texture_bindings[t].used = false;
-      this->pipeline_state.texture_bindings[t].texture_slot_index = t;
-      this->pipeline_state.texture_bindings[t].texture_resource = NULL;
+      this->pipeline_state.texture_bindings[t].slot_index = -1;
+      this->pipeline_state.texture_bindings[t].texture_resource = nullptr;
     }
     for (int s = 0; s < MTL_MAX_SAMPLER_SLOTS; s++) {
       this->pipeline_state.sampler_bindings[s].used = false;
     }
     for (int u = 0; u < MTL_MAX_UNIFORM_BUFFER_BINDINGS; u++) {
       this->pipeline_state.ubo_bindings[u].bound = false;
-      this->pipeline_state.ubo_bindings[u].ubo = NULL;
+      this->pipeline_state.ubo_bindings[u].ubo = nullptr;
     }
   }
 
@@ -373,6 +708,757 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled)
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Command Encoder and pipeline state
+ * These utilities ensure that all of the globally bound resources and state have been
+ * correctly encoded within the current RenderCommandEncoder. This involves managing
+ * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state.
+ *
+ * We will also trigger compilation of new PSOs where the input state has changed
+ * and is required.
+ * All of this setup is required in order to perform a valid draw call.
+ * \{ */
+
+bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type)
+{
+  BLI_assert(this->pipeline_state.initialised);
+
+  /* Check if an active shader is bound. */
+  if (!this->pipeline_state.active_shader) {
+    MTL_LOG_WARNING("No Metal shader for bound GL shader\n");
+    return false;
+  }
+
+  /* Also ensure active shader is valid. */
+  if (!this->pipeline_state.active_shader->is_valid()) {
+    MTL_LOG_WARNING(
+        "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", );
+    return false;
+  }
+
+  /* Apply global state. */
+  this->state_manager->apply_state();
+
+  /* Main command buffer tracks the current state of the render pass, based on bound
+   * MTLFrameBuffer. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /* Debug Check: Ensure Framebuffer instance is not dirty. */
+  BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+  /* Fetch shader interface. */
+  MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
+  if (shader_interface == nullptr) {
+    MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", );
+    return false;
+  }
+
+  /* Fetch shader and bake valid PipelineStateObject (PSO) based on current
+   * shader and state combination. This PSO represents the final GPU-executable
+   * permutation of the shader. */
+  MTLRenderPipelineStateInstance *pipeline_state_instance =
+      this->pipeline_state.active_shader->bake_current_pipeline_state(
+          this, mtl_prim_type_to_topology_class(mtl_prim_type));
+  if (!pipeline_state_instance) {
+    MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n",
+                  shader_interface->get_name());
+    return false;
+  }
+
+  bool result = false;
+  if (pipeline_state_instance->pso) {
+
+    /* Fetch render command encoder. A render pass should already be active.
+     * This will be NULL if invalid. */
+    id<MTLRenderCommandEncoder> rec =
+        this->main_command_buffer.get_active_render_command_encoder();
+    BLI_assert(rec);
+    if (rec == nil) {
+      MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n");
+      return false;
+    }
+
+    /* Bind Render Pipeline State. */
+    BLI_assert(pipeline_state_instance->pso);
+    if (rps.bound_pso != pipeline_state_instance->pso) {
+      [rec setRenderPipelineState:pipeline_state_instance->pso];
+      rps.bound_pso = pipeline_state_instance->pso;
+    }
+
+    /** Ensure resource bindings. */
+    /* Texture Bindings. */
+    /* We will iterate through all texture bindings on the context and determine if any of the
+     * active slots match those in our shader interface. If so, textures will be bound. */
+    if (shader_interface->get_total_textures() > 0) {
+      this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance);
+    }
+
+    /* Transform feedback buffer binding. */
+    /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which
+     * transform feedback data will be written. */
+    // GPUVertBuf *tf_vbo =
+    //     this->pipeline_state.active_shader->get_transform_feedback_active_buffer();
+    // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) {
+
+    //   /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */
+    //   BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine ||
+    //              mtl_prim_type == MTLPrimitiveTypeTriangle ||
+    //              mtl_prim_type == MTLPrimitiveTypePoint);
+
+    //   /* Fetch active transform feedback buffer from vertbuf */
+    //   MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo));
+    //   int tf_buffer_offset = 0;
+    //   id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset);
+
+    //   if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) {
+    //     [rec setVertexBuffer:tf_buffer_mtl
+    //                   offset:tf_buffer_offset
+    //                  atIndex:pipeline_state_instance->transform_feedback_buffer_index];
+    //     printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n",
+    //            tf_vbo_mtl,
+    //            tf_buffer_mtl);
+    //   }
+    // }
+
+    /* Matrix Bindings. */
+    /* This is now called upon shader bind. We may need to re-evaluate this though,
+     * as was done here to ensure uniform changes between draws were tracked.
+     * NOTE(Metal): We may be able to remove this. */
+    GPU_matrix_bind(reinterpret_cast<struct GPUShader *>(
+        static_cast<Shader *>(this->pipeline_state.active_shader)));
+
+    /* Bind Uniforms */
+    this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance);
+
+    /* Bind Null attribute buffer, if needed. */
+    if (pipeline_state_instance->null_attribute_buffer_index >= 0) {
+      if (G.debug & G_DEBUG_GPU) {
+        MTL_LOG_INFO("Binding null attribute buffer at index: %d\n",
+                     pipeline_state_instance->null_attribute_buffer_index);
+      }
+      rps.bind_vertex_buffer(this->get_null_attribute_buffer(),
+                             0,
+                             pipeline_state_instance->null_attribute_buffer_index);
+    }
+
+    /** Dynamic Per-draw Render State on RenderCommandEncoder. */
+    /* State: Viewport. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) {
+      MTLViewport viewport;
+      viewport.originX = (double)this->pipeline_state.viewport_offset_x;
+      viewport.originY = (double)this->pipeline_state.viewport_offset_y;
+      viewport.width = (double)this->pipeline_state.viewport_width;
+      viewport.height = (double)this->pipeline_state.viewport_height;
+      viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near;
+      viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far;
+      [rec setViewport:viewport];
+
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_VIEWPORT_FLAG);
+    }
+
+    /* State: Scissor. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) {
+
+      /* Get FrameBuffer associated with active RenderCommandEncoder. */
+      MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer();
+
+      MTLScissorRect scissor;
+      if (this->pipeline_state.scissor_enabled) {
+        scissor.x = this->pipeline_state.scissor_x;
+        scissor.y = this->pipeline_state.scissor_y;
+        scissor.width = this->pipeline_state.scissor_width;
+        scissor.height = this->pipeline_state.scissor_height;
+
+        /* Some scissor assignments exceed the bounds of the viewport due to implicitly added
+         * padding to the width/height - Clamp width/height. */
+        BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width());
+        BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height());
+        scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x);
+        scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y);
+        BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width()));
+        BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height()));
+      }
+      else {
+        /* Scissor is disabled, reset to default size as scissor state may have been previously
+         * assigned on this encoder. */
+        scissor.x = 0;
+        scissor.y = 0;
+        scissor.width = render_fb->get_width();
+        scissor.height = render_fb->get_height();
+      }
+
+      /* Scissor state can still be flagged as changed if it is toggled on and off, without
+       * parameters changing between draws. */
+      if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) {
+        [rec setScissorRect:scissor];
+        rps.last_scissor_rect = scissor;
+      }
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_SCISSOR_FLAG);
+    }
+
+    /* State: Face winding. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) {
+      /* We need to invert the face winding in Metal, to account for the inverted-Y coordinate
+       * system. */
+      MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ?
+                               MTLWindingClockwise :
+                               MTLWindingCounterClockwise;
+      [rec setFrontFacingWinding:winding];
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG);
+    }
+
+    /* State: cull-mode. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) {
+
+      MTLCullMode mode = MTLCullModeNone;
+      if (this->pipeline_state.culling_enabled) {
+        switch (this->pipeline_state.cull_mode) {
+          case GPU_CULL_NONE:
+            mode = MTLCullModeNone;
+            break;
+          case GPU_CULL_FRONT:
+            mode = MTLCullModeFront;
+            break;
+          case GPU_CULL_BACK:
+            mode = MTLCullModeBack;
+            break;
+          default:
+            BLI_assert_unreachable();
+            break;
+        }
+      }
+      [rec setCullMode:mode];
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_CULLMODE_FLAG);
+    }
+
+    /* Pipeline state is now good. */
+    result = true;
+  }
+  return result;
+}
+
+/* Bind uniform buffers to an active render command encoder using the rendering state of the
+ * current context -> Active shader, Bound UBOs). */
+bool MTLContext::ensure_uniform_buffer_bindings(
+    id<MTLRenderCommandEncoder> rec,
+    const MTLShaderInterface *shader_interface,
+    const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /* Shader owned push constant block for uniforms.. */
+  bool active_shader_changed = (rps.last_bound_shader_state.shader_ !=
+                                    this->pipeline_state.active_shader ||
+                                rps.last_bound_shader_state.shader_ == nullptr ||
+                                rps.last_bound_shader_state.pso_index_ !=
+                                    pipeline_state_instance->shader_pso_index);
+
+  const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
+  if (push_constant_block.size > 0) {
+
+    /* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
+     * will be offset by the number of bound VBOs. */
+    uint32_t block_size = push_constant_block.size;
+    uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index +
+                            push_constant_block.buffer_index;
+
+    /* Only need to rebind block if push constants have been modified -- or if no data is bound for
+     * the current RenderCommandEncoder. */
+    if (this->pipeline_state.active_shader->get_push_constant_is_dirty() ||
+        active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes ||
+        !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) {
+
+      /* Bind push constant data. */
+      BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr);
+      rps.bind_vertex_bytes(
+          this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+      rps.bind_fragment_bytes(
+          this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+
+      /* Only need to rebind block if it has been modified. */
+      this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false);
+    }
+  }
+  rps.last_bound_shader_state.set(this->pipeline_state.active_shader,
+                                  pipeline_state_instance->shader_pso_index);
+
+  /* Bind Global GPUUniformBuffers */
+  /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones
+   * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
+   * and not owned by the shader instance. */
+  for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
+    const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
+
+    if (ubo.buffer_index >= 0) {
+
+      /* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
+       * the uniform PushConstantBlock. */
+      const uint32_t buffer_index = ubo.buffer_index + 1;
+      int ubo_offset = 0;
+      id<MTLBuffer> ubo_buffer = nil;
+      int ubo_size = 0;
+
+      bool bind_dummy_buffer = false;
+      if (this->pipeline_state.ubo_bindings[ubo_index].bound) {
+
+        /* Fetch UBO global-binding properties from slot. */
+        ubo_offset = 0;
+        ubo_buffer = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_metal_buffer(
+            &ubo_offset);
+        ubo_size = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_size();
+
+        /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
+         * allocating zero buffers. */
+        if (ubo_buffer == nil) {
+          bind_dummy_buffer = true;
+        }
+        else {
+          BLI_assert(ubo_buffer != nil);
+          BLI_assert(ubo_size > 0);
+
+          if (pipeline_state_instance->reflection_data_available) {
+            /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each
+             * case will be the same for the same UBO.
+             * We also determine expected size and then ensure buffer of the correct size
+             * exists in one of the vertex/fragment shader binding tables. This path is used
+             * to verify that the size of the bound UBO matches what is expected in the shader. */
+            uint32_t expected_size =
+                (buffer_index <
+                 pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ?
+                    pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index]
+                        .size :
+                    0;
+            if (expected_size == 0) {
+              expected_size =
+                  (buffer_index <
+                   pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ?
+                      pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index]
+                          .size :
+                      0;
+            }
+            BLI_assert_msg(
+                expected_size > 0,
+                "Shader interface expects UBO, but shader reflection data reports that it "
+                "is not present");
+
+            /* If ubo size is smaller than the size expected by the shader, we need to bind the
+             * dummy buffer, which will be big enough, to avoid an OOB error. */
+            if (ubo_size < expected_size) {
+              MTL_LOG_INFO(
+                  "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
+                  "%d)  (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
+                  "over-binding, which is not used,  but we need this to avoid validation "
+                  "issues\n",
+                  shader_interface->get_name_at_offset(ubo.name_offset),
+                  buffer_index,
+                  ubo_size,
+                  expected_size,
+                  shader_interface->get_name());
+              bind_dummy_buffer = true;
+            }
+          }
+        }
+      }
+      else {
+        MTL_LOG_INFO(
+            "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
+            "nothing was bound -- binding dummy buffer\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ubo.name_offset),
+            buffer_index);
+        bind_dummy_buffer = true;
+      }
+
+      if (bind_dummy_buffer) {
+        /* Perform Dummy binding. */
+        ubo_offset = 0;
+        ubo_buffer = this->get_null_buffer();
+        ubo_size = [ubo_buffer length];
+      }
+
+      if (ubo_buffer != nil) {
+
+        uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index +
+                                     buffer_index;
+
+        /* Bind Vertex UBO. */
+        if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
+          BLI_assert(buffer_bind_index >= 0 &&
+                     buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+          rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+        }
+
+        /* Bind Fragment UBOs. */
+        if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
+          BLI_assert(buffer_bind_index >= 0 &&
+                     buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+          rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+        }
+      }
+      else {
+        MTL_LOG_WARNING(
+            "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
+            "is NULL!\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ubo.name_offset),
+            buffer_index);
+      }
+    }
+  }
+  return true;
+}
+
+/* Ensure texture bindings are correct and up to date for current draw call. */
+void MTLContext::ensure_texture_bindings(
+    id<MTLRenderCommandEncoder> rec,
+    MTLShaderInterface *shader_interface,
+    const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+  BLI_assert(shader_interface != nil);
+  BLI_assert(rec != nil);
+
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  @autoreleasepool {
+    int vertex_arg_buffer_bind_index = -1;
+    int fragment_arg_buffer_bind_index = -1;
+
+    /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */
+    bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers(
+        &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index);
+
+    /* Loop through expected textures in shader interface and resolve bindings with currently
+     * bound textures.. */
+    for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+      /* Ensure the bound texture is compatible with the shader interface. If the
+       * shader does not expect a texture to be bound for the current slot, we skip
+       * binding.
+       * NOTE: Global texture bindings may be left over from prior draw calls. */
+      const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t);
+      if (!shader_texture_info.used) {
+        /* Skip unused binding points if explicit indices are specified. */
+        continue;
+      }
+
+      int slot = shader_texture_info.slot_index;
+      if (slot >= 0 && slot < GPU_max_textures()) {
+        bool bind_dummy_texture = true;
+        if (this->pipeline_state.texture_bindings[slot].used) {
+          gpu::MTLTexture *bound_texture =
+              this->pipeline_state.texture_bindings[slot].texture_resource;
+          MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
+          BLI_assert(bound_texture);
+          BLI_assert(bound_sampler.used);
+
+          if (shader_texture_info.type == bound_texture->type_) {
+            /* Bind texture and sampler if the bound texture matches the type expected by the
+             * shader. */
+            id<MTLTexture> tex = bound_texture->get_metal_handle();
+
+            if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+              rps.bind_vertex_texture(tex, slot);
+              rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+            }
+
+            if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+              rps.bind_fragment_texture(tex, slot);
+              rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+            }
+
+            /* Texture state resolved, no need to bind dummy texture */
+            bind_dummy_texture = false;
+          }
+          else {
+            /* Texture type for bound texture (e.g. Texture2DArray) does not match what was
+             * expected in the shader interface. This is a problem and we will need to bind
+             * a dummy texture to ensure correct API usage. */
+            MTL_LOG_WARNING(
+                "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
+                "texture target type. (Expecting type %d, actual type %d) (binding "
+                "name:'%s')(texture name:'%s')\n",
+                shader_interface->get_name(),
+                bound_texture,
+                slot,
+                shader_texture_info.type,
+                bound_texture->type_,
+                shader_interface->get_name_at_offset(shader_texture_info.name_offset),
+                bound_texture->get_name());
+          }
+        }
+        else {
+          MTL_LOG_WARNING(
+              "Shader '%s' expected texture to be bound to slot %d -- No texture was "
+              "bound. (name:'%s')\n",
+              shader_interface->get_name(),
+              slot,
+              shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+        }
+
+        /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats
+         * are provided -- as certain configurations may not need any binding. These issues should
+         * be fixed in the high-level, if problems crop up. */
+        if (bind_dummy_texture) {
+          if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+            rps.bind_vertex_texture(
+                get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+            /* Bind default sampler state. */
+            MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+            rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+          }
+          if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+            rps.bind_fragment_texture(
+                get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+            /* Bind default sampler state. */
+            MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+            rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+          }
+        }
+      }
+      else {
+        MTL_LOG_WARNING(
+            "Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
+            "hardware/API limit of '%d'. (name:'%s')\n",
+            this->pipeline_state.active_shader,
+            slot,
+            GPU_max_textures(),
+            shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+      }
+    }
+
+    /* Construct and Bind argument buffer.
+     * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */
+    if (use_argument_buffer_for_samplers) {
+#ifndef NDEBUG
+      /* Debug check to validate each expected texture in the shader interface has a valid
+       * sampler object bound to the context. We will need all of these to be valid
+       * when constructing the sampler argument buffer. */
+      for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+        const MTLShaderTexture &texture = shader_interface->get_texture(i);
+        if (texture.used) {
+          BLI_assert(this->samplers_.mtl_sampler[i] != nil);
+        }
+      }
+#endif
+
+      /* Check to ensure the buffer binding index for the argument buffer has been assigned.
+       * This PSO property will be set if we expect to use argument buffers, and the shader
+       * uses any amount of textures. */
+      BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0);
+      if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) {
+        /* Offset binding index to be relative to the start of static uniform buffer binding slots.
+         * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
+         * used by vertex and index buffer bindings, and the number of buffers present will vary
+         * between PSOs. */
+        int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index +
+                              vertex_arg_buffer_bind_index);
+        assert(arg_buffer_idx < 32);
+        id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
+            arg_buffer_idx);
+        if (argument_encoder == nil) {
+          argument_encoder = [pipeline_state_instance->vert
+              newArgumentEncoderWithBufferIndex:arg_buffer_idx];
+          shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
+        }
+
+        /* Generate or Fetch argument buffer sampler configuration.
+         * NOTE(Metal): we need to base sampler counts off of the maximal texture
+         * index. This is not the most optimal, but in practice, not a use-case
+         * when argument buffers are required.
+         * This is because with explicit texture indices, the binding indices
+         * should match across draws, to allow the high-level to optimize bind-points. */
+        gpu::MTLBuffer *encoder_buffer = nullptr;
+        this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1;
+
+        gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr(
+            this->samplers_);
+        if (cached_smp_buffer_search != nullptr) {
+          encoder_buffer = *cached_smp_buffer_search;
+        }
+        else {
+          /* Populate argument buffer with current global sampler bindings. */
+          int size = [argument_encoder encodedLength];
+          int alignment = max_uu([argument_encoder alignment], 256);
+          int size_align_delta = (size % alignment);
+          int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ?
+                                       size + (alignment - (size % alignment)) :
+                                       size;
+
+          /* Allocate buffer to store encoded sampler arguments. */
+          encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size,
+                                                                            true);
+          BLI_assert(encoder_buffer);
+          BLI_assert(encoder_buffer->get_metal_buffer());
+          [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0];
+          [argument_encoder
+              setSamplerStates:this->samplers_.mtl_sampler
+                     withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)];
+          encoder_buffer->flush();
+
+          /* Insert into cache. */
+          this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer);
+        }
+
+        BLI_assert(encoder_buffer != nullptr);
+        int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+                                 vertex_arg_buffer_bind_index);
+        rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index);
+
+        /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to
+         * re-encode. We can use the same argument buffer. */
+        if (fragment_arg_buffer_bind_index >= 0) {
+          BLI_assert(fragment_arg_buffer_bind_index);
+          int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+                                   fragment_arg_buffer_bind_index);
+          rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index);
+        }
+      }
+    }
+  }
+}
+
+/* Encode latest depth-stencil state. */
+void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type)
+{
+  /* Check if we need to update state. */
+  if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) {
+    return;
+  }
+
+  /* Fetch render command encoder. */
+  id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder();
+  BLI_assert(rec);
+
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /** Prepare Depth-stencil state based on current global pipeline state. */
+  MTLFrameBuffer *fb = this->get_current_framebuffer();
+  bool hasDepthTarget = fb->has_depth_attachment();
+  bool hasStencilTarget = fb->has_stencil_attachment();
+
+  if (hasDepthTarget || hasStencilTarget) {
+    /* Update FrameBuffer State. */
+    this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget;
+    this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget;
+
+    /* Check if current MTLContextDepthStencilState maps to an existing state object in
+     * the Depth-stencil state cache. */
+    id<MTLDepthStencilState> ds_state = nil;
+    id<MTLDepthStencilState> *depth_stencil_state_lookup =
+        this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state);
+
+    /* If not, populate DepthStencil state descriptor. */
+    if (depth_stencil_state_lookup == nullptr) {
+
+      MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init]
+          autorelease];
+
+      if (hasDepthTarget) {
+        ds_state_desc.depthWriteEnabled =
+            this->pipeline_state.depth_stencil_state.depth_write_enable;
+        ds_state_desc.depthCompareFunction =
+            this->pipeline_state.depth_stencil_state.depth_test_enabled ?
+                this->pipeline_state.depth_stencil_state.depth_function :
+                MTLCompareFunctionAlways;
+      }
+
+      if (hasStencilTarget) {
+        ds_state_desc.backFaceStencil.readMask =
+            this->pipeline_state.depth_stencil_state.stencil_read_mask;
+        ds_state_desc.backFaceStencil.writeMask =
+            this->pipeline_state.depth_stencil_state.stencil_write_mask;
+        ds_state_desc.backFaceStencil.stencilFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail;
+        ds_state_desc.backFaceStencil.depthFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail;
+        ds_state_desc.backFaceStencil.depthStencilPassOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass;
+        ds_state_desc.backFaceStencil.stencilCompareFunction =
+            (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+                this->pipeline_state.depth_stencil_state.stencil_func :
+                MTLCompareFunctionAlways;
+
+        ds_state_desc.frontFaceStencil.readMask =
+            this->pipeline_state.depth_stencil_state.stencil_read_mask;
+        ds_state_desc.frontFaceStencil.writeMask =
+            this->pipeline_state.depth_stencil_state.stencil_write_mask;
+        ds_state_desc.frontFaceStencil.stencilFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail;
+        ds_state_desc.frontFaceStencil.depthFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail;
+        ds_state_desc.frontFaceStencil.depthStencilPassOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass;
+        ds_state_desc.frontFaceStencil.stencilCompareFunction =
+            (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+                this->pipeline_state.depth_stencil_state.stencil_func :
+                MTLCompareFunctionAlways;
+      }
+
+      /* Bake new DS state. */
+      ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc];
+
+      /* Store state in cache. */
+      BLI_assert(ds_state != nil);
+      this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state);
+    }
+    else {
+      ds_state = *depth_stencil_state_lookup;
+      BLI_assert(ds_state != nil);
+    }
+
+    /* Bind Depth Stencil State to render command encoder. */
+    BLI_assert(ds_state != nil);
+    if (ds_state != nil) {
+      if (rps.bound_ds_state != ds_state) {
+        [rec setDepthStencilState:ds_state];
+        rps.bound_ds_state = ds_state;
+      }
+    }
+
+    /* Apply dynamic depth-stencil state on encoder. */
+    if (hasStencilTarget) {
+      uint32_t stencil_ref_value =
+          (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+              this->pipeline_state.depth_stencil_state.stencil_ref :
+              0;
+      if (stencil_ref_value != rps.last_used_stencil_ref_value) {
+        [rec setStencilReferenceValue:stencil_ref_value];
+        rps.last_used_stencil_ref_value = stencil_ref_value;
+      }
+    }
+
+    if (hasDepthTarget) {
+      bool doBias = false;
+      switch (prim_type) {
+        case MTLPrimitiveTypeTriangle:
+        case MTLPrimitiveTypeTriangleStrip:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris;
+          break;
+        case MTLPrimitiveTypeLine:
+        case MTLPrimitiveTypeLineStrip:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines;
+          break;
+        case MTLPrimitiveTypePoint:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points;
+          break;
+      }
+      [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0
+             slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0
+                  clamp:0];
+    }
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Visibility buffer control for MTLQueryPool.
  * \{ */
 
@@ -487,52 +1573,46 @@ id<MTLSamplerState> MTLContext::get_sampler_from_state(MTLSamplerState sampler_s
 id<MTLSamplerState> MTLContext::generate_sampler_from_state(MTLSamplerState sampler_state)
 {
   /* Check if sampler already exists for given state. */
-  id<MTLSamplerState> st = sampler_state_cache_[(uint)sampler_state];
-  if (st != nil) {
-    return st;
-  }
-  else {
-    MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init];
-    descriptor.normalizedCoordinates = true;
-
-    MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ?
-                                           MTLSamplerAddressModeClampToBorderColor :
-                                           MTLSamplerAddressModeClampToEdge;
-    descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ?
-                                  MTLSamplerAddressModeRepeat :
-                                  clamp_type;
-    descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ?
-                                  MTLSamplerAddressModeRepeat :
-                                  clamp_type;
-    descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ?
-                                  MTLSamplerAddressModeRepeat :
-                                  clamp_type;
-    descriptor.borderColor = MTLSamplerBorderColorTransparentBlack;
-    descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ?
-                               MTLSamplerMinMagFilterLinear :
-                               MTLSamplerMinMagFilterNearest;
-    descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ?
-                               MTLSamplerMinMagFilterLinear :
-                               MTLSamplerMinMagFilterNearest;
-    descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ?
-                               MTLSamplerMipFilterLinear :
-                               MTLSamplerMipFilterNotMipmapped;
-    descriptor.lodMinClamp = -1000;
-    descriptor.lodMaxClamp = 1000;
-    float aniso_filter = max_ff(16, U.anisotropic_filter);
-    descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1;
-    descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ?
-                                     MTLCompareFunctionLessEqual :
-                                     MTLCompareFunctionAlways;
-    descriptor.supportArgumentBuffers = true;
-
-    id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor];
-    sampler_state_cache_[(uint)sampler_state] = state;
-
-    BLI_assert(state != nil);
-    [descriptor autorelease];
-    return state;
-  }
+  MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init];
+  descriptor.normalizedCoordinates = true;
+
+  MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ?
+                                         MTLSamplerAddressModeClampToBorderColor :
+                                         MTLSamplerAddressModeClampToEdge;
+  descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ?
+                                MTLSamplerAddressModeRepeat :
+                                clamp_type;
+  descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ?
+                                MTLSamplerAddressModeRepeat :
+                                clamp_type;
+  descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ?
+                                MTLSamplerAddressModeRepeat :
+                                clamp_type;
+  descriptor.borderColor = MTLSamplerBorderColorTransparentBlack;
+  descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ?
+                             MTLSamplerMinMagFilterLinear :
+                             MTLSamplerMinMagFilterNearest;
+  descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ?
+                             MTLSamplerMinMagFilterLinear :
+                             MTLSamplerMinMagFilterNearest;
+  descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ?
+                             MTLSamplerMipFilterLinear :
+                             MTLSamplerMipFilterNotMipmapped;
+  descriptor.lodMinClamp = -1000;
+  descriptor.lodMaxClamp = 1000;
+  float aniso_filter = max_ff(16, U.anisotropic_filter);
+  descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1;
+  descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ?
+                                   MTLCompareFunctionLessEqual :
+                                   MTLCompareFunctionAlways;
+  descriptor.supportArgumentBuffers = true;
+
+  id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor];
+  sampler_state_cache_[(uint)sampler_state] = state;
+
+  BLI_assert(state != nil);
+  [descriptor autorelease];
+  return state;
 }
 
 id<MTLSamplerState> MTLContext::get_default_sampler_state()
@@ -545,4 +1625,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state()
 
 /** \} */
 
+/* -------------------------------------------------------------------- */
+/** \name Swap-chain management and Metal presentation.
+ * \{ */
+
+void present(MTLRenderPassDescriptor *blit_descriptor,
+             id<MTLRenderPipelineState> blit_pso,
+             id<MTLTexture> swapchain_texture,
+             id<CAMetalDrawable> drawable)
+{
+
+  MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+  BLI_assert(ctx);
+
+  /* Flush any outstanding work. */
+  ctx->flush();
+
+  /* Always pace CPU to maximum of 3 drawables in flight.
+   * nextDrawable may have more in flight if backing swapchain
+   * textures are re-allocate, such as during resize events.
+   *
+   * Determine frames in flight based on current latency. If
+   * we are in a high-latency situation, limit frames in flight
+   * to increase app responsiveness and keep GPU execution under control.
+   * If latency improves, increase frames in flight to improve overall
+   * performance. */
+  int perf_max_drawables = MTL_MAX_DRAWABLES;
+  if (MTLContext::avg_drawable_latency_us > 185000) {
+    perf_max_drawables = 1;
+  }
+  else if (MTLContext::avg_drawable_latency_us > 85000) {
+    perf_max_drawables = 2;
+  }
+
+  while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) {
+    PIL_sleep_ms(2);
+  }
+
+  /* Present is submitted in its own CMD Buffer to ensure drawable reference released as early as
+   * possible. This command buffer is separate as it does not utilize the global state
+   * for rendering as the main context does. */
+  id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer];
+  MTLCommandBufferManager::num_active_cmd_bufs++;
+
+  if (MTLCommandBufferManager::sync_event != nil) {
+    /* Ensure command buffer ordering. */
+    [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event
+                         value:MTLCommandBufferManager::event_signal_val];
+  }
+
+  /* Do Present Call and final Blit to MTLDrawable. */
+  id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor];
+  [enc setRenderPipelineState:blit_pso];
+  [enc setFragmentTexture:swapchain_texture atIndex:0];
+  [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
+  [enc endEncoding];
+
+  /* Present drawable. */
+  BLI_assert(drawable);
+  [cmdbuf presentDrawable:drawable];
+
+  /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */
+  MTLSafeFreeList *cmd_free_buffer_list =
+      MTLContext::get_global_memory_manager().get_current_safe_list();
+  BLI_assert(cmd_free_buffer_list);
+
+  id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf;
+  [cmd_buffer_ref retain];
+
+  /* Increment drawables in flight limiter. */
+  MTLContext::max_drawables_in_flight++;
+  std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now();
+
+  /* Increment free pool reference and decrement upon command buffer completion. */
+  cmd_free_buffer_list->increment_reference();
+  [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) {
+    /* Flag freed buffers associated with this CMD buffer as ready to be freed. */
+    cmd_free_buffer_list->decrement_reference();
+    [cmd_buffer_ref release];
+
+    /* Decrement count */
+    MTLCommandBufferManager::num_active_cmd_bufs--;
+    MTL_LOG_INFO("[Metal] Active command buffers: %d\n",
+                 MTLCommandBufferManager::num_active_cmd_bufs);
+
+    /* Drawable count and latency management. */
+    MTLContext::max_drawables_in_flight--;
+    std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now();
+    int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>(
+                                         completion_time - submission_time)
+                                         .count();
+    MTLContext::latency_resolve_average(microseconds_per_frame);
+
+    MTL_LOG_INFO("Frame Latency: %f ms  (Rolling avg: %f ms  Drawables: %d)\n",
+                 ((float)microseconds_per_frame) / 1000.0f,
+                 ((float)MTLContext::avg_drawable_latency_us) / 1000.0f,
+                 perf_max_drawables);
+  }];
+
+  if (MTLCommandBufferManager::sync_event == nil) {
+    MTLCommandBufferManager::sync_event = [ctx->device newEvent];
+    BLI_assert(MTLCommandBufferManager::sync_event);
+    [MTLCommandBufferManager::sync_event retain];
+  }
+  BLI_assert(MTLCommandBufferManager::sync_event != nil);
+
+  MTLCommandBufferManager::event_signal_val++;
+  [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event
+                      value:MTLCommandBufferManager::event_signal_val];
+
+  [cmdbuf commit];
+
+  /* When debugging, fetch advanced command buffer errors. */
+  if (G.debug & G_DEBUG_GPU) {
+    [cmdbuf waitUntilCompleted];
+    NSError *error = [cmdbuf error];
+    if (error != nil) {
+      NSLog(@"%@", error);
+      BLI_assert(false);
+
+      @autoreleasepool {
+        const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String];
+
+        std::ofstream outfile;
+        outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+        outfile << stringAsChar;
+        outfile.close();
+      }
+    }
+    else {
+      @autoreleasepool {
+        NSString *str = @"Command buffer completed successfully!\n";
+        const char *stringAsChar = [str UTF8String];
+
+        std::ofstream outfile;
+        outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+        outfile << stringAsChar;
+        outfile.close();
+      }
+    }
+  }
+}
+
+/** \} */
+
 }  // blender::gpu