45 files changed, 3215 insertions, 561 deletions
diff --git a/intern/ghost/intern/GHOST_Context.h b/intern/ghost/intern/GHOST_Context.h
index 3546fb6bbc7..04d445e7f85 100644
--- a/intern/ghost/intern/GHOST_Context.h
+++ b/intern/ghost/intern/GHOST_Context.h
@@ -36,19 +36,19 @@ class GHOST_Context : public GHOST_IContext {
    * Swaps front and back buffers of a window.
    * \return A boolean success indicator.
    */
-  virtual GHOST_TSuccess swapBuffers() = 0;
+  virtual GHOST_TSuccess swapBuffers() override = 0;
 
   /**
    * Activates the drawing context of this window.
    * \return A boolean success indicator.
    */
-  virtual GHOST_TSuccess activateDrawingContext() = 0;
+  virtual GHOST_TSuccess activateDrawingContext() override = 0;
 
   /**
    * Release the drawing context of the calling thread.
    * \return A boolean success indicator.
    */
-  virtual GHOST_TSuccess releaseDrawingContext() = 0;
+  virtual GHOST_TSuccess releaseDrawingContext() override = 0;
 
   /**
    * Call immediately after new to initialize.  If this fails then immediately delete the object.
@@ -130,7 +130,7 @@ class GHOST_Context : public GHOST_IContext {
    * Gets the OpenGL frame-buffer associated with the OpenGL context
    * \return The ID of an OpenGL frame-buffer object.
    */
-  virtual unsigned int getDefaultFramebuffer()
+  virtual unsigned int getDefaultFramebuffer() override
   {
     return 0;
   }
diff --git a/intern/ghost/intern/GHOST_ContextCGL.h b/intern/ghost/intern/GHOST_ContextCGL.h
index fa6d6fc6fa0..5caabb8ce00 100644
--- a/intern/ghost/intern/GHOST_ContextCGL.h
+++ b/intern/ghost/intern/GHOST_ContextCGL.h
@@ -9,8 +9,13 @@
 
 #include "GHOST_Context.h"
 
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
 @class CAMetalLayer;
 @class MTLCommandQueue;
+@class MTLDevice;
 @class MTLRenderPipelineState;
 @class MTLTexture;
 @class NSOpenGLContext;
@@ -36,62 +41,89 @@ class GHOST_ContextCGL : public GHOST_Context {
    * Swaps front and back buffers of a window.
    * \return A boolean success indicator.
    */
-  GHOST_TSuccess swapBuffers();
+  GHOST_TSuccess swapBuffers() override;
 
   /**
    * Activates the drawing context of this window.
    * \return A boolean success indicator.
    */
-  GHOST_TSuccess activateDrawingContext();
+  GHOST_TSuccess activateDrawingContext() override;
 
   /**
    * Release the drawing context of the calling thread.
    * \return A boolean success indicator.
    */
-  GHOST_TSuccess releaseDrawingContext();
+  GHOST_TSuccess releaseDrawingContext() override;
 
-  unsigned int getDefaultFramebuffer();
+  unsigned int getDefaultFramebuffer() override;
 
   /**
    * Call immediately after new to initialize.  If this fails then immediately delete the object.
    * \return Indication as to whether initialization has succeeded.
    */
-  GHOST_TSuccess initializeDrawingContext();
+  GHOST_TSuccess initializeDrawingContext() override;
 
   /**
    * Removes references to native handles from this context and then returns
    * \return GHOST_kSuccess if it is OK for the parent to release the handles and
    * GHOST_kFailure if releasing the handles will interfere with sharing
    */
-  GHOST_TSuccess releaseNativeHandles();
+  GHOST_TSuccess releaseNativeHandles() override;
 
   /**
    * Sets the swap interval for #swapBuffers.
    * \param interval: The swap interval to use.
    * \return A boolean success indicator.
    */
-  GHOST_TSuccess setSwapInterval(int interval);
+  GHOST_TSuccess setSwapInterval(int interval) override;
 
   /**
    * Gets the current swap interval for #swapBuffers.
    * \param intervalOut: Variable to store the swap interval if it can be read.
    * \return Whether the swap interval can be read.
    */
-  GHOST_TSuccess getSwapInterval(int &);
+  GHOST_TSuccess getSwapInterval(int &) override;
 
   /**
    * Updates the drawing context of this window.
    * Needed whenever the window is changed.
    * \return Indication of success.
    */
-  GHOST_TSuccess updateDrawingContext();
+  GHOST_TSuccess updateDrawingContext() override;
+
+  /**
+   * Returns a texture that Metal code can use as a render target. The current
+   * contents of this texture will be composited on top of the framebuffer
+   * each time `swapBuffers` is called.
+   */
+  id<MTLTexture> metalOverlayTexture();
+
+  /**
+   * Return a pointer to the Metal command queue used by this context.
+   */
+  MTLCommandQueue *metalCommandQueue();
+
+  /**
+   * Return a pointer to the Metal device associated with this context.
+   */
+  MTLDevice *metalDevice();
+
+  /**
+   * Register present callback
+   */
+  void metalRegisterPresentCallback(void (*callback)(
+      MTLRenderPassDescriptor *, id<MTLRenderPipelineState>, id<MTLTexture>, id<CAMetalDrawable>));
 
  private:
   /** Metal state */
+  /* Set this flag to `true` when rendering with Metal API for Viewport.
+   * TODO(Metal): This should be assigned to externally. */
+  bool m_useMetalForRendering = false;
   NSView *m_metalView;
   CAMetalLayer *m_metalLayer;
   MTLCommandQueue *m_metalCmdQueue;
   MTLRenderPipelineState *m_metalRenderPipeline;
+  bool m_ownsMetalDevice;
 
   /** OpenGL state, for GPUs that don't support Metal */
   NSOpenGLView *m_openGLView;
@@ -102,9 +134,31 @@ class GHOST_ContextCGL : public GHOST_Context {
   /** The virtualized default frame-buffer. */
   unsigned int m_defaultFramebuffer;
 
-  /** The virtualized default frame-buffer's texture. */
-  MTLTexture *m_defaultFramebufferMetalTexture;
-
+  /** The virtualized default framebuffer's texture */
+  /**
+   * Texture that you can render into with Metal. The texture will be
+   * composited on top of `m_defaultFramebufferMetalTexture` whenever
+   * `swapBuffers` is called.
+   */
+  static const int METAL_SWAPCHAIN_SIZE = 3;
+  struct MTLSwapchainTexture {
+    id<MTLTexture> texture;
+    unsigned int index;
+  };
+  MTLSwapchainTexture m_defaultFramebufferMetalTexture[METAL_SWAPCHAIN_SIZE];
+  unsigned int current_swapchain_index = 0;
+
+  /* Present callback.
+   * We use this such that presentation can be controlled from within the Metal
+   * Context. This is required for optimal performance and clean control flow.
+   * Also helps ensure flickering does not occur by present being dependent
+   * on existing submissions. */
+  void (*contextPresentCallback)(MTLRenderPassDescriptor *,
+                                 id<MTLRenderPipelineState>,
+                                 id<MTLTexture>,
+                                 id<CAMetalDrawable>);
+
+  int mtl_SwapInterval;
   const bool m_debug;
 
   /** The first created OpenGL context (for sharing display lists) */
@@ -117,4 +171,5 @@ class GHOST_ContextCGL : public GHOST_Context {
   void metalInitFramebuffer();
   void metalUpdateFramebuffer();
   void metalSwapBuffers();
+  void initClear();
 };
diff --git a/intern/ghost/intern/GHOST_ContextCGL.mm b/intern/ghost/intern/GHOST_ContextCGL.mm
index 488aa58aa59..6a0fed79fb0 100644
--- a/intern/ghost/intern/GHOST_ContextCGL.mm
+++ b/intern/ghost/intern/GHOST_ContextCGL.mm
@@ -55,139 +55,277 @@ GHOST_ContextCGL::GHOST_ContextCGL(bool stereoVisual,
       m_openGLView(openGLView),
       m_openGLContext(nil),
       m_defaultFramebuffer(0),
-      m_defaultFramebufferMetalTexture(nil),
       m_debug(false)
 {
+  /* Init Metal Swapchain. */
+  current_swapchain_index = 0;
+  for (int i = 0; i < METAL_SWAPCHAIN_SIZE; i++) {
+    m_defaultFramebufferMetalTexture[i].texture = nil;
+    m_defaultFramebufferMetalTexture[i].index = i;
+  }
   if (m_metalView) {
+    m_ownsMetalDevice = false;
     metalInit();
   }
+  else {
+    /* Prepare offscreen GHOST Context Metal device. */
+    id<MTLDevice> metalDevice = MTLCreateSystemDefaultDevice();
+
+    if (m_debug) {
+      printf("Selected Metal Device: %s\n", [metalDevice.name UTF8String]);
+    }
+
+    m_ownsMetalDevice = true;
+    if (metalDevice) {
+      m_metalLayer = [[CAMetalLayer alloc] init];
+      [m_metalLayer setEdgeAntialiasingMask:0];
+      [m_metalLayer setMasksToBounds:NO];
+      [m_metalLayer setOpaque:YES];
+      [m_metalLayer setFramebufferOnly:YES];
+      [m_metalLayer setPresentsWithTransaction:NO];
+      [m_metalLayer removeAllAnimations];
+      [m_metalLayer setDevice:metalDevice];
+      m_metalLayer.allowsNextDrawableTimeout = NO;
+      metalInit();
+    }
+    else {
+      ghost_fatal_error_dialog(
+          "[ERROR] Failed to create Metal device for offscreen GHOST Context.\n");
+    }
+  }
+
+  /* Initialise swapinterval. */
+  mtl_SwapInterval = 60;
 }
 
 GHOST_ContextCGL::~GHOST_ContextCGL()
 {
   metalFree();
 
-  if (m_openGLContext != nil) {
-    if (m_openGLContext == [NSOpenGLContext currentContext]) {
-      [NSOpenGLContext clearCurrentContext];
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    if (m_openGLContext != nil) {
+      if (m_openGLContext == [NSOpenGLContext currentContext]) {
+        [NSOpenGLContext clearCurrentContext];
 
-      if (m_openGLView) {
-        [m_openGLView clearGLContext];
+        if (m_openGLView) {
+          [m_openGLView clearGLContext];
+        }
       }
-    }
 
-    if (m_openGLContext != s_sharedOpenGLContext || s_sharedCount == 1) {
-      assert(s_sharedCount > 0);
+      if (m_openGLContext != s_sharedOpenGLContext || s_sharedCount == 1) {
+        assert(s_sharedCount > 0);
 
-      s_sharedCount--;
+        s_sharedCount--;
 
-      if (s_sharedCount == 0)
-        s_sharedOpenGLContext = nil;
+        if (s_sharedCount == 0)
+          s_sharedOpenGLContext = nil;
 
-      [m_openGLContext release];
+        [m_openGLContext release];
+      }
+    }
+#endif
+  }
+
+  if (m_ownsMetalDevice) {
+    if (m_metalLayer) {
+      [m_metalLayer release];
+      m_metalLayer = nil;
     }
   }
 }
 
 GHOST_TSuccess GHOST_ContextCGL::swapBuffers()
 {
-  if (m_openGLContext != nil) {
-    if (m_metalView) {
-      metalSwapBuffers();
+  GHOST_TSuccess return_value = GHOST_kFailure;
+
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    if (m_openGLContext != nil) {
+      if (m_metalView) {
+        metalSwapBuffers();
+      }
+      else if (m_openGLView) {
+        NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+        [m_openGLContext flushBuffer];
+        [pool drain];
+      }
+      return_value = GHOST_kSuccess;
     }
-    else if (m_openGLView) {
-      NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-      [m_openGLContext flushBuffer];
-      [pool drain];
+    else {
+      return_value = GHOST_kFailure;
     }
-    return GHOST_kSuccess;
+#endif
   }
   else {
-    return GHOST_kFailure;
+    if (m_metalView) {
+      metalSwapBuffers();
+    }
+    return_value = GHOST_kSuccess;
   }
+  return return_value;
 }
 
 GHOST_TSuccess GHOST_ContextCGL::setSwapInterval(int interval)
 {
-  if (m_openGLContext != nil) {
-    NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-    [m_openGLContext setValues:&interval forParameter:NSOpenGLCPSwapInterval];
-    [pool drain];
-    return GHOST_kSuccess;
+
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    if (m_openGLContext != nil) {
+      NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+      [m_openGLContext setValues:&interval forParameter:NSOpenGLCPSwapInterval];
+      [pool drain];
+      return GHOST_kSuccess;
+    }
+    else {
+      return GHOST_kFailure;
+    }
+#endif
   }
   else {
-    return GHOST_kFailure;
+    mtl_SwapInterval = interval;
+    return GHOST_kSuccess;
   }
 }
 
 GHOST_TSuccess GHOST_ContextCGL::getSwapInterval(int &intervalOut)
 {
-  if (m_openGLContext != nil) {
-    GLint interval;
 
-    NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    if (m_openGLContext != nil) {
+      GLint interval;
 
-    [m_openGLContext getValues:&interval forParameter:NSOpenGLCPSwapInterval];
+      NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
 
-    [pool drain];
+      [m_openGLContext getValues:&interval forParameter:NSOpenGLCPSwapInterval];
 
-    intervalOut = static_cast<int>(interval);
+      [pool drain];
 
-    return GHOST_kSuccess;
+      intervalOut = static_cast<int>(interval);
+
+      return GHOST_kSuccess;
+    }
+    else {
+      return GHOST_kFailure;
+    }
+#endif
   }
   else {
-    return GHOST_kFailure;
+    intervalOut = mtl_SwapInterval;
+    return GHOST_kSuccess;
   }
 }
 
 GHOST_TSuccess GHOST_ContextCGL::activateDrawingContext()
 {
-  if (m_openGLContext != nil) {
-    NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-    [m_openGLContext makeCurrentContext];
-    [pool drain];
-    return GHOST_kSuccess;
+
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    if (m_openGLContext != nil) {
+      NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+      [m_openGLContext makeCurrentContext];
+      [pool drain];
+      return GHOST_kSuccess;
+    }
+    else {
+      return GHOST_kFailure;
+    }
+#endif
   }
   else {
-    return GHOST_kFailure;
+    return GHOST_kSuccess;
   }
 }
 
 GHOST_TSuccess GHOST_ContextCGL::releaseDrawingContext()
 {
-  if (m_openGLContext != nil) {
-    NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-    [NSOpenGLContext clearCurrentContext];
-    [pool drain];
-    return GHOST_kSuccess;
+
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    if (m_openGLContext != nil) {
+      NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+      [NSOpenGLContext clearCurrentContext];
+      [pool drain];
+      return GHOST_kSuccess;
+    }
+    else {
+      return GHOST_kFailure;
+    }
+#endif
   }
   else {
-    return GHOST_kFailure;
+    return GHOST_kSuccess;
   }
 }
 
 unsigned int GHOST_ContextCGL::getDefaultFramebuffer()
 {
-  return m_defaultFramebuffer;
+
+  if (!m_useMetalForRendering) {
+    return m_defaultFramebuffer;
+  }
+  /* NOTE(Metal): This is not valid. */
+  return 0;
 }
 
 GHOST_TSuccess GHOST_ContextCGL::updateDrawingContext()
 {
-  if (m_openGLContext != nil) {
-    if (m_metalView) {
-      metalUpdateFramebuffer();
+
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    if (m_openGLContext != nil) {
+      if (m_metalView) {
+        metalUpdateFramebuffer();
+      }
+      else if (m_openGLView) {
+        @autoreleasepool {
+          [m_openGLContext update];
+        }
+      }
+
+      return GHOST_kSuccess;
     }
-    else if (m_openGLView) {
-      NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-      [m_openGLContext update];
-      [pool drain];
+    else {
+      return GHOST_kFailure;
     }
-
-    return GHOST_kSuccess;
+#endif
   }
   else {
-    return GHOST_kFailure;
+    if (m_metalView) {
+      metalUpdateFramebuffer();
+      return GHOST_kSuccess;
+    }
   }
+  return GHOST_kFailure;
+}
+
+id<MTLTexture> GHOST_ContextCGL::metalOverlayTexture()
+{
+  /* Increment Swapchain - Only needed if context is requesting a new texture */
+  current_swapchain_index = (current_swapchain_index + 1) % METAL_SWAPCHAIN_SIZE;
+
+  /* Ensure backing texture is ready for current swapchain index */
+  updateDrawingContext();
+
+  /* Return texture. */
+  return m_defaultFramebufferMetalTexture[current_swapchain_index].texture;
+}
+
+MTLCommandQueue *GHOST_ContextCGL::metalCommandQueue()
+{
+  return m_metalCmdQueue;
+}
+MTLDevice *GHOST_ContextCGL::metalDevice()
+{
+  id<MTLDevice> device = m_metalLayer.device;
+  return (MTLDevice *)device;
+}
+
+void GHOST_ContextCGL::metalRegisterPresentCallback(void (*callback)(
+    MTLRenderPassDescriptor *, id<MTLRenderPipelineState>, id<MTLTexture>, id<CAMetalDrawable>))
+{
+  this->contextPresentCallback = callback;
 }
 
 static void makeAttribList(std::vector<NSOpenGLPixelFormatAttribute> &attribs,
@@ -241,120 +379,134 @@ GHOST_TSuccess GHOST_ContextCGL::initializeDrawingContext()
 #endif
 
     /* Command-line argument would be better. */
-    static bool softwareGL = getenv("BLENDER_SOFTWAREGL");
-
-    NSOpenGLPixelFormat *pixelFormat = nil;
-    std::vector<NSOpenGLPixelFormatAttribute> attribs;
-    bool increasedSamplerLimit = false;
-
-    /* Attempt to initialize device with increased sampler limit.
-     * If this is unsupported and initialization fails, initialize GL Context as normal.
-     *
-     * NOTE: This is not available when using the SoftwareGL path, or for Intel-based
-     * platforms. */
-    if (!softwareGL) {
-      if (@available(macos 11.0, *)) {
-        increasedSamplerLimit = true;
+    if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+      /* Command-line argument would be better. */
+      static bool softwareGL = getenv("BLENDER_SOFTWAREGL");
+
+      NSOpenGLPixelFormat *pixelFormat = nil;
+      std::vector<NSOpenGLPixelFormatAttribute> attribs;
+      bool increasedSamplerLimit = false;
+
+      /* Attempt to initialize device with increased sampler limit.
+       * If this is unsupported and initialization fails, initialize GL Context as normal.
+       *
+       * NOTE: This is not available when using the SoftwareGL path, or for Intel-based
+       * platforms. */
+      if (!softwareGL) {
+        if (@available(macos 11.0, *)) {
+          increasedSamplerLimit = true;
+        }
       }
-    }
-    const int max_ctx_attempts = increasedSamplerLimit ? 2 : 1;
-    for (int ctx_create_attempt = 0; ctx_create_attempt < max_ctx_attempts; ctx_create_attempt++) {
-
-      attribs.clear();
-      attribs.reserve(40);
-      makeAttribList(attribs, m_stereoVisual, needAlpha, softwareGL, increasedSamplerLimit);
+      const int max_ctx_attempts = increasedSamplerLimit ? 2 : 1;
+      for (int ctx_create_attempt = 0; ctx_create_attempt < max_ctx_attempts;
+           ctx_create_attempt++) {
+
+        attribs.clear();
+        attribs.reserve(40);
+        makeAttribList(attribs, m_stereoVisual, needAlpha, softwareGL, increasedSamplerLimit);
+
+        pixelFormat = [[NSOpenGLPixelFormat alloc] initWithAttributes:&attribs[0]];
+        if (pixelFormat == nil) {
+          /* If pixel format creation fails when testing increased sampler limit,
+           * attempt intialisation again with feature disabled, otherwise, fail. */
+          if (increasedSamplerLimit) {
+            increasedSamplerLimit = false;
+            continue;
+          }
+          return GHOST_kFailure;
+        }
 
-      pixelFormat = [[NSOpenGLPixelFormat alloc] initWithAttributes:&attribs[0]];
-      if (pixelFormat == nil) {
-        /* If pixel format creation fails when testing increased sampler limit,
-         * attempt initialization again with feature disabled, otherwise, fail. */
-        if (increasedSamplerLimit) {
-          increasedSamplerLimit = false;
-          continue;
+        /* Attempt to create context. */
+        m_openGLContext = [[NSOpenGLContext alloc] initWithFormat:pixelFormat
+                                                     shareContext:s_sharedOpenGLContext];
+        [pixelFormat release];
+
+        if (m_openGLContext == nil) {
+          /* If context creation fails when testing increased sampler limit,
+           * attempt re-creation with feature disabled. Otherwise, error. */
+          if (increasedSamplerLimit) {
+            increasedSamplerLimit = false;
+            continue;
+          }
+
+          /* Default context creation attempt failed. */
+          return GHOST_kFailure;
         }
-        return GHOST_kFailure;
-      }
 
-      /* Attempt to create context. */
-      m_openGLContext = [[NSOpenGLContext alloc] initWithFormat:pixelFormat
-                                                   shareContext:s_sharedOpenGLContext];
-      [pixelFormat release];
+        /* Created GL context successfully, activate. */
+        [m_openGLContext makeCurrentContext];
 
-      if (m_openGLContext == nil) {
-        /* If context creation fails when testing increased sampler limit,
-         * attempt re-creation with feature disabled. Otherwise, error. */
+        /* When increasing sampler limit, verify created context is a supported configuration. */
         if (increasedSamplerLimit) {
-          increasedSamplerLimit = false;
-          continue;
+          const char *vendor = (const char *)glGetString(GL_VENDOR);
+          const char *renderer = (const char *)glGetString(GL_RENDERER);
+
+          /* If generated context type is unsupported, release existing context and
+           * fallback to creating a normal context below. */
+          if (strstr(vendor, "Intel") || strstr(renderer, "Software")) {
+            [m_openGLContext release];
+            m_openGLContext = nil;
+            increasedSamplerLimit = false;
+            continue;
+          }
         }
-
-        /* Default context creation attempt failed. */
-        return GHOST_kFailure;
       }
 
-      /* Created GL context successfully, activate. */
-      [m_openGLContext makeCurrentContext];
+      if (m_debug) {
+        GLint major = 0, minor = 0;
+        glGetIntegerv(GL_MAJOR_VERSION, &major);
+        glGetIntegerv(GL_MINOR_VERSION, &minor);
+        fprintf(stderr, "OpenGL version %d.%d%s\n", major, minor, softwareGL ? " (software)" : "");
+        fprintf(stderr, "Renderer: %s\n", glGetString(GL_RENDERER));
+      }
 
-      /* When increasing sampler limit, verify created context is a supported configuration. */
-      if (increasedSamplerLimit) {
-        const char *vendor = (const char *)glGetString(GL_VENDOR);
-        const char *renderer = (const char *)glGetString(GL_RENDERER);
-
-        /* If generated context type is unsupported, release existing context and
-         * fallback to creating a normal context below. */
-        if (strstr(vendor, "Intel") || strstr(renderer, "Software")) {
-          [m_openGLContext release];
-          m_openGLContext = nil;
-          increasedSamplerLimit = false;
-          continue;
+#  ifdef GHOST_WAIT_FOR_VSYNC
+      {
+        GLint swapInt = 1;
+        /* Wait for vertical-sync, to avoid tearing artifacts. */
+        [m_openGLContext setValues:&swapInt forParameter:NSOpenGLCPSwapInterval];
+      }
+#  endif
+
+      if (m_metalView) {
+        if (m_defaultFramebuffer == 0) {
+          /* Create a virtual frame-buffer. */
+          [m_openGLContext makeCurrentContext];
+          metalInitFramebuffer();
+          initClearGL();
         }
       }
-    }
+      else if (m_openGLView) {
+        [m_openGLView setOpenGLContext:m_openGLContext];
+        [m_openGLContext setView:m_openGLView];
+        initClearGL();
+      }
 
-    if (m_debug) {
-      GLint major = 0, minor = 0;
-      glGetIntegerv(GL_MAJOR_VERSION, &major);
-      glGetIntegerv(GL_MINOR_VERSION, &minor);
-      fprintf(stderr, "OpenGL version %d.%d%s\n", major, minor, softwareGL ? " (software)" : "");
-      fprintf(stderr, "Renderer: %s\n", glGetString(GL_RENDERER));
-    }
+      [m_openGLContext flushBuffer];
 
-#ifdef GHOST_WAIT_FOR_VSYNC
-    {
-      GLint swapInt = 1;
-      /* Wait for vertical-sync, to avoid tearing artifacts. */
-      [m_openGLContext setValues:&swapInt forParameter:NSOpenGLCPSwapInterval];
-    }
-#endif
+      if (s_sharedCount == 0)
+        s_sharedOpenGLContext = m_openGLContext;
 
-    if (m_metalView) {
-      if (m_defaultFramebuffer == 0) {
-        /* Create a virtual frame-buffer. */
-        [m_openGLContext makeCurrentContext];
+      s_sharedCount++;
+#endif
+    }
+    else {
+      /* NOTE(Metal): Metal-only path. */
+      if (m_metalView) {
         metalInitFramebuffer();
-        initClearGL();
       }
     }
-    else if (m_openGLView) {
-      [m_openGLView setOpenGLContext:m_openGLContext];
-      [m_openGLContext setView:m_openGLView];
-      initClearGL();
-    }
-
-    [m_openGLContext flushBuffer];
-
-    if (s_sharedCount == 0)
-      s_sharedOpenGLContext = m_openGLContext;
-
-    s_sharedCount++;
   }
   return GHOST_kSuccess;
 }
 
 GHOST_TSuccess GHOST_ContextCGL::releaseNativeHandles()
 {
+#if WITH_OPENGL
   m_openGLContext = nil;
   m_openGLView = nil;
+#endif
   m_metalView = nil;
 
   return GHOST_kSuccess;
@@ -404,10 +556,14 @@ void GHOST_ContextCGL::metalInit()
 
       fragment float4 fragment_shader(Vertex v [[stage_in]],
                       texture2d<float> t [[texture(0)]]) {
-        return t.sample(s, v.texCoord);
-      }
 
-      )msl";
+        /* Final blit should ensure alpha is 1.0. This resolves
+         * rendering artifacts for blitting of final backbuffer. */
+        float4 out_tex = t.sample(s, v.texCoord);
+        out_tex.a = 1.0;
+        return out_tex;
+      }
+    )msl";
 
     MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
     options.languageVersion = MTLLanguageVersion1_1;
@@ -424,6 +580,8 @@ void GHOST_ContextCGL::metalInit()
 
     desc.fragmentFunction = [library newFunctionWithName:@"fragment_shader"];
     desc.vertexFunction = [library newFunctionWithName:@"vertex_shader"];
+    /* Ensure library is released. */
+    [library autorelease];
 
     [desc.colorAttachments objectAtIndexedSubscript:0].pixelFormat = METAL_FRAMEBUFFERPIXEL_FORMAT;
 
@@ -434,6 +592,20 @@ void GHOST_ContextCGL::metalInit()
       ghost_fatal_error_dialog(
           "GHOST_ContextCGL::metalInit: newRenderPipelineStateWithDescriptor:error: failed!");
     }
+
+    /* Create a render pipeline to composite things rendered with Metal on top
+     * of the framebuffer contents. Uses the same vertex and fragment shader
+     * as the blit above, but with alpha blending enabled. */
+    desc.label = @"Metal Overlay";
+    desc.colorAttachments[0].blendingEnabled = YES;
+    desc.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha;
+    desc.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
+
+    if (error) {
+      ghost_fatal_error_dialog(
+          "GHOST_ContextCGL::metalInit: newRenderPipelineStateWithDescriptor:error: failed (when "
+          "creating the Metal overlay pipeline)!");
+    }
   }
 }
 
@@ -445,123 +617,206 @@ void GHOST_ContextCGL::metalFree()
   if (m_metalRenderPipeline) {
     [m_metalRenderPipeline release];
   }
-  if (m_defaultFramebufferMetalTexture) {
-    [m_defaultFramebufferMetalTexture release];
+
+  for (int i = 0; i < METAL_SWAPCHAIN_SIZE; i++) {
+    if (m_defaultFramebufferMetalTexture[i].texture) {
+      [m_defaultFramebufferMetalTexture[i].texture release];
+    }
   }
 }
 
 void GHOST_ContextCGL::metalInitFramebuffer()
 {
-  glGenFramebuffers(1, &m_defaultFramebuffer);
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    glGenFramebuffers(1, &m_defaultFramebuffer);
+#endif
+  }
   updateDrawingContext();
-  glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer);
+
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer);
+#endif
+  }
 }
 
 void GHOST_ContextCGL::metalUpdateFramebuffer()
 {
-  assert(m_defaultFramebuffer != 0);
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    assert(m_defaultFramebuffer != 0);
+#endif
+  }
 
   NSRect bounds = [m_metalView bounds];
   NSSize backingSize = [m_metalView convertSizeToBacking:bounds.size];
   size_t width = (size_t)backingSize.width;
   size_t height = (size_t)backingSize.height;
 
-  {
-    /* Test if there is anything to update */
-    id<MTLTexture> tex = (id<MTLTexture>)m_defaultFramebufferMetalTexture;
-    if (tex && tex.width == width && tex.height == height) {
-      return;
+#if WITH_OPENGL
+  unsigned int glTex;
+  CVPixelBufferRef cvPixelBuffer = nil;
+  CVOpenGLTextureCacheRef cvGLTexCache = nil;
+  CVOpenGLTextureRef cvGLTex = nil;
+  CVMetalTextureCacheRef cvMetalTexCache = nil;
+  CVMetalTextureRef cvMetalTex = nil;
+#endif
+
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    /* OPENGL path */
+    {
+      /* Test if there is anything to update */
+      id<MTLTexture> tex = m_defaultFramebufferMetalTexture[current_swapchain_index].texture;
+      if (tex && tex.width == width && tex.height == height) {
+        return;
+      }
     }
-  }
 
-  activateDrawingContext();
+    activateDrawingContext();
+
+    NSDictionary *cvPixelBufferProps = @{
+      (__bridge NSString *)kCVPixelBufferOpenGLCompatibilityKey : @YES,
+      (__bridge NSString *)kCVPixelBufferMetalCompatibilityKey : @YES,
+    };
+    CVReturn cvret = CVPixelBufferCreate(kCFAllocatorDefault,
+                                         width,
+                                         height,
+                                         METAL_CORE_VIDEO_PIXEL_FORMAT,
+                                         (__bridge CFDictionaryRef)cvPixelBufferProps,
+                                         &cvPixelBuffer);
+    if (cvret != kCVReturnSuccess) {
+      ghost_fatal_error_dialog(
+          "GHOST_ContextCGL::metalUpdateFramebuffer: CVPixelBufferCreate failed!");
+    }
 
-  NSDictionary *cvPixelBufferProps = @{
-    (__bridge NSString *)kCVPixelBufferOpenGLCompatibilityKey : @YES,
-    (__bridge NSString *)kCVPixelBufferMetalCompatibilityKey : @YES,
-  };
-  CVPixelBufferRef cvPixelBuffer = nil;
-  CVReturn cvret = CVPixelBufferCreate(kCFAllocatorDefault,
-                                       width,
-                                       height,
-                                       METAL_CORE_VIDEO_PIXEL_FORMAT,
-                                       (__bridge CFDictionaryRef)cvPixelBufferProps,
-                                       &cvPixelBuffer);
-  if (cvret != kCVReturnSuccess) {
-    ghost_fatal_error_dialog(
-        "GHOST_ContextCGL::metalUpdateFramebuffer: CVPixelBufferCreate failed!");
-  }
-
-  /* Create an OpenGL texture. */
-  CVOpenGLTextureCacheRef cvGLTexCache = nil;
-  cvret = CVOpenGLTextureCacheCreate(kCFAllocatorDefault,
-                                     nil,
-                                     m_openGLContext.CGLContextObj,
-                                     m_openGLContext.pixelFormat.CGLPixelFormatObj,
-                                     nil,
-                                     &cvGLTexCache);
-  if (cvret != kCVReturnSuccess) {
-    ghost_fatal_error_dialog(
-        "GHOST_ContextCGL::metalUpdateFramebuffer: CVOpenGLTextureCacheCreate failed!");
-  }
+    /* Create an OpenGL texture. */
+    cvret = CVOpenGLTextureCacheCreate(kCFAllocatorDefault,
+                                       nil,
+                                       m_openGLContext.CGLContextObj,
+                                       m_openGLContext.pixelFormat.CGLPixelFormatObj,
+                                       nil,
+                                       &cvGLTexCache);
+    if (cvret != kCVReturnSuccess) {
+      ghost_fatal_error_dialog(
+          "GHOST_ContextCGL::metalUpdateFramebuffer: CVOpenGLTextureCacheCreate failed!");
+    }
 
-  CVOpenGLTextureRef cvGLTex = nil;
-  cvret = CVOpenGLTextureCacheCreateTextureFromImage(
-      kCFAllocatorDefault, cvGLTexCache, cvPixelBuffer, nil, &cvGLTex);
-  if (cvret != kCVReturnSuccess) {
-    ghost_fatal_error_dialog(
-        "GHOST_ContextCGL::metalUpdateFramebuffer: "
-        "CVOpenGLTextureCacheCreateTextureFromImage failed!");
-  }
+    cvret = CVOpenGLTextureCacheCreateTextureFromImage(
+        kCFAllocatorDefault, cvGLTexCache, cvPixelBuffer, nil, &cvGLTex);
+    if (cvret != kCVReturnSuccess) {
+      ghost_fatal_error_dialog(
+          "GHOST_ContextCGL::metalUpdateFramebuffer: "
+          "CVOpenGLTextureCacheCreateTextureFromImage failed!");
+    }
 
-  unsigned int glTex;
-  glTex = CVOpenGLTextureGetName(cvGLTex);
+    glTex = CVOpenGLTextureGetName(cvGLTex);
 
-  /* Create a Metal texture. */
-  CVMetalTextureCacheRef cvMetalTexCache = nil;
-  cvret = CVMetalTextureCacheCreate(
-      kCFAllocatorDefault, nil, m_metalLayer.device, nil, &cvMetalTexCache);
-  if (cvret != kCVReturnSuccess) {
-    ghost_fatal_error_dialog(
-        "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureCacheCreate failed!");
-  }
+    /* Create a Metal texture. */
+    cvret = CVMetalTextureCacheCreate(
+        kCFAllocatorDefault, nil, m_metalLayer.device, nil, &cvMetalTexCache);
+    if (cvret != kCVReturnSuccess) {
+      ghost_fatal_error_dialog(
+          "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureCacheCreate failed!");
+    }
 
-  CVMetalTextureRef cvMetalTex = nil;
-  cvret = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault,
-                                                    cvMetalTexCache,
-                                                    cvPixelBuffer,
-                                                    nil,
-                                                    METAL_FRAMEBUFFERPIXEL_FORMAT,
-                                                    width,
-                                                    height,
-                                                    0,
-                                                    &cvMetalTex);
-  if (cvret != kCVReturnSuccess) {
-    ghost_fatal_error_dialog(
-        "GHOST_ContextCGL::metalUpdateFramebuffer: "
-        "CVMetalTextureCacheCreateTextureFromImage failed!");
-  }
+    cvret = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault,
+                                                      cvMetalTexCache,
+                                                      cvPixelBuffer,
+                                                      nil,
+                                                      METAL_FRAMEBUFFERPIXEL_FORMAT,
+                                                      width,
+                                                      height,
+                                                      0,
+                                                      &cvMetalTex);
+    if (cvret != kCVReturnSuccess) {
+      ghost_fatal_error_dialog(
+          "GHOST_ContextCGL::metalUpdateFramebuffer: "
+          "CVMetalTextureCacheCreateTextureFromImage failed!");
+    }
 
-  MTLTexture *tex = (MTLTexture *)CVMetalTextureGetTexture(cvMetalTex);
+    id<MTLTexture> tex = CVMetalTextureGetTexture(cvMetalTex);
 
-  if (!tex) {
-    ghost_fatal_error_dialog(
-        "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureGetTexture failed!");
+    if (!tex) {
+      ghost_fatal_error_dialog(
+          "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureGetTexture failed!");
+    }
+
+    [m_defaultFramebufferMetalTexture[current_swapchain_index].texture release];
+    m_defaultFramebufferMetalTexture[current_swapchain_index].texture = [tex retain];
+#endif
   }
+  else {
+    /* NOTE(Metal): Metal API Path. */
+    if (m_defaultFramebufferMetalTexture[current_swapchain_index].texture &&
+        m_defaultFramebufferMetalTexture[current_swapchain_index].texture.width == width &&
+        m_defaultFramebufferMetalTexture[current_swapchain_index].texture.height == height) {
+      return;
+    }
 
-  [m_defaultFramebufferMetalTexture release];
-  m_defaultFramebufferMetalTexture = [tex retain];
+    /* Free old texture */
+    [m_defaultFramebufferMetalTexture[current_swapchain_index].texture release];
 
-  glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer);
-  glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE, glTex, 0);
+    id<MTLDevice> device = m_metalLayer.device;
+    MTLTextureDescriptor *overlayDesc = [MTLTextureDescriptor
+        texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
+                                     width:width
+                                    height:height
+                                 mipmapped:NO];
+    overlayDesc.storageMode = MTLStorageModePrivate;
+    overlayDesc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
+
+    id<MTLTexture> overlayTex = [device newTextureWithDescriptor:overlayDesc];
+    if (!overlayTex) {
+      ghost_fatal_error_dialog(
+          "GHOST_ContextCGL::metalUpdateFramebuffer: failed to create Metal overlay texture!");
+    }
+    else {
+      overlayTex.label = [NSString
+          stringWithFormat:@"Metal Overlay for GHOST Context %p", this];  //@"";
 
-  [m_metalLayer setDrawableSize:CGSizeMake((CGFloat)width, (CGFloat)height)];
+      // NSLog(@"Created new Metal Overlay (backbuffer) for context %p\n", this);
+    }
+
+    m_defaultFramebufferMetalTexture[current_swapchain_index].texture =
+        overlayTex;  //[(MTLTexture *)overlayTex retain];
+
+    /* Clear texture on create */
+    id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer];
+    MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
+    {
+      auto attachment = [passDescriptor.colorAttachments objectAtIndexedSubscript:0];
+      attachment.texture = m_defaultFramebufferMetalTexture[current_swapchain_index].texture;
+      attachment.loadAction = MTLLoadActionClear;
+      attachment.clearColor = MTLClearColorMake(0.294, 0.294, 0.294, 1.000);
+      attachment.storeAction = MTLStoreActionStore;
+    }
+    {
+      id<MTLRenderCommandEncoder> enc = [cmdBuffer
+          renderCommandEncoderWithDescriptor:passDescriptor];
+      [enc endEncoding];
+    }
+    [cmdBuffer commit];
+  }
 
-  CVPixelBufferRelease(cvPixelBuffer);
-  CVOpenGLTextureCacheRelease(cvGLTexCache);
-  CVOpenGLTextureRelease(cvGLTex);
-  CFRelease(cvMetalTexCache);
-  CFRelease(cvMetalTex);
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE, glTex, 0);
+#endif
+  }
+
+  [m_metalLayer setDrawableSize:CGSizeMake((CGFloat)width, (CGFloat)height)];
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    CVPixelBufferRelease(cvPixelBuffer);
+    CVOpenGLTextureCacheRelease(cvGLTexCache);
+    CVOpenGLTextureRelease(cvGLTex);
+    CFRelease(cvMetalTexCache);
+    CFRelease(cvMetalTex);
+#endif
+  }
 }
 
 void GHOST_ContextCGL::metalSwapBuffers()
@@ -570,40 +825,88 @@ void GHOST_ContextCGL::metalSwapBuffers()
   @autoreleasepool {
     /* clang-format on */
     updateDrawingContext();
-    glFlush();
 
-    assert(m_defaultFramebufferMetalTexture != 0);
+    if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+      glFlush();
+      assert(m_defaultFramebufferMetalTexture[current_swapchain_index].texture != nil);
+#endif
+    }
 
     id<CAMetalDrawable> drawable = [m_metalLayer nextDrawable];
     if (!drawable) {
       return;
     }
 
-    id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer];
-
     MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
     {
       auto attachment = [passDescriptor.colorAttachments objectAtIndexedSubscript:0];
       attachment.texture = drawable.texture;
-      attachment.loadAction = MTLLoadActionDontCare;
+      attachment.loadAction = MTLLoadActionClear;
+      attachment.clearColor = MTLClearColorMake(1.0, 0.294, 0.294, 1.000);
       attachment.storeAction = MTLStoreActionStore;
     }
 
-    id<MTLTexture> srcTexture = (id<MTLTexture>)m_defaultFramebufferMetalTexture;
+    if (!m_useMetalForRendering) {
+      id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer];
+      {
+        assert(m_defaultFramebufferMetalTexture[current_swapchain_index].texture != nil);
+        id<MTLRenderCommandEncoder> enc = [cmdBuffer
+            renderCommandEncoderWithDescriptor:passDescriptor];
+        [enc setRenderPipelineState:(id<MTLRenderPipelineState>)m_metalRenderPipeline];
+        [enc setFragmentTexture:m_defaultFramebufferMetalTexture[current_swapchain_index].texture
+                        atIndex:0];
+        [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
+        [enc endEncoding];
+      }
+
+      [cmdBuffer presentDrawable:drawable];
 
+      /* Submit command buffer */
+      [cmdBuffer commit];
+    }
+    else {
+      assert(contextPresentCallback);
+      assert(m_defaultFramebufferMetalTexture[current_swapchain_index].texture != nil);
+      (*contextPresentCallback)(passDescriptor,
+                                (id<MTLRenderPipelineState>)m_metalRenderPipeline,
+                                m_defaultFramebufferMetalTexture[current_swapchain_index].texture,
+                                drawable);
+    }
+  }
+}
+
+void GHOST_ContextCGL::initClear()
+{
+
+  if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+    glClearColor(0.294, 0.294, 0.294, 0.000);
+    glClear(GL_COLOR_BUFFER_BIT);
+    glClearColor(0.000, 0.000, 0.000, 0.000);
+#endif
+  }
+  else {
+#if WITH_METAL
+    // TODO (mg_gpusw_apple) this path is never taken, this is legacy left from inital integration
+    // of metal and gl, the whole file should be cleaned up and stripped of the legacy path
+    id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer];
+    MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
+    {
+      auto attachment = [passDescriptor.colorAttachments objectAtIndexedSubscript:0];
+      attachment.texture = m_defaultFramebufferMetalTexture[current_swapchain_index].texture;
+      attachment.loadAction = MTLLoadActionClear;
+      attachment.clearColor = MTLClearColorMake(0.294, 0.294, 0.294, 1.000);
+      attachment.storeAction = MTLStoreActionStore;
+    }
+
+    // encoding
     {
       id<MTLRenderCommandEncoder> enc = [cmdBuffer
           renderCommandEncoderWithDescriptor:passDescriptor];
-
-      [enc setRenderPipelineState:(id<MTLRenderPipelineState>)m_metalRenderPipeline];
-      [enc setFragmentTexture:srcTexture atIndex:0];
-      [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
-
       [enc endEncoding];
     }
-
-    [cmdBuffer presentDrawable:drawable];
-
     [cmdBuffer commit];
+#endif
   }
 }
diff --git a/intern/ghost/intern/GHOST_Window.cpp b/intern/ghost/intern/GHOST_Window.cpp
index db4d6c3bb71..da292a90869 100644
--- a/intern/ghost/intern/GHOST_Window.cpp
+++ b/intern/ghost/intern/GHOST_Window.cpp
@@ -92,6 +92,11 @@ GHOST_TSuccess GHOST_Window::getSwapInterval(int &intervalOut)
   return m_context->getSwapInterval(intervalOut);
 }
 
+GHOST_Context *GHOST_Window::getContext()
+{
+  return m_context;
+}
+
 unsigned int GHOST_Window::getDefaultFramebuffer()
 {
   return (m_context) ? m_context->getDefaultFramebuffer() : 0;
diff --git a/intern/ghost/intern/GHOST_Window.h b/intern/ghost/intern/GHOST_Window.h
index 2c2b75a6bd5..8e1f73d3430 100644
--- a/intern/ghost/intern/GHOST_Window.h
+++ b/intern/ghost/intern/GHOST_Window.h
@@ -72,7 +72,7 @@ class GHOST_Window : public GHOST_IWindow {
    * Returns indication as to whether the window is valid.
    * \return The validity of the window.
    */
-  virtual bool getValid() const
+  virtual bool getValid() const override
   {
     return m_context != NULL;
   }
@@ -81,15 +81,15 @@ class GHOST_Window : public GHOST_IWindow {
    * Returns the associated OS object/handle
    * \return The associated OS object/handle
    */
-  virtual void *getOSWindow() const;
+  virtual void *getOSWindow() const override;
 
   /**
    * Returns the current cursor shape.
    * \return The current cursor shape.
    */
-  inline GHOST_TStandardCursor getCursorShape() const;
+  inline GHOST_TStandardCursor getCursorShape() const override;
 
-  inline bool isDialog() const
+  inline bool isDialog() const override
   {
     return false;
   }
@@ -99,7 +99,7 @@ class GHOST_Window : public GHOST_IWindow {
    * \param cursorShape: The new cursor shape type id.
    * \return Indication of success.
    */
-  GHOST_TSuccess setCursorShape(GHOST_TStandardCursor cursorShape);
+  GHOST_TSuccess setCursorShape(GHOST_TStandardCursor cursorShape) override;
 
   /**
    * Set the shape of the cursor to a custom cursor.
@@ -115,15 +115,15 @@ class GHOST_Window : public GHOST_IWindow {
                                       int sizey,
                                       int hotX,
                                       int hotY,
-                                      bool canInvertColor);
+                                      bool canInvertColor) override;
 
-  GHOST_TSuccess getCursorBitmap(GHOST_CursorBitmapRef *bitmap);
+  GHOST_TSuccess getCursorBitmap(GHOST_CursorBitmapRef *bitmap) override;
 
   /**
    * Returns the visibility state of the cursor.
    * \return The visibility state of the cursor.
    */
-  inline bool getCursorVisibility() const;
+  inline bool getCursorVisibility() const override;
   inline GHOST_TGrabCursorMode getCursorGrabMode() const;
   inline bool getCursorGrabModeIsWarp() const;
   inline GHOST_TAxisFlag getCursorGrabAxis() const;
@@ -136,7 +136,7 @@ class GHOST_Window : public GHOST_IWindow {
    * \param visible: The new visibility state of the cursor.
    * \return Indication of success.
    */
-  GHOST_TSuccess setCursorVisibility(bool visible);
+  GHOST_TSuccess setCursorVisibility(bool visible) override;
 
   /**
    * Sets the cursor grab.
@@ -146,28 +146,28 @@ class GHOST_Window : public GHOST_IWindow {
   GHOST_TSuccess setCursorGrab(GHOST_TGrabCursorMode mode,
                                GHOST_TAxisFlag wrap_axis,
                                GHOST_Rect *bounds,
-                               int32_t mouse_ungrab_xy[2]);
+                               int32_t mouse_ungrab_xy[2]) override;
 
   /**
    * Gets the cursor grab region, if unset the window is used.
    * reset when grab is disabled.
    */
-  GHOST_TSuccess getCursorGrabBounds(GHOST_Rect &bounds);
+  GHOST_TSuccess getCursorGrabBounds(GHOST_Rect &bounds) override;
 
   void getCursorGrabState(GHOST_TGrabCursorMode &mode,
                           GHOST_TAxisFlag &axis_flag,
                           GHOST_Rect &bounds,
-                          bool &use_software_cursor);
+                          bool &use_software_cursor) override;
   /**
    * Return true when a software cursor should be used.
    */
-  bool getCursorGrabUseSoftwareDisplay();
+  bool getCursorGrabUseSoftwareDisplay() override;
 
   /**
    * Sets the progress bar value displayed in the window/application icon
    * \param progress: The progress percentage (0.0 to 1.0).
    */
-  virtual GHOST_TSuccess setProgressBar(float /*progress*/)
+  virtual GHOST_TSuccess setProgressBar(float /*progress*/) override
   {
     return GHOST_kFailure;
   }
@@ -175,7 +175,7 @@ class GHOST_Window : public GHOST_IWindow {
   /**
    * Hides the progress bar in the icon
    */
-  virtual GHOST_TSuccess endProgressBar()
+  virtual GHOST_TSuccess endProgressBar() override
   {
     return GHOST_kFailure;
   }
@@ -185,43 +185,43 @@ class GHOST_Window : public GHOST_IWindow {
    * \param interval: The swap interval to use.
    * \return A boolean success indicator.
    */
-  GHOST_TSuccess setSwapInterval(int interval);
+  GHOST_TSuccess setSwapInterval(int interval) override;
 
   /**
    * Gets the current swap interval for #swapBuffers.
    * \return An integer.
    */
-  GHOST_TSuccess getSwapInterval(int &intervalOut);
+  GHOST_TSuccess getSwapInterval(int &intervalOut) override;
 
   /**
    * Tells if the ongoing drag'n'drop object can be accepted upon mouse drop
    */
-  void setAcceptDragOperation(bool canAccept);
+  void setAcceptDragOperation(bool canAccept) override;
 
   /**
    * Returns acceptance of the dropped object
    * Usually called by the "object dropped" event handling function
    */
-  bool canAcceptDragOperation() const;
+  bool canAcceptDragOperation() const override;
 
   /**
    * Sets the window "modified" status, indicating unsaved changes
    * \param isUnsavedChanges: Unsaved changes or not.
    * \return Indication of success.
    */
-  virtual GHOST_TSuccess setModifiedState(bool isUnsavedChanges);
+  virtual GHOST_TSuccess setModifiedState(bool isUnsavedChanges) override;
 
   /**
    * Gets the window "modified" status, indicating unsaved changes
    * \return True if there are unsaved changes
    */
-  virtual bool getModifiedState();
+  virtual bool getModifiedState() override;
 
   /**
    * Returns the type of drawing context used in this window.
    * \return The current type of drawing context.
    */
-  inline GHOST_TDrawingContextType getDrawingContextType();
+  inline GHOST_TDrawingContextType getDrawingContextType() override;
 
   /**
    * Tries to install a rendering context in this window.
@@ -230,19 +230,19 @@ class GHOST_Window : public GHOST_IWindow {
    * \param type: The type of rendering context installed.
    * \return Indication as to whether installation has succeeded.
    */
-  GHOST_TSuccess setDrawingContextType(GHOST_TDrawingContextType type);
+  GHOST_TSuccess setDrawingContextType(GHOST_TDrawingContextType type) override;
 
   /**
    * Swaps front and back buffers of a window.
    * \return A boolean success indicator.
    */
-  virtual GHOST_TSuccess swapBuffers();
+  virtual GHOST_TSuccess swapBuffers() override;
 
   /**
    * Activates the drawing context of this window.
    * \return A boolean success indicator.
    */
-  virtual GHOST_TSuccess activateDrawingContext();
+  virtual GHOST_TSuccess activateDrawingContext() override;
 
   /**
    * Updates the drawing context of this window. Needed
@@ -252,16 +252,22 @@ class GHOST_Window : public GHOST_IWindow {
   GHOST_TSuccess updateDrawingContext();
 
   /**
-   * Gets the OpenGL frame-buffer associated with the window's contents.
-   * \return The ID of an OpenGL frame-buffer object.
+   * Get the drawing context associated with this window.
+   *\return Pointer to the context object.
    */
-  virtual unsigned int getDefaultFramebuffer();
+  GHOST_Context *getContext();
+
+  /**
+   * Gets the OpenGL framebuffer associated with the window's contents.
+   * \return The ID of an OpenGL framebuffer object.
+   */
+  virtual unsigned int getDefaultFramebuffer() override;
 
   /**
    * Returns the window user data.
    * \return The window user data.
    */
-  inline GHOST_TUserDataPtr getUserData() const
+  inline GHOST_TUserDataPtr getUserData() const override
   {
     return m_userData;
   }
@@ -270,12 +276,12 @@ class GHOST_Window : public GHOST_IWindow {
    * Changes the window user data.
    * \param userData: The window user data.
    */
-  void setUserData(const GHOST_TUserDataPtr userData)
+  void setUserData(const GHOST_TUserDataPtr userData) override
   {
     m_userData = userData;
   }
 
-  float getNativePixelSize(void)
+  float getNativePixelSize(void) override
   {
     if (m_nativePixelSize > 0.0f)
       return m_nativePixelSize;
@@ -286,18 +292,18 @@ class GHOST_Window : public GHOST_IWindow {
    * Returns the recommended DPI for this window.
    * \return The recommended DPI for this window.
    */
-  virtual inline uint16_t getDPIHint()
+  virtual inline uint16_t getDPIHint() override
   {
     return 96;
   }
 
 #ifdef WITH_INPUT_IME
-  virtual void beginIME(int32_t x, int32_t y, int32_t w, int32_t h, bool completed)
+  virtual void beginIME(int32_t x, int32_t y, int32_t w, int32_t h, bool completed) override
   {
     /* do nothing temporarily if not in windows */
   }
 
-  virtual void endIME()
+  virtual void endIME() override
   {
     /* do nothing temporarily if not in windows */
   }
diff --git a/intern/ghost/test/multitest/MultiTest.c b/intern/ghost/test/multitest/MultiTest.c
index 99b88dfb525..6a6a042f4ac 100644
--- a/intern/ghost/test/multitest/MultiTest.c
+++ b/intern/ghost/test/multitest/MultiTest.c
@@ -323,7 +323,7 @@ MainWindow *mainwindow_new(MultiTestApp *app)
   if (win) {
     MainWindow *mw = MEM_callocN(sizeof(*mw), "mainwindow_new");
 
-    mw->gpu_context = GPU_context_create(win);
+    mw->gpu_context = GPU_context_create(win, NULL);
     GPU_init();
 
     mw->app = app;
@@ -578,7 +578,7 @@ LoggerWindow *loggerwindow_new(MultiTestApp *app)
   if (win) {
     LoggerWindow *lw = MEM_callocN(sizeof(*lw), "loggerwindow_new");
 
-    lw->gpu_context = GPU_context_create(win);
+    lw->gpu_context = GPU_context_create(win, NULL);
     GPU_init();
 
     int bbox[2][2];
@@ -780,7 +780,7 @@ ExtraWindow *extrawindow_new(MultiTestApp *app)
   if (win) {
     ExtraWindow *ew = MEM_callocN(sizeof(*ew), "mainwindow_new");
 
-    ew->gpu_context = GPU_context_create(win);
+    ew->gpu_context = GPU_context_create(win, NULL);
     GPU_init();
 
     ew->app = app;
diff --git a/source/blender/draw/DRW_engine.h b/source/blender/draw/DRW_engine.h
index dec7a22aadb..04e3bddfb6c 100644
--- a/source/blender/draw/DRW_engine.h
+++ b/source/blender/draw/DRW_engine.h
@@ -201,6 +201,7 @@ void DRW_gpu_render_context_enable(void *re_gpu_context);
 void DRW_gpu_render_context_disable(void *re_gpu_context);
 
 void DRW_deferred_shader_remove(struct GPUMaterial *mat);
+void DRW_deferred_shader_optimize_remove(struct GPUMaterial *mat);
 
 /**
  * Get DrawData from the given ID-block. In order for this to work, we assume that
diff --git a/source/blender/draw/engines/eevee/eevee_lightcache.c b/source/blender/draw/engines/eevee/eevee_lightcache.c
index 614ea0b0892..0fd87ef43f0 100644
--- a/source/blender/draw/engines/eevee/eevee_lightcache.c
+++ b/source/blender/draw/engines/eevee/eevee_lightcache.c
@@ -597,7 +597,7 @@ static void eevee_lightbake_context_enable(EEVEE_LightBake *lbake)
   if (lbake->gl_context) {
     DRW_opengl_render_context_enable(lbake->gl_context);
     if (lbake->gpu_context == NULL) {
-      lbake->gpu_context = GPU_context_create(NULL);
+      lbake->gpu_context = GPU_context_create(NULL, lbake->gl_context);
     }
     DRW_gpu_render_context_enable(lbake->gpu_context);
   }
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc
index 64b1d4891a9..05ff06e7435 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc
@@ -471,6 +471,8 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
                                                    this);
   GPU_material_status_set(gpumat, GPU_MAT_QUEUED);
   GPU_material_compile(gpumat);
+  /* Queue deferred material optimization. */
+  DRW_shader_queue_optimize_material(gpumat);
   return gpumat;
 }
 
diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h
index 7b80ffd2b88..4bdef577e44 100644
--- a/source/blender/draw/intern/DRW_render.h
+++ b/source/blender/draw/intern/DRW_render.h
@@ -251,6 +251,7 @@ struct GPUMaterial *DRW_shader_from_material(struct Material *ma,
                                              bool deferred,
                                              GPUCodegenCallbackFn callback,
                                              void *thunk);
+void DRW_shader_queue_optimize_material(struct GPUMaterial *mat);
 void DRW_shader_free(struct GPUShader *shader);
 #define DRW_SHADER_FREE_SAFE(shader) \
   do { \
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index e1bee89db60..eab79652762 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -3139,7 +3139,7 @@ void DRW_opengl_context_create(void)
   DST.gl_context = WM_opengl_context_create();
   WM_opengl_context_activate(DST.gl_context);
   /* Be sure to create gpu_context too. */
-  DST.gpu_context = GPU_context_create(NULL);
+  DST.gpu_context = GPU_context_create(0, DST.gl_context);
   /* So we activate the window's one afterwards. */
   wm_window_reset_drawable();
 }
diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c
index 4bc3898c5e7..6f8df54ead3 100644
--- a/source/blender/draw/intern/draw_manager_shader.c
+++ b/source/blender/draw/intern/draw_manager_shader.c
@@ -51,9 +51,13 @@ extern char datatoc_common_fullscreen_vert_glsl[];
  * \{ */
 
 typedef struct DRWShaderCompiler {
+  /** Default compilation queue. */
   ListBase queue; /* GPUMaterial */
   SpinLock list_lock;
 
+  /** Optimization queue. */
+  ListBase optimize_queue; /* GPUMaterial */
+
   void *gl_context;
   GPUContext *gpu_context;
   bool own_context;
@@ -109,7 +113,29 @@ static void drw_deferred_shader_compilation_exec(
       MEM_freeN(link);
     }
     else {
-      break;
+      /* Check for Material Optimization job once there are no more
+       * shaders to compile. */
+      BLI_spin_lock(&comp->list_lock);
+      /* Pop tail because it will be less likely to lock the main thread
+       * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
+      LinkData *link = (LinkData *)BLI_poptail(&comp->optimize_queue);
+      GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : NULL;
+      if (optimize_mat) {
+        /* Avoid another thread freeing the material during optimization. */
+        GPU_material_acquire(optimize_mat);
+      }
+      BLI_spin_unlock(&comp->list_lock);
+
+      if (optimize_mat) {
+        /* Compile optimized material shader. */
+        GPU_material_optimize(optimize_mat);
+        GPU_material_release(optimize_mat);
+        MEM_freeN(link);
+      }
+      else {
+        /* No more materials to optimize, or shaders to compile. */
+        break;
+      }
     }
 
     if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
@@ -131,6 +157,7 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
 
   BLI_spin_lock(&comp->list_lock);
   BLI_freelistN(&comp->queue);
+  BLI_freelistN(&comp->optimize_queue);
   BLI_spin_unlock(&comp->list_lock);
 
   if (comp->own_context) {
@@ -146,34 +173,13 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
   MEM_freeN(comp);
 }
 
-static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
+/**
+ * Append either shader compilation or optimization job to deferred queue and
+ * ensure shader compilation worker is active.
+ * We keep two separate queue's to ensure core compilations always complete before optimization.
+ */
+static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job)
 {
-  if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
-    return;
-  }
-  /* Do not defer the compilation if we are rendering for image.
-   * deferred rendering is only possible when `evil_C` is available */
-  if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
-    deferred = false;
-  }
-
-  if (!deferred) {
-    DRW_deferred_shader_remove(mat);
-    /* Shaders could already be compiling. Have to wait for compilation to finish. */
-    while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
-      PIL_sleep_ms(20);
-    }
-    if (GPU_material_status(mat) == GPU_MAT_CREATED) {
-      GPU_material_compile(mat);
-    }
-    return;
-  }
-
-  /* Don't add material to the queue twice. */
-  if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
-    return;
-  }
-
   const bool use_main_context = GPU_use_main_context_workaround();
   const bool job_own_context = !use_main_context;
 
@@ -194,6 +200,7 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
   if (old_comp) {
     BLI_spin_lock(&old_comp->list_lock);
     BLI_movelisttolist(&comp->queue, &old_comp->queue);
+    BLI_movelisttolist(&comp->optimize_queue, &old_comp->optimize_queue);
     BLI_spin_unlock(&old_comp->list_lock);
     /* Do not recreate context, just pass ownership. */
     if (old_comp->gl_context) {
@@ -204,9 +211,18 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
     }
   }
 
-  GPU_material_status_set(mat, GPU_MAT_QUEUED);
-  LinkData *node = BLI_genericNodeN(mat);
-  BLI_addtail(&comp->queue, node);
+  /* Add to either compilation or optimization queue. */
+  if (is_optimization_job) {
+    BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED);
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED);
+    LinkData *node = BLI_genericNodeN(mat);
+    BLI_addtail(&comp->optimize_queue, node);
+  }
+  else {
+    GPU_material_status_set(mat, GPU_MAT_QUEUED);
+    LinkData *node = BLI_genericNodeN(mat);
+    BLI_addtail(&comp->queue, node);
+  }
 
   /* Create only one context. */
   if (comp->gl_context == NULL) {
@@ -216,7 +232,7 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
     }
     else {
       comp->gl_context = WM_opengl_context_create();
-      comp->gpu_context = GPU_context_create(NULL);
+      comp->gpu_context = GPU_context_create(NULL, comp->gl_context);
       GPU_context_active_set(NULL);
 
       WM_opengl_context_activate(DST.gl_context);
@@ -235,6 +251,39 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
   WM_jobs_start(wm, wm_job);
 }
 
+static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
+{
+  if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
+    return;
+  }
+
+  /* Do not defer the compilation if we are rendering for image.
+   * deferred rendering is only possible when `evil_C` is available */
+  if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
+    deferred = false;
+  }
+
+  if (!deferred) {
+    DRW_deferred_shader_remove(mat);
+    /* Shaders could already be compiling. Have to wait for compilation to finish. */
+    while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
+      PIL_sleep_ms(20);
+    }
+    if (GPU_material_status(mat) == GPU_MAT_CREATED) {
+      GPU_material_compile(mat);
+    }
+    return;
+  }
+
+  /* Don't add material to the queue twice. */
+  if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
+    return;
+  }
+
+  /* Add deferred shader compilation to queue. */
+  drw_deferred_queue_append(mat, false);
+}
+
 void DRW_deferred_shader_remove(GPUMaterial *mat)
 {
   LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
@@ -243,14 +292,49 @@ void DRW_deferred_shader_remove(GPUMaterial *mat)
           wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
       if (comp != NULL) {
         BLI_spin_lock(&comp->list_lock);
+
+        /* Search for compilation job in queue. */
         LinkData *link = (LinkData *)BLI_findptr(&comp->queue, mat, offsetof(LinkData, data));
         if (link) {
           BLI_remlink(&comp->queue, link);
           GPU_material_status_set(link->data, GPU_MAT_CREATED);
         }
-        BLI_spin_unlock(&comp->list_lock);
 
         MEM_SAFE_FREE(link);
+
+        /* Search for optimization job in queue. */
+        LinkData *opti_link = (LinkData *)BLI_findptr(
+            &comp->optimize_queue, mat, offsetof(LinkData, data));
+        if (opti_link) {
+          BLI_remlink(&comp->optimize_queue, opti_link);
+          GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
+        }
+        BLI_spin_unlock(&comp->list_lock);
+
+        MEM_SAFE_FREE(opti_link);
+      }
+    }
+  }
+}
+
+void DRW_deferred_shader_optimize_remove(GPUMaterial *mat)
+{
+  LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
+    LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
+      DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type(
+          wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
+      if (comp != NULL) {
+        BLI_spin_lock(&comp->list_lock);
+        /* Search for optimization job in queue. */
+        LinkData *opti_link = (LinkData *)BLI_findptr(
+            &comp->optimize_queue, mat, offsetof(LinkData, data));
+        if (opti_link) {
+          BLI_remlink(&comp->optimize_queue, opti_link);
+          GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
+        }
+        BLI_spin_unlock(&comp->list_lock);
+
+        MEM_SAFE_FREE(opti_link);
       }
     }
   }
@@ -384,6 +468,7 @@ GPUMaterial *DRW_shader_from_world(World *wo,
   }
 
   drw_deferred_shader_add(mat, deferred);
+  DRW_shader_queue_optimize_material(mat);
   return mat;
 }
 
@@ -413,9 +498,52 @@ GPUMaterial *DRW_shader_from_material(Material *ma,
   }
 
   drw_deferred_shader_add(mat, deferred);
+  DRW_shader_queue_optimize_material(mat);
   return mat;
 }
 
+void DRW_shader_queue_optimize_material(GPUMaterial *mat)
+{
+  /* Do not perform deferred optimization if performing render.
+   * De-queue any queued optimization jobs. */
+  if (DRW_state_is_image_render()) {
+    if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
+      /* Remove from pending optimization job queue. */
+      DRW_deferred_shader_optimize_remove(mat);
+      /* If optimization job had already started, wait for it to complete. */
+      while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
+        PIL_sleep_ms(20);
+      }
+    }
+    return;
+  }
+
+  /* We do not need to perform optimization on the material if it is already compiled or in the
+   * optimization queue. If optimization is not required, the status will be flagged as
+   * `GPU_MAT_OPTIMIZATION_SKIP`.
+   * We can also skip cases which have already been queued up. */
+  if (ELEM(GPU_material_optimization_status(mat),
+           GPU_MAT_OPTIMIZATION_SKIP,
+           GPU_MAT_OPTIMIZATION_SUCCESS,
+           GPU_MAT_OPTIMIZATION_QUEUED)) {
+    return;
+  }
+
+  /* Only queue optimization once the original shader has been successfully compiled. */
+  if (GPU_material_status(mat) != GPU_MAT_SUCCESS) {
+    return;
+  }
+
+  /* Defer optimization until sufficient time has passed beyond creation. This avoids excessive
+   * recompilation for shaders which are being actively modified. */
+  if (!GPU_material_optimization_ready(mat)) {
+    return;
+  }
+
+  /* Add deferred shader compilation to queue. */
+  drw_deferred_queue_append(mat, true);
+}
+
 void DRW_shader_free(GPUShader *shader)
 {
   GPU_shader_free(shader);
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 18da5169620..0ce4011b2b4 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -192,6 +192,7 @@ set(METAL_SRC
   metal/mtl_context.mm
   metal/mtl_debug.mm
   metal/mtl_framebuffer.mm
+  metal/mtl_immediate.mm
   metal/mtl_index_buffer.mm
   metal/mtl_memory.mm
   metal/mtl_query.mm
@@ -205,11 +206,14 @@ set(METAL_SRC
   metal/mtl_vertex_buffer.mm
 
   metal/mtl_backend.hh
+  metal/mtl_batch.hh
   metal/mtl_capabilities.hh
   metal/mtl_common.hh
   metal/mtl_context.hh
   metal/mtl_debug.hh
+  metal/mtl_drawlist.hh
   metal/mtl_framebuffer.hh
+  metal/mtl_immediate.hh
   metal/mtl_index_buffer.hh
   metal/mtl_memory.hh
   metal/mtl_primitive.hh
diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h
index a242bb7cc94..b59ea9e55d2 100644
--- a/source/blender/gpu/GPU_context.h
+++ b/source/blender/gpu/GPU_context.h
@@ -26,7 +26,7 @@ eGPUBackendType GPU_backend_get_type(void);
 /** Opaque type hiding blender::gpu::Context. */
 typedef struct GPUContext GPUContext;
 
-GPUContext *GPU_context_create(void *ghost_window);
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context);
 /**
  * To be called after #GPU_context_active_set(ctx_to_destroy).
  */
diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h
index 922988bf95a..11500f5af60 100644
--- a/source/blender/gpu/GPU_material.h
+++ b/source/blender/gpu/GPU_material.h
@@ -117,6 +117,15 @@ typedef enum eGPUMaterialStatus {
   GPU_MAT_SUCCESS,
 } eGPUMaterialStatus;
 
+/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not
+ * plan to perform optimization on a given material. */
+typedef enum eGPUMaterialOptimizationStatus {
+  GPU_MAT_OPTIMIZATION_SKIP = 0,
+  GPU_MAT_OPTIMIZATION_READY,
+  GPU_MAT_OPTIMIZATION_QUEUED,
+  GPU_MAT_OPTIMIZATION_SUCCESS,
+} eGPUMaterialOptimizationStatus;
+
 typedef enum eGPUDefaultValue {
   GPU_DEFAULT_0 = 0,
   GPU_DEFAULT_1,
@@ -246,6 +255,15 @@ struct Scene *GPU_material_scene(GPUMaterial *material);
 struct GPUPass *GPU_material_get_pass(GPUMaterial *material);
 struct GPUShader *GPU_material_get_shader(GPUMaterial *material);
 const char *GPU_material_get_name(GPUMaterial *material);
+
+/**
+ * Material Optimization.
+ * \note Compiles optimal version of shader graph, populating mat->optimized_pass.
+ * This operation should always be deferred until existing compilations have completed.
+ * Default un-optimized materials will still exist for interactive material editing performance.
+ */
+void GPU_material_optimize(GPUMaterial *mat);
+
 /**
  * Return can be NULL if it's a world material.
  */
@@ -256,6 +274,13 @@ struct Material *GPU_material_get_material(GPUMaterial *material);
 eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
 void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status);
 
+/**
+ * Return status for async optimization jobs.
+ */
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat);
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status);
+bool GPU_material_optimization_ready(GPUMaterial *mat);
+
 struct GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
 /**
  * Create dynamic UBO from parameters
diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh
index d2890efee72..2a545c8114e 100644
--- a/source/blender/gpu/intern/gpu_backend.hh
+++ b/source/blender/gpu/intern/gpu_backend.hh
@@ -38,7 +38,7 @@ class GPUBackend {
   virtual void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) = 0;
   virtual void compute_dispatch_indirect(StorageBuf *indirect_buf) = 0;
 
-  virtual Context *context_alloc(void *ghost_window) = 0;
+  virtual Context *context_alloc(void *ghost_window, void *ghost_context) = 0;
 
   virtual Batch *batch_alloc() = 0;
   virtual DrawList *drawlist_alloc(int list_length) = 0;
diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc
index 2241bcf9f9b..85cfa9749fa 100644
--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@@ -95,6 +95,9 @@ struct GPUPass {
   uint32_t hash;
   /** Did we already tried to compile the attached GPUShader. */
   bool compiled;
+  /** Hint that an optimized variant of this pass should be created based on a complexity heuristic
+   * during pass code generation. */
+  bool should_optimize;
 };
 
 /* -------------------------------------------------------------------- */
@@ -242,6 +245,11 @@ class GPUCodegen {
   ListBase ubo_inputs_ = {nullptr, nullptr};
   GPUInput *cryptomatte_input_ = nullptr;
 
+  /** Cache paramters for complexity heuristic. */
+  uint nodes_total_ = 0;
+  uint textures_total_ = 0;
+  uint uniforms_total_ = 0;
+
  public:
   GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
   {
@@ -282,6 +290,14 @@ class GPUCodegen {
     return hash_;
   }
 
+  /* Heuristic determined during pass codegen for whether a
+   * more optimal variant of this material should be compiled. */
+  bool should_optimize_heuristic() const
+  {
+    bool do_optimize = (nodes_total_ >= 100 || textures_total_ >= 4 || uniforms_total_ >= 64);
+    return do_optimize;
+  }
+
  private:
   void set_unique_ids();
 
@@ -403,6 +419,9 @@ void GPUCodegen::generate_resources()
     }
   }
 
+  /* Increment heuristic. */
+  textures_total_ = slot;
+
   if (!BLI_listbase_is_empty(&ubo_inputs_)) {
     /* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
     ss << "struct NodeTree {\n";
@@ -440,11 +459,16 @@ void GPUCodegen::generate_library()
   GPUCodegenCreateInfo &info = *create_info;
 
   void *value;
-  GSetIterState pop_state = {};
-  while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) {
+  /* Iterate over libraries. We need to keep this struct intact incase
+   * it is required for the optimization an pass. */
+  GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
+  while (!BLI_ghashIterator_done(ihash)) {
+    value = BLI_ghashIterator_getKey(ihash);
     auto deps = gpu_shader_dependency_get_resolved_source((const char *)value);
     info.dependencies_generated.extend_non_duplicates(deps);
+    BLI_ghashIterator_step(ihash);
   }
+  BLI_ghashIterator_free(ihash);
 }
 
 void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
@@ -512,6 +536,9 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
     }
   }
   eval_ss << ");\n\n";
+
+  /* Increment heuristic. */
+  nodes_total_++;
 }
 
 char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link)
@@ -575,6 +602,7 @@ void GPUCodegen::generate_uniform_buffer()
       if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
         /* We handle the UBO uniforms separately. */
         BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
+        uniforms_total_++;
       }
     }
   }
@@ -602,6 +630,7 @@ void GPUCodegen::generate_graphs()
 {
   set_unique_ids();
 
+  /* Serialize graph. */
   output.surface = graph_serialize(GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface);
   output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume);
   output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement);
@@ -637,10 +666,17 @@ void GPUCodegen::generate_graphs()
 GPUPass *GPU_generate_pass(GPUMaterial *material,
                            GPUNodeGraph *graph,
                            GPUCodegenCallbackFn finalize_source_cb,
-                           void *thunk)
+                           void *thunk,
+                           bool optimize_graph)
 {
   gpu_node_graph_prune_unused(graph);
 
+  /* If Optimize flag is passed in, we are generating an optimized
+   * variant of the GPUMaterial's GPUPass. */
+  if (optimize_graph) {
+    gpu_node_graph_optimize(graph);
+  }
+
   /* Extract attributes before compiling so the generated VBOs are ready to accept the future
    * shader. */
   gpu_node_graph_finalize_uniform_attrs(graph);
@@ -648,23 +684,33 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
   GPUCodegen codegen(material, graph);
   codegen.generate_graphs();
   codegen.generate_cryptomatte();
-  codegen.generate_uniform_buffer();
 
-  /* Cache lookup: Reuse shaders already compiled. */
-  GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
-
-  /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
-   * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
-  if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
-    if (!gpu_pass_is_valid(pass_hash)) {
-      /* Shader has already been created but failed to compile. */
-      return nullptr;
+  GPUPass *pass_hash = nullptr;
+
+  if (!optimize_graph) {
+    /* The optimized version of the shader should not re-generate a UBO.
+     * The UBO will not be used for this variant. */
+    codegen.generate_uniform_buffer();
+
+    /** Cache lookup: Reuse shaders already compiled.
+     * NOTE: We only perform cache look-up for non-optimized shader
+     * graphs, as baked constant data amongst other optimizations will generate too many
+     * shader source permutations, with minimal re-usability. */
+    pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
+
+    /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
+     * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
+    if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
+      if (!gpu_pass_is_valid(pass_hash)) {
+        /* Shader has already been created but failed to compile. */
+        return nullptr;
+      }
+      /* No collision, just return the pass. */
+      BLI_spin_lock(&pass_cache_spin);
+      pass_hash->refcount += 1;
+      BLI_spin_unlock(&pass_cache_spin);
+      return pass_hash;
     }
-    /* No collision, just return the pass. */
-    BLI_spin_lock(&pass_cache_spin);
-    pass_hash->refcount += 1;
-    BLI_spin_unlock(&pass_cache_spin);
-    return pass_hash;
   }
 
   /* Either the shader is not compiled or there is a hash collision...
@@ -702,14 +748,31 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
     pass->create_info = codegen.create_info;
     pass->hash = codegen.hash_get();
     pass->compiled = false;
+    /* Only flag pass optimization hint if this is the first generated pass for a material.
+     * Optimized passes cannot be optimized further, even if the heuristic is still not
+     * favourable. */
+    pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
 
     codegen.create_info = nullptr;
 
-    gpu_pass_cache_insert_after(pass_hash, pass);
+    /* Only insert non-optimized graphs into cache.
+     * Optimized graphs will continuously be recompiled with new unique source during material
+     * editing, and thus causing the cache to fill up quickly with materials offering minimal
+     * re-use. */
+    if (!optimize_graph) {
+      gpu_pass_cache_insert_after(pass_hash, pass);
+    }
   }
   return pass;
 }
 
+bool GPU_pass_should_optimize(GPUPass *pass)
+{
+  /* Returns optimization heuristic prepared during
+   * initial codegen. */
+  return pass->should_optimize;
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
diff --git a/source/blender/gpu/intern/gpu_codegen.h b/source/blender/gpu/intern/gpu_codegen.h
index 95a672c0400..aabdf1ac003 100644
--- a/source/blender/gpu/intern/gpu_codegen.h
+++ b/source/blender/gpu/intern/gpu_codegen.h
@@ -25,10 +25,12 @@ typedef struct GPUPass GPUPass;
 GPUPass *GPU_generate_pass(GPUMaterial *material,
                            struct GPUNodeGraph *graph,
                            GPUCodegenCallbackFn finalize_source_cb,
-                           void *thunk);
+                           void *thunk,
+                           bool optimize_graph);
 GPUShader *GPU_pass_shader_get(GPUPass *pass);
 bool GPU_pass_compile(GPUPass *pass, const char *shname);
 void GPU_pass_release(GPUPass *pass);
+bool GPU_pass_should_optimize(GPUPass *pass);
 
 /* Module */
 
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index bcc418169b7..92cbbc5b4b0 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -94,7 +94,7 @@ Context *Context::get()
 
 /* -------------------------------------------------------------------- */
 
-GPUContext *GPU_context_create(void *ghost_window)
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context)
 {
   {
     std::scoped_lock lock(backend_users_mutex);
@@ -105,7 +105,7 @@ GPUContext *GPU_context_create(void *ghost_window)
     num_backend_users++;
   }
 
-  Context *ctx = GPUBackend::get()->context_alloc(ghost_window);
+  Context *ctx = GPUBackend::get()->context_alloc(ghost_window, ghost_context);
 
   GPU_context_active_set(wrap(ctx));
   return wrap(ctx);
@@ -216,6 +216,9 @@ void GPU_render_step()
 /** \name Backend selection
  * \{ */
 
+/* NOTE: To enable Metal API, we need to temporarily change this to `GPU_BACKEND_METAL`.
+ * Until a global switch is added, Metal also needs to be enabled in GHOST_ContextCGL:
+ * `m_useMetalForRendering = true`. */
 static const eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL;
 static GPUBackend *g_backend = nullptr;
 
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index 96809db1587..991cb229eda 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -34,6 +34,8 @@
 
 #include "DRW_engine.h"
 
+#include "PIL_time.h"
+
 #include "gpu_codegen.h"
 #include "gpu_node_graph.h"
 
@@ -43,6 +45,17 @@
 #define MAX_COLOR_BAND 128
 #define MAX_GPU_SKIES 8
 
+/** Whether the optimized variant of the GPUPass should be created asynchronously.
+ * Usage of this depends on whether there are possible threading challenges of doing so.
+ * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
+ * compilation, though this option exists in case any potential scenarios for material graph
+ * optimization cause a slow down on the main thread.
+ *
+ * NOTE: The actual shader program for the optimized pass will alwaysbe compiled asynchronously,
+ * this flag controls whether shader node graph source serialization happens on the compilation
+ * worker thread. */
+#define ASYNC_OPTIMIZED_PASS_CREATION 0
+
 typedef struct GPUColorBandBuilder {
   float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
   int current_layer;
@@ -57,6 +70,27 @@ struct GPUMaterial {
   /* Contains GPUShader and source code for deferred compilation.
    * Can be shared between similar material (i.e: sharing same nodetree topology). */
   GPUPass *pass;
+  /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
+   * This shader variant bakes dynamic uniform data as constant. This variant will not use
+   * the ubo, and instead bake constants directly into the shader source. */
+  GPUPass *optimized_pass;
+  /* Optimization status.
+   * We also use this status to determine whether this material should be considered for
+   * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
+   *   `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
+   *   `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
+   * performance to do so, based on the heuristic.
+   */
+  eGPUMaterialOptimizationStatus optimization_status;
+  double creation_time;
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+  struct DeferredOptimizePass {
+    GPUCodegenCallbackFn callback;
+    void *thunk;
+  } DeferredOptimizePass;
+  struct DeferredOptimizePass optimize_pass_info;
+#endif
+
   /** UBOs for this material parameters. */
   GPUUniformBuf *ubo;
   /** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
@@ -209,6 +243,9 @@ void GPU_material_free_single(GPUMaterial *material)
 
   gpu_node_graph_free(&material->graph);
 
+  if (material->optimized_pass != NULL) {
+    GPU_pass_release(material->optimized_pass);
+  }
   if (material->pass != NULL) {
     GPU_pass_release(material->pass);
   }
@@ -247,12 +284,15 @@ Scene *GPU_material_scene(GPUMaterial *material)
 
 GPUPass *GPU_material_get_pass(GPUMaterial *material)
 {
-  return material->pass;
+  return (material->optimized_pass) ? material->optimized_pass : material->pass;
 }
 
 GPUShader *GPU_material_get_shader(GPUMaterial *material)
 {
-  return material->pass ? GPU_pass_shader_get(material->pass) : NULL;
+  /* First attempt to select optimized shader. If not available, fetch original. */
+  GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) :
+                                                   NULL;
+  return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL);
 }
 
 const char *GPU_material_get_name(GPUMaterial *material)
@@ -665,6 +705,29 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
   mat->status = status;
 }
 
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
+{
+  return mat->optimization_status;
+}
+
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
+{
+  mat->optimization_status = status;
+  if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+    /* Reset creation timer to delay optimization pass. */
+    mat->creation_time = PIL_check_seconds_timer();
+  }
+}
+
+bool GPU_material_optimization_ready(GPUMaterial *mat)
+{
+  /* Timer threshold before optimizations will be queued.
+   * When materials are frequently being modified, optimization
+   * can incur CPU overhead from excessive compilation. */
+  const double optimization_time_threshold_s = 5.0;
+  return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s);
+}
+
 /* Code generation */
 
 bool GPU_material_has_surface_output(GPUMaterial *mat)
@@ -730,6 +793,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
   mat->uuid = shader_uuid;
   mat->flag = GPU_MATFLAG_UPDATED;
   mat->status = GPU_MAT_CREATED;
+  mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
   mat->is_volume_shader = is_volume_shader;
   mat->graph.used_libraries = BLI_gset_new(
       BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
@@ -748,7 +812,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
 
   {
     /* Create source code and search pass cache for an already compiled version. */
-    mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk);
+    mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);
 
     if (mat->pass == NULL) {
       /* We had a cache hit and the shader has already failed to compile. */
@@ -756,11 +820,44 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
       gpu_node_graph_free(&mat->graph);
     }
     else {
+      /* Determine whether we should generate an optimized variant of the graph.
+       * Heuristic is based on complexity of default material pass and shader node graph. */
+      if (GPU_pass_should_optimize(mat->pass)) {
+        GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+      }
+
       GPUShader *sh = GPU_pass_shader_get(mat->pass);
       if (sh != NULL) {
         /* We had a cache hit and the shader is already compiled. */
         mat->status = GPU_MAT_SUCCESS;
-        gpu_node_graph_free_nodes(&mat->graph);
+
+        if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+          gpu_node_graph_free_nodes(&mat->graph);
+        }
+      }
+
+      /* Generate optimized pass. */
+      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+        mat->optimized_pass = NULL;
+        mat->optimize_pass_info.callback = callback;
+        mat->optimize_pass_info.thunk = thunk;
+#else
+        mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
+        if (mat->optimized_pass == NULL) {
+          /* Failed to create optimized pass. */
+          gpu_node_graph_free_nodes(&mat->graph);
+          GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+        }
+        else {
+          GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
+          if (optimized_sh != NULL) {
+            /* Optimized shader already available. */
+            gpu_node_graph_free_nodes(&mat->graph);
+            GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+          }
+        }
+#endif
       }
     }
   }
@@ -811,7 +908,11 @@ void GPU_material_compile(GPUMaterial *mat)
     GPUShader *sh = GPU_pass_shader_get(mat->pass);
     if (sh != NULL) {
       mat->status = GPU_MAT_SUCCESS;
-      gpu_node_graph_free_nodes(&mat->graph);
+
+      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+        /* Only free node graph nodes if not required by secondary optimization pass. */
+        gpu_node_graph_free_nodes(&mat->graph);
+      }
     }
     else {
       mat->status = GPU_MAT_FAILED;
@@ -825,6 +926,71 @@ void GPU_material_compile(GPUMaterial *mat)
   }
 }
 
+void GPU_material_optimize(GPUMaterial *mat)
+{
+  /* If shader is flagged for skipping optimization or has already been successfully
+   * optimized, skip. */
+  if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
+    return;
+  }
+
+  /* If original shader has not been fully compiled, we are not
+   * ready to perform optimization. */
+  if (mat->status != GPU_MAT_SUCCESS) {
+    /* Reset optimization status. */
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+    return;
+  }
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+  /* If the optimized pass is not valid, first generate optimized pass.
+   * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
+   * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
+   * optimal, as these do not benefit from caching, due to baked constants. However, this could
+   * possibly be cause for concern for certain cases.  */
+  if (!mat->optimized_pass) {
+    mat->optimized_pass = GPU_generate_pass(
+        mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
+    BLI_assert(mat->optimized_pass);
+  }
+#else
+  if (!mat->optimized_pass) {
+    /* Optimized pass has not been created, skip future optimization attempts. */
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+    return;
+  }
+#endif
+
+  bool success;
+  /* NOTE: The shader may have already been compiled here since we are
+   * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
+#ifndef NDEBUG
+  success = GPU_pass_compile(mat->optimized_pass, mat->name);
+#else
+  success = GPU_pass_compile(mat->optimized_pass, __func__);
+#endif
+
+  if (success) {
+    GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
+    if (sh != NULL) {
+      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+    }
+    else {
+      /* Optimized pass failed to compile. Disable any future optimization attempts. */
+      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+    }
+  }
+  else {
+    /* Optimization pass generation failed. Disable future attempts to optimize. */
+    GPU_pass_release(mat->optimized_pass);
+    mat->optimized_pass = NULL;
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+  }
+
+  /* Release node graph as no longer needed. */
+  gpu_node_graph_free_nodes(&mat->graph);
+}
+
 void GPU_materials_free(Main *bmain)
 {
   LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
@@ -848,6 +1014,8 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
   material->graph.used_libraries = BLI_gset_new(
       BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
   material->refcount = 1;
+  material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
+  material->optimized_pass = NULL;
 
   /* Construct the material graph by adding and linking the necessary GPU material nodes. */
   construct_function_cb(thunk, material);
@@ -856,7 +1024,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
   gpu_material_ramp_texture_build(material);
 
   /* Lookup an existing pass in the cache or generate a new one. */
-  material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk);
+  material->pass = GPU_generate_pass(
+      material, &material->graph, generate_code_function_cb, thunk, false);
+  material->optimized_pass = NULL;
 
   /* The pass already exists in the pass cache but its shader already failed to compile. */
   if (material->pass == NULL) {
@@ -865,11 +1035,42 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
     return material;
   }
 
+  /* Generate optimized pass. */
+  if (GPU_pass_should_optimize(material->pass)) {
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+    mmaterial->optimized_pass = NULL;
+    material->optimize_pass_info.callback = generate_code_function_cb;
+    material->optimize_pass_info.thunk = thunk;
+    GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY);
+#else
+    material->optimized_pass = GPU_generate_pass(
+        material, &material->graph, generate_code_function_cb, thunk, true);
+
+    if (material->optimized_pass == NULL) {
+      /* Failed to create optimized pass. */
+      gpu_node_graph_free_nodes(&material->graph);
+      GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP);
+    }
+    else {
+      GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass);
+      if (optimized_sh != NULL) {
+        /* Optimized shader already available. */
+        gpu_node_graph_free_nodes(&material->graph);
+        GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS);
+      }
+    }
+#endif
+  }
+
   /* The pass already exists in the pass cache and its shader is already compiled. */
   GPUShader *shader = GPU_pass_shader_get(material->pass);
   if (shader != NULL) {
     material->status = GPU_MAT_SUCCESS;
-    gpu_node_graph_free_nodes(&material->graph);
+    if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+      /* Only free node graph if not required by secondary optimization pass. */
+      gpu_node_graph_free_nodes(&material->graph);
+    }
     return material;
   }
 
diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c
index a305413905b..3ca2399a547 100644
--- a/source/blender/gpu/intern/gpu_node_graph.c
+++ b/source/blender/gpu/intern/gpu_node_graph.c
@@ -914,3 +914,22 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
     }
   }
 }
+
+void gpu_node_graph_optimize(GPUNodeGraph *graph)
+{
+  /* Replace all uniform node links with constant. */
+  LISTBASE_FOREACH (GPUNode *, node, &graph->nodes) {
+    LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
+      if (input->link) {
+        if (input->link->link_type == GPU_NODE_LINK_UNIFORM) {
+          input->link->link_type = GPU_NODE_LINK_CONSTANT;
+        }
+      }
+      if (input->source == GPU_SOURCE_UNIFORM) {
+        input->source = (input->type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT;
+      }
+    }
+  }
+
+  /* TODO: Consider performing other node graph optimizations here. */
+}
diff --git a/source/blender/gpu/intern/gpu_node_graph.h b/source/blender/gpu/intern/gpu_node_graph.h
index 085620b30e4..75ca05ffaea 100644
--- a/source/blender/gpu/intern/gpu_node_graph.h
+++ b/source/blender/gpu/intern/gpu_node_graph.h
@@ -179,6 +179,21 @@ typedef struct GPUNodeGraph {
 
 void gpu_node_graph_prune_unused(GPUNodeGraph *graph);
 void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph);
+
+/**
+ * Optimize node graph for optimized material shader path.
+ * Once the base material has been generated, we can modify the shader
+ * node graph to create one which will produce an optimally performing shader.
+ * This currently involves baking uniform data into constant data to enable
+ * aggressive constant folding by the compiler in order to reduce complexity and
+ * shader core memory pressure.
+ *
+ * NOTE: Graph optimizations will produce a shader which needs to be re-compiled
+ * more frequently, however, the default material pass will always exist to fall
+ * back on.
+ */
+void gpu_node_graph_optimize(GPUNodeGraph *graph);
+
 /**
  * Free intermediate node graph.
  */
diff --git a/source/blender/gpu/intern/gpu_shader_builder.cc b/source/blender/gpu/intern/gpu_shader_builder.cc
index 9b699c60126..3aa2963ecd0 100644
--- a/source/blender/gpu/intern/gpu_shader_builder.cc
+++ b/source/blender/gpu/intern/gpu_shader_builder.cc
@@ -45,7 +45,7 @@ void ShaderBuilder::init()
   ghost_context_ = GHOST_CreateOpenGLContext(ghost_system_, glSettings);
   GHOST_ActivateOpenGLContext(ghost_context_);
 
-  gpu_context_ = GPU_context_create(nullptr);
+  gpu_context_ = GPU_context_create(nullptr, ghost_context_);
   GPU_init();
 }
 
diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc
index 6f43b379d31..d9e5e066fea 100644
--- a/source/blender/gpu/intern/gpu_shader_interface.cc
+++ b/source/blender/gpu/intern/gpu_shader_interface.cc
@@ -22,8 +22,8 @@ ShaderInterface::ShaderInterface() = default;
 ShaderInterface::~ShaderInterface()
 {
   /* Free memory used by name_buffer. */
-  MEM_freeN(name_buffer_);
-  MEM_freeN(inputs_);
+  MEM_SAFE_FREE(name_buffer_);
+  MEM_SAFE_FREE(inputs_);
 }
 
 static void sort_input_list(MutableSpan<ShaderInput> dst)
diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh
index 214a5d738a9..082fab24ba4 100644
--- a/source/blender/gpu/metal/mtl_backend.hh
+++ b/source/blender/gpu/metal/mtl_backend.hh
@@ -63,7 +63,7 @@ class MTLBackend : public GPUBackend {
 
   /* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
    * objects. */
-  Context *context_alloc(void *ghost_window) override;
+  Context *context_alloc(void *ghost_window, void *ghost_context) override;
   Batch *batch_alloc() override;
   DrawList *drawlist_alloc(int list_length) override;
   FrameBuffer *framebuffer_alloc(const char *name) override;
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index ec9e8ab4d15..2ca1fd3f3d0 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -8,8 +8,11 @@
 
 #include "gpu_backend.hh"
 #include "mtl_backend.hh"
+#include "mtl_batch.hh"
 #include "mtl_context.hh"
+#include "mtl_drawlist.hh"
 #include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
 #include "mtl_index_buffer.hh"
 #include "mtl_query.hh"
 #include "mtl_shader.hh"
@@ -37,21 +40,21 @@ void MTLBackend::samplers_update(){
     /* Placeholder -- Handled in MTLContext. */
 };
 
-Context *MTLBackend::context_alloc(void *ghost_window)
+Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
 {
-  return new MTLContext(ghost_window);
+  return new MTLContext(ghost_window, ghost_context);
 };
 
 Batch *MTLBackend::batch_alloc()
 {
-  /* TODO(Metal): Implement MTLBatch. */
-  return nullptr;
+  /* TODO(Metal): Full MTLBatch implementation. */
+  return new MTLBatch();
 };
 
 DrawList *MTLBackend::drawlist_alloc(int list_length)
 {
-  /* TODO(Metal): Implement MTLDrawList. */
-  return nullptr;
+  /* TODO(Metal): Full MTLDrawList implementation. */
+  return new MTLDrawList(list_length);
 };
 
 FrameBuffer *MTLBackend::framebuffer_alloc(const char *name)
diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh
new file mode 100644
index 00000000000..66603dabd15
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "gpu_batch_private.hh"
+
+namespace blender {
+namespace gpu {
+
+
+/* Pass-through MTLBatch. TODO(Metal): Implement. */
+class MTLBatch : public Batch {
+ public:
+  void draw(int v_first, int v_count, int i_first, int i_count) override {
+
+  }
+
+  void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override {
+
+  }
+  
+  void multi_draw_indirect(GPUStorageBuf *indirect_buf,
+                           int count,
+                           intptr_t offset,
+                           intptr_t stride) override {
+                               
+                           }
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm
index d2936e8e91f..a9cabbb111f 100644
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -54,6 +54,7 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
       MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
       desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
       desc.retainedReferences = YES;
+      BLI_assert(context_.queue != nil);
       active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
     }
     else {
@@ -611,40 +612,187 @@ void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
                                              bool use_argument_buffer_for_samplers,
                                              uint slot)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be
-   * implemented alongside MTLShader. */
+  /* Range check. */
+  const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+  BLI_assert(slot >= 0);
+  BLI_assert(slot <= shader_interface->get_max_texture_index());
+  BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+  UNUSED_VARS_NDEBUG(shader_interface);
+
+  /* If sampler state has not changed for the given slot, we do not need to fetch. */
+  if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil ||
+      !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) ||
+      use_argument_buffer_for_samplers) {
+
+    id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+                                            ctx.get_default_sampler_state() :
+                                            ctx.get_sampler_from_state(sampler_binding.state);
+    if (!use_argument_buffer_for_samplers) {
+      /* Update binding and cached state. */
+      id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+      BLI_assert(rec != nil);
+      [rec setVertexSamplerState:sampler_state atIndex:slot];
+      this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+      this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state;
+    }
+
+    /* Flag last binding type. */
+    this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding =
+        use_argument_buffer_for_samplers;
+
+    /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+     * the samplers array is always up to date. */
+    ctx.samplers_.mtl_sampler[slot] = sampler_state;
+    ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+  }
 }
 
 void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
                                                bool use_argument_buffer_for_samplers,
                                                uint slot)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be
-   * implemented alongside MTLShader. */
+  /* Range check. */
+  const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+  BLI_assert(slot >= 0);
+  BLI_assert(slot <= shader_interface->get_max_texture_index());
+  BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+  UNUSED_VARS_NDEBUG(shader_interface);
+
+  /* If sampler state has not changed for the given slot, we do not need to fetch*/
+  if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil ||
+      !(this->cached_fragment_sampler_state_bindings[slot].binding_state ==
+        sampler_binding.state) ||
+      use_argument_buffer_for_samplers) {
+
+    id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+                                            ctx.get_default_sampler_state() :
+                                            ctx.get_sampler_from_state(sampler_binding.state);
+    if (!use_argument_buffer_for_samplers) {
+      /* Update binding and cached state. */
+      id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+      BLI_assert(rec != nil);
+      [rec setFragmentSamplerState:sampler_state atIndex:slot];
+      this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+      this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state;
+    }
+
+    /* Flag last binding type */
+    this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding =
+        use_argument_buffer_for_samplers;
+
+    /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+     * the samplers array is always up to date. */
+    ctx.samplers_.mtl_sampler[slot] = sampler_state;
+    ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+  }
 }
 
 void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  BLI_assert(index >= 0);
+  BLI_assert(buffer_offset >= 0);
+  BLI_assert(buffer != nil);
+
+  BufferBindingCached &current_vert_ubo_binding = this->cached_vertex_buffer_bindings[index];
+  if (current_vert_ubo_binding.offset != buffer_offset ||
+      current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes) {
+
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    BLI_assert(rec != nil);
+
+    if (current_vert_ubo_binding.metal_buffer == buffer) {
+      /* If buffer is the same, but offset has changed. */
+      [rec setVertexBufferOffset:buffer_offset atIndex:index];
+    }
+    else {
+      /* Bind Vertex Buffer. */
+      [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index];
+    }
+
+    /* Update Bind-state cache. */
+    this->cached_vertex_buffer_bindings[index].is_bytes = false;
+    this->cached_vertex_buffer_bindings[index].metal_buffer = buffer;
+    this->cached_vertex_buffer_bindings[index].offset = buffer_offset;
+  }
 }
 
 void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  BLI_assert(index >= 0);
+  BLI_assert(buffer_offset >= 0);
+  BLI_assert(buffer != nil);
+
+  BufferBindingCached &current_frag_ubo_binding = this->cached_fragment_buffer_bindings[index];
+  if (current_frag_ubo_binding.offset != buffer_offset ||
+      current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes) {
+
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    BLI_assert(rec != nil);
+
+    if (current_frag_ubo_binding.metal_buffer == buffer) {
+      /* If buffer is the same, but offset has changed. */
+      [rec setFragmentBufferOffset:buffer_offset atIndex:index];
+    }
+    else {
+      /* Bind Fragment Buffer */
+      [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index];
+    }
+
+    /* Update Bind-state cache */
+    this->cached_fragment_buffer_bindings[index].is_bytes = false;
+    this->cached_fragment_buffer_bindings[index].metal_buffer = buffer;
+    this->cached_fragment_buffer_bindings[index].offset = buffer_offset;
+  }
 }
 
 void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  /* Bytes always updated as source data may have changed. */
+  BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+  BLI_assert(length > 0);
+  BLI_assert(bytes != nullptr);
+
+  if (length < MTL_MAX_SET_BYTES_SIZE) {
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    [rec setVertexBytes:bytes length:length atIndex:index];
+  }
+  else {
+    /* We have run over the setBytes limit, bind buffer instead. */
+    MTLTemporaryBuffer range =
+        ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+    memcpy(range.data, bytes, length);
+    this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index);
+  }
+
+  /* Update Bind-state cache */
+  this->cached_vertex_buffer_bindings[index].is_bytes = true;
+  this->cached_vertex_buffer_bindings[index].metal_buffer = nil;
+  this->cached_vertex_buffer_bindings[index].offset = -1;
 }
 
 void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  /* Bytes always updated as source data may have changed. */
+  BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+  BLI_assert(length > 0);
+  BLI_assert(bytes != nullptr);
+
+  if (length < MTL_MAX_SET_BYTES_SIZE) {
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    [rec setFragmentBytes:bytes length:length atIndex:index];
+  }
+  else {
+    /* We have run over the setBytes limit, bind buffer instead. */
+    MTLTemporaryBuffer range =
+        ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+    memcpy(range.data, bytes, length);
+    this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index);
+  }
+
+  /* Update Bind-state cache. */
+  this->cached_fragment_buffer_bindings[index].is_bytes = true;
+  this->cached_fragment_buffer_bindings[index].metal_buffer = nil;
+  this->cached_fragment_buffer_bindings[index].offset = -1;
 }
 
 /** \} */
diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh
index b6f9c0050a9..5c322efa3f9 100644
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -3,7 +3,9 @@
 #ifndef __MTL_COMMON
 #define __MTL_COMMON
 
-// -- Renderer Options --
+/** -- Renderer Options -- */
+/* Number of frames over which rolling averages are taken. */
+#define MTL_FRAME_AVERAGE_COUNT 5
 #define MTL_MAX_DRAWABLES 3
 #define MTL_MAX_SET_BYTES_SIZE 4096
 #define MTL_FORCE_WAIT_IDLE 0
diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh
index 577438667d6..5991fe2bc3e 100644
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -12,6 +12,10 @@
 #include "GPU_common_types.h"
 #include "GPU_context.h"
 
+#include "intern/GHOST_Context.h"
+#include "intern/GHOST_ContextCGL.h"
+#include "intern/GHOST_Window.h"
+
 #include "mtl_backend.hh"
 #include "mtl_capabilities.hh"
 #include "mtl_common.hh"
@@ -570,12 +574,44 @@ class MTLCommandBufferManager {
 
 class MTLContext : public Context {
   friend class MTLBackend;
+  friend class MTLRenderPassState;
+
+ public:
+  /* Swapchain and latency management. */
+  static std::atomic<int> max_drawables_in_flight;
+  static std::atomic<int64_t> avg_drawable_latency_us;
+  static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT];
+
+ public:
+  /* Shaders and Pipeline state. */
+  MTLContextGlobalShaderPipelineState pipeline_state;
+
+  /* Metal API Resource Handles. */
+  id<MTLCommandQueue> queue = nil;
+  id<MTLDevice> device = nil;
+
+#ifndef NDEBUG
+  /* Label for Context debug name assignemnt. */
+  NSString *label = nil;
+#endif
+
+  /* Memory Management. */
+  MTLScratchBufferManager memory_manager;
+  static MTLBufferPool global_memory_manager;
+
+  /* CommandBuffer managers. */
+  MTLCommandBufferManager main_command_buffer;
 
  private:
-  /* Null buffers for empty/uninitialized bindings.
-   * Null attribute buffer follows default attribute format of OpenGL Back-end. */
-  id<MTLBuffer> null_buffer_;           /* All zero's. */
-  id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
+  /* Parent Context. */
+  GHOST_ContextCGL *ghost_context_;
+
+  /* Render Passes and Framebuffers. */
+  id<MTLTexture> default_fbo_mtltexture_ = nil;
+  gpu::MTLTexture *default_fbo_gputexture_ = nullptr;
+
+  /* Depth-stencil state cache. */
+  blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache;
 
   /* Compute and specialization caches. */
   MTLContextTextureUtils texture_utils_;
@@ -601,23 +637,20 @@ class MTLContext : public Context {
   gpu::MTLBuffer *visibility_buffer_ = nullptr;
   bool visibility_is_dirty_ = false;
 
- public:
-  /* Shaders and Pipeline state. */
-  MTLContextGlobalShaderPipelineState pipeline_state;
-
-  /* Metal API Resource Handles. */
-  id<MTLCommandQueue> queue = nil;
-  id<MTLDevice> device = nil;
-
-  /* Memory Management */
-  MTLScratchBufferManager memory_manager;
-  static MTLBufferPool global_memory_manager;
+  /* Null buffers for empty/unintialized bindings.
+   * Null attribute buffer follows default attribute format of OpenGL Backend. */
+  id<MTLBuffer> null_buffer_;           /* All zero's. */
+  id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
 
-  /* CommandBuffer managers. */
-  MTLCommandBufferManager main_command_buffer;
+  /** Dummy Resources */
+  /* Maximum of 32 texture types. Though most combinations invalid. */
+  gpu::MTLTexture *dummy_textures_[GPU_TEXTURE_BUFFER] = {nullptr};
+  GPUVertFormat dummy_vertformat_;
+  GPUVertBuf *dummy_verts_ = nullptr;
 
+ public:
   /* GPUContext interface. */
-  MTLContext(void *ghost_window);
+  MTLContext(void *ghost_window, void *ghost_context);
   ~MTLContext();
 
   static void check_error(const char *info);
@@ -673,6 +706,35 @@ class MTLContext : public Context {
   void pipeline_state_init();
   MTLShader *get_active_shader();
 
+  /* These functions ensure that the current RenderCommandEncoder has
+   * the correct global state assigned. This should be called prior
+   * to every draw call, to ensure that all state is applied and up
+   * to date. We handle:
+   *
+   * - Buffer bindings (Vertex buffers, Uniforms, UBOs, transform feedback)
+   * - Texture bindings
+   * - Sampler bindings (+ argument buffer bindings)
+   * - Dynamic Render pipeline state (on encoder)
+   * - Baking Pipeline State Objects (PSOs) for current shader, based
+   *   on final pipeline state.
+   *
+   * `ensure_render_pipeline_state` will return false if the state is
+   * invalid and cannot be applied. This should cancel a draw call. */
+  bool ensure_render_pipeline_state(MTLPrimitiveType prim_type);
+  bool ensure_uniform_buffer_bindings(
+      id<MTLRenderCommandEncoder> rec,
+      const MTLShaderInterface *shader_interface,
+      const MTLRenderPipelineStateInstance *pipeline_state_instance);
+  void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec,
+                               MTLShaderInterface *shader_interface,
+                               const MTLRenderPipelineStateInstance *pipeline_state_instance);
+  void ensure_depth_stencil_state(MTLPrimitiveType prim_type);
+
+  id<MTLBuffer> get_null_buffer();
+  id<MTLBuffer> get_null_attribute_buffer();
+  gpu::MTLTexture *get_dummy_texture(eGPUTextureType type);
+  void free_dummy_resources();
+
   /* State assignment. */
   void set_viewport(int origin_x, int origin_y, int width, int height);
   void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height);
@@ -720,9 +782,37 @@ class MTLContext : public Context {
   {
     return MTLContext::global_memory_manager;
   }
-  /* Uniform Buffer Bindings to command encoders. */
-  id<MTLBuffer> get_null_buffer();
-  id<MTLBuffer> get_null_attribute_buffer();
+
+  /* Swapchain and latency management. */
+  static void latency_resolve_average(int64_t frame_latency_us)
+  {
+    int64_t avg = 0;
+    int64_t frame_c = 0;
+    for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) {
+      MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1];
+      avg += MTLContext::frame_latency[i];
+      frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0;
+    }
+    MTLContext::frame_latency[0] = frame_latency_us;
+    avg += MTLContext::frame_latency[0];
+    if (frame_c > 0) {
+      avg /= frame_c;
+    }
+    else {
+      avg = 0;
+    }
+    MTLContext::avg_drawable_latency_us = avg;
+  }
+
+ private:
+  void set_ghost_context(GHOST_ContextHandle ghostCtxHandle);
+  void set_ghost_window(GHOST_WindowHandle ghostWinHandle);
 };
 
+/* GHOST Context callback and present. */
+void present(MTLRenderPassDescriptor *blit_descriptor,
+             id<MTLRenderPipelineState> blit_pso,
+             id<MTLTexture> swapchain_texture,
+             id<CAMetalDrawable> drawable);
+
 }  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index 1302cf0dabd..a89339d0d14 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -5,13 +5,29 @@
  */
 #include "mtl_context.hh"
 #include "mtl_debug.hh"
+#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
+#include "mtl_memory.hh"
+#include "mtl_primitive.hh"
 #include "mtl_shader.hh"
 #include "mtl_shader_interface.hh"
 #include "mtl_state.hh"
+#include "mtl_uniform_buffer.hh"
 
 #include "DNA_userdef_types.h"
 
 #include "GPU_capabilities.h"
+#include "GPU_matrix.h"
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+#include "GPU_uniform_buffer.h"
+#include "GPU_vertex_buffer.h"
+#include "intern/gpu_matrix_private.h"
+
+#include "PIL_time.h"
+
+#include <fstream>
+#include <string>
 
 using namespace blender;
 using namespace blender::gpu;
@@ -21,21 +37,118 @@ namespace blender::gpu {
 /* Global memory manager. */
 MTLBufferPool MTLContext::global_memory_manager;
 
+/* Swapchain and latency management. */
+std::atomic<int> MTLContext::max_drawables_in_flight = 0;
+std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0;
+int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0};
+
+/* -------------------------------------------------------------------- */
+/** \name GHOST Context interaction.
+ * \{ */
+
+void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle)
+{
+  GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle);
+  BLI_assert(ghost_ctx != nullptr);
+
+  /* Release old MTLTexture handle */
+  if (default_fbo_mtltexture_) {
+    [default_fbo_mtltexture_ release];
+    default_fbo_mtltexture_ = nil;
+  }
+
+  /* Release Framebuffer attachments */
+  MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left);
+  MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left);
+  mtl_front_left->remove_all_attachments();
+  mtl_back_left->remove_all_attachments();
+
+  GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx);
+  if (ghost_cgl_ctx != NULL) {
+    default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture();
+
+    MTL_LOG_INFO(
+        "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n",
+        ghost_cgl_ctx,
+        this,
+        this->device,
+        this->queue,
+        default_fbo_gputexture_);
+
+    /* Check if the GHOST Context provides a default framebuffer: */
+    if (default_fbo_mtltexture_) {
+
+      /* Release old GPUTexture handle */
+      if (default_fbo_gputexture_) {
+        GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_)));
+        default_fbo_gputexture_ = nullptr;
+      }
+
+      /* Retain handle */
+      [default_fbo_mtltexture_ retain];
+
+      /*** Create front and back-buffers ***/
+      /* Create gpu::MTLTexture objects */
+      default_fbo_gputexture_ = new gpu::MTLTexture(
+          "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_);
+
+      /* Update framebuffers with new texture attachments */
+      mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+      mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+#ifndef NDEBUG
+      this->label = default_fbo_mtltexture_.label;
+#endif
+    }
+    else {
+
+      /* Add default texture for cases where no other framebuffer is bound */
+      if (!default_fbo_gputexture_) {
+        default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>(
+            unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr)));
+      }
+      mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+
+      MTL_LOG_INFO(
+          "-- Bound context %p for GPU context: %p is offscreen and does not have a default "
+          "framebuffer\n",
+          ghost_cgl_ctx,
+          this);
+#ifndef NDEBUG
+      this->label = @"Offscreen Metal Context";
+#endif
+    }
+  }
+  else {
+    MTL_LOG_INFO(
+        "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null "
+        "(GhostContext: %p, GhostContext_CGL: %p)\n",
+        ghost_ctx,
+        ghost_cgl_ctx);
+    BLI_assert(false);
+  }
+}
+
+void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle)
+{
+  GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle);
+  this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL));
+}
+
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name MTLContext
  * \{ */
 
 /* Placeholder functions */
-MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this)
+MTLContext::MTLContext(void *ghost_window, void *ghost_context)
+    : memory_manager(*this), main_command_buffer(*this)
 {
   /* Init debug. */
   debug::mtl_debug_init();
 
-  /* Device creation.
-   * TODO(Metal): This is a temporary initialization path to enable testing of features
-   * and shader compilation tests. Future functionality should fetch the existing device
-   * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */
-  this->device = MTLCreateSystemDefaultDevice();
+  /* Initialise Renderpass and Framebuffer State */
+  this->back_left = nullptr;
 
   /* Initialize command buffer state. */
   this->main_command_buffer.prepare();
@@ -47,10 +160,35 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
   is_inside_frame_ = false;
   current_frame_index_ = 0;
 
-  /* Prepare null data buffer */
+  /* Prepare null data buffer. */
   null_buffer_ = nil;
   null_attribute_buffer_ = nil;
 
+  /* Zero-initialise MTL Textures. */
+  default_fbo_mtltexture_ = nil;
+  default_fbo_gputexture_ = nullptr;
+
+  /** Fetch GHOSTContext and fetch Metal device/queue. */
+  ghost_window_ = ghost_window;
+  if (ghost_window_ && ghost_context == NULL) {
+    /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided.
+     * Regardless of whether windowed or not, we need access to the GhostContext
+     * for presentation, and device/queue access. */
+    GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_);
+    ghost_context = (ghostWin ? ghostWin->getContext() : NULL);
+  }
+  BLI_assert(ghost_context);
+  this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context);
+  this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue();
+  this->device = (id<MTLDevice>)this->ghost_context_->metalDevice();
+  BLI_assert(this->queue);
+  BLI_assert(this->device);
+  [this->queue retain];
+  [this->device retain];
+
+  /* Register present callback. */
+  this->ghost_context_->metalRegisterPresentCallback(&present);
+
   /* Create FrameBuffer handles. */
   MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
   MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
@@ -66,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
   /* Initialize Metal modules. */
   this->memory_manager.init();
   this->state_manager = new MTLStateManager(this);
+  this->imm = new MTLImmediate(this);
 
   /* Ensure global memory manager is initialized. */
   MTLContext::global_memory_manager.init(this->device);
@@ -99,9 +238,29 @@ MTLContext::~MTLContext()
       this->end_frame();
     }
   }
+
+  /* Release Memory Manager */
+  this->get_scratchbuffer_manager().free();
+
   /* Release update/blit shaders. */
   this->get_texture_utils().cleanup();
 
+  /* Detach resource references */
+  GPU_texture_unbind_all();
+
+  /* Unbind UBOs */
+  for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ubo_bindings[i].bound &&
+        this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+      GPUUniformBuf *ubo = wrap(
+          static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo));
+      GPU_uniformbuf_unbind(ubo);
+    }
+  }
+
+  /* Release Dummy resources */
+  this->free_dummy_resources();
+
   /* Release Sampler States. */
   for (int i = 0; i < GPU_SAMPLER_MAX; i++) {
     if (sampler_state_cache_[i] != nil) {
@@ -109,12 +268,28 @@ MTLContext::~MTLContext()
       sampler_state_cache_[i] = nil;
     }
   }
+
+  /* Empty cached sampler argument buffers. */
+  for (auto entry : cached_sampler_buffers_.values()) {
+    entry->free();
+  }
+  cached_sampler_buffers_.clear();
+
+  /* Free null buffers. */
   if (null_buffer_) {
     [null_buffer_ release];
   }
   if (null_attribute_buffer_) {
     [null_attribute_buffer_ release];
   }
+
+  /* Free Metal objects. */
+  if (this->queue) {
+    [this->queue release];
+  }
+  if (this->device) {
+    [this->device release];
+  }
 }
 
 void MTLContext::begin_frame()
@@ -146,20 +321,49 @@ void MTLContext::check_error(const char *info)
 
 void MTLContext::activate()
 {
-  /* TODO(Metal): Implement. */
+  /* Make sure no other context is already bound to this thread. */
+  BLI_assert(is_active_ == false);
+  is_active_ = true;
+  thread_ = pthread_self();
+
+  /* Re-apply ghost window/context for resizing */
+  if (ghost_window_) {
+    this->set_ghost_window((GHOST_WindowHandle)ghost_window_);
+  }
+  else if (ghost_context_) {
+    this->set_ghost_context((GHOST_ContextHandle)ghost_context_);
+  }
+
+  /* Reset UBO bind state. */
+  for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ubo_bindings[i].bound &&
+        this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+      this->pipeline_state.ubo_bindings[i].bound = false;
+      this->pipeline_state.ubo_bindings[i].ubo = nullptr;
+    }
+  }
+
+  /* Ensure imm active. */
+  immActivate();
 }
+
 void MTLContext::deactivate()
 {
-  /* TODO(Metal): Implement. */
+  BLI_assert(this->is_active_on_thread());
+  /* Flush context on deactivate. */
+  this->flush();
+  is_active_ = false;
+  immDeactivate();
 }
 
 void MTLContext::flush()
 {
-  /* TODO(Metal): Implement. */
+  this->main_command_buffer.submit(false);
 }
+
 void MTLContext::finish()
 {
-  /* TODO(Metal): Implement. */
+  this->main_command_buffer.submit(true);
 }
 
 void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
@@ -200,10 +404,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
 
   /* Ensure command buffer workload submissions are optimal --
    * Though do not split a batch mid-IMM recording. */
-  /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */
-  if (this->main_command_buffer.do_break_submission()
-      // && !((MTLImmediate *)(this->imm))->imm_is_recording()
-  ) {
+  if (this->main_command_buffer.do_break_submission() &&
+      !((MTLImmediate *)(this->imm))->imm_is_recording()) {
     this->flush();
   }
 
@@ -294,6 +496,72 @@ id<MTLBuffer> MTLContext::get_null_attribute_buffer()
   return null_attribute_buffer_;
 }
 
+gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type)
+{
+  /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */
+  gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1];
+  if (dummy_tex != nullptr) {
+    return dummy_tex;
+  }
+  else {
+    GPUTexture *tex = nullptr;
+    switch (type) {
+      case GPU_TEXTURE_1D:
+        tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_1D_ARRAY:
+        tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_2D:
+        tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_2D_ARRAY:
+        tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_3D:
+        tex = GPU_texture_create_3d(
+            "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr);
+        break;
+      case GPU_TEXTURE_CUBE:
+        tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_CUBE_ARRAY:
+        tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr);
+        break;
+      case GPU_TEXTURE_BUFFER:
+        if (!dummy_verts_) {
+          GPU_vertformat_clear(&dummy_vertformat_);
+          GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+          dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC);
+          GPU_vertbuf_data_alloc(dummy_verts_, 64);
+        }
+        tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_);
+        break;
+      default:
+        BLI_assert_msg(false, "Unrecognised texture type");
+        return nullptr;
+    }
+    gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex));
+    dummy_textures_[type - 1] = metal_tex;
+    return metal_tex;
+  }
+  return nullptr;
+}
+
+void MTLContext::free_dummy_resources()
+{
+  for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) {
+    if (dummy_textures_[tex]) {
+      GPU_texture_free(
+          reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex])));
+      dummy_textures_[tex] = nullptr;
+    }
+  }
+  if (dummy_verts_) {
+    GPU_vertbuf_discard(dummy_verts_);
+  }
+}
+
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -440,6 +708,755 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled)
 /** \} */
 
 /* -------------------------------------------------------------------- */
+/** \name Command Encoder and pipeline state
+ * These utilities ensure that all of the globally bound resources and state have been
+ * correctly encoded within the current RenderCommandEncoder. This involves managing
+ * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state.
+ *
+ * We will also trigger compilation of new PSOs where the input state has changed
+ * and is required.
+ * All of this setup is required in order to perform a valid draw call.
+ * \{ */
+
+bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type)
+{
+  BLI_assert(this->pipeline_state.initialised);
+
+  /* Check if an active shader is bound. */
+  if (!this->pipeline_state.active_shader) {
+    MTL_LOG_WARNING("No Metal shader for bound GL shader\n");
+    return false;
+  }
+
+  /* Also ensure active shader is valid. */
+  if (!this->pipeline_state.active_shader->is_valid()) {
+    MTL_LOG_WARNING(
+        "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", );
+    return false;
+  }
+
+  /* Apply global state. */
+  this->state_manager->apply_state();
+
+  /* Main command buffer tracks the current state of the render pass, based on bound
+   * MTLFrameBuffer. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /* Debug Check: Ensure Framebuffer instance is not dirty. */
+  BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+  /* Fetch shader interface. */
+  MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
+  if (shader_interface == nullptr) {
+    MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", );
+    return false;
+  }
+
+  /* Fetch shader and bake valid PipelineStateObject (PSO) based on current
+   * shader and state combination. This PSO represents the final GPU-executable
+   * permutation of the shader. */
+  MTLRenderPipelineStateInstance *pipeline_state_instance =
+      this->pipeline_state.active_shader->bake_current_pipeline_state(
+          this, mtl_prim_type_to_topology_class(mtl_prim_type));
+  if (!pipeline_state_instance) {
+    MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n",
+                  shader_interface->get_name());
+    return false;
+  }
+
+  bool result = false;
+  if (pipeline_state_instance->pso) {
+
+    /* Fetch render command encoder. A render pass should already be active.
+     * This will be NULL if invalid. */
+    id<MTLRenderCommandEncoder> rec =
+        this->main_command_buffer.get_active_render_command_encoder();
+    BLI_assert(rec);
+    if (rec == nil) {
+      MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n");
+      return false;
+    }
+
+    /* Bind Render Pipeline State. */
+    BLI_assert(pipeline_state_instance->pso);
+    if (rps.bound_pso != pipeline_state_instance->pso) {
+      [rec setRenderPipelineState:pipeline_state_instance->pso];
+      rps.bound_pso = pipeline_state_instance->pso;
+    }
+
+    /** Ensure resource bindings. */
+    /* Texture Bindings. */
+    /* We will iterate through all texture bindings on the context and determine if any of the
+     * active slots match those in our shader interface. If so, textures will be bound. */
+    if (shader_interface->get_total_textures() > 0) {
+      this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance);
+    }
+
+    /* Transform feedback buffer binding. */
+    /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which
+     * transform feedback data will be written. */
+    // GPUVertBuf *tf_vbo =
+    //     this->pipeline_state.active_shader->get_transform_feedback_active_buffer();
+    // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) {
+
+    //   /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */
+    //   BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine ||
+    //              mtl_prim_type == MTLPrimitiveTypeTriangle ||
+    //              mtl_prim_type == MTLPrimitiveTypePoint);
+
+    //   /* Fetch active transform feedback buffer from vertbuf */
+    //   MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo));
+    //   int tf_buffer_offset = 0;
+    //   id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset);
+
+    //   if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) {
+    //     [rec setVertexBuffer:tf_buffer_mtl
+    //                   offset:tf_buffer_offset
+    //                  atIndex:pipeline_state_instance->transform_feedback_buffer_index];
+    //     printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n",
+    //            tf_vbo_mtl,
+    //            tf_buffer_mtl);
+    //   }
+    // }
+
+    /* Matrix Bindings. */
+    /* This is now called upon shader bind. We may need to re-evaluate this though,
+     * as was done here to ensure uniform changes beween draws were tracked.
+     * NOTE(Metal): We may be able to remove this. */
+    GPU_matrix_bind(reinterpret_cast<struct GPUShader *>(
+        static_cast<Shader *>(this->pipeline_state.active_shader)));
+
+    /* Bind Uniforms */
+    this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance);
+
+    /* Bind Null attribute buffer, if needed. */
+    if (pipeline_state_instance->null_attribute_buffer_index >= 0) {
+      if (G.debug & G_DEBUG_GPU) {
+        MTL_LOG_INFO("Binding null attribute buffer at index: %d\n",
+                     pipeline_state_instance->null_attribute_buffer_index);
+      }
+      rps.bind_vertex_buffer(this->get_null_attribute_buffer(),
+                             0,
+                             pipeline_state_instance->null_attribute_buffer_index);
+    }
+
+    /** Dynamic Per-draw Render State on RenderCommandEncoder. */
+    /* State: Viewport. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) {
+      MTLViewport viewport;
+      viewport.originX = (double)this->pipeline_state.viewport_offset_x;
+      viewport.originY = (double)this->pipeline_state.viewport_offset_y;
+      viewport.width = (double)this->pipeline_state.viewport_width;
+      viewport.height = (double)this->pipeline_state.viewport_height;
+      viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near;
+      viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far;
+      [rec setViewport:viewport];
+
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_VIEWPORT_FLAG);
+    }
+
+    /* State: Scissor. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) {
+
+      /* Get FrameBuffer associated with active RenderCommandEncoder. */
+      MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer();
+
+      MTLScissorRect scissor;
+      if (this->pipeline_state.scissor_enabled) {
+        scissor.x = this->pipeline_state.scissor_x;
+        scissor.y = this->pipeline_state.scissor_y;
+        scissor.width = this->pipeline_state.scissor_width;
+        scissor.height = this->pipeline_state.scissor_height;
+
+        /* Some scissor assignments exceed the bounds of the viewport due to implictly added
+         * padding to the width/height - Clamp width/height. */
+        BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width());
+        BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height());
+        scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x);
+        scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y);
+        BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width()));
+        BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height()));
+      }
+      else {
+        /* Scissor is disabled, reset to default size as scissor state may have been previously
+         * assigned on this encoder. */
+        scissor.x = 0;
+        scissor.y = 0;
+        scissor.width = render_fb->get_width();
+        scissor.height = render_fb->get_height();
+      }
+
+      /* Scissor state can still be flagged as changed if it is toggled on and off, without
+       * parameters changing between draws. */
+      if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) {
+        [rec setScissorRect:scissor];
+        rps.last_scissor_rect = scissor;
+      }
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_SCISSOR_FLAG);
+    }
+
+    /* State: Face winding. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) {
+      /* We nede to invert the face winding in Metal, to account for the inverted-Y coordinate
+       * system. */
+      MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ?
+                               MTLWindingClockwise :
+                               MTLWindingCounterClockwise;
+      [rec setFrontFacingWinding:winding];
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG);
+    }
+
+    /* State: cullmode. */
+    if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) {
+
+      MTLCullMode mode = MTLCullModeNone;
+      if (this->pipeline_state.culling_enabled) {
+        switch (this->pipeline_state.cull_mode) {
+          case GPU_CULL_NONE:
+            mode = MTLCullModeNone;
+            break;
+          case GPU_CULL_FRONT:
+            mode = MTLCullModeFront;
+            break;
+          case GPU_CULL_BACK:
+            mode = MTLCullModeBack;
+            break;
+          default:
+            BLI_assert_unreachable();
+            break;
+        }
+      }
+      [rec setCullMode:mode];
+      this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+                                          ~MTL_PIPELINE_STATE_CULLMODE_FLAG);
+    }
+
+    /* Pipeline state is now good. */
+    result = true;
+  }
+  return result;
+}
+
+/* Bind uniform buffers to an active render command encoder using the rendering state of the
+ * current context -> Active shader, Bound UBOs). */
+bool MTLContext::ensure_uniform_buffer_bindings(
+    id<MTLRenderCommandEncoder> rec,
+    const MTLShaderInterface *shader_interface,
+    const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /* Shader owned push constant block for uniforms.. */
+  bool active_shader_changed = (rps.last_bound_shader_state.shader_ !=
+                                    this->pipeline_state.active_shader ||
+                                rps.last_bound_shader_state.shader_ == nullptr ||
+                                rps.last_bound_shader_state.pso_index_ !=
+                                    pipeline_state_instance->shader_pso_index);
+
+  const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
+  if (push_constant_block.size > 0) {
+
+    /* Fetch uniform buffer base binding index from pipeline_state_instance - Terhe buffer index
+     * will be offset by the number of bound VBOs. */
+    uint32_t block_size = push_constant_block.size;
+    uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index +
+                            push_constant_block.buffer_index;
+
+    /* Only need to rebind block if push constants have been modified -- or if no data is bound for
+     * the current RenderCommandEncoder. */
+    if (this->pipeline_state.active_shader->get_push_constant_is_dirty() ||
+        active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes ||
+        !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) {
+
+      /* Bind push constant data. */
+      BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr);
+      rps.bind_vertex_bytes(
+          this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+      rps.bind_fragment_bytes(
+          this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+
+      /* Only need to rebind block if it has been modified. */
+      this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false);
+    }
+  }
+  rps.last_bound_shader_state.set(this->pipeline_state.active_shader,
+                                  pipeline_state_instance->shader_pso_index);
+
+  /* Bind Global GPUUniformBuffers */
+  /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones
+   * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
+   * and not owned by the shader instance. */
+  for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
+    const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
+
+    if (ubo.buffer_index >= 0) {
+
+      const uint32_t buffer_index = ubo.buffer_index;
+      int ubo_offset = 0;
+      id<MTLBuffer> ubo_buffer = nil;
+      int ubo_size = 0;
+
+      bool bind_dummy_buffer = false;
+      if (this->pipeline_state.ubo_bindings[buffer_index].bound) {
+
+        /* Fetch UBO global-binding properties from slot. */
+        ubo_offset = 0;
+        ubo_buffer = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_metal_buffer(
+            &ubo_offset);
+        ubo_size = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_size();
+
+        /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
+         * allocating zero buffers. */
+        if (ubo_buffer == nil) {
+          bind_dummy_buffer = true;
+        }
+        else {
+          BLI_assert(ubo_buffer != nil);
+          BLI_assert(ubo_size > 0);
+
+          if (pipeline_state_instance->reflection_data_available) {
+            /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each
+             * case will be the same for the same UBO.
+             * We also determine expected size and then ensure buffer of the correct size
+             * exists in one of the vertex/fragment shader binding tables. This path is used
+             * to verify that the size of the bound UBO matches what is expected in the shader. */
+            uint32_t expected_size =
+                (buffer_index <
+                 pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ?
+                    pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index]
+                        .size :
+                    0;
+            if (expected_size == 0) {
+              expected_size =
+                  (buffer_index <
+                   pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ?
+                      pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index]
+                          .size :
+                      0;
+            }
+            BLI_assert_msg(
+                expected_size > 0,
+                "Shader interface expects UBO, but shader reflection data reports that it "
+                "is not present");
+
+            /* If ubo size is smaller than the size expected by the shader, we need to bind the
+             * dummy buffer, which will be big enough, to avoid an OOB error. */
+            if (ubo_size < expected_size) {
+              MTL_LOG_INFO(
+                  "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
+                  "%d)  (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
+                  "over-binding, which is not used,  but we need this to avoid validation "
+                  "issues\n",
+                  shader_interface->get_name_at_offset(ubo.name_offset),
+                  buffer_index,
+                  ubo_size,
+                  expected_size,
+                  shader_interface->get_name());
+              bind_dummy_buffer = true;
+            }
+          }
+        }
+      }
+      else {
+        MTL_LOG_INFO(
+            "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
+            "nothing was bound -- binding dummy buffer\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ubo.name_offset),
+            buffer_index);
+        bind_dummy_buffer = true;
+      }
+
+      if (bind_dummy_buffer) {
+        /* Perform Dummy binding. */
+        ubo_offset = 0;
+        ubo_buffer = this->get_null_buffer();
+        ubo_size = [ubo_buffer length];
+      }
+
+      if (ubo_buffer != nil) {
+
+        uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index +
+                                     buffer_index;
+
+        /* Bind Vertex UBO. */
+        if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
+          BLI_assert(buffer_bind_index >= 0 &&
+                     buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+          rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+        }
+
+        /* Bind Fragment UBOs. */
+        if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
+          BLI_assert(buffer_bind_index >= 0 &&
+                     buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+          rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+        }
+      }
+      else {
+        MTL_LOG_WARNING(
+            "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
+            "is NULL!\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ubo.name_offset),
+            buffer_index);
+      }
+    }
+  }
+  return true;
+}
+
+/* Ensure texture bindings are correct and up to date for current draw call. */
+void MTLContext::ensure_texture_bindings(
+    id<MTLRenderCommandEncoder> rec,
+    MTLShaderInterface *shader_interface,
+    const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+  BLI_assert(shader_interface != nil);
+  BLI_assert(rec != nil);
+
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  @autoreleasepool {
+    int vertex_arg_buffer_bind_index = -1;
+    int fragment_arg_buffer_bind_index = -1;
+
+    /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */
+    bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers(
+        &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index);
+
+    /* Loop through expected textures in shader interface and resolve bindings with currently
+     * bound textures.. */
+    for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+      /* Ensure the bound texture is compatible with the shader interface. If the
+       * shader does not expect a texture to be bound for the current slot, we skip
+       * binding.
+       * NOTE: Global texture bindings may be left over from prior draw calls. */
+      const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t);
+      if (!shader_texture_info.used) {
+        /* Skip unused binding points if explicit indices are specified. */
+        continue;
+      }
+
+      int slot = shader_texture_info.slot_index;
+      if (slot >= 0 && slot < GPU_max_textures()) {
+        bool bind_dummy_texture = true;
+        if (this->pipeline_state.texture_bindings[slot].used) {
+          gpu::MTLTexture *bound_texture =
+              this->pipeline_state.texture_bindings[slot].texture_resource;
+          MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
+          BLI_assert(bound_texture);
+          BLI_assert(bound_sampler.used);
+
+          if (shader_texture_info.type == bound_texture->type_) {
+            /* Bind texture and sampler if the bound texture matches the type expected by the
+             * shader. */
+            id<MTLTexture> tex = bound_texture->get_metal_handle();
+
+            if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+              rps.bind_vertex_texture(tex, slot);
+              rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+            }
+
+            if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+              rps.bind_fragment_texture(tex, slot);
+              rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+            }
+
+            /* Texture state resolved, no need to bind dummy texture */
+            bind_dummy_texture = false;
+          }
+          else {
+            /* Texture type for bound texture (e.g. Texture2DArray) does not match what was
+             * expected in the shader interface. This is a problem and we will need to bind
+             * a dummy texture to ensure correct API usage. */
+            MTL_LOG_WARNING(
+                "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
+                "texture target type. (Expecting type %d, actual type %d) (binding "
+                "name:'%s')(texture name:'%s')\n",
+                shader_interface->get_name(),
+                bound_texture,
+                slot,
+                shader_texture_info.type,
+                bound_texture->type_,
+                shader_interface->get_name_at_offset(shader_texture_info.name_offset),
+                bound_texture->get_name());
+          }
+        }
+        else {
+          MTL_LOG_WARNING(
+              "Shader '%s' expected texture to be bound to slot %d -- No texture was "
+              "bound. (name:'%s')\n",
+              shader_interface->get_name(),
+              slot,
+              shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+        }
+
+        /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats
+         * are provided -- as certain configurations may not need any binding. These issues should
+         * be fixed in the high-level, if problems crop up. */
+        if (bind_dummy_texture) {
+          if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+            rps.bind_vertex_texture(
+                get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+            /* Bind default sampler state. */
+            MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+            rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+          }
+          if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+            rps.bind_fragment_texture(
+                get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+            /* Bind default sampler state. */
+            MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+            rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+          }
+        }
+      }
+      else {
+        MTL_LOG_WARNING(
+            "Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
+            "hardware/API limit of '%d'. (name:'%s')\n",
+            this->pipeline_state.active_shader,
+            slot,
+            GPU_max_textures(),
+            shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+      }
+    }
+
+    /* Construct and Bind argument buffer.
+     * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */
+    if (use_argument_buffer_for_samplers) {
+#ifndef NDEBUG
+      /* Debug check to validate each expected texture in the shader interface has a valid
+       * sampler object bound to the context. We will need all of these to be valid
+       * when constructing the sampler argument buffer. */
+      for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+        const MTLShaderTexture &texture = shader_interface->get_texture(i);
+        if (texture.used) {
+          BLI_assert(this->samplers_.mtl_sampler[i] != nil);
+        }
+      }
+#endif
+
+      /* Check to ensure the buffer binding index for the argument buffer has been assigned.
+       * This PSO property will be set if we expect to use argument buffers, and the shader
+       * uses any amount of textures. */
+      BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0);
+      if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) {
+        /* Offset binding index to be relative to the start of static uniform buffer binding slots.
+         * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
+         * used by vertex and index buffer bindings, and the number of buffers present will vary
+         * between PSOs. */
+        int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index +
+                              vertex_arg_buffer_bind_index);
+        assert(arg_buffer_idx < 32);
+        id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
+            arg_buffer_idx);
+        if (argument_encoder == nil) {
+          argument_encoder = [pipeline_state_instance->vert
+              newArgumentEncoderWithBufferIndex:arg_buffer_idx];
+          shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
+        }
+
+        /* Generate or Fetch argument buffer sampler configuration.
+         * NOTE(Metal): we need to base sampler counts off of the maximal texture
+         * index. This is not the most optimal, but in practise, not a use-case
+         * when argument buffers are required.
+         * This is because with explicit texture indices, the binding indices
+         * should match across draws, to allow the high-level to optimise bindpoints. */
+        gpu::MTLBuffer *encoder_buffer = nullptr;
+        this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1;
+
+        gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr(
+            this->samplers_);
+        if (cached_smp_buffer_search != nullptr) {
+          encoder_buffer = *cached_smp_buffer_search;
+        }
+        else {
+          /* Populate argument buffer with current global sampler bindings. */
+          int size = [argument_encoder encodedLength];
+          int alignment = max_uu([argument_encoder alignment], 256);
+          int size_align_delta = (size % alignment);
+          int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ?
+                                       size + (alignment - (size % alignment)) :
+                                       size;
+
+          /* Allocate buffer to store encoded sampler arguments. */
+          encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size,
+                                                                            true);
+          BLI_assert(encoder_buffer);
+          BLI_assert(encoder_buffer->get_metal_buffer());
+          [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0];
+          [argument_encoder
+              setSamplerStates:this->samplers_.mtl_sampler
+                     withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)];
+          encoder_buffer->flush();
+
+          /* Insert into cache. */
+          this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer);
+        }
+
+        BLI_assert(encoder_buffer != nullptr);
+        int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+                                 vertex_arg_buffer_bind_index);
+        rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index);
+
+        /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to
+         * re-encode. We can use the same argument buffer. */
+        if (fragment_arg_buffer_bind_index >= 0) {
+          BLI_assert(fragment_arg_buffer_bind_index);
+          int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+                                   fragment_arg_buffer_bind_index);
+          rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index);
+        }
+      }
+    }
+  }
+}
+
+/* Encode latest depth-stencil state. */
+void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type)
+{
+  /* Check if we need to update state. */
+  if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) {
+    return;
+  }
+
+  /* Fetch render command encoder. */
+  id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder();
+  BLI_assert(rec);
+
+  /* Fetch Render Pass state. */
+  MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+  /** Prepare Depth-stencil state based on current global pipeline state. */
+  MTLFrameBuffer *fb = this->get_current_framebuffer();
+  bool hasDepthTarget = fb->has_depth_attachment();
+  bool hasStencilTarget = fb->has_stencil_attachment();
+
+  if (hasDepthTarget || hasStencilTarget) {
+    /* Update FrameBuffer State. */
+    this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget;
+    this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget;
+
+    /* Check if current MTLContextDepthStencilState maps to an existing state object in
+     * the Depth-stencil state cache. */
+    id<MTLDepthStencilState> ds_state = nil;
+    id<MTLDepthStencilState> *depth_stencil_state_lookup =
+        this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state);
+
+    /* If not, populate DepthStencil state descriptor. */
+    if (depth_stencil_state_lookup == nullptr) {
+
+      MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init]
+          autorelease];
+
+      if (hasDepthTarget) {
+        ds_state_desc.depthWriteEnabled =
+            this->pipeline_state.depth_stencil_state.depth_write_enable;
+        ds_state_desc.depthCompareFunction =
+            this->pipeline_state.depth_stencil_state.depth_test_enabled ?
+                this->pipeline_state.depth_stencil_state.depth_function :
+                MTLCompareFunctionAlways;
+      }
+
+      if (hasStencilTarget) {
+        ds_state_desc.backFaceStencil.readMask =
+            this->pipeline_state.depth_stencil_state.stencil_read_mask;
+        ds_state_desc.backFaceStencil.writeMask =
+            this->pipeline_state.depth_stencil_state.stencil_write_mask;
+        ds_state_desc.backFaceStencil.stencilFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail;
+        ds_state_desc.backFaceStencil.depthFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail;
+        ds_state_desc.backFaceStencil.depthStencilPassOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass;
+        ds_state_desc.backFaceStencil.stencilCompareFunction =
+            (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+                this->pipeline_state.depth_stencil_state.stencil_func :
+                MTLCompareFunctionAlways;
+
+        ds_state_desc.frontFaceStencil.readMask =
+            this->pipeline_state.depth_stencil_state.stencil_read_mask;
+        ds_state_desc.frontFaceStencil.writeMask =
+            this->pipeline_state.depth_stencil_state.stencil_write_mask;
+        ds_state_desc.frontFaceStencil.stencilFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail;
+        ds_state_desc.frontFaceStencil.depthFailureOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail;
+        ds_state_desc.frontFaceStencil.depthStencilPassOperation =
+            this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass;
+        ds_state_desc.frontFaceStencil.stencilCompareFunction =
+            (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+                this->pipeline_state.depth_stencil_state.stencil_func :
+                MTLCompareFunctionAlways;
+      }
+
+      /* Bake new DS state. */
+      ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc];
+
+      /* Store state in cache. */
+      BLI_assert(ds_state != nil);
+      this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state);
+    }
+    else {
+      ds_state = *depth_stencil_state_lookup;
+      BLI_assert(ds_state != nil);
+    }
+
+    /* Bind Depth Stencil State to render command encoder. */
+    BLI_assert(ds_state != nil);
+    if (ds_state != nil) {
+      if (rps.bound_ds_state != ds_state) {
+        [rec setDepthStencilState:ds_state];
+        rps.bound_ds_state = ds_state;
+      }
+    }
+
+    /* Apply dynamic depth-stencil state on encoder. */
+    if (hasStencilTarget) {
+      uint32_t stencil_ref_value =
+          (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+              this->pipeline_state.depth_stencil_state.stencil_ref :
+              0;
+      if (stencil_ref_value != rps.last_used_stencil_ref_value) {
+        [rec setStencilReferenceValue:stencil_ref_value];
+        rps.last_used_stencil_ref_value = stencil_ref_value;
+      }
+    }
+
+    if (hasDepthTarget) {
+      bool doBias = false;
+      switch (prim_type) {
+        case MTLPrimitiveTypeTriangle:
+        case MTLPrimitiveTypeTriangleStrip:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris;
+          break;
+        case MTLPrimitiveTypeLine:
+        case MTLPrimitiveTypeLineStrip:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines;
+          break;
+        case MTLPrimitiveTypePoint:
+          doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points;
+          break;
+      }
+      [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0
+             slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0
+                  clamp:0];
+    }
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
 /** \name Visibility buffer control for MTLQueryPool.
  * \{ */
 
@@ -606,4 +1623,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state()
 
 /** \} */
 
+/* -------------------------------------------------------------------- */
+/** \name Swapchain management and Metal presentation.
+ * \{ */
+
+void present(MTLRenderPassDescriptor *blit_descriptor,
+             id<MTLRenderPipelineState> blit_pso,
+             id<MTLTexture> swapchain_texture,
+             id<CAMetalDrawable> drawable)
+{
+
+  MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+  BLI_assert(ctx);
+
+  /* Flush any oustanding work. */
+  ctx->flush();
+
+  /* Always pace CPU to maximum of 3 drawables in flight.
+   * nextDrawable may have more in flight if backing swapchain
+   * textures are re-allocate, such as during resize events.
+   *
+   * Determine frames in flight based on current latency. If
+   * we are in a high-latency situation, limit frames in flight
+   * to increase app responsiveness and keep GPU execution under control.
+   * If latency improves, increase frames in flight to improve overall
+   * performance. */
+  int perf_max_drawables = MTL_MAX_DRAWABLES;
+  if (MTLContext::avg_drawable_latency_us > 185000) {
+    perf_max_drawables = 1;
+  }
+  else if (MTLContext::avg_drawable_latency_us > 85000) {
+    perf_max_drawables = 2;
+  }
+
+  while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) {
+    PIL_sleep_ms(2);
+  }
+
+  /* Present is submitted in its own CMD Buffer to enusure drawable reference released as early as
+   * possible. This command buffer is separate as it does not utilise the global state
+   * for rendering as the main context does. */
+  id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer];
+  MTLCommandBufferManager::num_active_cmd_bufs++;
+
+  if (MTLCommandBufferManager::sync_event != nil) {
+    /* Ensure command buffer ordering. */
+    [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event
+                         value:MTLCommandBufferManager::event_signal_val];
+  }
+
+  /* Do Present Call and final Blit to MTLDrawable. */
+  id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor];
+  [enc setRenderPipelineState:blit_pso];
+  [enc setFragmentTexture:swapchain_texture atIndex:0];
+  [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
+  [enc endEncoding];
+
+  /* Present drawable. */
+  BLI_assert(drawable);
+  [cmdbuf presentDrawable:drawable];
+
+  /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */
+  MTLSafeFreeList *cmd_free_buffer_list =
+      MTLContext::get_global_memory_manager().get_current_safe_list();
+  BLI_assert(cmd_free_buffer_list);
+
+  id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf;
+  [cmd_buffer_ref retain];
+
+  /* Increment drawables in flight limiter. */
+  MTLContext::max_drawables_in_flight++;
+  std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now();
+
+  /* Increment free pool reference and decrement upon command buffer completion. */
+  cmd_free_buffer_list->increment_reference();
+  [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) {
+    /* Flag freed buffers associated with this CMD buffer as ready to be freed. */
+    cmd_free_buffer_list->decrement_reference();
+    [cmd_buffer_ref release];
+
+    /* Decrement count */
+    MTLCommandBufferManager::num_active_cmd_bufs--;
+    MTL_LOG_INFO("[Metal] Active command buffers: %d\n",
+                 MTLCommandBufferManager::num_active_cmd_bufs);
+
+    /* Drawable count and latency management. */
+    MTLContext::max_drawables_in_flight--;
+    std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now();
+    int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>(
+                                         completion_time - submission_time)
+                                         .count();
+    MTLContext::latency_resolve_average(microseconds_per_frame);
+
+    MTL_LOG_INFO("Frame Latency: %f ms  (Rolling avg: %f ms  Drawables: %d)\n",
+                 ((float)microseconds_per_frame) / 1000.0f,
+                 ((float)MTLContext::avg_drawable_latency_us) / 1000.0f,
+                 perf_max_drawables);
+  }];
+
+  if (MTLCommandBufferManager::sync_event == nil) {
+    MTLCommandBufferManager::sync_event = [ctx->device newEvent];
+    BLI_assert(MTLCommandBufferManager::sync_event);
+    [MTLCommandBufferManager::sync_event retain];
+  }
+  BLI_assert(MTLCommandBufferManager::sync_event != nil);
+
+  MTLCommandBufferManager::event_signal_val++;
+  [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event
+                      value:MTLCommandBufferManager::event_signal_val];
+
+  [cmdbuf commit];
+
+  /* When debugging, fetch advanced command buffer errors. */
+  if (G.debug & G_DEBUG_GPU) {
+    [cmdbuf waitUntilCompleted];
+    NSError *error = [cmdbuf error];
+    if (error != nil) {
+      NSLog(@"%@", error);
+      BLI_assert(false);
+
+      @autoreleasepool {
+        const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String];
+
+        std::ofstream outfile;
+        outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+        outfile << stringAsChar;
+        outfile.close();
+      }
+    }
+    else {
+      @autoreleasepool {
+        NSString *str = @"Command buffer completed successfully!\n";
+        const char *stringAsChar = [str UTF8String];
+
+        std::ofstream outfile;
+        outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+        outfile << stringAsChar;
+        outfile.close();
+      }
+    }
+  }
+}
+
+/** \} */
+
 }  // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_drawlist.hh b/source/blender/gpu/metal/mtl_drawlist.hh
new file mode 100644
index 00000000000..9eb465b26a0
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.hh
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#pragma once
+
+#pragma once
+
+#include "gpu_drawlist_private.hh"
+
+namespace blender {
+namespace gpu {
+
+/**
+ * TODO(Metal): MTLDrawList Implementation. Included as temporary stub.
+ */
+class MTLDrawList : public DrawList {
+ public:
+  MTLDrawList(int length) {}
+  ~MTLDrawList() {}
+
+  void append(GPUBatch *batch, int i_first, int i_count) override {}
+  void submit() override {}
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList");
+};
+
+}  // namespace gpu
+}  // namespace blender
diff --git a/source/blender/gpu/metal/mtl_immediate.hh b/source/blender/gpu/metal/mtl_immediate.hh
new file mode 100644
index 00000000000..b743efb397d
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_immediate_private.hh"
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+namespace blender::gpu {
+
+class MTLImmediate : public Immediate {
+ private:
+  MTLContext *context_ = nullptr;
+  MTLTemporaryBuffer current_allocation_;
+  MTLPrimitiveTopologyClass metal_primitive_mode_;
+  MTLPrimitiveType metal_primitive_type_;
+  bool has_begun_ = false;
+
+ public:
+  MTLImmediate(MTLContext *ctx);
+  ~MTLImmediate();
+
+  uchar *begin() override;
+  void end() override;
+  bool imm_is_recording()
+  {
+    return has_begun_;
+  }
+};
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm
new file mode 100644
index 00000000000..41632e39092
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -0,0 +1,397 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#include "BKE_global.h"
+
+#include "GPU_vertex_format.h"
+#include "gpu_context_private.hh"
+#include "gpu_shader_private.hh"
+#include "gpu_vertex_format_private.h"
+
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_immediate.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+
+namespace blender::gpu {
+
+MTLImmediate::MTLImmediate(MTLContext *ctx)
+{
+  context_ = ctx;
+}
+
+MTLImmediate::~MTLImmediate()
+{
+}
+
+uchar *MTLImmediate::begin()
+{
+  BLI_assert(!has_begun_);
+
+  /* Determine primitive type. */
+  metal_primitive_type_ = gpu_prim_type_to_metal(this->prim_type);
+  metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
+  has_begun_ = true;
+
+  /* Allocate a range of data and return host-accessible pointer. */
+  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+  current_allocation_ = context_->get_scratchbuffer_manager()
+                            .scratch_buffer_allocate_range_aligned(bytes_needed, 256);
+  [current_allocation_.metal_buffer retain];
+  return reinterpret_cast<uchar *>(current_allocation_.data);
+}
+
+void MTLImmediate::end()
+{
+  /* Ensure we're between a imm::begin/imm:end pair. */
+  BLI_assert(has_begun_);
+  BLI_assert(prim_type != GPU_PRIM_NONE);
+
+  /* Verify context is valid, vertex data is written and a valid shader is bound. */
+  if (context_ && this->vertex_idx > 0 && this->shader) {
+
+    MTLShader *active_mtl_shader = static_cast<MTLShader *>(unwrap(shader));
+
+    /* Skip draw if Metal shader is not valid. */
+    if (active_mtl_shader == nullptr || !active_mtl_shader->is_valid() ||
+        active_mtl_shader->get_interface() == nullptr) {
+
+      const char *ptr = (active_mtl_shader) ? active_mtl_shader->name_get() : nullptr;
+      MTL_LOG_WARNING(
+          "MTLImmediate::end -- cannot perform draw as active shader is NULL or invalid (likely "
+          "unimplemented) (shader %p '%s')\n",
+          active_mtl_shader,
+          ptr);
+      return;
+    }
+
+    /* Ensure we are inside a render pass and fetch active RenderCommandEncoder. */
+    id<MTLRenderCommandEncoder> rec = context_->ensure_begin_render_pass();
+    BLI_assert(rec != nil);
+
+    /* Fetch active render pipeline state. */
+    MTLRenderPassState &rps = context_->main_command_buffer.get_render_pass_state();
+
+    /* Bind Shader. */
+    GPU_shader_bind(this->shader);
+
+    /* Debug markers for frame-capture and detailed error messages. */
+    if (G.debug & G_DEBUG_GPU) {
+      [rec pushDebugGroup:[NSString
+                              stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+                                               this->vertex_idx,
+                                               active_mtl_shader->get_interface()->get_name()]];
+      [rec insertDebugSignpost:[NSString stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+                                                          this->vertex_idx,
+                                                          active_mtl_shader->get_interface()
+                                                              ->get_name()]];
+    }
+
+    /* Populate pipeline state vertex descriptor. */
+    MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+        MTLContext::get()->state_manager);
+    MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+    const MTLShaderInterface *interface = active_mtl_shader->get_interface();
+
+    desc.vertex_descriptor.num_attributes = interface->get_total_attributes();
+    desc.vertex_descriptor.num_vert_buffers = 1;
+
+    for (int i = 0; i < desc.vertex_descriptor.num_attributes; i++) {
+      desc.vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+    }
+    desc.vertex_descriptor.uses_ssbo_vertex_fetch =
+        active_mtl_shader->get_uses_ssbo_vertex_fetch();
+    desc.vertex_descriptor.num_ssbo_attributes = 0;
+
+    /* SSBO Vertex Fetch -- Verify Attributes. */
+    if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+      active_mtl_shader->ssbo_vertex_fetch_bind_attributes_begin();
+
+      /* Disable Indexed rendering in SSBO vertex fetch. */
+      int uniform_ssbo_use_indexed = active_mtl_shader->uni_ssbo_uses_indexed_rendering;
+      BLI_assert_msg(uniform_ssbo_use_indexed != -1, "Expected valid uniform location for ssbo_uses_indexed_rendering.");
+      int uses_indexed_rendering = 0;
+      active_mtl_shader->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+    }
+
+    /* Populate Vertex descriptor and verify attributes.
+     * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */
+    for (int i = 0; i < interface->get_total_attributes(); i++) {
+
+      /* Note: Attribute in VERTEX FORMAT does not necessarily share the same array index as
+       * attributes in shader interface. */
+      GPUVertAttr *attr = nullptr;
+      const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i);
+
+      /* Scan through vertex_format attributes until one with a name matching the shader interface
+       * is found. */
+      for (uint32_t a_idx = 0; a_idx < this->vertex_format.attr_len && attr == nullptr; a_idx++) {
+        GPUVertAttr *check_attribute = &this->vertex_format.attrs[a_idx];
+
+        /* Attributes can have multiple name aliases associated with them. */
+        for (uint32_t n_idx = 0; n_idx < check_attribute->name_len; n_idx++) {
+          const char *name = GPU_vertformat_attr_name_get(
+              &this->vertex_format, check_attribute, n_idx);
+
+          if (strcmp(name, interface->get_name_at_offset(mtl_shader_attribute.name_offset)) == 0) {
+            attr = check_attribute;
+            break;
+          }
+        }
+      }
+
+      BLI_assert_msg(attr != nullptr,
+                     "Could not find expected attribute in immediate mode vertex format.");
+      if (attr == nullptr) {
+        MTL_LOG_ERROR(
+            "MTLImmediate::end Could not find matching attribute '%s' from Shader Interface in "
+            "Vertex Format! - TODO: Bind Dummy attribute\n",
+            interface->get_name_at_offset(mtl_shader_attribute.name_offset));
+        return;
+      }
+
+      /* Determine whether implicit type conversion between input vertex format
+       * and shader interface vertex format is supported. */
+      MTLVertexFormat convertedFormat;
+      bool can_use_implicit_conversion = mtl_convert_vertex_format(
+          mtl_shader_attribute.format,
+          (GPUVertCompType)attr->comp_type,
+          attr->comp_len,
+          (GPUVertFetchMode)attr->fetch_mode,
+          &convertedFormat);
+
+      if (can_use_implicit_conversion) {
+        /* Metal API can implicitly convert some formats during vertex assembly:
+         * - Converting from a normalized short2 format to float2
+         * - Type truncation e.g. Float4 to Float2.
+         * - Type expansion from Float3 to Float4.
+         * - Note: extra components are filled with the corresponding components of (0,0,0,1).
+         * (See
+         * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format)
+         */
+        bool is_floating_point_format = (attr->comp_type == GPU_COMP_F32);
+        desc.vertex_descriptor.attributes[i].format = convertedFormat;
+        desc.vertex_descriptor.attributes[i].format_conversion_mode =
+            (is_floating_point_format) ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+                                         (GPUVertFetchMode)GPU_FETCH_INT;
+        BLI_assert(convertedFormat != MTLVertexFormatInvalid);
+      }
+      else {
+        /* Some conversions are NOT valid, e.g. Int4 to Float4
+         * - In this case, we need to implement a conversion routine inside the shader.
+         * - This is handled using the format_conversion_mode flag
+         * - This flag is passed into the PSO as a function specialisation,
+         *   and will generate an appropriate conversion function when reading the vertex attribute
+         *   value into local shader storage.
+         *   (If no explicit conversion is needed, the function specialize to a pass-through). */
+        MTLVertexFormat converted_format;
+        bool can_convert = mtl_vertex_format_resize(
+            mtl_shader_attribute.format, attr->comp_len, &converted_format);
+        desc.vertex_descriptor.attributes[i].format = (can_convert) ? converted_format :
+                                                                      mtl_shader_attribute.format;
+        desc.vertex_descriptor.attributes[i].format_conversion_mode = (GPUVertFetchMode)
+                                                                          attr->fetch_mode;
+        BLI_assert(desc.vertex_descriptor.attributes[i].format != MTLVertexFormatInvalid);
+      }
+      /* Using attribute offset in vertex format, as this will be correct */
+      desc.vertex_descriptor.attributes[i].offset = attr->offset;
+      desc.vertex_descriptor.attributes[i].buffer_index = mtl_shader_attribute.buffer_index;
+
+      /* SSBO Vertex Fetch Attribute bind. */
+      if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+        BLI_assert_msg(mtl_shader_attribute.buffer_index == 0,
+                   "All attributes should be in buffer index zero");
+        MTLSSBOAttribute ssbo_attr(
+            mtl_shader_attribute.index,
+            mtl_shader_attribute.buffer_index,
+            attr->offset,
+            this->vertex_format.stride,
+            MTLShader::ssbo_vertex_type_to_attr_type(desc.vertex_descriptor.attributes[i].format),
+            false);
+        desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+            ssbo_attr;
+        desc.vertex_descriptor.num_ssbo_attributes++;
+        active_mtl_shader->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+      }
+    }
+
+    /* Buffer bindings for singular vertex buffer. */
+    desc.vertex_descriptor.buffer_layouts[0].step_function = MTLVertexStepFunctionPerVertex;
+    desc.vertex_descriptor.buffer_layouts[0].step_rate = 1;
+    desc.vertex_descriptor.buffer_layouts[0].stride = this->vertex_format.stride;
+    BLI_assert(this->vertex_format.stride > 0);
+
+    /* SSBO Vertex Fetch -- Verify Attributes. */
+    if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+      active_mtl_shader->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+      /* Set Status uniforms. */
+      BLI_assert_msg(active_mtl_shader->uni_ssbo_input_prim_type_loc != -1,
+                     "ssbo_input_prim_type uniform location invalid!");
+      BLI_assert_msg(active_mtl_shader->uni_ssbo_input_vert_count_loc != -1,
+                     "ssbo_input_vert_count uniform location invalid!");
+      GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+                                    active_mtl_shader->uni_ssbo_input_prim_type_loc,
+                                    1,
+                                    1,
+                                    (const int *)(&this->prim_type));
+      GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+                                    active_mtl_shader->uni_ssbo_input_vert_count_loc,
+                                    1,
+                                    1,
+                                    (const int *)(&this->vertex_idx));
+    }
+
+    MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+    if (context_->ensure_render_pipeline_state(mtl_prim_type)) {
+
+      /* Issue draw call. */
+      BLI_assert(this->vertex_idx > 0);
+
+      /* Metal API does not support triangle fan, so we can emulate this
+       * input data by generating an index buffer to re-map indices to
+       * a TriangleList.
+       *
+       * NOTE(Metal): Consider caching generated triangle fan index buffers.
+       * For immediate mode, generating these is currently very cheap, as we use
+       * fast scratch buffer allocations. Though we may benefit from caching of
+       * frequently used buffer sizes. */
+      if (mtl_needs_topology_emulation(this->prim_type)) {
+
+        /* Debug safety check for SSBO FETCH MODE. */
+        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+          BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
+        }
+
+        /* Emulate Tri-fan. */
+        if (this->prim_type == GPU_PRIM_TRI_FAN) {
+          /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
+           * vertices. */
+          uint32_t base_vert_count = this->vertex_idx;
+          uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
+          uint32_t fan_index_count = num_triangles * 3;
+          BLI_assert(num_triangles > 0);
+
+          uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
+          uint32_t *index_buffer = nullptr;
+
+          MTLTemporaryBuffer allocation =
+              context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
+                  alloc_size, 128);
+          index_buffer = (uint32_t *)allocation.data;
+
+          int a = 0;
+          for (int i = 0; i < num_triangles; i++) {
+            index_buffer[a++] = 0;
+            index_buffer[a++] = i + 1;
+            index_buffer[a++] = i + 2;
+          }
+
+          @autoreleasepool {
+
+            id<MTLBuffer> index_buffer_mtl = nil;
+            uint32_t index_buffer_offset = 0;
+
+            /* Region of scratch buffer used for topology emulation element data.
+             * NOTE(Metal): We do not need to manually flush as the entire scratch
+             * buffer for current command buffer is flushed upon submission. */
+            index_buffer_mtl = allocation.metal_buffer;
+            index_buffer_offset = allocation.buffer_offset;
+
+            /* Set depth stencil state (requires knowledge of primitive type). */
+            context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
+
+            /* Bind Vertex Buffer. */
+            rps.bind_vertex_buffer(
+                current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+            /* Draw. */
+            [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+                            indexCount:fan_index_count
+                             indexType:MTLIndexTypeUInt32
+                           indexBuffer:index_buffer_mtl
+                     indexBufferOffset:index_buffer_offset];
+          }
+        }
+        else {
+          /* TODO(Metal): Topology emulation for line loop.
+           * NOTE(Metal): This is currently not used anywhere and modified at the high
+           * level for efficiency in such cases. */
+          BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
+        }
+      }
+      else {
+        MTLPrimitiveType primitive_type = metal_primitive_type_;
+        int vertex_count = this->vertex_idx;
+
+        /* Bind Vertex Buffer. */
+        rps.bind_vertex_buffer(
+            current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        context_->ensure_depth_stencil_state(primitive_type);
+
+        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+
+          /* Bind Null Buffers for empty/missing bind slots. */
+          id<MTLBuffer> null_buffer = context_->get_null_buffer();
+          BLI_assert(null_buffer != nil);
+          for (int i = 1; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+
+            /* We only need to ensure a buffer is bound to the context, its contents do not matter
+             * as it will not be used. */
+            if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+              rps.bind_vertex_buffer(null_buffer, 0, i);
+            }
+          }
+
+          /* SSBO vertex fetch - Nullify elements buffer. */
+          if (rps.cached_vertex_buffer_bindings[MTL_SSBO_VERTEX_FETCH_IBO_INDEX].metal_buffer ==
+              nil) {
+            rps.bind_vertex_buffer(null_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+          }
+
+          /* Submit draw call with modified vertex count, which reflects vertices per primitive
+           * defined in the USE_SSBO_VERTEX_FETCH pragma. */
+          int num_input_primitives = gpu_get_prim_count_from_type(vertex_count, this->prim_type);
+          int output_num_verts = num_input_primitives *
+                                 active_mtl_shader->get_ssbo_vertex_fetch_output_num_verts();
+#ifndef NDEBUG
+          BLI_assert(
+              mtl_vertex_count_fits_primitive_type(
+                  output_num_verts, active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()) &&
+              "Output Vertex count is not compatible with the requested output vertex primitive "
+              "type");
+#endif
+          [rec drawPrimitives:active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()
+                  vertexStart:0
+                  vertexCount:output_num_verts];
+          context_->main_command_buffer.register_draw_counters(output_num_verts);
+        }
+        else {
+          /* Regular draw. */
+          [rec drawPrimitives:primitive_type vertexStart:0 vertexCount:vertex_count];
+          context_->main_command_buffer.register_draw_counters(vertex_count);
+        }
+      }
+    }
+    if (G.debug & G_DEBUG_GPU) {
+      [rec popDebugGroup];
+    }
+  }
+
+  /* Reset allocation after draw submission. */
+  has_begun_ = false;
+  if (current_allocation_.metal_buffer) {
+    [current_allocation_.metal_buffer release];
+    current_allocation_.metal_buffer = nil;
+  }
+}
+
+}  // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh
index df80df6543f..bd354376b12 100644
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -340,13 +340,13 @@ class MTLBufferPool {
 
  private:
   /* Memory statistics. */
-  long long int total_allocation_bytes_ = 0;
+  int64_t total_allocation_bytes_ = 0;
 
 #if MTL_DEBUG_MEMORY_STATISTICS == 1
   /* Debug statistics. */
   std::atomic<int> per_frame_allocation_count_;
-  std::atomic<long long int> allocations_in_pool_;
-  std::atomic<long long int> buffers_in_pool_;
+  std::atomic<int64_t> allocations_in_pool_;
+  std::atomic<int64_t> buffers_in_pool_;
 #endif
 
   /* Metal resources. */
diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh
index 64d9d1cf849..5485b32dd31 100644
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -261,8 +261,6 @@ class MTLShader : public Shader {
   bool get_push_constant_is_dirty();
   void push_constant_bindstate_mark_dirty(bool is_dirty);
 
-  void vertformat_from_shader(GPUVertFormat *format) const override;
-
   /* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */
   int program_handle_get() const override
   {
diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm
index 23097f312f0..3b27b60bca0 100644
--- a/source/blender/gpu/metal/mtl_shader.mm
+++ b/source/blender/gpu/metal/mtl_shader.mm
@@ -129,6 +129,7 @@ MTLShader::~MTLShader()
 
   if (shd_builder_ != nullptr) {
     delete shd_builder_;
+    shd_builder_ = nullptr;
   }
 }
 
@@ -209,6 +210,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 
       /* Release temporary compilation resources. */
       delete shd_builder_;
+      shd_builder_ = nullptr;
       return false;
     }
   }
@@ -279,6 +281,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 
           /* Release temporary compilation resources. */
           delete shd_builder_;
+          shd_builder_ = nullptr;
           return false;
         }
       }
@@ -324,6 +327,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
 
   /* Release temporary compilation resources. */
   delete shd_builder_;
+  shd_builder_ = nullptr;
   return true;
 }
 
@@ -535,28 +539,6 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
 {
   push_constant_modified_ = is_dirty;
 }
-
-void MTLShader::vertformat_from_shader(GPUVertFormat *format) const
-{
-  GPU_vertformat_clear(format);
-
-  const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface);
-  for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) {
-    const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id);
-
-    /* Extract type parameters from Metal type. */
-    GPUVertCompType comp_type = comp_type_from_vert_format(attr.format);
-    uint comp_len = comp_count_from_vert_format(attr.format);
-    GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format);
-
-    GPU_vertformat_attr_add(format,
-                            mtl_interface->get_name_at_offset(attr.name_offset),
-                            comp_type,
-                            comp_len,
-                            fetch_mode);
-  }
-}
-
 /** \} */
 
 /* -------------------------------------------------------------------- */
@@ -1167,6 +1149,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_at
   MTLShaderInterface *mtl_interface = this->get_interface();
   BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
              ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
+  UNUSED_VARS_NDEBUG(mtl_interface);
 
   /* Update bind-mask to verify this attribute has been used. */
   BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm
index 977e97dbd82..4a2be0753bb 100644
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@@ -724,10 +724,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
   }
   if (msl_iface.uses_ssbo_vertex_fetch_mode) {
     ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
-    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS
-              << std::endl;
-    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX
-              << std::endl;
     for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) {
       ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl;
     }
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index 4af46c13751..b4e913e5be6 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -12,6 +12,7 @@
 #include "GPU_batch_presets.h"
 #include "GPU_capabilities.h"
 #include "GPU_framebuffer.h"
+#include "GPU_immediate.h"
 #include "GPU_platform.h"
 #include "GPU_state.h"
 
@@ -303,7 +304,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
 
   /* Execute graphics draw call to perform the blit. */
   GPUBatch *quad = GPU_batch_preset_quad();
-
   GPU_batch_set_shader(quad, shader);
 
   float w = dst->width_get();
@@ -337,6 +337,20 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
 
   GPU_batch_draw(quad);
 
+  /* TMP draw with IMM TODO(Metal): Remove this once GPUBatch is supported. */
+  GPUVertFormat *imm_format = immVertexFormat();
+  uint pos = GPU_vertformat_attr_add(imm_format, "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+
+  immBindShader(shader);
+  immBegin(GPU_PRIM_TRI_STRIP, 4);
+  immVertex2f(pos, 1, 0);
+  immVertex2f(pos, 0, 0);
+  immVertex2f(pos, 1, 1);
+  immVertex2f(pos, 0, 1);
+  immEnd();
+  immUnbindProgram();
+  /**********************/
+
   /* restoring old pipeline state. */
   GPU_depth_mask(depth_write_prev);
   GPU_stencil_write_mask_set(stencil_mask_prev);
@@ -1463,79 +1477,6 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
   BLI_assert_msg(this->format_ != GPU_DEPTH24_STENCIL8,
                  "Apple silicon does not support GPU_DEPTH24_S8");
 
-  MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
-  mtl_max_mips_ = 1;
-  mipmaps_ = 0;
-  this->mip_range_set(0, 0);
-
-  /* Create texture from GPUVertBuf's buffer. */
-  MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo));
-  mtl_vbo->bind();
-  mtl_vbo->flag_used();
-
-  /* Get Metal Buffer. */
-  id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
-  BLI_assert(source_buffer);
-
-  /* Verify size. */
-  if (w_ <= 0) {
-    MTL_LOG_WARNING("Allocating texture buffer of width 0!\n");
-    w_ = 1;
-  }
-
-  /* Verify Texture and vertex buffer alignment. */
-  int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
-  int bytes_per_row = bytes_per_pixel * w_;
-
-  MTLContext *mtl_ctx = MTLContext::get();
-  uint align_requirement = static_cast<uint>(
-      [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
-
-  /* Verify per-vertex size aligns with texture size. */
-  const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
-  BLI_assert(bytes_per_pixel == format->stride &&
-             "Pixel format stride MUST match the texture format stride -- These being different "
-             "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
-  UNUSED_VARS_NDEBUG(format);
-
-  /* Create texture descriptor. */
-  BLI_assert(type_ == GPU_TEXTURE_BUFFER);
-  texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
-  texture_descriptor_.pixelFormat = mtl_format;
-  texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
-  texture_descriptor_.width = w_;
-  texture_descriptor_.height = 1;
-  texture_descriptor_.depth = 1;
-  texture_descriptor_.arrayLength = 1;
-  texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
-  texture_descriptor_.usage =
-      MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
-      MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
-  texture_descriptor_.storageMode = [source_buffer storageMode];
-  texture_descriptor_.sampleCount = 1;
-  texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
-  texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
-
-  texture_ = [source_buffer
-      newTextureWithDescriptor:texture_descriptor_
-                        offset:0
-                   bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)];
-  aligned_w_ = bytes_per_row / bytes_per_pixel;
-
-  BLI_assert(texture_);
-  texture_.label = [NSString stringWithUTF8String:this->get_name()];
-  is_baked_ = true;
-  is_dirty_ = false;
-  resource_mode_ = MTL_TEXTURE_MODE_VBO;
-
-  /* Track Status. */
-  vert_buffer_ = mtl_vbo;
-  vert_buffer_mtl_ = source_buffer;
-
-  /* Cleanup. */
-  [texture_descriptor_ release];
-  texture_descriptor_ = nullptr;
-
   return true;
 }
 
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index 928393fb39e..5ed7659f260 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -22,13 +22,7 @@
 /* Utility file for secondary functionality which supports mtl_texture.mm. */
 
 extern char datatoc_compute_texture_update_msl[];
-extern char datatoc_depth_2d_update_vert_glsl[];
-extern char datatoc_depth_2d_update_float_frag_glsl[];
-extern char datatoc_depth_2d_update_int24_frag_glsl[];
-extern char datatoc_depth_2d_update_int32_frag_glsl[];
 extern char datatoc_compute_texture_read_msl[];
-extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[];
-extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[];
 
 namespace blender::gpu {
 
@@ -447,42 +441,34 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
     return *result;
   }
 
-  const char *fragment_source = nullptr;
+  const char *depth_2d_info_variant = nullptr;
   switch (specialization.data_mode) {
     case MTL_DEPTH_UPDATE_MODE_FLOAT:
-      fragment_source = datatoc_depth_2d_update_float_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_float";
       break;
     case MTL_DEPTH_UPDATE_MODE_INT24:
-      fragment_source = datatoc_depth_2d_update_int24_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_int24";
       break;
     case MTL_DEPTH_UPDATE_MODE_INT32:
-      fragment_source = datatoc_depth_2d_update_int32_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_int32";
       break;
     default:
       BLI_assert(false && "Invalid format mode\n");
       return nullptr;
   }
 
-  GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl,
-                                        fragment_source,
-                                        nullptr,
-                                        nullptr,
-                                        nullptr,
-                                        "depth_2d_update_sh_get");
+  GPUShader *shader = GPU_shader_create_from_info_name(depth_2d_info_variant);
   mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader);
   return shader;
 }
 
 GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get()
 {
-
   MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
   BLI_assert(mtl_context != nullptr);
   if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) {
-    const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl;
-    const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl;
-    GPUShader *shader = GPU_shader_create(
-        vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit");
+    GPUShader *shader = GPU_shader_create_from_info_name("fullscreen_blit");
+
     mtl_context->get_texture_utils().fullscreen_blit_shader = shader;
   }
   return mtl_context->get_texture_utils().fullscreen_blit_shader;
@@ -614,7 +600,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
         stringWithUTF8String:datatoc_compute_texture_read_msl];
 
     /* Defensive Debug Checks. */
-    long long int depth_scale_factor = 1;
+    int64_t depth_scale_factor = 1;
     if (specialization_params.depth_format_mode > 0) {
       BLI_assert(specialization_params.component_count_input == 1);
       BLI_assert(specialization_params.component_count_output == 1);
diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh
index 8646d94e2fd..14fca9f061d 100644
--- a/source/blender/gpu/opengl/gl_backend.hh
+++ b/source/blender/gpu/opengl/gl_backend.hh
@@ -61,7 +61,7 @@ class GLBackend : public GPUBackend {
     GLTexture::samplers_update();
   };
 
-  Context *context_alloc(void *ghost_window) override
+  Context *context_alloc(void *ghost_window, void *ghost_context) override
   {
     return new GLContext(ghost_window, shared_orphan_list_);
   };
diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc
index 224a9afcf59..67e296b11d5 100644
--- a/source/blender/gpu/tests/gpu_testing.cc
+++ b/source/blender/gpu/tests/gpu_testing.cc
@@ -19,7 +19,7 @@ void GPUTest::SetUp()
   ghost_system = GHOST_CreateSystem();
   ghost_context = GHOST_CreateOpenGLContext(ghost_system, glSettings);
   GHOST_ActivateOpenGLContext(ghost_context);
-  context = GPU_context_create(nullptr);
+  context = GPU_context_create(nullptr, ghost_context);
   GPU_init();
 }
 
diff --git a/source/blender/render/intern/pipeline.cc b/source/blender/render/intern/pipeline.cc
index 4b52fb62bee..3cac9063b8e 100644
--- a/source/blender/render/intern/pipeline.cc
+++ b/source/blender/render/intern/pipeline.cc
@@ -926,7 +926,7 @@ void *RE_gl_context_get(Render *re)
 void *RE_gpu_context_get(Render *re)
 {
   if (re->gpu_context == nullptr) {
-    re->gpu_context = GPU_context_create(nullptr);
+    re->gpu_context = GPU_context_create(NULL, re->gl_context);
   }
   return re->gpu_context;
 }
diff --git a/source/blender/windowmanager/intern/wm_playanim.c b/source/blender/windowmanager/intern/wm_playanim.c
index 7c6650922a4..e768d18960b 100644
--- a/source/blender/windowmanager/intern/wm_playanim.c
+++ b/source/blender/windowmanager/intern/wm_playanim.c
@@ -1549,7 +1549,7 @@ static char *wm_main_playanim_intern(int argc, const char **argv)
   // GHOST_ActivateWindowDrawingContext(g_WS.ghost_window);
 
   /* initialize OpenGL immediate mode */
-  g_WS.gpu_context = GPU_context_create(g_WS.ghost_window);
+  g_WS.gpu_context = GPU_context_create(g_WS.ghost_window, NULL);
   GPU_init();
 
   /* initialize the font */
diff --git a/source/blender/windowmanager/intern/wm_window.c b/source/blender/windowmanager/intern/wm_window.c
index b61ebdd11be..dea875becb1 100644
--- a/source/blender/windowmanager/intern/wm_window.c
+++ b/source/blender/windowmanager/intern/wm_window.c
@@ -579,7 +579,7 @@ static void wm_window_ghostwindow_add(wmWindowManager *wm,
                                                    glSettings);
 
   if (ghostwin) {
-    win->gpuctx = GPU_context_create(ghostwin);
+    win->gpuctx = GPU_context_create(ghostwin, NULL);
 
     /* needed so we can detect the graphics card below */
     GPU_init();