Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Dinges <dingto>2022-09-22 18:27:51 +0300
committerClément Foucault <foucault.clem@gmail.com>2022-09-22 18:32:43 +0300
commit697b447c2069bbbbaa9929aab0ea1f66ef8bf4d0 (patch)
treef5c78b102b5c1478fb1dbd262b23508f5f072e33
parentbb63b98d1ff5acfd24dff9b5e72175f82f5bca26 (diff)
Metal: MTLContext implementation and immediate mode rendering support.
MTLContext provides functionality for command encoding, binding management and graphics device management. MTLImmediate provides simple draw enablement with dynamically encoded data. These draws utilise temporary scratch buffer memory to provide minimal bandwidth overhead during workload submission. This patch also contains empty placeholders for MTLBatch and MTLDrawList to enable testing of first pixels on-screen without failure. The Metal API also requires access to the GHOST_Context to ensure the same pre-initialized Metal GPU device is used by the viewport. Given the explicit nature of Metal, explicit control is also needed over presentation, to ensure correct work scheduling and rendering pipeline state. Authored by Apple: Michael Parkin-White Ref T96261 (The diff is based on 043f59cb3b5835ba1a0bbf6f1cbad080b527f7f6) Reviewed By: fclem Differential Revision: https://developer.blender.org/D15953
-rw-r--r--intern/ghost/intern/GHOST_Context.h8
-rw-r--r--intern/ghost/intern/GHOST_ContextCGL.h79
-rw-r--r--intern/ghost/intern/GHOST_ContextCGL.mm805
-rw-r--r--intern/ghost/intern/GHOST_Window.cpp5
-rw-r--r--intern/ghost/intern/GHOST_Window.h74
-rw-r--r--intern/ghost/test/multitest/MultiTest.c6
-rw-r--r--source/blender/draw/DRW_engine.h1
-rw-r--r--source/blender/draw/engines/eevee/eevee_lightcache.c2
-rw-r--r--source/blender/draw/engines/eevee_next/eevee_shader.cc2
-rw-r--r--source/blender/draw/intern/DRW_render.h1
-rw-r--r--source/blender/draw/intern/draw_manager.c2
-rw-r--r--source/blender/draw/intern/draw_manager_shader.c194
-rw-r--r--source/blender/gpu/CMakeLists.txt4
-rw-r--r--source/blender/gpu/GPU_context.h2
-rw-r--r--source/blender/gpu/GPU_material.h25
-rw-r--r--source/blender/gpu/intern/gpu_backend.hh2
-rw-r--r--source/blender/gpu/intern/gpu_codegen.cc101
-rw-r--r--source/blender/gpu/intern/gpu_codegen.h4
-rw-r--r--source/blender/gpu/intern/gpu_context.cc7
-rw-r--r--source/blender/gpu/intern/gpu_material.c215
-rw-r--r--source/blender/gpu/intern/gpu_node_graph.c19
-rw-r--r--source/blender/gpu/intern/gpu_node_graph.h15
-rw-r--r--source/blender/gpu/intern/gpu_shader_builder.cc2
-rw-r--r--source/blender/gpu/intern/gpu_shader_interface.cc4
-rw-r--r--source/blender/gpu/metal/mtl_backend.hh2
-rw-r--r--source/blender/gpu/metal/mtl_backend.mm15
-rw-r--r--source/blender/gpu/metal/mtl_batch.hh41
-rw-r--r--source/blender/gpu/metal/mtl_command_buffer.mm172
-rw-r--r--source/blender/gpu/metal/mtl_common.hh4
-rw-r--r--source/blender/gpu/metal/mtl_context.hh132
-rw-r--r--source/blender/gpu/metal/mtl_context.mm1191
-rw-r--r--source/blender/gpu/metal/mtl_drawlist.hh34
-rw-r--r--source/blender/gpu/metal/mtl_immediate.hh41
-rw-r--r--source/blender/gpu/metal/mtl_immediate.mm397
-rw-r--r--source/blender/gpu/metal/mtl_memory.hh6
-rw-r--r--source/blender/gpu/metal/mtl_shader.hh2
-rw-r--r--source/blender/gpu/metal/mtl_shader.mm27
-rw-r--r--source/blender/gpu/metal/mtl_shader_generator.mm4
-rw-r--r--source/blender/gpu/metal/mtl_texture.mm89
-rw-r--r--source/blender/gpu/metal/mtl_texture_util.mm30
-rw-r--r--source/blender/gpu/opengl/gl_backend.hh2
-rw-r--r--source/blender/gpu/tests/gpu_testing.cc2
-rw-r--r--source/blender/render/intern/pipeline.cc2
-rw-r--r--source/blender/windowmanager/intern/wm_playanim.c2
-rw-r--r--source/blender/windowmanager/intern/wm_window.c2
45 files changed, 3215 insertions, 561 deletions
diff --git a/intern/ghost/intern/GHOST_Context.h b/intern/ghost/intern/GHOST_Context.h
index 3546fb6bbc7..04d445e7f85 100644
--- a/intern/ghost/intern/GHOST_Context.h
+++ b/intern/ghost/intern/GHOST_Context.h
@@ -36,19 +36,19 @@ class GHOST_Context : public GHOST_IContext {
* Swaps front and back buffers of a window.
* \return A boolean success indicator.
*/
- virtual GHOST_TSuccess swapBuffers() = 0;
+ virtual GHOST_TSuccess swapBuffers() override = 0;
/**
* Activates the drawing context of this window.
* \return A boolean success indicator.
*/
- virtual GHOST_TSuccess activateDrawingContext() = 0;
+ virtual GHOST_TSuccess activateDrawingContext() override = 0;
/**
* Release the drawing context of the calling thread.
* \return A boolean success indicator.
*/
- virtual GHOST_TSuccess releaseDrawingContext() = 0;
+ virtual GHOST_TSuccess releaseDrawingContext() override = 0;
/**
* Call immediately after new to initialize. If this fails then immediately delete the object.
@@ -130,7 +130,7 @@ class GHOST_Context : public GHOST_IContext {
* Gets the OpenGL frame-buffer associated with the OpenGL context
* \return The ID of an OpenGL frame-buffer object.
*/
- virtual unsigned int getDefaultFramebuffer()
+ virtual unsigned int getDefaultFramebuffer() override
{
return 0;
}
diff --git a/intern/ghost/intern/GHOST_ContextCGL.h b/intern/ghost/intern/GHOST_ContextCGL.h
index fa6d6fc6fa0..5caabb8ce00 100644
--- a/intern/ghost/intern/GHOST_ContextCGL.h
+++ b/intern/ghost/intern/GHOST_ContextCGL.h
@@ -9,8 +9,13 @@
#include "GHOST_Context.h"
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
@class CAMetalLayer;
@class MTLCommandQueue;
+@class MTLDevice;
@class MTLRenderPipelineState;
@class MTLTexture;
@class NSOpenGLContext;
@@ -36,62 +41,89 @@ class GHOST_ContextCGL : public GHOST_Context {
* Swaps front and back buffers of a window.
* \return A boolean success indicator.
*/
- GHOST_TSuccess swapBuffers();
+ GHOST_TSuccess swapBuffers() override;
/**
* Activates the drawing context of this window.
* \return A boolean success indicator.
*/
- GHOST_TSuccess activateDrawingContext();
+ GHOST_TSuccess activateDrawingContext() override;
/**
* Release the drawing context of the calling thread.
* \return A boolean success indicator.
*/
- GHOST_TSuccess releaseDrawingContext();
+ GHOST_TSuccess releaseDrawingContext() override;
- unsigned int getDefaultFramebuffer();
+ unsigned int getDefaultFramebuffer() override;
/**
* Call immediately after new to initialize. If this fails then immediately delete the object.
* \return Indication as to whether initialization has succeeded.
*/
- GHOST_TSuccess initializeDrawingContext();
+ GHOST_TSuccess initializeDrawingContext() override;
/**
* Removes references to native handles from this context and then returns
* \return GHOST_kSuccess if it is OK for the parent to release the handles and
* GHOST_kFailure if releasing the handles will interfere with sharing
*/
- GHOST_TSuccess releaseNativeHandles();
+ GHOST_TSuccess releaseNativeHandles() override;
/**
* Sets the swap interval for #swapBuffers.
* \param interval: The swap interval to use.
* \return A boolean success indicator.
*/
- GHOST_TSuccess setSwapInterval(int interval);
+ GHOST_TSuccess setSwapInterval(int interval) override;
/**
* Gets the current swap interval for #swapBuffers.
* \param intervalOut: Variable to store the swap interval if it can be read.
* \return Whether the swap interval can be read.
*/
- GHOST_TSuccess getSwapInterval(int &);
+ GHOST_TSuccess getSwapInterval(int &) override;
/**
* Updates the drawing context of this window.
* Needed whenever the window is changed.
* \return Indication of success.
*/
- GHOST_TSuccess updateDrawingContext();
+ GHOST_TSuccess updateDrawingContext() override;
+
+ /**
+ * Returns a texture that Metal code can use as a render target. The current
+ * contents of this texture will be composited on top of the framebuffer
+ * each time `swapBuffers` is called.
+ */
+ id<MTLTexture> metalOverlayTexture();
+
+ /**
+ * Return a pointer to the Metal command queue used by this context.
+ */
+ MTLCommandQueue *metalCommandQueue();
+
+ /**
+ * Return a pointer to the Metal device associated with this context.
+ */
+ MTLDevice *metalDevice();
+
+ /**
+ * Register present callback
+ */
+ void metalRegisterPresentCallback(void (*callback)(
+ MTLRenderPassDescriptor *, id<MTLRenderPipelineState>, id<MTLTexture>, id<CAMetalDrawable>));
private:
/** Metal state */
+ /* Set this flag to `true` when rendering with Metal API for Viewport.
+ * TODO(Metal): This should be assigned to externally. */
+ bool m_useMetalForRendering = false;
NSView *m_metalView;
CAMetalLayer *m_metalLayer;
MTLCommandQueue *m_metalCmdQueue;
MTLRenderPipelineState *m_metalRenderPipeline;
+ bool m_ownsMetalDevice;
/** OpenGL state, for GPUs that don't support Metal */
NSOpenGLView *m_openGLView;
@@ -102,9 +134,31 @@ class GHOST_ContextCGL : public GHOST_Context {
/** The virtualized default frame-buffer. */
unsigned int m_defaultFramebuffer;
- /** The virtualized default frame-buffer's texture. */
- MTLTexture *m_defaultFramebufferMetalTexture;
-
+ /** The virtualized default framebuffer's texture */
+ /**
+ * Texture that you can render into with Metal. The texture will be
+ * composited on top of `m_defaultFramebufferMetalTexture` whenever
+ * `swapBuffers` is called.
+ */
+ static const int METAL_SWAPCHAIN_SIZE = 3;
+ struct MTLSwapchainTexture {
+ id<MTLTexture> texture;
+ unsigned int index;
+ };
+ MTLSwapchainTexture m_defaultFramebufferMetalTexture[METAL_SWAPCHAIN_SIZE];
+ unsigned int current_swapchain_index = 0;
+
+ /* Present callback.
+ * We use this such that presentation can be controlled from within the Metal
+ * Context. This is required for optimal performance and clean control flow.
+ * Also helps ensure flickering does not occur by present being dependent
+ * on existing submissions. */
+ void (*contextPresentCallback)(MTLRenderPassDescriptor *,
+ id<MTLRenderPipelineState>,
+ id<MTLTexture>,
+ id<CAMetalDrawable>);
+
+ int mtl_SwapInterval;
const bool m_debug;
/** The first created OpenGL context (for sharing display lists) */
@@ -117,4 +171,5 @@ class GHOST_ContextCGL : public GHOST_Context {
void metalInitFramebuffer();
void metalUpdateFramebuffer();
void metalSwapBuffers();
+ void initClear();
};
diff --git a/intern/ghost/intern/GHOST_ContextCGL.mm b/intern/ghost/intern/GHOST_ContextCGL.mm
index 488aa58aa59..6a0fed79fb0 100644
--- a/intern/ghost/intern/GHOST_ContextCGL.mm
+++ b/intern/ghost/intern/GHOST_ContextCGL.mm
@@ -55,139 +55,277 @@ GHOST_ContextCGL::GHOST_ContextCGL(bool stereoVisual,
m_openGLView(openGLView),
m_openGLContext(nil),
m_defaultFramebuffer(0),
- m_defaultFramebufferMetalTexture(nil),
m_debug(false)
{
+ /* Init Metal Swapchain. */
+ current_swapchain_index = 0;
+ for (int i = 0; i < METAL_SWAPCHAIN_SIZE; i++) {
+ m_defaultFramebufferMetalTexture[i].texture = nil;
+ m_defaultFramebufferMetalTexture[i].index = i;
+ }
if (m_metalView) {
+ m_ownsMetalDevice = false;
metalInit();
}
+ else {
+ /* Prepare offscreen GHOST Context Metal device. */
+ id<MTLDevice> metalDevice = MTLCreateSystemDefaultDevice();
+
+ if (m_debug) {
+ printf("Selected Metal Device: %s\n", [metalDevice.name UTF8String]);
+ }
+
+ m_ownsMetalDevice = true;
+ if (metalDevice) {
+ m_metalLayer = [[CAMetalLayer alloc] init];
+ [m_metalLayer setEdgeAntialiasingMask:0];
+ [m_metalLayer setMasksToBounds:NO];
+ [m_metalLayer setOpaque:YES];
+ [m_metalLayer setFramebufferOnly:YES];
+ [m_metalLayer setPresentsWithTransaction:NO];
+ [m_metalLayer removeAllAnimations];
+ [m_metalLayer setDevice:metalDevice];
+ m_metalLayer.allowsNextDrawableTimeout = NO;
+ metalInit();
+ }
+ else {
+ ghost_fatal_error_dialog(
+ "[ERROR] Failed to create Metal device for offscreen GHOST Context.\n");
+ }
+ }
+
+ /* Initialise swapinterval. */
+ mtl_SwapInterval = 60;
}
GHOST_ContextCGL::~GHOST_ContextCGL()
{
metalFree();
- if (m_openGLContext != nil) {
- if (m_openGLContext == [NSOpenGLContext currentContext]) {
- [NSOpenGLContext clearCurrentContext];
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ if (m_openGLContext != nil) {
+ if (m_openGLContext == [NSOpenGLContext currentContext]) {
+ [NSOpenGLContext clearCurrentContext];
- if (m_openGLView) {
- [m_openGLView clearGLContext];
+ if (m_openGLView) {
+ [m_openGLView clearGLContext];
+ }
}
- }
- if (m_openGLContext != s_sharedOpenGLContext || s_sharedCount == 1) {
- assert(s_sharedCount > 0);
+ if (m_openGLContext != s_sharedOpenGLContext || s_sharedCount == 1) {
+ assert(s_sharedCount > 0);
- s_sharedCount--;
+ s_sharedCount--;
- if (s_sharedCount == 0)
- s_sharedOpenGLContext = nil;
+ if (s_sharedCount == 0)
+ s_sharedOpenGLContext = nil;
- [m_openGLContext release];
+ [m_openGLContext release];
+ }
+ }
+#endif
+ }
+
+ if (m_ownsMetalDevice) {
+ if (m_metalLayer) {
+ [m_metalLayer release];
+ m_metalLayer = nil;
}
}
}
GHOST_TSuccess GHOST_ContextCGL::swapBuffers()
{
- if (m_openGLContext != nil) {
- if (m_metalView) {
- metalSwapBuffers();
+ GHOST_TSuccess return_value = GHOST_kFailure;
+
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ if (m_openGLContext != nil) {
+ if (m_metalView) {
+ metalSwapBuffers();
+ }
+ else if (m_openGLView) {
+ NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+ [m_openGLContext flushBuffer];
+ [pool drain];
+ }
+ return_value = GHOST_kSuccess;
}
- else if (m_openGLView) {
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
- [m_openGLContext flushBuffer];
- [pool drain];
+ else {
+ return_value = GHOST_kFailure;
}
- return GHOST_kSuccess;
+#endif
}
else {
- return GHOST_kFailure;
+ if (m_metalView) {
+ metalSwapBuffers();
+ }
+ return_value = GHOST_kSuccess;
}
+ return return_value;
}
GHOST_TSuccess GHOST_ContextCGL::setSwapInterval(int interval)
{
- if (m_openGLContext != nil) {
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
- [m_openGLContext setValues:&interval forParameter:NSOpenGLCPSwapInterval];
- [pool drain];
- return GHOST_kSuccess;
+
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ if (m_openGLContext != nil) {
+ NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+ [m_openGLContext setValues:&interval forParameter:NSOpenGLCPSwapInterval];
+ [pool drain];
+ return GHOST_kSuccess;
+ }
+ else {
+ return GHOST_kFailure;
+ }
+#endif
}
else {
- return GHOST_kFailure;
+ mtl_SwapInterval = interval;
+ return GHOST_kSuccess;
}
}
GHOST_TSuccess GHOST_ContextCGL::getSwapInterval(int &intervalOut)
{
- if (m_openGLContext != nil) {
- GLint interval;
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ if (m_openGLContext != nil) {
+ GLint interval;
- [m_openGLContext getValues:&interval forParameter:NSOpenGLCPSwapInterval];
+ NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
- [pool drain];
+ [m_openGLContext getValues:&interval forParameter:NSOpenGLCPSwapInterval];
- intervalOut = static_cast<int>(interval);
+ [pool drain];
- return GHOST_kSuccess;
+ intervalOut = static_cast<int>(interval);
+
+ return GHOST_kSuccess;
+ }
+ else {
+ return GHOST_kFailure;
+ }
+#endif
}
else {
- return GHOST_kFailure;
+ intervalOut = mtl_SwapInterval;
+ return GHOST_kSuccess;
}
}
GHOST_TSuccess GHOST_ContextCGL::activateDrawingContext()
{
- if (m_openGLContext != nil) {
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
- [m_openGLContext makeCurrentContext];
- [pool drain];
- return GHOST_kSuccess;
+
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ if (m_openGLContext != nil) {
+ NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+ [m_openGLContext makeCurrentContext];
+ [pool drain];
+ return GHOST_kSuccess;
+ }
+ else {
+ return GHOST_kFailure;
+ }
+#endif
}
else {
- return GHOST_kFailure;
+ return GHOST_kSuccess;
}
}
GHOST_TSuccess GHOST_ContextCGL::releaseDrawingContext()
{
- if (m_openGLContext != nil) {
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
- [NSOpenGLContext clearCurrentContext];
- [pool drain];
- return GHOST_kSuccess;
+
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ if (m_openGLContext != nil) {
+ NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
+ [NSOpenGLContext clearCurrentContext];
+ [pool drain];
+ return GHOST_kSuccess;
+ }
+ else {
+ return GHOST_kFailure;
+ }
+#endif
}
else {
- return GHOST_kFailure;
+ return GHOST_kSuccess;
}
}
unsigned int GHOST_ContextCGL::getDefaultFramebuffer()
{
- return m_defaultFramebuffer;
+
+ if (!m_useMetalForRendering) {
+ return m_defaultFramebuffer;
+ }
+ /* NOTE(Metal): This is not valid. */
+ return 0;
}
GHOST_TSuccess GHOST_ContextCGL::updateDrawingContext()
{
- if (m_openGLContext != nil) {
- if (m_metalView) {
- metalUpdateFramebuffer();
+
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ if (m_openGLContext != nil) {
+ if (m_metalView) {
+ metalUpdateFramebuffer();
+ }
+ else if (m_openGLView) {
+ @autoreleasepool {
+ [m_openGLContext update];
+ }
+ }
+
+ return GHOST_kSuccess;
}
- else if (m_openGLView) {
- NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
- [m_openGLContext update];
- [pool drain];
+ else {
+ return GHOST_kFailure;
}
-
- return GHOST_kSuccess;
+#endif
}
else {
- return GHOST_kFailure;
+ if (m_metalView) {
+ metalUpdateFramebuffer();
+ return GHOST_kSuccess;
+ }
}
+ return GHOST_kFailure;
+}
+
+id<MTLTexture> GHOST_ContextCGL::metalOverlayTexture()
+{
+ /* Increment Swapchain - Only needed if context is requesting a new texture */
+ current_swapchain_index = (current_swapchain_index + 1) % METAL_SWAPCHAIN_SIZE;
+
+ /* Ensure backing texture is ready for current swapchain index */
+ updateDrawingContext();
+
+ /* Return texture. */
+ return m_defaultFramebufferMetalTexture[current_swapchain_index].texture;
+}
+
+MTLCommandQueue *GHOST_ContextCGL::metalCommandQueue()
+{
+ return m_metalCmdQueue;
+}
+MTLDevice *GHOST_ContextCGL::metalDevice()
+{
+ id<MTLDevice> device = m_metalLayer.device;
+ return (MTLDevice *)device;
+}
+
+void GHOST_ContextCGL::metalRegisterPresentCallback(void (*callback)(
+ MTLRenderPassDescriptor *, id<MTLRenderPipelineState>, id<MTLTexture>, id<CAMetalDrawable>))
+{
+ this->contextPresentCallback = callback;
}
static void makeAttribList(std::vector<NSOpenGLPixelFormatAttribute> &attribs,
@@ -241,120 +379,134 @@ GHOST_TSuccess GHOST_ContextCGL::initializeDrawingContext()
#endif
/* Command-line argument would be better. */
- static bool softwareGL = getenv("BLENDER_SOFTWAREGL");
-
- NSOpenGLPixelFormat *pixelFormat = nil;
- std::vector<NSOpenGLPixelFormatAttribute> attribs;
- bool increasedSamplerLimit = false;
-
- /* Attempt to initialize device with increased sampler limit.
- * If this is unsupported and initialization fails, initialize GL Context as normal.
- *
- * NOTE: This is not available when using the SoftwareGL path, or for Intel-based
- * platforms. */
- if (!softwareGL) {
- if (@available(macos 11.0, *)) {
- increasedSamplerLimit = true;
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ /* Command-line argument would be better. */
+ static bool softwareGL = getenv("BLENDER_SOFTWAREGL");
+
+ NSOpenGLPixelFormat *pixelFormat = nil;
+ std::vector<NSOpenGLPixelFormatAttribute> attribs;
+ bool increasedSamplerLimit = false;
+
+ /* Attempt to initialize device with increased sampler limit.
+ * If this is unsupported and initialization fails, initialize GL Context as normal.
+ *
+ * NOTE: This is not available when using the SoftwareGL path, or for Intel-based
+ * platforms. */
+ if (!softwareGL) {
+ if (@available(macos 11.0, *)) {
+ increasedSamplerLimit = true;
+ }
}
- }
- const int max_ctx_attempts = increasedSamplerLimit ? 2 : 1;
- for (int ctx_create_attempt = 0; ctx_create_attempt < max_ctx_attempts; ctx_create_attempt++) {
-
- attribs.clear();
- attribs.reserve(40);
- makeAttribList(attribs, m_stereoVisual, needAlpha, softwareGL, increasedSamplerLimit);
+ const int max_ctx_attempts = increasedSamplerLimit ? 2 : 1;
+ for (int ctx_create_attempt = 0; ctx_create_attempt < max_ctx_attempts;
+ ctx_create_attempt++) {
+
+ attribs.clear();
+ attribs.reserve(40);
+ makeAttribList(attribs, m_stereoVisual, needAlpha, softwareGL, increasedSamplerLimit);
+
+ pixelFormat = [[NSOpenGLPixelFormat alloc] initWithAttributes:&attribs[0]];
+ if (pixelFormat == nil) {
+ /* If pixel format creation fails when testing increased sampler limit,
+ * attempt intialisation again with feature disabled, otherwise, fail. */
+ if (increasedSamplerLimit) {
+ increasedSamplerLimit = false;
+ continue;
+ }
+ return GHOST_kFailure;
+ }
- pixelFormat = [[NSOpenGLPixelFormat alloc] initWithAttributes:&attribs[0]];
- if (pixelFormat == nil) {
- /* If pixel format creation fails when testing increased sampler limit,
- * attempt initialization again with feature disabled, otherwise, fail. */
- if (increasedSamplerLimit) {
- increasedSamplerLimit = false;
- continue;
+ /* Attempt to create context. */
+ m_openGLContext = [[NSOpenGLContext alloc] initWithFormat:pixelFormat
+ shareContext:s_sharedOpenGLContext];
+ [pixelFormat release];
+
+ if (m_openGLContext == nil) {
+ /* If context creation fails when testing increased sampler limit,
+ * attempt re-creation with feature disabled. Otherwise, error. */
+ if (increasedSamplerLimit) {
+ increasedSamplerLimit = false;
+ continue;
+ }
+
+ /* Default context creation attempt failed. */
+ return GHOST_kFailure;
}
- return GHOST_kFailure;
- }
- /* Attempt to create context. */
- m_openGLContext = [[NSOpenGLContext alloc] initWithFormat:pixelFormat
- shareContext:s_sharedOpenGLContext];
- [pixelFormat release];
+ /* Created GL context successfully, activate. */
+ [m_openGLContext makeCurrentContext];
- if (m_openGLContext == nil) {
- /* If context creation fails when testing increased sampler limit,
- * attempt re-creation with feature disabled. Otherwise, error. */
+ /* When increasing sampler limit, verify created context is a supported configuration. */
if (increasedSamplerLimit) {
- increasedSamplerLimit = false;
- continue;
+ const char *vendor = (const char *)glGetString(GL_VENDOR);
+ const char *renderer = (const char *)glGetString(GL_RENDERER);
+
+ /* If generated context type is unsupported, release existing context and
+ * fallback to creating a normal context below. */
+ if (strstr(vendor, "Intel") || strstr(renderer, "Software")) {
+ [m_openGLContext release];
+ m_openGLContext = nil;
+ increasedSamplerLimit = false;
+ continue;
+ }
}
-
- /* Default context creation attempt failed. */
- return GHOST_kFailure;
}
- /* Created GL context successfully, activate. */
- [m_openGLContext makeCurrentContext];
+ if (m_debug) {
+ GLint major = 0, minor = 0;
+ glGetIntegerv(GL_MAJOR_VERSION, &major);
+ glGetIntegerv(GL_MINOR_VERSION, &minor);
+ fprintf(stderr, "OpenGL version %d.%d%s\n", major, minor, softwareGL ? " (software)" : "");
+ fprintf(stderr, "Renderer: %s\n", glGetString(GL_RENDERER));
+ }
- /* When increasing sampler limit, verify created context is a supported configuration. */
- if (increasedSamplerLimit) {
- const char *vendor = (const char *)glGetString(GL_VENDOR);
- const char *renderer = (const char *)glGetString(GL_RENDERER);
-
- /* If generated context type is unsupported, release existing context and
- * fallback to creating a normal context below. */
- if (strstr(vendor, "Intel") || strstr(renderer, "Software")) {
- [m_openGLContext release];
- m_openGLContext = nil;
- increasedSamplerLimit = false;
- continue;
+# ifdef GHOST_WAIT_FOR_VSYNC
+ {
+ GLint swapInt = 1;
+ /* Wait for vertical-sync, to avoid tearing artifacts. */
+ [m_openGLContext setValues:&swapInt forParameter:NSOpenGLCPSwapInterval];
+ }
+# endif
+
+ if (m_metalView) {
+ if (m_defaultFramebuffer == 0) {
+ /* Create a virtual frame-buffer. */
+ [m_openGLContext makeCurrentContext];
+ metalInitFramebuffer();
+ initClearGL();
}
}
- }
+ else if (m_openGLView) {
+ [m_openGLView setOpenGLContext:m_openGLContext];
+ [m_openGLContext setView:m_openGLView];
+ initClearGL();
+ }
- if (m_debug) {
- GLint major = 0, minor = 0;
- glGetIntegerv(GL_MAJOR_VERSION, &major);
- glGetIntegerv(GL_MINOR_VERSION, &minor);
- fprintf(stderr, "OpenGL version %d.%d%s\n", major, minor, softwareGL ? " (software)" : "");
- fprintf(stderr, "Renderer: %s\n", glGetString(GL_RENDERER));
- }
+ [m_openGLContext flushBuffer];
-#ifdef GHOST_WAIT_FOR_VSYNC
- {
- GLint swapInt = 1;
- /* Wait for vertical-sync, to avoid tearing artifacts. */
- [m_openGLContext setValues:&swapInt forParameter:NSOpenGLCPSwapInterval];
- }
-#endif
+ if (s_sharedCount == 0)
+ s_sharedOpenGLContext = m_openGLContext;
- if (m_metalView) {
- if (m_defaultFramebuffer == 0) {
- /* Create a virtual frame-buffer. */
- [m_openGLContext makeCurrentContext];
+ s_sharedCount++;
+#endif
+ }
+ else {
+ /* NOTE(Metal): Metal-only path. */
+ if (m_metalView) {
metalInitFramebuffer();
- initClearGL();
}
}
- else if (m_openGLView) {
- [m_openGLView setOpenGLContext:m_openGLContext];
- [m_openGLContext setView:m_openGLView];
- initClearGL();
- }
-
- [m_openGLContext flushBuffer];
-
- if (s_sharedCount == 0)
- s_sharedOpenGLContext = m_openGLContext;
-
- s_sharedCount++;
}
return GHOST_kSuccess;
}
GHOST_TSuccess GHOST_ContextCGL::releaseNativeHandles()
{
+#if WITH_OPENGL
m_openGLContext = nil;
m_openGLView = nil;
+#endif
m_metalView = nil;
return GHOST_kSuccess;
@@ -404,10 +556,14 @@ void GHOST_ContextCGL::metalInit()
fragment float4 fragment_shader(Vertex v [[stage_in]],
texture2d<float> t [[texture(0)]]) {
- return t.sample(s, v.texCoord);
- }
- )msl";
+ /* Final blit should ensure alpha is 1.0. This resolves
+ * rendering artifacts for blitting of final backbuffer. */
+ float4 out_tex = t.sample(s, v.texCoord);
+ out_tex.a = 1.0;
+ return out_tex;
+ }
+ )msl";
MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease];
options.languageVersion = MTLLanguageVersion1_1;
@@ -424,6 +580,8 @@ void GHOST_ContextCGL::metalInit()
desc.fragmentFunction = [library newFunctionWithName:@"fragment_shader"];
desc.vertexFunction = [library newFunctionWithName:@"vertex_shader"];
+ /* Ensure library is released. */
+ [library autorelease];
[desc.colorAttachments objectAtIndexedSubscript:0].pixelFormat = METAL_FRAMEBUFFERPIXEL_FORMAT;
@@ -434,6 +592,20 @@ void GHOST_ContextCGL::metalInit()
ghost_fatal_error_dialog(
"GHOST_ContextCGL::metalInit: newRenderPipelineStateWithDescriptor:error: failed!");
}
+
+ /* Create a render pipeline to composite things rendered with Metal on top
+ * of the framebuffer contents. Uses the same vertex and fragment shader
+ * as the blit above, but with alpha blending enabled. */
+ desc.label = @"Metal Overlay";
+ desc.colorAttachments[0].blendingEnabled = YES;
+ desc.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha;
+ desc.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha;
+
+ if (error) {
+ ghost_fatal_error_dialog(
+ "GHOST_ContextCGL::metalInit: newRenderPipelineStateWithDescriptor:error: failed (when "
+ "creating the Metal overlay pipeline)!");
+ }
}
}
@@ -445,123 +617,206 @@ void GHOST_ContextCGL::metalFree()
if (m_metalRenderPipeline) {
[m_metalRenderPipeline release];
}
- if (m_defaultFramebufferMetalTexture) {
- [m_defaultFramebufferMetalTexture release];
+
+ for (int i = 0; i < METAL_SWAPCHAIN_SIZE; i++) {
+ if (m_defaultFramebufferMetalTexture[i].texture) {
+ [m_defaultFramebufferMetalTexture[i].texture release];
+ }
}
}
void GHOST_ContextCGL::metalInitFramebuffer()
{
- glGenFramebuffers(1, &m_defaultFramebuffer);
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ glGenFramebuffers(1, &m_defaultFramebuffer);
+#endif
+ }
updateDrawingContext();
- glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer);
+
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer);
+#endif
+ }
}
void GHOST_ContextCGL::metalUpdateFramebuffer()
{
- assert(m_defaultFramebuffer != 0);
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ assert(m_defaultFramebuffer != 0);
+#endif
+ }
NSRect bounds = [m_metalView bounds];
NSSize backingSize = [m_metalView convertSizeToBacking:bounds.size];
size_t width = (size_t)backingSize.width;
size_t height = (size_t)backingSize.height;
- {
- /* Test if there is anything to update */
- id<MTLTexture> tex = (id<MTLTexture>)m_defaultFramebufferMetalTexture;
- if (tex && tex.width == width && tex.height == height) {
- return;
+#if WITH_OPENGL
+ unsigned int glTex;
+ CVPixelBufferRef cvPixelBuffer = nil;
+ CVOpenGLTextureCacheRef cvGLTexCache = nil;
+ CVOpenGLTextureRef cvGLTex = nil;
+ CVMetalTextureCacheRef cvMetalTexCache = nil;
+ CVMetalTextureRef cvMetalTex = nil;
+#endif
+
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ /* OPENGL path */
+ {
+ /* Test if there is anything to update */
+ id<MTLTexture> tex = m_defaultFramebufferMetalTexture[current_swapchain_index].texture;
+ if (tex && tex.width == width && tex.height == height) {
+ return;
+ }
}
- }
- activateDrawingContext();
+ activateDrawingContext();
+
+ NSDictionary *cvPixelBufferProps = @{
+ (__bridge NSString *)kCVPixelBufferOpenGLCompatibilityKey : @YES,
+ (__bridge NSString *)kCVPixelBufferMetalCompatibilityKey : @YES,
+ };
+ CVReturn cvret = CVPixelBufferCreate(kCFAllocatorDefault,
+ width,
+ height,
+ METAL_CORE_VIDEO_PIXEL_FORMAT,
+ (__bridge CFDictionaryRef)cvPixelBufferProps,
+ &cvPixelBuffer);
+ if (cvret != kCVReturnSuccess) {
+ ghost_fatal_error_dialog(
+ "GHOST_ContextCGL::metalUpdateFramebuffer: CVPixelBufferCreate failed!");
+ }
- NSDictionary *cvPixelBufferProps = @{
- (__bridge NSString *)kCVPixelBufferOpenGLCompatibilityKey : @YES,
- (__bridge NSString *)kCVPixelBufferMetalCompatibilityKey : @YES,
- };
- CVPixelBufferRef cvPixelBuffer = nil;
- CVReturn cvret = CVPixelBufferCreate(kCFAllocatorDefault,
- width,
- height,
- METAL_CORE_VIDEO_PIXEL_FORMAT,
- (__bridge CFDictionaryRef)cvPixelBufferProps,
- &cvPixelBuffer);
- if (cvret != kCVReturnSuccess) {
- ghost_fatal_error_dialog(
- "GHOST_ContextCGL::metalUpdateFramebuffer: CVPixelBufferCreate failed!");
- }
-
- /* Create an OpenGL texture. */
- CVOpenGLTextureCacheRef cvGLTexCache = nil;
- cvret = CVOpenGLTextureCacheCreate(kCFAllocatorDefault,
- nil,
- m_openGLContext.CGLContextObj,
- m_openGLContext.pixelFormat.CGLPixelFormatObj,
- nil,
- &cvGLTexCache);
- if (cvret != kCVReturnSuccess) {
- ghost_fatal_error_dialog(
- "GHOST_ContextCGL::metalUpdateFramebuffer: CVOpenGLTextureCacheCreate failed!");
- }
+ /* Create an OpenGL texture. */
+ cvret = CVOpenGLTextureCacheCreate(kCFAllocatorDefault,
+ nil,
+ m_openGLContext.CGLContextObj,
+ m_openGLContext.pixelFormat.CGLPixelFormatObj,
+ nil,
+ &cvGLTexCache);
+ if (cvret != kCVReturnSuccess) {
+ ghost_fatal_error_dialog(
+ "GHOST_ContextCGL::metalUpdateFramebuffer: CVOpenGLTextureCacheCreate failed!");
+ }
- CVOpenGLTextureRef cvGLTex = nil;
- cvret = CVOpenGLTextureCacheCreateTextureFromImage(
- kCFAllocatorDefault, cvGLTexCache, cvPixelBuffer, nil, &cvGLTex);
- if (cvret != kCVReturnSuccess) {
- ghost_fatal_error_dialog(
- "GHOST_ContextCGL::metalUpdateFramebuffer: "
- "CVOpenGLTextureCacheCreateTextureFromImage failed!");
- }
+ cvret = CVOpenGLTextureCacheCreateTextureFromImage(
+ kCFAllocatorDefault, cvGLTexCache, cvPixelBuffer, nil, &cvGLTex);
+ if (cvret != kCVReturnSuccess) {
+ ghost_fatal_error_dialog(
+ "GHOST_ContextCGL::metalUpdateFramebuffer: "
+ "CVOpenGLTextureCacheCreateTextureFromImage failed!");
+ }
- unsigned int glTex;
- glTex = CVOpenGLTextureGetName(cvGLTex);
+ glTex = CVOpenGLTextureGetName(cvGLTex);
- /* Create a Metal texture. */
- CVMetalTextureCacheRef cvMetalTexCache = nil;
- cvret = CVMetalTextureCacheCreate(
- kCFAllocatorDefault, nil, m_metalLayer.device, nil, &cvMetalTexCache);
- if (cvret != kCVReturnSuccess) {
- ghost_fatal_error_dialog(
- "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureCacheCreate failed!");
- }
+ /* Create a Metal texture. */
+ cvret = CVMetalTextureCacheCreate(
+ kCFAllocatorDefault, nil, m_metalLayer.device, nil, &cvMetalTexCache);
+ if (cvret != kCVReturnSuccess) {
+ ghost_fatal_error_dialog(
+ "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureCacheCreate failed!");
+ }
- CVMetalTextureRef cvMetalTex = nil;
- cvret = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault,
- cvMetalTexCache,
- cvPixelBuffer,
- nil,
- METAL_FRAMEBUFFERPIXEL_FORMAT,
- width,
- height,
- 0,
- &cvMetalTex);
- if (cvret != kCVReturnSuccess) {
- ghost_fatal_error_dialog(
- "GHOST_ContextCGL::metalUpdateFramebuffer: "
- "CVMetalTextureCacheCreateTextureFromImage failed!");
- }
+ cvret = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault,
+ cvMetalTexCache,
+ cvPixelBuffer,
+ nil,
+ METAL_FRAMEBUFFERPIXEL_FORMAT,
+ width,
+ height,
+ 0,
+ &cvMetalTex);
+ if (cvret != kCVReturnSuccess) {
+ ghost_fatal_error_dialog(
+ "GHOST_ContextCGL::metalUpdateFramebuffer: "
+ "CVMetalTextureCacheCreateTextureFromImage failed!");
+ }
- MTLTexture *tex = (MTLTexture *)CVMetalTextureGetTexture(cvMetalTex);
+ id<MTLTexture> tex = CVMetalTextureGetTexture(cvMetalTex);
- if (!tex) {
- ghost_fatal_error_dialog(
- "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureGetTexture failed!");
+ if (!tex) {
+ ghost_fatal_error_dialog(
+ "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureGetTexture failed!");
+ }
+
+ [m_defaultFramebufferMetalTexture[current_swapchain_index].texture release];
+ m_defaultFramebufferMetalTexture[current_swapchain_index].texture = [tex retain];
+#endif
}
+ else {
+ /* NOTE(Metal): Metal API Path. */
+ if (m_defaultFramebufferMetalTexture[current_swapchain_index].texture &&
+ m_defaultFramebufferMetalTexture[current_swapchain_index].texture.width == width &&
+ m_defaultFramebufferMetalTexture[current_swapchain_index].texture.height == height) {
+ return;
+ }
- [m_defaultFramebufferMetalTexture release];
- m_defaultFramebufferMetalTexture = [tex retain];
+ /* Free old texture */
+ [m_defaultFramebufferMetalTexture[current_swapchain_index].texture release];
- glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer);
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE, glTex, 0);
+ id<MTLDevice> device = m_metalLayer.device;
+ MTLTextureDescriptor *overlayDesc = [MTLTextureDescriptor
+ texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float
+ width:width
+ height:height
+ mipmapped:NO];
+ overlayDesc.storageMode = MTLStorageModePrivate;
+ overlayDesc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead;
+
+ id<MTLTexture> overlayTex = [device newTextureWithDescriptor:overlayDesc];
+ if (!overlayTex) {
+ ghost_fatal_error_dialog(
+ "GHOST_ContextCGL::metalUpdateFramebuffer: failed to create Metal overlay texture!");
+ }
+ else {
+ overlayTex.label = [NSString
+ stringWithFormat:@"Metal Overlay for GHOST Context %p", this]; //@"";
- [m_metalLayer setDrawableSize:CGSizeMake((CGFloat)width, (CGFloat)height)];
+ // NSLog(@"Created new Metal Overlay (backbuffer) for context %p\n", this);
+ }
+
+ m_defaultFramebufferMetalTexture[current_swapchain_index].texture =
+ overlayTex; //[(MTLTexture *)overlayTex retain];
+
+ /* Clear texture on create */
+ id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer];
+ MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
+ {
+ auto attachment = [passDescriptor.colorAttachments objectAtIndexedSubscript:0];
+ attachment.texture = m_defaultFramebufferMetalTexture[current_swapchain_index].texture;
+ attachment.loadAction = MTLLoadActionClear;
+ attachment.clearColor = MTLClearColorMake(0.294, 0.294, 0.294, 1.000);
+ attachment.storeAction = MTLStoreActionStore;
+ }
+ {
+ id<MTLRenderCommandEncoder> enc = [cmdBuffer
+ renderCommandEncoderWithDescriptor:passDescriptor];
+ [enc endEncoding];
+ }
+ [cmdBuffer commit];
+ }
- CVPixelBufferRelease(cvPixelBuffer);
- CVOpenGLTextureCacheRelease(cvGLTexCache);
- CVOpenGLTextureRelease(cvGLTex);
- CFRelease(cvMetalTexCache);
- CFRelease(cvMetalTex);
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer);
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE, glTex, 0);
+#endif
+ }
+
+ [m_metalLayer setDrawableSize:CGSizeMake((CGFloat)width, (CGFloat)height)];
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ CVPixelBufferRelease(cvPixelBuffer);
+ CVOpenGLTextureCacheRelease(cvGLTexCache);
+ CVOpenGLTextureRelease(cvGLTex);
+ CFRelease(cvMetalTexCache);
+ CFRelease(cvMetalTex);
+#endif
+ }
}
void GHOST_ContextCGL::metalSwapBuffers()
@@ -570,40 +825,88 @@ void GHOST_ContextCGL::metalSwapBuffers()
@autoreleasepool {
/* clang-format on */
updateDrawingContext();
- glFlush();
- assert(m_defaultFramebufferMetalTexture != 0);
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ glFlush();
+ assert(m_defaultFramebufferMetalTexture[current_swapchain_index].texture != nil);
+#endif
+ }
id<CAMetalDrawable> drawable = [m_metalLayer nextDrawable];
if (!drawable) {
return;
}
- id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer];
-
MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
{
auto attachment = [passDescriptor.colorAttachments objectAtIndexedSubscript:0];
attachment.texture = drawable.texture;
- attachment.loadAction = MTLLoadActionDontCare;
+ attachment.loadAction = MTLLoadActionClear;
+ attachment.clearColor = MTLClearColorMake(1.0, 0.294, 0.294, 1.000);
attachment.storeAction = MTLStoreActionStore;
}
- id<MTLTexture> srcTexture = (id<MTLTexture>)m_defaultFramebufferMetalTexture;
+ if (!m_useMetalForRendering) {
+ id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer];
+ {
+ assert(m_defaultFramebufferMetalTexture[current_swapchain_index].texture != nil);
+ id<MTLRenderCommandEncoder> enc = [cmdBuffer
+ renderCommandEncoderWithDescriptor:passDescriptor];
+ [enc setRenderPipelineState:(id<MTLRenderPipelineState>)m_metalRenderPipeline];
+ [enc setFragmentTexture:m_defaultFramebufferMetalTexture[current_swapchain_index].texture
+ atIndex:0];
+ [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
+ [enc endEncoding];
+ }
+
+ [cmdBuffer presentDrawable:drawable];
+ /* Submit command buffer */
+ [cmdBuffer commit];
+ }
+ else {
+ assert(contextPresentCallback);
+ assert(m_defaultFramebufferMetalTexture[current_swapchain_index].texture != nil);
+ (*contextPresentCallback)(passDescriptor,
+ (id<MTLRenderPipelineState>)m_metalRenderPipeline,
+ m_defaultFramebufferMetalTexture[current_swapchain_index].texture,
+ drawable);
+ }
+ }
+}
+
+void GHOST_ContextCGL::initClear()
+{
+
+ if (!m_useMetalForRendering) {
+#if WITH_OPENGL
+ glClearColor(0.294, 0.294, 0.294, 0.000);
+ glClear(GL_COLOR_BUFFER_BIT);
+ glClearColor(0.000, 0.000, 0.000, 0.000);
+#endif
+ }
+ else {
+#if WITH_METAL
+ // TODO (mg_gpusw_apple) this path is never taken, this is legacy left from inital integration
+ // of metal and gl, the whole file should be cleaned up and stripped of the legacy path
+ id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer];
+ MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor];
+ {
+ auto attachment = [passDescriptor.colorAttachments objectAtIndexedSubscript:0];
+ attachment.texture = m_defaultFramebufferMetalTexture[current_swapchain_index].texture;
+ attachment.loadAction = MTLLoadActionClear;
+ attachment.clearColor = MTLClearColorMake(0.294, 0.294, 0.294, 1.000);
+ attachment.storeAction = MTLStoreActionStore;
+ }
+
+ // encoding
{
id<MTLRenderCommandEncoder> enc = [cmdBuffer
renderCommandEncoderWithDescriptor:passDescriptor];
-
- [enc setRenderPipelineState:(id<MTLRenderPipelineState>)m_metalRenderPipeline];
- [enc setFragmentTexture:srcTexture atIndex:0];
- [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
-
[enc endEncoding];
}
-
- [cmdBuffer presentDrawable:drawable];
-
[cmdBuffer commit];
+#endif
}
}
diff --git a/intern/ghost/intern/GHOST_Window.cpp b/intern/ghost/intern/GHOST_Window.cpp
index db4d6c3bb71..da292a90869 100644
--- a/intern/ghost/intern/GHOST_Window.cpp
+++ b/intern/ghost/intern/GHOST_Window.cpp
@@ -92,6 +92,11 @@ GHOST_TSuccess GHOST_Window::getSwapInterval(int &intervalOut)
return m_context->getSwapInterval(intervalOut);
}
+GHOST_Context *GHOST_Window::getContext()
+{
+ return m_context;
+}
+
unsigned int GHOST_Window::getDefaultFramebuffer()
{
return (m_context) ? m_context->getDefaultFramebuffer() : 0;
diff --git a/intern/ghost/intern/GHOST_Window.h b/intern/ghost/intern/GHOST_Window.h
index 2c2b75a6bd5..8e1f73d3430 100644
--- a/intern/ghost/intern/GHOST_Window.h
+++ b/intern/ghost/intern/GHOST_Window.h
@@ -72,7 +72,7 @@ class GHOST_Window : public GHOST_IWindow {
* Returns indication as to whether the window is valid.
* \return The validity of the window.
*/
- virtual bool getValid() const
+ virtual bool getValid() const override
{
return m_context != NULL;
}
@@ -81,15 +81,15 @@ class GHOST_Window : public GHOST_IWindow {
* Returns the associated OS object/handle
* \return The associated OS object/handle
*/
- virtual void *getOSWindow() const;
+ virtual void *getOSWindow() const override;
/**
* Returns the current cursor shape.
* \return The current cursor shape.
*/
- inline GHOST_TStandardCursor getCursorShape() const;
+ inline GHOST_TStandardCursor getCursorShape() const override;
- inline bool isDialog() const
+ inline bool isDialog() const override
{
return false;
}
@@ -99,7 +99,7 @@ class GHOST_Window : public GHOST_IWindow {
* \param cursorShape: The new cursor shape type id.
* \return Indication of success.
*/
- GHOST_TSuccess setCursorShape(GHOST_TStandardCursor cursorShape);
+ GHOST_TSuccess setCursorShape(GHOST_TStandardCursor cursorShape) override;
/**
* Set the shape of the cursor to a custom cursor.
@@ -115,15 +115,15 @@ class GHOST_Window : public GHOST_IWindow {
int sizey,
int hotX,
int hotY,
- bool canInvertColor);
+ bool canInvertColor) override;
- GHOST_TSuccess getCursorBitmap(GHOST_CursorBitmapRef *bitmap);
+ GHOST_TSuccess getCursorBitmap(GHOST_CursorBitmapRef *bitmap) override;
/**
* Returns the visibility state of the cursor.
* \return The visibility state of the cursor.
*/
- inline bool getCursorVisibility() const;
+ inline bool getCursorVisibility() const override;
inline GHOST_TGrabCursorMode getCursorGrabMode() const;
inline bool getCursorGrabModeIsWarp() const;
inline GHOST_TAxisFlag getCursorGrabAxis() const;
@@ -136,7 +136,7 @@ class GHOST_Window : public GHOST_IWindow {
* \param visible: The new visibility state of the cursor.
* \return Indication of success.
*/
- GHOST_TSuccess setCursorVisibility(bool visible);
+ GHOST_TSuccess setCursorVisibility(bool visible) override;
/**
* Sets the cursor grab.
@@ -146,28 +146,28 @@ class GHOST_Window : public GHOST_IWindow {
GHOST_TSuccess setCursorGrab(GHOST_TGrabCursorMode mode,
GHOST_TAxisFlag wrap_axis,
GHOST_Rect *bounds,
- int32_t mouse_ungrab_xy[2]);
+ int32_t mouse_ungrab_xy[2]) override;
/**
* Gets the cursor grab region, if unset the window is used.
* reset when grab is disabled.
*/
- GHOST_TSuccess getCursorGrabBounds(GHOST_Rect &bounds);
+ GHOST_TSuccess getCursorGrabBounds(GHOST_Rect &bounds) override;
void getCursorGrabState(GHOST_TGrabCursorMode &mode,
GHOST_TAxisFlag &axis_flag,
GHOST_Rect &bounds,
- bool &use_software_cursor);
+ bool &use_software_cursor) override;
/**
* Return true when a software cursor should be used.
*/
- bool getCursorGrabUseSoftwareDisplay();
+ bool getCursorGrabUseSoftwareDisplay() override;
/**
* Sets the progress bar value displayed in the window/application icon
* \param progress: The progress percentage (0.0 to 1.0).
*/
- virtual GHOST_TSuccess setProgressBar(float /*progress*/)
+ virtual GHOST_TSuccess setProgressBar(float /*progress*/) override
{
return GHOST_kFailure;
}
@@ -175,7 +175,7 @@ class GHOST_Window : public GHOST_IWindow {
/**
* Hides the progress bar in the icon
*/
- virtual GHOST_TSuccess endProgressBar()
+ virtual GHOST_TSuccess endProgressBar() override
{
return GHOST_kFailure;
}
@@ -185,43 +185,43 @@ class GHOST_Window : public GHOST_IWindow {
* \param interval: The swap interval to use.
* \return A boolean success indicator.
*/
- GHOST_TSuccess setSwapInterval(int interval);
+ GHOST_TSuccess setSwapInterval(int interval) override;
/**
* Gets the current swap interval for #swapBuffers.
* \return An integer.
*/
- GHOST_TSuccess getSwapInterval(int &intervalOut);
+ GHOST_TSuccess getSwapInterval(int &intervalOut) override;
/**
* Tells if the ongoing drag'n'drop object can be accepted upon mouse drop
*/
- void setAcceptDragOperation(bool canAccept);
+ void setAcceptDragOperation(bool canAccept) override;
/**
* Returns acceptance of the dropped object
* Usually called by the "object dropped" event handling function
*/
- bool canAcceptDragOperation() const;
+ bool canAcceptDragOperation() const override;
/**
* Sets the window "modified" status, indicating unsaved changes
* \param isUnsavedChanges: Unsaved changes or not.
* \return Indication of success.
*/
- virtual GHOST_TSuccess setModifiedState(bool isUnsavedChanges);
+ virtual GHOST_TSuccess setModifiedState(bool isUnsavedChanges) override;
/**
* Gets the window "modified" status, indicating unsaved changes
* \return True if there are unsaved changes
*/
- virtual bool getModifiedState();
+ virtual bool getModifiedState() override;
/**
* Returns the type of drawing context used in this window.
* \return The current type of drawing context.
*/
- inline GHOST_TDrawingContextType getDrawingContextType();
+ inline GHOST_TDrawingContextType getDrawingContextType() override;
/**
* Tries to install a rendering context in this window.
@@ -230,19 +230,19 @@ class GHOST_Window : public GHOST_IWindow {
* \param type: The type of rendering context installed.
* \return Indication as to whether installation has succeeded.
*/
- GHOST_TSuccess setDrawingContextType(GHOST_TDrawingContextType type);
+ GHOST_TSuccess setDrawingContextType(GHOST_TDrawingContextType type) override;
/**
* Swaps front and back buffers of a window.
* \return A boolean success indicator.
*/
- virtual GHOST_TSuccess swapBuffers();
+ virtual GHOST_TSuccess swapBuffers() override;
/**
* Activates the drawing context of this window.
* \return A boolean success indicator.
*/
- virtual GHOST_TSuccess activateDrawingContext();
+ virtual GHOST_TSuccess activateDrawingContext() override;
/**
* Updates the drawing context of this window. Needed
@@ -252,16 +252,22 @@ class GHOST_Window : public GHOST_IWindow {
GHOST_TSuccess updateDrawingContext();
/**
- * Gets the OpenGL frame-buffer associated with the window's contents.
- * \return The ID of an OpenGL frame-buffer object.
+ * Get the drawing context associated with this window.
+ *\return Pointer to the context object.
*/
- virtual unsigned int getDefaultFramebuffer();
+ GHOST_Context *getContext();
+
+ /**
+ * Gets the OpenGL framebuffer associated with the window's contents.
+ * \return The ID of an OpenGL framebuffer object.
+ */
+ virtual unsigned int getDefaultFramebuffer() override;
/**
* Returns the window user data.
* \return The window user data.
*/
- inline GHOST_TUserDataPtr getUserData() const
+ inline GHOST_TUserDataPtr getUserData() const override
{
return m_userData;
}
@@ -270,12 +276,12 @@ class GHOST_Window : public GHOST_IWindow {
* Changes the window user data.
* \param userData: The window user data.
*/
- void setUserData(const GHOST_TUserDataPtr userData)
+ void setUserData(const GHOST_TUserDataPtr userData) override
{
m_userData = userData;
}
- float getNativePixelSize(void)
+ float getNativePixelSize(void) override
{
if (m_nativePixelSize > 0.0f)
return m_nativePixelSize;
@@ -286,18 +292,18 @@ class GHOST_Window : public GHOST_IWindow {
* Returns the recommended DPI for this window.
* \return The recommended DPI for this window.
*/
- virtual inline uint16_t getDPIHint()
+ virtual inline uint16_t getDPIHint() override
{
return 96;
}
#ifdef WITH_INPUT_IME
- virtual void beginIME(int32_t x, int32_t y, int32_t w, int32_t h, bool completed)
+ virtual void beginIME(int32_t x, int32_t y, int32_t w, int32_t h, bool completed) override
{
/* do nothing temporarily if not in windows */
}
- virtual void endIME()
+ virtual void endIME() override
{
/* do nothing temporarily if not in windows */
}
diff --git a/intern/ghost/test/multitest/MultiTest.c b/intern/ghost/test/multitest/MultiTest.c
index 99b88dfb525..6a6a042f4ac 100644
--- a/intern/ghost/test/multitest/MultiTest.c
+++ b/intern/ghost/test/multitest/MultiTest.c
@@ -323,7 +323,7 @@ MainWindow *mainwindow_new(MultiTestApp *app)
if (win) {
MainWindow *mw = MEM_callocN(sizeof(*mw), "mainwindow_new");
- mw->gpu_context = GPU_context_create(win);
+ mw->gpu_context = GPU_context_create(win, NULL);
GPU_init();
mw->app = app;
@@ -578,7 +578,7 @@ LoggerWindow *loggerwindow_new(MultiTestApp *app)
if (win) {
LoggerWindow *lw = MEM_callocN(sizeof(*lw), "loggerwindow_new");
- lw->gpu_context = GPU_context_create(win);
+ lw->gpu_context = GPU_context_create(win, NULL);
GPU_init();
int bbox[2][2];
@@ -780,7 +780,7 @@ ExtraWindow *extrawindow_new(MultiTestApp *app)
if (win) {
ExtraWindow *ew = MEM_callocN(sizeof(*ew), "mainwindow_new");
- ew->gpu_context = GPU_context_create(win);
+ ew->gpu_context = GPU_context_create(win, NULL);
GPU_init();
ew->app = app;
diff --git a/source/blender/draw/DRW_engine.h b/source/blender/draw/DRW_engine.h
index dec7a22aadb..04e3bddfb6c 100644
--- a/source/blender/draw/DRW_engine.h
+++ b/source/blender/draw/DRW_engine.h
@@ -201,6 +201,7 @@ void DRW_gpu_render_context_enable(void *re_gpu_context);
void DRW_gpu_render_context_disable(void *re_gpu_context);
void DRW_deferred_shader_remove(struct GPUMaterial *mat);
+void DRW_deferred_shader_optimize_remove(struct GPUMaterial *mat);
/**
* Get DrawData from the given ID-block. In order for this to work, we assume that
diff --git a/source/blender/draw/engines/eevee/eevee_lightcache.c b/source/blender/draw/engines/eevee/eevee_lightcache.c
index 614ea0b0892..0fd87ef43f0 100644
--- a/source/blender/draw/engines/eevee/eevee_lightcache.c
+++ b/source/blender/draw/engines/eevee/eevee_lightcache.c
@@ -597,7 +597,7 @@ static void eevee_lightbake_context_enable(EEVEE_LightBake *lbake)
if (lbake->gl_context) {
DRW_opengl_render_context_enable(lbake->gl_context);
if (lbake->gpu_context == NULL) {
- lbake->gpu_context = GPU_context_create(NULL);
+ lbake->gpu_context = GPU_context_create(NULL, lbake->gl_context);
}
DRW_gpu_render_context_enable(lbake->gpu_context);
}
diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc
index 64b1d4891a9..05ff06e7435 100644
--- a/source/blender/draw/engines/eevee_next/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc
@@ -471,6 +471,8 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
this);
GPU_material_status_set(gpumat, GPU_MAT_QUEUED);
GPU_material_compile(gpumat);
+ /* Queue deferred material optimization. */
+ DRW_shader_queue_optimize_material(gpumat);
return gpumat;
}
diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h
index 7b80ffd2b88..4bdef577e44 100644
--- a/source/blender/draw/intern/DRW_render.h
+++ b/source/blender/draw/intern/DRW_render.h
@@ -251,6 +251,7 @@ struct GPUMaterial *DRW_shader_from_material(struct Material *ma,
bool deferred,
GPUCodegenCallbackFn callback,
void *thunk);
+void DRW_shader_queue_optimize_material(struct GPUMaterial *mat);
void DRW_shader_free(struct GPUShader *shader);
#define DRW_SHADER_FREE_SAFE(shader) \
do { \
diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c
index e1bee89db60..eab79652762 100644
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -3139,7 +3139,7 @@ void DRW_opengl_context_create(void)
DST.gl_context = WM_opengl_context_create();
WM_opengl_context_activate(DST.gl_context);
/* Be sure to create gpu_context too. */
- DST.gpu_context = GPU_context_create(NULL);
+ DST.gpu_context = GPU_context_create(0, DST.gl_context);
/* So we activate the window's one afterwards. */
wm_window_reset_drawable();
}
diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c
index 4bc3898c5e7..6f8df54ead3 100644
--- a/source/blender/draw/intern/draw_manager_shader.c
+++ b/source/blender/draw/intern/draw_manager_shader.c
@@ -51,9 +51,13 @@ extern char datatoc_common_fullscreen_vert_glsl[];
* \{ */
typedef struct DRWShaderCompiler {
+ /** Default compilation queue. */
ListBase queue; /* GPUMaterial */
SpinLock list_lock;
+ /** Optimization queue. */
+ ListBase optimize_queue; /* GPUMaterial */
+
void *gl_context;
GPUContext *gpu_context;
bool own_context;
@@ -109,7 +113,29 @@ static void drw_deferred_shader_compilation_exec(
MEM_freeN(link);
}
else {
- break;
+ /* Check for Material Optimization job once there are no more
+ * shaders to compile. */
+ BLI_spin_lock(&comp->list_lock);
+ /* Pop tail because it will be less likely to lock the main thread
+ * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
+ LinkData *link = (LinkData *)BLI_poptail(&comp->optimize_queue);
+ GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : NULL;
+ if (optimize_mat) {
+ /* Avoid another thread freeing the material during optimization. */
+ GPU_material_acquire(optimize_mat);
+ }
+ BLI_spin_unlock(&comp->list_lock);
+
+ if (optimize_mat) {
+ /* Compile optimized material shader. */
+ GPU_material_optimize(optimize_mat);
+ GPU_material_release(optimize_mat);
+ MEM_freeN(link);
+ }
+ else {
+ /* No more materials to optimize, or shaders to compile. */
+ break;
+ }
}
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
@@ -131,6 +157,7 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
BLI_spin_lock(&comp->list_lock);
BLI_freelistN(&comp->queue);
+ BLI_freelistN(&comp->optimize_queue);
BLI_spin_unlock(&comp->list_lock);
if (comp->own_context) {
@@ -146,34 +173,13 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
MEM_freeN(comp);
}
-static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
+/**
+ * Append either shader compilation or optimization job to deferred queue and
+ * ensure shader compilation worker is active.
+ * We keep two separate queue's to ensure core compilations always complete before optimization.
+ */
+static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job)
{
- if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
- return;
- }
- /* Do not defer the compilation if we are rendering for image.
- * deferred rendering is only possible when `evil_C` is available */
- if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
- deferred = false;
- }
-
- if (!deferred) {
- DRW_deferred_shader_remove(mat);
- /* Shaders could already be compiling. Have to wait for compilation to finish. */
- while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
- PIL_sleep_ms(20);
- }
- if (GPU_material_status(mat) == GPU_MAT_CREATED) {
- GPU_material_compile(mat);
- }
- return;
- }
-
- /* Don't add material to the queue twice. */
- if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
- return;
- }
-
const bool use_main_context = GPU_use_main_context_workaround();
const bool job_own_context = !use_main_context;
@@ -194,6 +200,7 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
if (old_comp) {
BLI_spin_lock(&old_comp->list_lock);
BLI_movelisttolist(&comp->queue, &old_comp->queue);
+ BLI_movelisttolist(&comp->optimize_queue, &old_comp->optimize_queue);
BLI_spin_unlock(&old_comp->list_lock);
/* Do not recreate context, just pass ownership. */
if (old_comp->gl_context) {
@@ -204,9 +211,18 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
}
}
- GPU_material_status_set(mat, GPU_MAT_QUEUED);
- LinkData *node = BLI_genericNodeN(mat);
- BLI_addtail(&comp->queue, node);
+ /* Add to either compilation or optimization queue. */
+ if (is_optimization_job) {
+ BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED);
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED);
+ LinkData *node = BLI_genericNodeN(mat);
+ BLI_addtail(&comp->optimize_queue, node);
+ }
+ else {
+ GPU_material_status_set(mat, GPU_MAT_QUEUED);
+ LinkData *node = BLI_genericNodeN(mat);
+ BLI_addtail(&comp->queue, node);
+ }
/* Create only one context. */
if (comp->gl_context == NULL) {
@@ -216,7 +232,7 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
}
else {
comp->gl_context = WM_opengl_context_create();
- comp->gpu_context = GPU_context_create(NULL);
+ comp->gpu_context = GPU_context_create(NULL, comp->gl_context);
GPU_context_active_set(NULL);
WM_opengl_context_activate(DST.gl_context);
@@ -235,6 +251,39 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
WM_jobs_start(wm, wm_job);
}
+static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
+{
+ if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
+ return;
+ }
+
+ /* Do not defer the compilation if we are rendering for image.
+ * deferred rendering is only possible when `evil_C` is available */
+ if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
+ deferred = false;
+ }
+
+ if (!deferred) {
+ DRW_deferred_shader_remove(mat);
+ /* Shaders could already be compiling. Have to wait for compilation to finish. */
+ while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
+ PIL_sleep_ms(20);
+ }
+ if (GPU_material_status(mat) == GPU_MAT_CREATED) {
+ GPU_material_compile(mat);
+ }
+ return;
+ }
+
+ /* Don't add material to the queue twice. */
+ if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
+ return;
+ }
+
+ /* Add deferred shader compilation to queue. */
+ drw_deferred_queue_append(mat, false);
+}
+
void DRW_deferred_shader_remove(GPUMaterial *mat)
{
LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
@@ -243,14 +292,49 @@ void DRW_deferred_shader_remove(GPUMaterial *mat)
wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
if (comp != NULL) {
BLI_spin_lock(&comp->list_lock);
+
+ /* Search for compilation job in queue. */
LinkData *link = (LinkData *)BLI_findptr(&comp->queue, mat, offsetof(LinkData, data));
if (link) {
BLI_remlink(&comp->queue, link);
GPU_material_status_set(link->data, GPU_MAT_CREATED);
}
- BLI_spin_unlock(&comp->list_lock);
MEM_SAFE_FREE(link);
+
+ /* Search for optimization job in queue. */
+ LinkData *opti_link = (LinkData *)BLI_findptr(
+ &comp->optimize_queue, mat, offsetof(LinkData, data));
+ if (opti_link) {
+ BLI_remlink(&comp->optimize_queue, opti_link);
+ GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
+ }
+ BLI_spin_unlock(&comp->list_lock);
+
+ MEM_SAFE_FREE(opti_link);
+ }
+ }
+ }
+}
+
+void DRW_deferred_shader_optimize_remove(GPUMaterial *mat)
+{
+ LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
+ LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
+ DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type(
+ wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
+ if (comp != NULL) {
+ BLI_spin_lock(&comp->list_lock);
+ /* Search for optimization job in queue. */
+ LinkData *opti_link = (LinkData *)BLI_findptr(
+ &comp->optimize_queue, mat, offsetof(LinkData, data));
+ if (opti_link) {
+ BLI_remlink(&comp->optimize_queue, opti_link);
+ GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
+ }
+ BLI_spin_unlock(&comp->list_lock);
+
+ MEM_SAFE_FREE(opti_link);
}
}
}
@@ -384,6 +468,7 @@ GPUMaterial *DRW_shader_from_world(World *wo,
}
drw_deferred_shader_add(mat, deferred);
+ DRW_shader_queue_optimize_material(mat);
return mat;
}
@@ -413,9 +498,52 @@ GPUMaterial *DRW_shader_from_material(Material *ma,
}
drw_deferred_shader_add(mat, deferred);
+ DRW_shader_queue_optimize_material(mat);
return mat;
}
+void DRW_shader_queue_optimize_material(GPUMaterial *mat)
+{
+ /* Do not perform deferred optimization if performing render.
+ * De-queue any queued optimization jobs. */
+ if (DRW_state_is_image_render()) {
+ if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
+ /* Remove from pending optimization job queue. */
+ DRW_deferred_shader_optimize_remove(mat);
+ /* If optimization job had already started, wait for it to complete. */
+ while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
+ PIL_sleep_ms(20);
+ }
+ }
+ return;
+ }
+
+ /* We do not need to perform optimization on the material if it is already compiled or in the
+ * optimization queue. If optimization is not required, the status will be flagged as
+ * `GPU_MAT_OPTIMIZATION_SKIP`.
+ * We can also skip cases which have already been queued up. */
+ if (ELEM(GPU_material_optimization_status(mat),
+ GPU_MAT_OPTIMIZATION_SKIP,
+ GPU_MAT_OPTIMIZATION_SUCCESS,
+ GPU_MAT_OPTIMIZATION_QUEUED)) {
+ return;
+ }
+
+ /* Only queue optimization once the original shader has been successfully compiled. */
+ if (GPU_material_status(mat) != GPU_MAT_SUCCESS) {
+ return;
+ }
+
+ /* Defer optimization until sufficient time has passed beyond creation. This avoids excessive
+ * recompilation for shaders which are being actively modified. */
+ if (!GPU_material_optimization_ready(mat)) {
+ return;
+ }
+
+ /* Add deferred shader compilation to queue. */
+ drw_deferred_queue_append(mat, true);
+}
+
void DRW_shader_free(GPUShader *shader)
{
GPU_shader_free(shader);
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 18da5169620..0ce4011b2b4 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -192,6 +192,7 @@ set(METAL_SRC
metal/mtl_context.mm
metal/mtl_debug.mm
metal/mtl_framebuffer.mm
+ metal/mtl_immediate.mm
metal/mtl_index_buffer.mm
metal/mtl_memory.mm
metal/mtl_query.mm
@@ -205,11 +206,14 @@ set(METAL_SRC
metal/mtl_vertex_buffer.mm
metal/mtl_backend.hh
+ metal/mtl_batch.hh
metal/mtl_capabilities.hh
metal/mtl_common.hh
metal/mtl_context.hh
metal/mtl_debug.hh
+ metal/mtl_drawlist.hh
metal/mtl_framebuffer.hh
+ metal/mtl_immediate.hh
metal/mtl_index_buffer.hh
metal/mtl_memory.hh
metal/mtl_primitive.hh
diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h
index a242bb7cc94..b59ea9e55d2 100644
--- a/source/blender/gpu/GPU_context.h
+++ b/source/blender/gpu/GPU_context.h
@@ -26,7 +26,7 @@ eGPUBackendType GPU_backend_get_type(void);
/** Opaque type hiding blender::gpu::Context. */
typedef struct GPUContext GPUContext;
-GPUContext *GPU_context_create(void *ghost_window);
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context);
/**
* To be called after #GPU_context_active_set(ctx_to_destroy).
*/
diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h
index 922988bf95a..11500f5af60 100644
--- a/source/blender/gpu/GPU_material.h
+++ b/source/blender/gpu/GPU_material.h
@@ -117,6 +117,15 @@ typedef enum eGPUMaterialStatus {
GPU_MAT_SUCCESS,
} eGPUMaterialStatus;
+/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not
+ * plan to perform optimization on a given material. */
+typedef enum eGPUMaterialOptimizationStatus {
+ GPU_MAT_OPTIMIZATION_SKIP = 0,
+ GPU_MAT_OPTIMIZATION_READY,
+ GPU_MAT_OPTIMIZATION_QUEUED,
+ GPU_MAT_OPTIMIZATION_SUCCESS,
+} eGPUMaterialOptimizationStatus;
+
typedef enum eGPUDefaultValue {
GPU_DEFAULT_0 = 0,
GPU_DEFAULT_1,
@@ -246,6 +255,15 @@ struct Scene *GPU_material_scene(GPUMaterial *material);
struct GPUPass *GPU_material_get_pass(GPUMaterial *material);
struct GPUShader *GPU_material_get_shader(GPUMaterial *material);
const char *GPU_material_get_name(GPUMaterial *material);
+
+/**
+ * Material Optimization.
+ * \note Compiles optimal version of shader graph, populating mat->optimized_pass.
+ * This operation should always be deferred until existing compilations have completed.
+ * Default un-optimized materials will still exist for interactive material editing performance.
+ */
+void GPU_material_optimize(GPUMaterial *mat);
+
/**
* Return can be NULL if it's a world material.
*/
@@ -256,6 +274,13 @@ struct Material *GPU_material_get_material(GPUMaterial *material);
eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status);
+/**
+ * Return status for async optimization jobs.
+ */
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat);
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status);
+bool GPU_material_optimization_ready(GPUMaterial *mat);
+
struct GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
/**
* Create dynamic UBO from parameters
diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh
index d2890efee72..2a545c8114e 100644
--- a/source/blender/gpu/intern/gpu_backend.hh
+++ b/source/blender/gpu/intern/gpu_backend.hh
@@ -38,7 +38,7 @@ class GPUBackend {
virtual void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) = 0;
virtual void compute_dispatch_indirect(StorageBuf *indirect_buf) = 0;
- virtual Context *context_alloc(void *ghost_window) = 0;
+ virtual Context *context_alloc(void *ghost_window, void *ghost_context) = 0;
virtual Batch *batch_alloc() = 0;
virtual DrawList *drawlist_alloc(int list_length) = 0;
diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc
index 2241bcf9f9b..85cfa9749fa 100644
--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@@ -95,6 +95,9 @@ struct GPUPass {
uint32_t hash;
/** Did we already tried to compile the attached GPUShader. */
bool compiled;
+ /** Hint that an optimized variant of this pass should be created based on a complexity heuristic
+ * during pass code generation. */
+ bool should_optimize;
};
/* -------------------------------------------------------------------- */
@@ -242,6 +245,11 @@ class GPUCodegen {
ListBase ubo_inputs_ = {nullptr, nullptr};
GPUInput *cryptomatte_input_ = nullptr;
+ /** Cache paramters for complexity heuristic. */
+ uint nodes_total_ = 0;
+ uint textures_total_ = 0;
+ uint uniforms_total_ = 0;
+
public:
GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
{
@@ -282,6 +290,14 @@ class GPUCodegen {
return hash_;
}
+ /* Heuristic determined during pass codegen for whether a
+ * more optimal variant of this material should be compiled. */
+ bool should_optimize_heuristic() const
+ {
+ bool do_optimize = (nodes_total_ >= 100 || textures_total_ >= 4 || uniforms_total_ >= 64);
+ return do_optimize;
+ }
+
private:
void set_unique_ids();
@@ -403,6 +419,9 @@ void GPUCodegen::generate_resources()
}
}
+ /* Increment heuristic. */
+ textures_total_ = slot;
+
if (!BLI_listbase_is_empty(&ubo_inputs_)) {
/* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
ss << "struct NodeTree {\n";
@@ -440,11 +459,16 @@ void GPUCodegen::generate_library()
GPUCodegenCreateInfo &info = *create_info;
void *value;
- GSetIterState pop_state = {};
- while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) {
+ /* Iterate over libraries. We need to keep this struct intact incase
+ * it is required for the optimization an pass. */
+ GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
+ while (!BLI_ghashIterator_done(ihash)) {
+ value = BLI_ghashIterator_getKey(ihash);
auto deps = gpu_shader_dependency_get_resolved_source((const char *)value);
info.dependencies_generated.extend_non_duplicates(deps);
+ BLI_ghashIterator_step(ihash);
}
+ BLI_ghashIterator_free(ihash);
}
void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
@@ -512,6 +536,9 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
}
}
eval_ss << ");\n\n";
+
+ /* Increment heuristic. */
+ nodes_total_++;
}
char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link)
@@ -575,6 +602,7 @@ void GPUCodegen::generate_uniform_buffer()
if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
/* We handle the UBO uniforms separately. */
BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
+ uniforms_total_++;
}
}
}
@@ -602,6 +630,7 @@ void GPUCodegen::generate_graphs()
{
set_unique_ids();
+ /* Serialize graph. */
output.surface = graph_serialize(GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface);
output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume);
output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement);
@@ -637,10 +666,17 @@ void GPUCodegen::generate_graphs()
GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUNodeGraph *graph,
GPUCodegenCallbackFn finalize_source_cb,
- void *thunk)
+ void *thunk,
+ bool optimize_graph)
{
gpu_node_graph_prune_unused(graph);
+ /* If Optimize flag is passed in, we are generating an optimized
+ * variant of the GPUMaterial's GPUPass. */
+ if (optimize_graph) {
+ gpu_node_graph_optimize(graph);
+ }
+
/* Extract attributes before compiling so the generated VBOs are ready to accept the future
* shader. */
gpu_node_graph_finalize_uniform_attrs(graph);
@@ -648,23 +684,33 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUCodegen codegen(material, graph);
codegen.generate_graphs();
codegen.generate_cryptomatte();
- codegen.generate_uniform_buffer();
- /* Cache lookup: Reuse shaders already compiled. */
- GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
-
- /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
- * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
- if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
- if (!gpu_pass_is_valid(pass_hash)) {
- /* Shader has already been created but failed to compile. */
- return nullptr;
+ GPUPass *pass_hash = nullptr;
+
+ if (!optimize_graph) {
+ /* The optimized version of the shader should not re-generate a UBO.
+ * The UBO will not be used for this variant. */
+ codegen.generate_uniform_buffer();
+
+ /** Cache lookup: Reuse shaders already compiled.
+ * NOTE: We only perform cache look-up for non-optimized shader
+ * graphs, as baked constant data amongst other optimizations will generate too many
+ * shader source permutations, with minimal re-usability. */
+ pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
+
+ /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
+ * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
+ if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
+ if (!gpu_pass_is_valid(pass_hash)) {
+ /* Shader has already been created but failed to compile. */
+ return nullptr;
+ }
+ /* No collision, just return the pass. */
+ BLI_spin_lock(&pass_cache_spin);
+ pass_hash->refcount += 1;
+ BLI_spin_unlock(&pass_cache_spin);
+ return pass_hash;
}
- /* No collision, just return the pass. */
- BLI_spin_lock(&pass_cache_spin);
- pass_hash->refcount += 1;
- BLI_spin_unlock(&pass_cache_spin);
- return pass_hash;
}
/* Either the shader is not compiled or there is a hash collision...
@@ -702,14 +748,31 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
pass->create_info = codegen.create_info;
pass->hash = codegen.hash_get();
pass->compiled = false;
+ /* Only flag pass optimization hint if this is the first generated pass for a material.
+ * Optimized passes cannot be optimized further, even if the heuristic is still not
+ * favourable. */
+ pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
codegen.create_info = nullptr;
- gpu_pass_cache_insert_after(pass_hash, pass);
+ /* Only insert non-optimized graphs into cache.
+ * Optimized graphs will continuously be recompiled with new unique source during material
+ * editing, and thus causing the cache to fill up quickly with materials offering minimal
+ * re-use. */
+ if (!optimize_graph) {
+ gpu_pass_cache_insert_after(pass_hash, pass);
+ }
}
return pass;
}
+bool GPU_pass_should_optimize(GPUPass *pass)
+{
+ /* Returns optimization heuristic prepared during
+ * initial codegen. */
+ return pass->should_optimize;
+}
+
/** \} */
/* -------------------------------------------------------------------- */
diff --git a/source/blender/gpu/intern/gpu_codegen.h b/source/blender/gpu/intern/gpu_codegen.h
index 95a672c0400..aabdf1ac003 100644
--- a/source/blender/gpu/intern/gpu_codegen.h
+++ b/source/blender/gpu/intern/gpu_codegen.h
@@ -25,10 +25,12 @@ typedef struct GPUPass GPUPass;
GPUPass *GPU_generate_pass(GPUMaterial *material,
struct GPUNodeGraph *graph,
GPUCodegenCallbackFn finalize_source_cb,
- void *thunk);
+ void *thunk,
+ bool optimize_graph);
GPUShader *GPU_pass_shader_get(GPUPass *pass);
bool GPU_pass_compile(GPUPass *pass, const char *shname);
void GPU_pass_release(GPUPass *pass);
+bool GPU_pass_should_optimize(GPUPass *pass);
/* Module */
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index bcc418169b7..92cbbc5b4b0 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -94,7 +94,7 @@ Context *Context::get()
/* -------------------------------------------------------------------- */
-GPUContext *GPU_context_create(void *ghost_window)
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context)
{
{
std::scoped_lock lock(backend_users_mutex);
@@ -105,7 +105,7 @@ GPUContext *GPU_context_create(void *ghost_window)
num_backend_users++;
}
- Context *ctx = GPUBackend::get()->context_alloc(ghost_window);
+ Context *ctx = GPUBackend::get()->context_alloc(ghost_window, ghost_context);
GPU_context_active_set(wrap(ctx));
return wrap(ctx);
@@ -216,6 +216,9 @@ void GPU_render_step()
/** \name Backend selection
* \{ */
+/* NOTE: To enable Metal API, we need to temporarily change this to `GPU_BACKEND_METAL`.
+ * Until a global switch is added, Metal also needs to be enabled in GHOST_ContextCGL:
+ * `m_useMetalForRendering = true`. */
static const eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL;
static GPUBackend *g_backend = nullptr;
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index 96809db1587..991cb229eda 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -34,6 +34,8 @@
#include "DRW_engine.h"
+#include "PIL_time.h"
+
#include "gpu_codegen.h"
#include "gpu_node_graph.h"
@@ -43,6 +45,17 @@
#define MAX_COLOR_BAND 128
#define MAX_GPU_SKIES 8
+/** Whether the optimized variant of the GPUPass should be created asynchronously.
+ * Usage of this depends on whether there are possible threading challenges of doing so.
+ * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
+ * compilation, though this option exists in case any potential scenarios for material graph
+ * optimization cause a slow down on the main thread.
+ *
+ * NOTE: The actual shader program for the optimized pass will alwaysbe compiled asynchronously,
+ * this flag controls whether shader node graph source serialization happens on the compilation
+ * worker thread. */
+#define ASYNC_OPTIMIZED_PASS_CREATION 0
+
typedef struct GPUColorBandBuilder {
float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
int current_layer;
@@ -57,6 +70,27 @@ struct GPUMaterial {
/* Contains GPUShader and source code for deferred compilation.
* Can be shared between similar material (i.e: sharing same nodetree topology). */
GPUPass *pass;
+ /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
+ * This shader variant bakes dynamic uniform data as constant. This variant will not use
+ * the ubo, and instead bake constants directly into the shader source. */
+ GPUPass *optimized_pass;
+ /* Optimization status.
+ * We also use this status to determine whether this material should be considered for
+ * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
+ * `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
+ * `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
+ * performance to do so, based on the heuristic.
+ */
+ eGPUMaterialOptimizationStatus optimization_status;
+ double creation_time;
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+ struct DeferredOptimizePass {
+ GPUCodegenCallbackFn callback;
+ void *thunk;
+ } DeferredOptimizePass;
+ struct DeferredOptimizePass optimize_pass_info;
+#endif
+
/** UBOs for this material parameters. */
GPUUniformBuf *ubo;
/** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
@@ -209,6 +243,9 @@ void GPU_material_free_single(GPUMaterial *material)
gpu_node_graph_free(&material->graph);
+ if (material->optimized_pass != NULL) {
+ GPU_pass_release(material->optimized_pass);
+ }
if (material->pass != NULL) {
GPU_pass_release(material->pass);
}
@@ -247,12 +284,15 @@ Scene *GPU_material_scene(GPUMaterial *material)
GPUPass *GPU_material_get_pass(GPUMaterial *material)
{
- return material->pass;
+ return (material->optimized_pass) ? material->optimized_pass : material->pass;
}
GPUShader *GPU_material_get_shader(GPUMaterial *material)
{
- return material->pass ? GPU_pass_shader_get(material->pass) : NULL;
+ /* First attempt to select optimized shader. If not available, fetch original. */
+ GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) :
+ NULL;
+ return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL);
}
const char *GPU_material_get_name(GPUMaterial *material)
@@ -665,6 +705,29 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
mat->status = status;
}
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
+{
+ return mat->optimization_status;
+}
+
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
+{
+ mat->optimization_status = status;
+ if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+ /* Reset creation timer to delay optimization pass. */
+ mat->creation_time = PIL_check_seconds_timer();
+ }
+}
+
+bool GPU_material_optimization_ready(GPUMaterial *mat)
+{
+ /* Timer threshold before optimizations will be queued.
+ * When materials are frequently being modified, optimization
+ * can incur CPU overhead from excessive compilation. */
+ const double optimization_time_threshold_s = 5.0;
+ return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s);
+}
+
/* Code generation */
bool GPU_material_has_surface_output(GPUMaterial *mat)
@@ -730,6 +793,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
mat->uuid = shader_uuid;
mat->flag = GPU_MATFLAG_UPDATED;
mat->status = GPU_MAT_CREATED;
+ mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
mat->is_volume_shader = is_volume_shader;
mat->graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
@@ -748,7 +812,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
{
/* Create source code and search pass cache for an already compiled version. */
- mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk);
+ mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);
if (mat->pass == NULL) {
/* We had a cache hit and the shader has already failed to compile. */
@@ -756,11 +820,44 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
gpu_node_graph_free(&mat->graph);
}
else {
+ /* Determine whether we should generate an optimized variant of the graph.
+ * Heuristic is based on complexity of default material pass and shader node graph. */
+ if (GPU_pass_should_optimize(mat->pass)) {
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+ }
+
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
/* We had a cache hit and the shader is already compiled. */
mat->status = GPU_MAT_SUCCESS;
- gpu_node_graph_free_nodes(&mat->graph);
+
+ if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+ gpu_node_graph_free_nodes(&mat->graph);
+ }
+ }
+
+ /* Generate optimized pass. */
+ if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+ mat->optimized_pass = NULL;
+ mat->optimize_pass_info.callback = callback;
+ mat->optimize_pass_info.thunk = thunk;
+#else
+ mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
+ if (mat->optimized_pass == NULL) {
+ /* Failed to create optimized pass. */
+ gpu_node_graph_free_nodes(&mat->graph);
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+ }
+ else {
+ GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
+ if (optimized_sh != NULL) {
+ /* Optimized shader already available. */
+ gpu_node_graph_free_nodes(&mat->graph);
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+ }
+ }
+#endif
}
}
}
@@ -811,7 +908,11 @@ void GPU_material_compile(GPUMaterial *mat)
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
mat->status = GPU_MAT_SUCCESS;
- gpu_node_graph_free_nodes(&mat->graph);
+
+ if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+ /* Only free node graph nodes if not required by secondary optimization pass. */
+ gpu_node_graph_free_nodes(&mat->graph);
+ }
}
else {
mat->status = GPU_MAT_FAILED;
@@ -825,6 +926,71 @@ void GPU_material_compile(GPUMaterial *mat)
}
}
+void GPU_material_optimize(GPUMaterial *mat)
+{
+ /* If shader is flagged for skipping optimization or has already been successfully
+ * optimized, skip. */
+ if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
+ return;
+ }
+
+ /* If original shader has not been fully compiled, we are not
+ * ready to perform optimization. */
+ if (mat->status != GPU_MAT_SUCCESS) {
+ /* Reset optimization status. */
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+ return;
+ }
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+ /* If the optimized pass is not valid, first generate optimized pass.
+ * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
+ * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
+ * optimal, as these do not benefit from caching, due to baked constants. However, this could
+ * possibly be cause for concern for certain cases. */
+ if (!mat->optimized_pass) {
+ mat->optimized_pass = GPU_generate_pass(
+ mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
+ BLI_assert(mat->optimized_pass);
+ }
+#else
+ if (!mat->optimized_pass) {
+ /* Optimized pass has not been created, skip future optimization attempts. */
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+ return;
+ }
+#endif
+
+ bool success;
+ /* NOTE: The shader may have already been compiled here since we are
+ * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
+#ifndef NDEBUG
+ success = GPU_pass_compile(mat->optimized_pass, mat->name);
+#else
+ success = GPU_pass_compile(mat->optimized_pass, __func__);
+#endif
+
+ if (success) {
+ GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
+ if (sh != NULL) {
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+ }
+ else {
+ /* Optimized pass failed to compile. Disable any future optimization attempts. */
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+ }
+ }
+ else {
+ /* Optimization pass generation failed. Disable future attempts to optimize. */
+ GPU_pass_release(mat->optimized_pass);
+ mat->optimized_pass = NULL;
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+ }
+
+ /* Release node graph as no longer needed. */
+ gpu_node_graph_free_nodes(&mat->graph);
+}
+
void GPU_materials_free(Main *bmain)
{
LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
@@ -848,6 +1014,8 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
material->graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
material->refcount = 1;
+ material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
+ material->optimized_pass = NULL;
/* Construct the material graph by adding and linking the necessary GPU material nodes. */
construct_function_cb(thunk, material);
@@ -856,7 +1024,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
gpu_material_ramp_texture_build(material);
/* Lookup an existing pass in the cache or generate a new one. */
- material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk);
+ material->pass = GPU_generate_pass(
+ material, &material->graph, generate_code_function_cb, thunk, false);
+ material->optimized_pass = NULL;
/* The pass already exists in the pass cache but its shader already failed to compile. */
if (material->pass == NULL) {
@@ -865,11 +1035,42 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
return material;
}
+ /* Generate optimized pass. */
+ if (GPU_pass_should_optimize(material->pass)) {
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+ mmaterial->optimized_pass = NULL;
+ material->optimize_pass_info.callback = generate_code_function_cb;
+ material->optimize_pass_info.thunk = thunk;
+ GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY);
+#else
+ material->optimized_pass = GPU_generate_pass(
+ material, &material->graph, generate_code_function_cb, thunk, true);
+
+ if (material->optimized_pass == NULL) {
+ /* Failed to create optimized pass. */
+ gpu_node_graph_free_nodes(&material->graph);
+ GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP);
+ }
+ else {
+ GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass);
+ if (optimized_sh != NULL) {
+ /* Optimized shader already available. */
+ gpu_node_graph_free_nodes(&material->graph);
+ GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS);
+ }
+ }
+#endif
+ }
+
/* The pass already exists in the pass cache and its shader is already compiled. */
GPUShader *shader = GPU_pass_shader_get(material->pass);
if (shader != NULL) {
material->status = GPU_MAT_SUCCESS;
- gpu_node_graph_free_nodes(&material->graph);
+ if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+ /* Only free node graph if not required by secondary optimization pass. */
+ gpu_node_graph_free_nodes(&material->graph);
+ }
return material;
}
diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c
index a305413905b..3ca2399a547 100644
--- a/source/blender/gpu/intern/gpu_node_graph.c
+++ b/source/blender/gpu/intern/gpu_node_graph.c
@@ -914,3 +914,22 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
}
}
}
+
+void gpu_node_graph_optimize(GPUNodeGraph *graph)
+{
+ /* Replace all uniform node links with constant. */
+ LISTBASE_FOREACH (GPUNode *, node, &graph->nodes) {
+ LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
+ if (input->link) {
+ if (input->link->link_type == GPU_NODE_LINK_UNIFORM) {
+ input->link->link_type = GPU_NODE_LINK_CONSTANT;
+ }
+ }
+ if (input->source == GPU_SOURCE_UNIFORM) {
+ input->source = (input->type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT;
+ }
+ }
+ }
+
+ /* TODO: Consider performing other node graph optimizations here. */
+}
diff --git a/source/blender/gpu/intern/gpu_node_graph.h b/source/blender/gpu/intern/gpu_node_graph.h
index 085620b30e4..75ca05ffaea 100644
--- a/source/blender/gpu/intern/gpu_node_graph.h
+++ b/source/blender/gpu/intern/gpu_node_graph.h
@@ -179,6 +179,21 @@ typedef struct GPUNodeGraph {
void gpu_node_graph_prune_unused(GPUNodeGraph *graph);
void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph);
+
+/**
+ * Optimize node graph for optimized material shader path.
+ * Once the base material has been generated, we can modify the shader
+ * node graph to create one which will produce an optimally performing shader.
+ * This currently involves baking uniform data into constant data to enable
+ * aggressive constant folding by the compiler in order to reduce complexity and
+ * shader core memory pressure.
+ *
+ * NOTE: Graph optimizations will produce a shader which needs to be re-compiled
+ * more frequently, however, the default material pass will always exist to fall
+ * back on.
+ */
+void gpu_node_graph_optimize(GPUNodeGraph *graph);
+
/**
* Free intermediate node graph.
*/
diff --git a/source/blender/gpu/intern/gpu_shader_builder.cc b/source/blender/gpu/intern/gpu_shader_builder.cc
index 9b699c60126..3aa2963ecd0 100644
--- a/source/blender/gpu/intern/gpu_shader_builder.cc
+++ b/source/blender/gpu/intern/gpu_shader_builder.cc
@@ -45,7 +45,7 @@ void ShaderBuilder::init()
ghost_context_ = GHOST_CreateOpenGLContext(ghost_system_, glSettings);
GHOST_ActivateOpenGLContext(ghost_context_);
- gpu_context_ = GPU_context_create(nullptr);
+ gpu_context_ = GPU_context_create(nullptr, ghost_context_);
GPU_init();
}
diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc
index 6f43b379d31..d9e5e066fea 100644
--- a/source/blender/gpu/intern/gpu_shader_interface.cc
+++ b/source/blender/gpu/intern/gpu_shader_interface.cc
@@ -22,8 +22,8 @@ ShaderInterface::ShaderInterface() = default;
ShaderInterface::~ShaderInterface()
{
/* Free memory used by name_buffer. */
- MEM_freeN(name_buffer_);
- MEM_freeN(inputs_);
+ MEM_SAFE_FREE(name_buffer_);
+ MEM_SAFE_FREE(inputs_);
}
static void sort_input_list(MutableSpan<ShaderInput> dst)
diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh
index 214a5d738a9..082fab24ba4 100644
--- a/source/blender/gpu/metal/mtl_backend.hh
+++ b/source/blender/gpu/metal/mtl_backend.hh
@@ -63,7 +63,7 @@ class MTLBackend : public GPUBackend {
/* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
* objects. */
- Context *context_alloc(void *ghost_window) override;
+ Context *context_alloc(void *ghost_window, void *ghost_context) override;
Batch *batch_alloc() override;
DrawList *drawlist_alloc(int list_length) override;
FrameBuffer *framebuffer_alloc(const char *name) override;
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index ec9e8ab4d15..2ca1fd3f3d0 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -8,8 +8,11 @@
#include "gpu_backend.hh"
#include "mtl_backend.hh"
+#include "mtl_batch.hh"
#include "mtl_context.hh"
+#include "mtl_drawlist.hh"
#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
#include "mtl_index_buffer.hh"
#include "mtl_query.hh"
#include "mtl_shader.hh"
@@ -37,21 +40,21 @@ void MTLBackend::samplers_update(){
/* Placeholder -- Handled in MTLContext. */
};
-Context *MTLBackend::context_alloc(void *ghost_window)
+Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
{
- return new MTLContext(ghost_window);
+ return new MTLContext(ghost_window, ghost_context);
};
Batch *MTLBackend::batch_alloc()
{
- /* TODO(Metal): Implement MTLBatch. */
- return nullptr;
+ /* TODO(Metal): Full MTLBatch implementation. */
+ return new MTLBatch();
};
DrawList *MTLBackend::drawlist_alloc(int list_length)
{
- /* TODO(Metal): Implement MTLDrawList. */
- return nullptr;
+ /* TODO(Metal): Full MTLDrawList implementation. */
+ return new MTLDrawList(list_length);
};
FrameBuffer *MTLBackend::framebuffer_alloc(const char *name)
diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh
new file mode 100644
index 00000000000..66603dabd15
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "gpu_batch_private.hh"
+
+namespace blender {
+namespace gpu {
+
+
+/* Pass-through MTLBatch. TODO(Metal): Implement. */
+class MTLBatch : public Batch {
+ public:
+ void draw(int v_first, int v_count, int i_first, int i_count) override {
+
+ }
+
+ void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override {
+
+ }
+
+ void multi_draw_indirect(GPUStorageBuf *indirect_buf,
+ int count,
+ intptr_t offset,
+ intptr_t stride) override {
+
+ }
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
+};
+
+} // namespace gpu
+} // namespace blender
diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm
index d2936e8e91f..a9cabbb111f 100644
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -54,6 +54,7 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
desc.retainedReferences = YES;
+ BLI_assert(context_.queue != nil);
active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
}
else {
@@ -611,40 +612,187 @@ void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot)
{
- /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be
- * implemented alongside MTLShader. */
+ /* Range check. */
+ const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+ BLI_assert(slot >= 0);
+ BLI_assert(slot <= shader_interface->get_max_texture_index());
+ BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+ UNUSED_VARS_NDEBUG(shader_interface);
+
+ /* If sampler state has not changed for the given slot, we do not need to fetch. */
+ if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil ||
+ !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) ||
+ use_argument_buffer_for_samplers) {
+
+ id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+ ctx.get_default_sampler_state() :
+ ctx.get_sampler_from_state(sampler_binding.state);
+ if (!use_argument_buffer_for_samplers) {
+ /* Update binding and cached state. */
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+ [rec setVertexSamplerState:sampler_state atIndex:slot];
+ this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+ this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state;
+ }
+
+ /* Flag last binding type. */
+ this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding =
+ use_argument_buffer_for_samplers;
+
+ /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+ * the samplers array is always up to date. */
+ ctx.samplers_.mtl_sampler[slot] = sampler_state;
+ ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+ }
}
void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot)
{
- /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be
- * implemented alongside MTLShader. */
+ /* Range check. */
+ const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+ BLI_assert(slot >= 0);
+ BLI_assert(slot <= shader_interface->get_max_texture_index());
+ BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+ UNUSED_VARS_NDEBUG(shader_interface);
+
+ /* If sampler state has not changed for the given slot, we do not need to fetch*/
+ if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil ||
+ !(this->cached_fragment_sampler_state_bindings[slot].binding_state ==
+ sampler_binding.state) ||
+ use_argument_buffer_for_samplers) {
+
+ id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+ ctx.get_default_sampler_state() :
+ ctx.get_sampler_from_state(sampler_binding.state);
+ if (!use_argument_buffer_for_samplers) {
+ /* Update binding and cached state. */
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+ [rec setFragmentSamplerState:sampler_state atIndex:slot];
+ this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+ this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state;
+ }
+
+ /* Flag last binding type */
+ this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding =
+ use_argument_buffer_for_samplers;
+
+ /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+ * the samplers array is always up to date. */
+ ctx.samplers_.mtl_sampler[slot] = sampler_state;
+ ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+ }
}
void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ BLI_assert(index >= 0);
+ BLI_assert(buffer_offset >= 0);
+ BLI_assert(buffer != nil);
+
+ BufferBindingCached &current_vert_ubo_binding = this->cached_vertex_buffer_bindings[index];
+ if (current_vert_ubo_binding.offset != buffer_offset ||
+ current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes) {
+
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+
+ if (current_vert_ubo_binding.metal_buffer == buffer) {
+ /* If buffer is the same, but offset has changed. */
+ [rec setVertexBufferOffset:buffer_offset atIndex:index];
+ }
+ else {
+ /* Bind Vertex Buffer. */
+ [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index];
+ }
+
+ /* Update Bind-state cache. */
+ this->cached_vertex_buffer_bindings[index].is_bytes = false;
+ this->cached_vertex_buffer_bindings[index].metal_buffer = buffer;
+ this->cached_vertex_buffer_bindings[index].offset = buffer_offset;
+ }
}
void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ BLI_assert(index >= 0);
+ BLI_assert(buffer_offset >= 0);
+ BLI_assert(buffer != nil);
+
+ BufferBindingCached &current_frag_ubo_binding = this->cached_fragment_buffer_bindings[index];
+ if (current_frag_ubo_binding.offset != buffer_offset ||
+ current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes) {
+
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+
+ if (current_frag_ubo_binding.metal_buffer == buffer) {
+ /* If buffer is the same, but offset has changed. */
+ [rec setFragmentBufferOffset:buffer_offset atIndex:index];
+ }
+ else {
+ /* Bind Fragment Buffer */
+ [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index];
+ }
+
+ /* Update Bind-state cache */
+ this->cached_fragment_buffer_bindings[index].is_bytes = false;
+ this->cached_fragment_buffer_bindings[index].metal_buffer = buffer;
+ this->cached_fragment_buffer_bindings[index].offset = buffer_offset;
+ }
}
void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ /* Bytes always updated as source data may have changed. */
+ BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ BLI_assert(length > 0);
+ BLI_assert(bytes != nullptr);
+
+ if (length < MTL_MAX_SET_BYTES_SIZE) {
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ [rec setVertexBytes:bytes length:length atIndex:index];
+ }
+ else {
+ /* We have run over the setBytes limit, bind buffer instead. */
+ MTLTemporaryBuffer range =
+ ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+ memcpy(range.data, bytes, length);
+ this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index);
+ }
+
+ /* Update Bind-state cache */
+ this->cached_vertex_buffer_bindings[index].is_bytes = true;
+ this->cached_vertex_buffer_bindings[index].metal_buffer = nil;
+ this->cached_vertex_buffer_bindings[index].offset = -1;
}
void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ /* Bytes always updated as source data may have changed. */
+ BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ BLI_assert(length > 0);
+ BLI_assert(bytes != nullptr);
+
+ if (length < MTL_MAX_SET_BYTES_SIZE) {
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ [rec setFragmentBytes:bytes length:length atIndex:index];
+ }
+ else {
+ /* We have run over the setBytes limit, bind buffer instead. */
+ MTLTemporaryBuffer range =
+ ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+ memcpy(range.data, bytes, length);
+ this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index);
+ }
+
+ /* Update Bind-state cache. */
+ this->cached_fragment_buffer_bindings[index].is_bytes = true;
+ this->cached_fragment_buffer_bindings[index].metal_buffer = nil;
+ this->cached_fragment_buffer_bindings[index].offset = -1;
}
/** \} */
diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh
index b6f9c0050a9..5c322efa3f9 100644
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -3,7 +3,9 @@
#ifndef __MTL_COMMON
#define __MTL_COMMON
-// -- Renderer Options --
+/** -- Renderer Options -- */
+/* Number of frames over which rolling averages are taken. */
+#define MTL_FRAME_AVERAGE_COUNT 5
#define MTL_MAX_DRAWABLES 3
#define MTL_MAX_SET_BYTES_SIZE 4096
#define MTL_FORCE_WAIT_IDLE 0
diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh
index 577438667d6..5991fe2bc3e 100644
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -12,6 +12,10 @@
#include "GPU_common_types.h"
#include "GPU_context.h"
+#include "intern/GHOST_Context.h"
+#include "intern/GHOST_ContextCGL.h"
+#include "intern/GHOST_Window.h"
+
#include "mtl_backend.hh"
#include "mtl_capabilities.hh"
#include "mtl_common.hh"
@@ -570,12 +574,44 @@ class MTLCommandBufferManager {
class MTLContext : public Context {
friend class MTLBackend;
+ friend class MTLRenderPassState;
+
+ public:
+ /* Swapchain and latency management. */
+ static std::atomic<int> max_drawables_in_flight;
+ static std::atomic<int64_t> avg_drawable_latency_us;
+ static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT];
+
+ public:
+ /* Shaders and Pipeline state. */
+ MTLContextGlobalShaderPipelineState pipeline_state;
+
+ /* Metal API Resource Handles. */
+ id<MTLCommandQueue> queue = nil;
+ id<MTLDevice> device = nil;
+
+#ifndef NDEBUG
+ /* Label for Context debug name assignemnt. */
+ NSString *label = nil;
+#endif
+
+ /* Memory Management. */
+ MTLScratchBufferManager memory_manager;
+ static MTLBufferPool global_memory_manager;
+
+ /* CommandBuffer managers. */
+ MTLCommandBufferManager main_command_buffer;
private:
- /* Null buffers for empty/uninitialized bindings.
- * Null attribute buffer follows default attribute format of OpenGL Back-end. */
- id<MTLBuffer> null_buffer_; /* All zero's. */
- id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
+ /* Parent Context. */
+ GHOST_ContextCGL *ghost_context_;
+
+ /* Render Passes and Framebuffers. */
+ id<MTLTexture> default_fbo_mtltexture_ = nil;
+ gpu::MTLTexture *default_fbo_gputexture_ = nullptr;
+
+ /* Depth-stencil state cache. */
+ blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache;
/* Compute and specialization caches. */
MTLContextTextureUtils texture_utils_;
@@ -601,23 +637,20 @@ class MTLContext : public Context {
gpu::MTLBuffer *visibility_buffer_ = nullptr;
bool visibility_is_dirty_ = false;
- public:
- /* Shaders and Pipeline state. */
- MTLContextGlobalShaderPipelineState pipeline_state;
-
- /* Metal API Resource Handles. */
- id<MTLCommandQueue> queue = nil;
- id<MTLDevice> device = nil;
-
- /* Memory Management */
- MTLScratchBufferManager memory_manager;
- static MTLBufferPool global_memory_manager;
+ /* Null buffers for empty/unintialized bindings.
+ * Null attribute buffer follows default attribute format of OpenGL Backend. */
+ id<MTLBuffer> null_buffer_; /* All zero's. */
+ id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
- /* CommandBuffer managers. */
- MTLCommandBufferManager main_command_buffer;
+ /** Dummy Resources */
+ /* Maximum of 32 texture types. Though most combinations invalid. */
+ gpu::MTLTexture *dummy_textures_[GPU_TEXTURE_BUFFER] = {nullptr};
+ GPUVertFormat dummy_vertformat_;
+ GPUVertBuf *dummy_verts_ = nullptr;
+ public:
/* GPUContext interface. */
- MTLContext(void *ghost_window);
+ MTLContext(void *ghost_window, void *ghost_context);
~MTLContext();
static void check_error(const char *info);
@@ -673,6 +706,35 @@ class MTLContext : public Context {
void pipeline_state_init();
MTLShader *get_active_shader();
+ /* These functions ensure that the current RenderCommandEncoder has
+ * the correct global state assigned. This should be called prior
+ * to every draw call, to ensure that all state is applied and up
+ * to date. We handle:
+ *
+ * - Buffer bindings (Vertex buffers, Uniforms, UBOs, transform feedback)
+ * - Texture bindings
+ * - Sampler bindings (+ argument buffer bindings)
+ * - Dynamic Render pipeline state (on encoder)
+ * - Baking Pipeline State Objects (PSOs) for current shader, based
+ * on final pipeline state.
+ *
+ * `ensure_render_pipeline_state` will return false if the state is
+ * invalid and cannot be applied. This should cancel a draw call. */
+ bool ensure_render_pipeline_state(MTLPrimitiveType prim_type);
+ bool ensure_uniform_buffer_bindings(
+ id<MTLRenderCommandEncoder> rec,
+ const MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance);
+ void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec,
+ MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance);
+ void ensure_depth_stencil_state(MTLPrimitiveType prim_type);
+
+ id<MTLBuffer> get_null_buffer();
+ id<MTLBuffer> get_null_attribute_buffer();
+ gpu::MTLTexture *get_dummy_texture(eGPUTextureType type);
+ void free_dummy_resources();
+
/* State assignment. */
void set_viewport(int origin_x, int origin_y, int width, int height);
void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height);
@@ -720,9 +782,37 @@ class MTLContext : public Context {
{
return MTLContext::global_memory_manager;
}
- /* Uniform Buffer Bindings to command encoders. */
- id<MTLBuffer> get_null_buffer();
- id<MTLBuffer> get_null_attribute_buffer();
+
+ /* Swapchain and latency management. */
+ static void latency_resolve_average(int64_t frame_latency_us)
+ {
+ int64_t avg = 0;
+ int64_t frame_c = 0;
+ for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) {
+ MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1];
+ avg += MTLContext::frame_latency[i];
+ frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0;
+ }
+ MTLContext::frame_latency[0] = frame_latency_us;
+ avg += MTLContext::frame_latency[0];
+ if (frame_c > 0) {
+ avg /= frame_c;
+ }
+ else {
+ avg = 0;
+ }
+ MTLContext::avg_drawable_latency_us = avg;
+ }
+
+ private:
+ void set_ghost_context(GHOST_ContextHandle ghostCtxHandle);
+ void set_ghost_window(GHOST_WindowHandle ghostWinHandle);
};
+/* GHOST Context callback and present. */
+void present(MTLRenderPassDescriptor *blit_descriptor,
+ id<MTLRenderPipelineState> blit_pso,
+ id<MTLTexture> swapchain_texture,
+ id<CAMetalDrawable> drawable);
+
} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index 1302cf0dabd..a89339d0d14 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -5,13 +5,29 @@
*/
#include "mtl_context.hh"
#include "mtl_debug.hh"
+#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
+#include "mtl_memory.hh"
+#include "mtl_primitive.hh"
#include "mtl_shader.hh"
#include "mtl_shader_interface.hh"
#include "mtl_state.hh"
+#include "mtl_uniform_buffer.hh"
#include "DNA_userdef_types.h"
#include "GPU_capabilities.h"
+#include "GPU_matrix.h"
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+#include "GPU_uniform_buffer.h"
+#include "GPU_vertex_buffer.h"
+#include "intern/gpu_matrix_private.h"
+
+#include "PIL_time.h"
+
+#include <fstream>
+#include <string>
using namespace blender;
using namespace blender::gpu;
@@ -21,21 +37,118 @@ namespace blender::gpu {
/* Global memory manager. */
MTLBufferPool MTLContext::global_memory_manager;
+/* Swapchain and latency management. */
+std::atomic<int> MTLContext::max_drawables_in_flight = 0;
+std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0;
+int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0};
+
+/* -------------------------------------------------------------------- */
+/** \name GHOST Context interaction.
+ * \{ */
+
+void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle)
+{
+ GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle);
+ BLI_assert(ghost_ctx != nullptr);
+
+ /* Release old MTLTexture handle */
+ if (default_fbo_mtltexture_) {
+ [default_fbo_mtltexture_ release];
+ default_fbo_mtltexture_ = nil;
+ }
+
+ /* Release Framebuffer attachments */
+ MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left);
+ MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left);
+ mtl_front_left->remove_all_attachments();
+ mtl_back_left->remove_all_attachments();
+
+ GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx);
+ if (ghost_cgl_ctx != NULL) {
+ default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture();
+
+ MTL_LOG_INFO(
+ "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n",
+ ghost_cgl_ctx,
+ this,
+ this->device,
+ this->queue,
+ default_fbo_gputexture_);
+
+ /* Check if the GHOST Context provides a default framebuffer: */
+ if (default_fbo_mtltexture_) {
+
+ /* Release old GPUTexture handle */
+ if (default_fbo_gputexture_) {
+ GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_)));
+ default_fbo_gputexture_ = nullptr;
+ }
+
+ /* Retain handle */
+ [default_fbo_mtltexture_ retain];
+
+ /*** Create front and back-buffers ***/
+ /* Create gpu::MTLTexture objects */
+ default_fbo_gputexture_ = new gpu::MTLTexture(
+ "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_);
+
+ /* Update framebuffers with new texture attachments */
+ mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+ mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+#ifndef NDEBUG
+ this->label = default_fbo_mtltexture_.label;
+#endif
+ }
+ else {
+
+ /* Add default texture for cases where no other framebuffer is bound */
+ if (!default_fbo_gputexture_) {
+ default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>(
+ unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr)));
+ }
+ mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+
+ MTL_LOG_INFO(
+ "-- Bound context %p for GPU context: %p is offscreen and does not have a default "
+ "framebuffer\n",
+ ghost_cgl_ctx,
+ this);
+#ifndef NDEBUG
+ this->label = @"Offscreen Metal Context";
+#endif
+ }
+ }
+ else {
+ MTL_LOG_INFO(
+ "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null "
+ "(GhostContext: %p, GhostContext_CGL: %p)\n",
+ ghost_ctx,
+ ghost_cgl_ctx);
+ BLI_assert(false);
+ }
+}
+
+void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle)
+{
+ GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle);
+ this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL));
+}
+
+/** \} */
+
/* -------------------------------------------------------------------- */
/** \name MTLContext
* \{ */
/* Placeholder functions */
-MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this)
+MTLContext::MTLContext(void *ghost_window, void *ghost_context)
+ : memory_manager(*this), main_command_buffer(*this)
{
/* Init debug. */
debug::mtl_debug_init();
- /* Device creation.
- * TODO(Metal): This is a temporary initialization path to enable testing of features
- * and shader compilation tests. Future functionality should fetch the existing device
- * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */
- this->device = MTLCreateSystemDefaultDevice();
+ /* Initialise Renderpass and Framebuffer State */
+ this->back_left = nullptr;
/* Initialize command buffer state. */
this->main_command_buffer.prepare();
@@ -47,10 +160,35 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
is_inside_frame_ = false;
current_frame_index_ = 0;
- /* Prepare null data buffer */
+ /* Prepare null data buffer. */
null_buffer_ = nil;
null_attribute_buffer_ = nil;
+ /* Zero-initialise MTL Textures. */
+ default_fbo_mtltexture_ = nil;
+ default_fbo_gputexture_ = nullptr;
+
+ /** Fetch GHOSTContext and fetch Metal device/queue. */
+ ghost_window_ = ghost_window;
+ if (ghost_window_ && ghost_context == NULL) {
+ /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided.
+ * Regardless of whether windowed or not, we need access to the GhostContext
+ * for presentation, and device/queue access. */
+ GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_);
+ ghost_context = (ghostWin ? ghostWin->getContext() : NULL);
+ }
+ BLI_assert(ghost_context);
+ this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context);
+ this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue();
+ this->device = (id<MTLDevice>)this->ghost_context_->metalDevice();
+ BLI_assert(this->queue);
+ BLI_assert(this->device);
+ [this->queue retain];
+ [this->device retain];
+
+ /* Register present callback. */
+ this->ghost_context_->metalRegisterPresentCallback(&present);
+
/* Create FrameBuffer handles. */
MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
@@ -66,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
/* Initialize Metal modules. */
this->memory_manager.init();
this->state_manager = new MTLStateManager(this);
+ this->imm = new MTLImmediate(this);
/* Ensure global memory manager is initialized. */
MTLContext::global_memory_manager.init(this->device);
@@ -99,9 +238,29 @@ MTLContext::~MTLContext()
this->end_frame();
}
}
+
+ /* Release Memory Manager */
+ this->get_scratchbuffer_manager().free();
+
/* Release update/blit shaders. */
this->get_texture_utils().cleanup();
+ /* Detach resource references */
+ GPU_texture_unbind_all();
+
+ /* Unbind UBOs */
+ for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+ if (this->pipeline_state.ubo_bindings[i].bound &&
+ this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+ GPUUniformBuf *ubo = wrap(
+ static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo));
+ GPU_uniformbuf_unbind(ubo);
+ }
+ }
+
+ /* Release Dummy resources */
+ this->free_dummy_resources();
+
/* Release Sampler States. */
for (int i = 0; i < GPU_SAMPLER_MAX; i++) {
if (sampler_state_cache_[i] != nil) {
@@ -109,12 +268,28 @@ MTLContext::~MTLContext()
sampler_state_cache_[i] = nil;
}
}
+
+ /* Empty cached sampler argument buffers. */
+ for (auto entry : cached_sampler_buffers_.values()) {
+ entry->free();
+ }
+ cached_sampler_buffers_.clear();
+
+ /* Free null buffers. */
if (null_buffer_) {
[null_buffer_ release];
}
if (null_attribute_buffer_) {
[null_attribute_buffer_ release];
}
+
+ /* Free Metal objects. */
+ if (this->queue) {
+ [this->queue release];
+ }
+ if (this->device) {
+ [this->device release];
+ }
}
void MTLContext::begin_frame()
@@ -146,20 +321,49 @@ void MTLContext::check_error(const char *info)
void MTLContext::activate()
{
- /* TODO(Metal): Implement. */
+ /* Make sure no other context is already bound to this thread. */
+ BLI_assert(is_active_ == false);
+ is_active_ = true;
+ thread_ = pthread_self();
+
+ /* Re-apply ghost window/context for resizing */
+ if (ghost_window_) {
+ this->set_ghost_window((GHOST_WindowHandle)ghost_window_);
+ }
+ else if (ghost_context_) {
+ this->set_ghost_context((GHOST_ContextHandle)ghost_context_);
+ }
+
+ /* Reset UBO bind state. */
+ for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+ if (this->pipeline_state.ubo_bindings[i].bound &&
+ this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+ this->pipeline_state.ubo_bindings[i].bound = false;
+ this->pipeline_state.ubo_bindings[i].ubo = nullptr;
+ }
+ }
+
+ /* Ensure imm active. */
+ immActivate();
}
+
void MTLContext::deactivate()
{
- /* TODO(Metal): Implement. */
+ BLI_assert(this->is_active_on_thread());
+ /* Flush context on deactivate. */
+ this->flush();
+ is_active_ = false;
+ immDeactivate();
}
void MTLContext::flush()
{
- /* TODO(Metal): Implement. */
+ this->main_command_buffer.submit(false);
}
+
void MTLContext::finish()
{
- /* TODO(Metal): Implement. */
+ this->main_command_buffer.submit(true);
}
void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
@@ -200,10 +404,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
/* Ensure command buffer workload submissions are optimal --
* Though do not split a batch mid-IMM recording. */
- /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */
- if (this->main_command_buffer.do_break_submission()
- // && !((MTLImmediate *)(this->imm))->imm_is_recording()
- ) {
+ if (this->main_command_buffer.do_break_submission() &&
+ !((MTLImmediate *)(this->imm))->imm_is_recording()) {
this->flush();
}
@@ -294,6 +496,72 @@ id<MTLBuffer> MTLContext::get_null_attribute_buffer()
return null_attribute_buffer_;
}
+gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type)
+{
+ /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */
+ gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1];
+ if (dummy_tex != nullptr) {
+ return dummy_tex;
+ }
+ else {
+ GPUTexture *tex = nullptr;
+ switch (type) {
+ case GPU_TEXTURE_1D:
+ tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_1D_ARRAY:
+ tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_2D:
+ tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_2D_ARRAY:
+ tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_3D:
+ tex = GPU_texture_create_3d(
+ "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr);
+ break;
+ case GPU_TEXTURE_CUBE:
+ tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_CUBE_ARRAY:
+ tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_BUFFER:
+ if (!dummy_verts_) {
+ GPU_vertformat_clear(&dummy_vertformat_);
+ GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+ dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC);
+ GPU_vertbuf_data_alloc(dummy_verts_, 64);
+ }
+ tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_);
+ break;
+ default:
+ BLI_assert_msg(false, "Unrecognised texture type");
+ return nullptr;
+ }
+ gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex));
+ dummy_textures_[type - 1] = metal_tex;
+ return metal_tex;
+ }
+ return nullptr;
+}
+
+void MTLContext::free_dummy_resources()
+{
+ for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) {
+ if (dummy_textures_[tex]) {
+ GPU_texture_free(
+ reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex])));
+ dummy_textures_[tex] = nullptr;
+ }
+ }
+ if (dummy_verts_) {
+ GPU_vertbuf_discard(dummy_verts_);
+ }
+}
+
/** \} */
/* -------------------------------------------------------------------- */
@@ -440,6 +708,755 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled)
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Command Encoder and pipeline state
+ * These utilities ensure that all of the globally bound resources and state have been
+ * correctly encoded within the current RenderCommandEncoder. This involves managing
+ * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state.
+ *
+ * We will also trigger compilation of new PSOs where the input state has changed
+ * and is required.
+ * All of this setup is required in order to perform a valid draw call.
+ * \{ */
+
+bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type)
+{
+ BLI_assert(this->pipeline_state.initialised);
+
+ /* Check if an active shader is bound. */
+ if (!this->pipeline_state.active_shader) {
+ MTL_LOG_WARNING("No Metal shader for bound GL shader\n");
+ return false;
+ }
+
+ /* Also ensure active shader is valid. */
+ if (!this->pipeline_state.active_shader->is_valid()) {
+ MTL_LOG_WARNING(
+ "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", );
+ return false;
+ }
+
+ /* Apply global state. */
+ this->state_manager->apply_state();
+
+ /* Main command buffer tracks the current state of the render pass, based on bound
+ * MTLFrameBuffer. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ /* Debug Check: Ensure Framebuffer instance is not dirty. */
+ BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+ /* Fetch shader interface. */
+ MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
+ if (shader_interface == nullptr) {
+ MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", );
+ return false;
+ }
+
+ /* Fetch shader and bake valid PipelineStateObject (PSO) based on current
+ * shader and state combination. This PSO represents the final GPU-executable
+ * permutation of the shader. */
+ MTLRenderPipelineStateInstance *pipeline_state_instance =
+ this->pipeline_state.active_shader->bake_current_pipeline_state(
+ this, mtl_prim_type_to_topology_class(mtl_prim_type));
+ if (!pipeline_state_instance) {
+ MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n",
+ shader_interface->get_name());
+ return false;
+ }
+
+ bool result = false;
+ if (pipeline_state_instance->pso) {
+
+ /* Fetch render command encoder. A render pass should already be active.
+ * This will be NULL if invalid. */
+ id<MTLRenderCommandEncoder> rec =
+ this->main_command_buffer.get_active_render_command_encoder();
+ BLI_assert(rec);
+ if (rec == nil) {
+ MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n");
+ return false;
+ }
+
+ /* Bind Render Pipeline State. */
+ BLI_assert(pipeline_state_instance->pso);
+ if (rps.bound_pso != pipeline_state_instance->pso) {
+ [rec setRenderPipelineState:pipeline_state_instance->pso];
+ rps.bound_pso = pipeline_state_instance->pso;
+ }
+
+ /** Ensure resource bindings. */
+ /* Texture Bindings. */
+ /* We will iterate through all texture bindings on the context and determine if any of the
+ * active slots match those in our shader interface. If so, textures will be bound. */
+ if (shader_interface->get_total_textures() > 0) {
+ this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance);
+ }
+
+ /* Transform feedback buffer binding. */
+ /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which
+ * transform feedback data will be written. */
+ // GPUVertBuf *tf_vbo =
+ // this->pipeline_state.active_shader->get_transform_feedback_active_buffer();
+ // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) {
+
+ // /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */
+ // BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine ||
+ // mtl_prim_type == MTLPrimitiveTypeTriangle ||
+ // mtl_prim_type == MTLPrimitiveTypePoint);
+
+ // /* Fetch active transform feedback buffer from vertbuf */
+ // MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo));
+ // int tf_buffer_offset = 0;
+ // id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset);
+
+ // if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) {
+ // [rec setVertexBuffer:tf_buffer_mtl
+ // offset:tf_buffer_offset
+ // atIndex:pipeline_state_instance->transform_feedback_buffer_index];
+ // printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n",
+ // tf_vbo_mtl,
+ // tf_buffer_mtl);
+ // }
+ // }
+
+ /* Matrix Bindings. */
+ /* This is now called upon shader bind. We may need to re-evaluate this though,
+ * as was done here to ensure uniform changes beween draws were tracked.
+ * NOTE(Metal): We may be able to remove this. */
+ GPU_matrix_bind(reinterpret_cast<struct GPUShader *>(
+ static_cast<Shader *>(this->pipeline_state.active_shader)));
+
+ /* Bind Uniforms */
+ this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance);
+
+ /* Bind Null attribute buffer, if needed. */
+ if (pipeline_state_instance->null_attribute_buffer_index >= 0) {
+ if (G.debug & G_DEBUG_GPU) {
+ MTL_LOG_INFO("Binding null attribute buffer at index: %d\n",
+ pipeline_state_instance->null_attribute_buffer_index);
+ }
+ rps.bind_vertex_buffer(this->get_null_attribute_buffer(),
+ 0,
+ pipeline_state_instance->null_attribute_buffer_index);
+ }
+
+ /** Dynamic Per-draw Render State on RenderCommandEncoder. */
+ /* State: Viewport. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) {
+ MTLViewport viewport;
+ viewport.originX = (double)this->pipeline_state.viewport_offset_x;
+ viewport.originY = (double)this->pipeline_state.viewport_offset_y;
+ viewport.width = (double)this->pipeline_state.viewport_width;
+ viewport.height = (double)this->pipeline_state.viewport_height;
+ viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near;
+ viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far;
+ [rec setViewport:viewport];
+
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_VIEWPORT_FLAG);
+ }
+
+ /* State: Scissor. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) {
+
+ /* Get FrameBuffer associated with active RenderCommandEncoder. */
+ MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer();
+
+ MTLScissorRect scissor;
+ if (this->pipeline_state.scissor_enabled) {
+ scissor.x = this->pipeline_state.scissor_x;
+ scissor.y = this->pipeline_state.scissor_y;
+ scissor.width = this->pipeline_state.scissor_width;
+ scissor.height = this->pipeline_state.scissor_height;
+
+ /* Some scissor assignments exceed the bounds of the viewport due to implictly added
+ * padding to the width/height - Clamp width/height. */
+ BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width());
+ BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height());
+ scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x);
+ scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y);
+ BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width()));
+ BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height()));
+ }
+ else {
+ /* Scissor is disabled, reset to default size as scissor state may have been previously
+ * assigned on this encoder. */
+ scissor.x = 0;
+ scissor.y = 0;
+ scissor.width = render_fb->get_width();
+ scissor.height = render_fb->get_height();
+ }
+
+ /* Scissor state can still be flagged as changed if it is toggled on and off, without
+ * parameters changing between draws. */
+ if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) {
+ [rec setScissorRect:scissor];
+ rps.last_scissor_rect = scissor;
+ }
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_SCISSOR_FLAG);
+ }
+
+ /* State: Face winding. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) {
+ /* We nede to invert the face winding in Metal, to account for the inverted-Y coordinate
+ * system. */
+ MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ?
+ MTLWindingClockwise :
+ MTLWindingCounterClockwise;
+ [rec setFrontFacingWinding:winding];
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG);
+ }
+
+ /* State: cullmode. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) {
+
+ MTLCullMode mode = MTLCullModeNone;
+ if (this->pipeline_state.culling_enabled) {
+ switch (this->pipeline_state.cull_mode) {
+ case GPU_CULL_NONE:
+ mode = MTLCullModeNone;
+ break;
+ case GPU_CULL_FRONT:
+ mode = MTLCullModeFront;
+ break;
+ case GPU_CULL_BACK:
+ mode = MTLCullModeBack;
+ break;
+ default:
+ BLI_assert_unreachable();
+ break;
+ }
+ }
+ [rec setCullMode:mode];
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_CULLMODE_FLAG);
+ }
+
+ /* Pipeline state is now good. */
+ result = true;
+ }
+ return result;
+}
+
+/* Bind uniform buffers to an active render command encoder using the rendering state of the
+ * current context -> Active shader, Bound UBOs). */
+bool MTLContext::ensure_uniform_buffer_bindings(
+ id<MTLRenderCommandEncoder> rec,
+ const MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+ /* Fetch Render Pass state. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ /* Shader owned push constant block for uniforms.. */
+ bool active_shader_changed = (rps.last_bound_shader_state.shader_ !=
+ this->pipeline_state.active_shader ||
+ rps.last_bound_shader_state.shader_ == nullptr ||
+ rps.last_bound_shader_state.pso_index_ !=
+ pipeline_state_instance->shader_pso_index);
+
+ const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
+ if (push_constant_block.size > 0) {
+
+ /* Fetch uniform buffer base binding index from pipeline_state_instance - Terhe buffer index
+ * will be offset by the number of bound VBOs. */
+ uint32_t block_size = push_constant_block.size;
+ uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index +
+ push_constant_block.buffer_index;
+
+ /* Only need to rebind block if push constants have been modified -- or if no data is bound for
+ * the current RenderCommandEncoder. */
+ if (this->pipeline_state.active_shader->get_push_constant_is_dirty() ||
+ active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes ||
+ !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) {
+
+ /* Bind push constant data. */
+ BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr);
+ rps.bind_vertex_bytes(
+ this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+ rps.bind_fragment_bytes(
+ this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+
+ /* Only need to rebind block if it has been modified. */
+ this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false);
+ }
+ }
+ rps.last_bound_shader_state.set(this->pipeline_state.active_shader,
+ pipeline_state_instance->shader_pso_index);
+
+ /* Bind Global GPUUniformBuffers */
+ /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones
+ * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
+ * and not owned by the shader instance. */
+ for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
+ const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
+
+ if (ubo.buffer_index >= 0) {
+
+ const uint32_t buffer_index = ubo.buffer_index;
+ int ubo_offset = 0;
+ id<MTLBuffer> ubo_buffer = nil;
+ int ubo_size = 0;
+
+ bool bind_dummy_buffer = false;
+ if (this->pipeline_state.ubo_bindings[buffer_index].bound) {
+
+ /* Fetch UBO global-binding properties from slot. */
+ ubo_offset = 0;
+ ubo_buffer = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_metal_buffer(
+ &ubo_offset);
+ ubo_size = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_size();
+
+ /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
+ * allocating zero buffers. */
+ if (ubo_buffer == nil) {
+ bind_dummy_buffer = true;
+ }
+ else {
+ BLI_assert(ubo_buffer != nil);
+ BLI_assert(ubo_size > 0);
+
+ if (pipeline_state_instance->reflection_data_available) {
+ /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each
+ * case will be the same for the same UBO.
+ * We also determine expected size and then ensure buffer of the correct size
+ * exists in one of the vertex/fragment shader binding tables. This path is used
+ * to verify that the size of the bound UBO matches what is expected in the shader. */
+ uint32_t expected_size =
+ (buffer_index <
+ pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ?
+ pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index]
+ .size :
+ 0;
+ if (expected_size == 0) {
+ expected_size =
+ (buffer_index <
+ pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ?
+ pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index]
+ .size :
+ 0;
+ }
+ BLI_assert_msg(
+ expected_size > 0,
+ "Shader interface expects UBO, but shader reflection data reports that it "
+ "is not present");
+
+ /* If ubo size is smaller than the size expected by the shader, we need to bind the
+ * dummy buffer, which will be big enough, to avoid an OOB error. */
+ if (ubo_size < expected_size) {
+ MTL_LOG_INFO(
+ "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
+ "%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
+ "over-binding, which is not used, but we need this to avoid validation "
+ "issues\n",
+ shader_interface->get_name_at_offset(ubo.name_offset),
+ buffer_index,
+ ubo_size,
+ expected_size,
+ shader_interface->get_name());
+ bind_dummy_buffer = true;
+ }
+ }
+ }
+ }
+ else {
+ MTL_LOG_INFO(
+ "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
+ "nothing was bound -- binding dummy buffer\n",
+ shader_interface->get_name(),
+ shader_interface->get_name_at_offset(ubo.name_offset),
+ buffer_index);
+ bind_dummy_buffer = true;
+ }
+
+ if (bind_dummy_buffer) {
+ /* Perform Dummy binding. */
+ ubo_offset = 0;
+ ubo_buffer = this->get_null_buffer();
+ ubo_size = [ubo_buffer length];
+ }
+
+ if (ubo_buffer != nil) {
+
+ uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index +
+ buffer_index;
+
+ /* Bind Vertex UBO. */
+ if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
+ BLI_assert(buffer_bind_index >= 0 &&
+ buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+ }
+
+ /* Bind Fragment UBOs. */
+ if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
+ BLI_assert(buffer_bind_index >= 0 &&
+ buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+ }
+ }
+ else {
+ MTL_LOG_WARNING(
+ "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
+ "is NULL!\n",
+ shader_interface->get_name(),
+ shader_interface->get_name_at_offset(ubo.name_offset),
+ buffer_index);
+ }
+ }
+ }
+ return true;
+}
+
+/* Ensure texture bindings are correct and up to date for current draw call. */
+void MTLContext::ensure_texture_bindings(
+ id<MTLRenderCommandEncoder> rec,
+ MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+ BLI_assert(shader_interface != nil);
+ BLI_assert(rec != nil);
+
+ /* Fetch Render Pass state. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ @autoreleasepool {
+ int vertex_arg_buffer_bind_index = -1;
+ int fragment_arg_buffer_bind_index = -1;
+
+ /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */
+ bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers(
+ &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index);
+
+ /* Loop through expected textures in shader interface and resolve bindings with currently
+ * bound textures.. */
+ for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+ /* Ensure the bound texture is compatible with the shader interface. If the
+ * shader does not expect a texture to be bound for the current slot, we skip
+ * binding.
+ * NOTE: Global texture bindings may be left over from prior draw calls. */
+ const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t);
+ if (!shader_texture_info.used) {
+ /* Skip unused binding points if explicit indices are specified. */
+ continue;
+ }
+
+ int slot = shader_texture_info.slot_index;
+ if (slot >= 0 && slot < GPU_max_textures()) {
+ bool bind_dummy_texture = true;
+ if (this->pipeline_state.texture_bindings[slot].used) {
+ gpu::MTLTexture *bound_texture =
+ this->pipeline_state.texture_bindings[slot].texture_resource;
+ MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
+ BLI_assert(bound_texture);
+ BLI_assert(bound_sampler.used);
+
+ if (shader_texture_info.type == bound_texture->type_) {
+ /* Bind texture and sampler if the bound texture matches the type expected by the
+ * shader. */
+ id<MTLTexture> tex = bound_texture->get_metal_handle();
+
+ if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+ rps.bind_vertex_texture(tex, slot);
+ rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+ }
+
+ if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+ rps.bind_fragment_texture(tex, slot);
+ rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+ }
+
+ /* Texture state resolved, no need to bind dummy texture */
+ bind_dummy_texture = false;
+ }
+ else {
+ /* Texture type for bound texture (e.g. Texture2DArray) does not match what was
+ * expected in the shader interface. This is a problem and we will need to bind
+ * a dummy texture to ensure correct API usage. */
+ MTL_LOG_WARNING(
+ "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
+ "texture target type. (Expecting type %d, actual type %d) (binding "
+ "name:'%s')(texture name:'%s')\n",
+ shader_interface->get_name(),
+ bound_texture,
+ slot,
+ shader_texture_info.type,
+ bound_texture->type_,
+ shader_interface->get_name_at_offset(shader_texture_info.name_offset),
+ bound_texture->get_name());
+ }
+ }
+ else {
+ MTL_LOG_WARNING(
+ "Shader '%s' expected texture to be bound to slot %d -- No texture was "
+ "bound. (name:'%s')\n",
+ shader_interface->get_name(),
+ slot,
+ shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+ }
+
+ /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats
+ * are provided -- as certain configurations may not need any binding. These issues should
+ * be fixed in the high-level, if problems crop up. */
+ if (bind_dummy_texture) {
+ if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+ rps.bind_vertex_texture(
+ get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+ /* Bind default sampler state. */
+ MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+ rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+ }
+ if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+ rps.bind_fragment_texture(
+ get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+ /* Bind default sampler state. */
+ MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+ rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+ }
+ }
+ }
+ else {
+ MTL_LOG_WARNING(
+ "Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
+ "hardware/API limit of '%d'. (name:'%s')\n",
+ this->pipeline_state.active_shader,
+ slot,
+ GPU_max_textures(),
+ shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+ }
+ }
+
+ /* Construct and Bind argument buffer.
+ * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */
+ if (use_argument_buffer_for_samplers) {
+#ifndef NDEBUG
+ /* Debug check to validate each expected texture in the shader interface has a valid
+ * sampler object bound to the context. We will need all of these to be valid
+ * when constructing the sampler argument buffer. */
+ for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+ const MTLShaderTexture &texture = shader_interface->get_texture(i);
+ if (texture.used) {
+ BLI_assert(this->samplers_.mtl_sampler[i] != nil);
+ }
+ }
+#endif
+
+ /* Check to ensure the buffer binding index for the argument buffer has been assigned.
+ * This PSO property will be set if we expect to use argument buffers, and the shader
+ * uses any amount of textures. */
+ BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0);
+ if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) {
+ /* Offset binding index to be relative to the start of static uniform buffer binding slots.
+ * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
+ * used by vertex and index buffer bindings, and the number of buffers present will vary
+ * between PSOs. */
+ int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index +
+ vertex_arg_buffer_bind_index);
+ assert(arg_buffer_idx < 32);
+ id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
+ arg_buffer_idx);
+ if (argument_encoder == nil) {
+ argument_encoder = [pipeline_state_instance->vert
+ newArgumentEncoderWithBufferIndex:arg_buffer_idx];
+ shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
+ }
+
+ /* Generate or Fetch argument buffer sampler configuration.
+ * NOTE(Metal): we need to base sampler counts off of the maximal texture
+ * index. This is not the most optimal, but in practise, not a use-case
+ * when argument buffers are required.
+ * This is because with explicit texture indices, the binding indices
+ * should match across draws, to allow the high-level to optimise bindpoints. */
+ gpu::MTLBuffer *encoder_buffer = nullptr;
+ this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1;
+
+ gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr(
+ this->samplers_);
+ if (cached_smp_buffer_search != nullptr) {
+ encoder_buffer = *cached_smp_buffer_search;
+ }
+ else {
+ /* Populate argument buffer with current global sampler bindings. */
+ int size = [argument_encoder encodedLength];
+ int alignment = max_uu([argument_encoder alignment], 256);
+ int size_align_delta = (size % alignment);
+ int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ?
+ size + (alignment - (size % alignment)) :
+ size;
+
+ /* Allocate buffer to store encoded sampler arguments. */
+ encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size,
+ true);
+ BLI_assert(encoder_buffer);
+ BLI_assert(encoder_buffer->get_metal_buffer());
+ [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0];
+ [argument_encoder
+ setSamplerStates:this->samplers_.mtl_sampler
+ withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)];
+ encoder_buffer->flush();
+
+ /* Insert into cache. */
+ this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer);
+ }
+
+ BLI_assert(encoder_buffer != nullptr);
+ int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+ vertex_arg_buffer_bind_index);
+ rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index);
+
+ /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to
+ * re-encode. We can use the same argument buffer. */
+ if (fragment_arg_buffer_bind_index >= 0) {
+ BLI_assert(fragment_arg_buffer_bind_index);
+ int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+ fragment_arg_buffer_bind_index);
+ rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index);
+ }
+ }
+ }
+ }
+}
+
+/* Encode latest depth-stencil state. */
+void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type)
+{
+ /* Check if we need to update state. */
+ if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) {
+ return;
+ }
+
+ /* Fetch render command encoder. */
+ id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder();
+ BLI_assert(rec);
+
+ /* Fetch Render Pass state. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ /** Prepare Depth-stencil state based on current global pipeline state. */
+ MTLFrameBuffer *fb = this->get_current_framebuffer();
+ bool hasDepthTarget = fb->has_depth_attachment();
+ bool hasStencilTarget = fb->has_stencil_attachment();
+
+ if (hasDepthTarget || hasStencilTarget) {
+ /* Update FrameBuffer State. */
+ this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget;
+ this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget;
+
+ /* Check if current MTLContextDepthStencilState maps to an existing state object in
+ * the Depth-stencil state cache. */
+ id<MTLDepthStencilState> ds_state = nil;
+ id<MTLDepthStencilState> *depth_stencil_state_lookup =
+ this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state);
+
+ /* If not, populate DepthStencil state descriptor. */
+ if (depth_stencil_state_lookup == nullptr) {
+
+ MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init]
+ autorelease];
+
+ if (hasDepthTarget) {
+ ds_state_desc.depthWriteEnabled =
+ this->pipeline_state.depth_stencil_state.depth_write_enable;
+ ds_state_desc.depthCompareFunction =
+ this->pipeline_state.depth_stencil_state.depth_test_enabled ?
+ this->pipeline_state.depth_stencil_state.depth_function :
+ MTLCompareFunctionAlways;
+ }
+
+ if (hasStencilTarget) {
+ ds_state_desc.backFaceStencil.readMask =
+ this->pipeline_state.depth_stencil_state.stencil_read_mask;
+ ds_state_desc.backFaceStencil.writeMask =
+ this->pipeline_state.depth_stencil_state.stencil_write_mask;
+ ds_state_desc.backFaceStencil.stencilFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail;
+ ds_state_desc.backFaceStencil.depthFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail;
+ ds_state_desc.backFaceStencil.depthStencilPassOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass;
+ ds_state_desc.backFaceStencil.stencilCompareFunction =
+ (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+ this->pipeline_state.depth_stencil_state.stencil_func :
+ MTLCompareFunctionAlways;
+
+ ds_state_desc.frontFaceStencil.readMask =
+ this->pipeline_state.depth_stencil_state.stencil_read_mask;
+ ds_state_desc.frontFaceStencil.writeMask =
+ this->pipeline_state.depth_stencil_state.stencil_write_mask;
+ ds_state_desc.frontFaceStencil.stencilFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail;
+ ds_state_desc.frontFaceStencil.depthFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail;
+ ds_state_desc.frontFaceStencil.depthStencilPassOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass;
+ ds_state_desc.frontFaceStencil.stencilCompareFunction =
+ (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+ this->pipeline_state.depth_stencil_state.stencil_func :
+ MTLCompareFunctionAlways;
+ }
+
+ /* Bake new DS state. */
+ ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc];
+
+ /* Store state in cache. */
+ BLI_assert(ds_state != nil);
+ this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state);
+ }
+ else {
+ ds_state = *depth_stencil_state_lookup;
+ BLI_assert(ds_state != nil);
+ }
+
+ /* Bind Depth Stencil State to render command encoder. */
+ BLI_assert(ds_state != nil);
+ if (ds_state != nil) {
+ if (rps.bound_ds_state != ds_state) {
+ [rec setDepthStencilState:ds_state];
+ rps.bound_ds_state = ds_state;
+ }
+ }
+
+ /* Apply dynamic depth-stencil state on encoder. */
+ if (hasStencilTarget) {
+ uint32_t stencil_ref_value =
+ (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+ this->pipeline_state.depth_stencil_state.stencil_ref :
+ 0;
+ if (stencil_ref_value != rps.last_used_stencil_ref_value) {
+ [rec setStencilReferenceValue:stencil_ref_value];
+ rps.last_used_stencil_ref_value = stencil_ref_value;
+ }
+ }
+
+ if (hasDepthTarget) {
+ bool doBias = false;
+ switch (prim_type) {
+ case MTLPrimitiveTypeTriangle:
+ case MTLPrimitiveTypeTriangleStrip:
+ doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris;
+ break;
+ case MTLPrimitiveTypeLine:
+ case MTLPrimitiveTypeLineStrip:
+ doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines;
+ break;
+ case MTLPrimitiveTypePoint:
+ doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points;
+ break;
+ }
+ [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0
+ slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0
+ clamp:0];
+ }
+ }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Visibility buffer control for MTLQueryPool.
* \{ */
@@ -606,4 +1623,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state()
/** \} */
+/* -------------------------------------------------------------------- */
+/** \name Swapchain management and Metal presentation.
+ * \{ */
+
+void present(MTLRenderPassDescriptor *blit_descriptor,
+ id<MTLRenderPipelineState> blit_pso,
+ id<MTLTexture> swapchain_texture,
+ id<CAMetalDrawable> drawable)
+{
+
+ MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+ BLI_assert(ctx);
+
+ /* Flush any oustanding work. */
+ ctx->flush();
+
+ /* Always pace CPU to maximum of 3 drawables in flight.
+ * nextDrawable may have more in flight if backing swapchain
+ * textures are re-allocate, such as during resize events.
+ *
+ * Determine frames in flight based on current latency. If
+ * we are in a high-latency situation, limit frames in flight
+ * to increase app responsiveness and keep GPU execution under control.
+ * If latency improves, increase frames in flight to improve overall
+ * performance. */
+ int perf_max_drawables = MTL_MAX_DRAWABLES;
+ if (MTLContext::avg_drawable_latency_us > 185000) {
+ perf_max_drawables = 1;
+ }
+ else if (MTLContext::avg_drawable_latency_us > 85000) {
+ perf_max_drawables = 2;
+ }
+
+ while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) {
+ PIL_sleep_ms(2);
+ }
+
+ /* Present is submitted in its own CMD Buffer to enusure drawable reference released as early as
+ * possible. This command buffer is separate as it does not utilise the global state
+ * for rendering as the main context does. */
+ id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer];
+ MTLCommandBufferManager::num_active_cmd_bufs++;
+
+ if (MTLCommandBufferManager::sync_event != nil) {
+ /* Ensure command buffer ordering. */
+ [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event
+ value:MTLCommandBufferManager::event_signal_val];
+ }
+
+ /* Do Present Call and final Blit to MTLDrawable. */
+ id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor];
+ [enc setRenderPipelineState:blit_pso];
+ [enc setFragmentTexture:swapchain_texture atIndex:0];
+ [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
+ [enc endEncoding];
+
+ /* Present drawable. */
+ BLI_assert(drawable);
+ [cmdbuf presentDrawable:drawable];
+
+ /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */
+ MTLSafeFreeList *cmd_free_buffer_list =
+ MTLContext::get_global_memory_manager().get_current_safe_list();
+ BLI_assert(cmd_free_buffer_list);
+
+ id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf;
+ [cmd_buffer_ref retain];
+
+ /* Increment drawables in flight limiter. */
+ MTLContext::max_drawables_in_flight++;
+ std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now();
+
+ /* Increment free pool reference and decrement upon command buffer completion. */
+ cmd_free_buffer_list->increment_reference();
+ [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) {
+ /* Flag freed buffers associated with this CMD buffer as ready to be freed. */
+ cmd_free_buffer_list->decrement_reference();
+ [cmd_buffer_ref release];
+
+ /* Decrement count */
+ MTLCommandBufferManager::num_active_cmd_bufs--;
+ MTL_LOG_INFO("[Metal] Active command buffers: %d\n",
+ MTLCommandBufferManager::num_active_cmd_bufs);
+
+ /* Drawable count and latency management. */
+ MTLContext::max_drawables_in_flight--;
+ std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now();
+ int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>(
+ completion_time - submission_time)
+ .count();
+ MTLContext::latency_resolve_average(microseconds_per_frame);
+
+ MTL_LOG_INFO("Frame Latency: %f ms (Rolling avg: %f ms Drawables: %d)\n",
+ ((float)microseconds_per_frame) / 1000.0f,
+ ((float)MTLContext::avg_drawable_latency_us) / 1000.0f,
+ perf_max_drawables);
+ }];
+
+ if (MTLCommandBufferManager::sync_event == nil) {
+ MTLCommandBufferManager::sync_event = [ctx->device newEvent];
+ BLI_assert(MTLCommandBufferManager::sync_event);
+ [MTLCommandBufferManager::sync_event retain];
+ }
+ BLI_assert(MTLCommandBufferManager::sync_event != nil);
+
+ MTLCommandBufferManager::event_signal_val++;
+ [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event
+ value:MTLCommandBufferManager::event_signal_val];
+
+ [cmdbuf commit];
+
+ /* When debugging, fetch advanced command buffer errors. */
+ if (G.debug & G_DEBUG_GPU) {
+ [cmdbuf waitUntilCompleted];
+ NSError *error = [cmdbuf error];
+ if (error != nil) {
+ NSLog(@"%@", error);
+ BLI_assert(false);
+
+ @autoreleasepool {
+ const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String];
+
+ std::ofstream outfile;
+ outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+ outfile << stringAsChar;
+ outfile.close();
+ }
+ }
+ else {
+ @autoreleasepool {
+ NSString *str = @"Command buffer completed successfully!\n";
+ const char *stringAsChar = [str UTF8String];
+
+ std::ofstream outfile;
+ outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+ outfile << stringAsChar;
+ outfile.close();
+ }
+ }
+ }
+}
+
+/** \} */
+
} // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_drawlist.hh b/source/blender/gpu/metal/mtl_drawlist.hh
new file mode 100644
index 00000000000..9eb465b26a0
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.hh
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#pragma once
+
+#pragma once
+
+#include "gpu_drawlist_private.hh"
+
+namespace blender {
+namespace gpu {
+
+/**
+ * TODO(Metal): MTLDrawList Implementation. Included as temporary stub.
+ */
+class MTLDrawList : public DrawList {
+ public:
+ MTLDrawList(int length) {}
+ ~MTLDrawList() {}
+
+ void append(GPUBatch *batch, int i_first, int i_count) override {}
+ void submit() override {}
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList");
+};
+
+} // namespace gpu
+} // namespace blender
diff --git a/source/blender/gpu/metal/mtl_immediate.hh b/source/blender/gpu/metal/mtl_immediate.hh
new file mode 100644
index 00000000000..b743efb397d
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_immediate_private.hh"
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+namespace blender::gpu {
+
+class MTLImmediate : public Immediate {
+ private:
+ MTLContext *context_ = nullptr;
+ MTLTemporaryBuffer current_allocation_;
+ MTLPrimitiveTopologyClass metal_primitive_mode_;
+ MTLPrimitiveType metal_primitive_type_;
+ bool has_begun_ = false;
+
+ public:
+ MTLImmediate(MTLContext *ctx);
+ ~MTLImmediate();
+
+ uchar *begin() override;
+ void end() override;
+ bool imm_is_recording()
+ {
+ return has_begun_;
+ }
+};
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm
new file mode 100644
index 00000000000..41632e39092
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -0,0 +1,397 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#include "BKE_global.h"
+
+#include "GPU_vertex_format.h"
+#include "gpu_context_private.hh"
+#include "gpu_shader_private.hh"
+#include "gpu_vertex_format_private.h"
+
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_immediate.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+
+namespace blender::gpu {
+
+MTLImmediate::MTLImmediate(MTLContext *ctx)
+{
+ context_ = ctx;
+}
+
+MTLImmediate::~MTLImmediate()
+{
+}
+
+uchar *MTLImmediate::begin()
+{
+ BLI_assert(!has_begun_);
+
+ /* Determine primitive type. */
+ metal_primitive_type_ = gpu_prim_type_to_metal(this->prim_type);
+ metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
+ has_begun_ = true;
+
+ /* Allocate a range of data and return host-accessible pointer. */
+ const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+ current_allocation_ = context_->get_scratchbuffer_manager()
+ .scratch_buffer_allocate_range_aligned(bytes_needed, 256);
+ [current_allocation_.metal_buffer retain];
+ return reinterpret_cast<uchar *>(current_allocation_.data);
+}
+
+void MTLImmediate::end()
+{
+ /* Ensure we're between a imm::begin/imm:end pair. */
+ BLI_assert(has_begun_);
+ BLI_assert(prim_type != GPU_PRIM_NONE);
+
+ /* Verify context is valid, vertex data is written and a valid shader is bound. */
+ if (context_ && this->vertex_idx > 0 && this->shader) {
+
+ MTLShader *active_mtl_shader = static_cast<MTLShader *>(unwrap(shader));
+
+ /* Skip draw if Metal shader is not valid. */
+ if (active_mtl_shader == nullptr || !active_mtl_shader->is_valid() ||
+ active_mtl_shader->get_interface() == nullptr) {
+
+ const char *ptr = (active_mtl_shader) ? active_mtl_shader->name_get() : nullptr;
+ MTL_LOG_WARNING(
+ "MTLImmediate::end -- cannot perform draw as active shader is NULL or invalid (likely "
+ "unimplemented) (shader %p '%s')\n",
+ active_mtl_shader,
+ ptr);
+ return;
+ }
+
+ /* Ensure we are inside a render pass and fetch active RenderCommandEncoder. */
+ id<MTLRenderCommandEncoder> rec = context_->ensure_begin_render_pass();
+ BLI_assert(rec != nil);
+
+ /* Fetch active render pipeline state. */
+ MTLRenderPassState &rps = context_->main_command_buffer.get_render_pass_state();
+
+ /* Bind Shader. */
+ GPU_shader_bind(this->shader);
+
+ /* Debug markers for frame-capture and detailed error messages. */
+ if (G.debug & G_DEBUG_GPU) {
+ [rec pushDebugGroup:[NSString
+ stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+ this->vertex_idx,
+ active_mtl_shader->get_interface()->get_name()]];
+ [rec insertDebugSignpost:[NSString stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+ this->vertex_idx,
+ active_mtl_shader->get_interface()
+ ->get_name()]];
+ }
+
+ /* Populate pipeline state vertex descriptor. */
+ MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+ MTLContext::get()->state_manager);
+ MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+ const MTLShaderInterface *interface = active_mtl_shader->get_interface();
+
+ desc.vertex_descriptor.num_attributes = interface->get_total_attributes();
+ desc.vertex_descriptor.num_vert_buffers = 1;
+
+ for (int i = 0; i < desc.vertex_descriptor.num_attributes; i++) {
+ desc.vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+ }
+ desc.vertex_descriptor.uses_ssbo_vertex_fetch =
+ active_mtl_shader->get_uses_ssbo_vertex_fetch();
+ desc.vertex_descriptor.num_ssbo_attributes = 0;
+
+ /* SSBO Vertex Fetch -- Verify Attributes. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ active_mtl_shader->ssbo_vertex_fetch_bind_attributes_begin();
+
+ /* Disable Indexed rendering in SSBO vertex fetch. */
+ int uniform_ssbo_use_indexed = active_mtl_shader->uni_ssbo_uses_indexed_rendering;
+ BLI_assert_msg(uniform_ssbo_use_indexed != -1, "Expected valid uniform location for ssbo_uses_indexed_rendering.");
+ int uses_indexed_rendering = 0;
+ active_mtl_shader->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+ }
+
+ /* Populate Vertex descriptor and verify attributes.
+ * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */
+ for (int i = 0; i < interface->get_total_attributes(); i++) {
+
+ /* Note: Attribute in VERTEX FORMAT does not necessarily share the same array index as
+ * attributes in shader interface. */
+ GPUVertAttr *attr = nullptr;
+ const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i);
+
+ /* Scan through vertex_format attributes until one with a name matching the shader interface
+ * is found. */
+ for (uint32_t a_idx = 0; a_idx < this->vertex_format.attr_len && attr == nullptr; a_idx++) {
+ GPUVertAttr *check_attribute = &this->vertex_format.attrs[a_idx];
+
+ /* Attributes can have multiple name aliases associated with them. */
+ for (uint32_t n_idx = 0; n_idx < check_attribute->name_len; n_idx++) {
+ const char *name = GPU_vertformat_attr_name_get(
+ &this->vertex_format, check_attribute, n_idx);
+
+ if (strcmp(name, interface->get_name_at_offset(mtl_shader_attribute.name_offset)) == 0) {
+ attr = check_attribute;
+ break;
+ }
+ }
+ }
+
+ BLI_assert_msg(attr != nullptr,
+ "Could not find expected attribute in immediate mode vertex format.");
+ if (attr == nullptr) {
+ MTL_LOG_ERROR(
+ "MTLImmediate::end Could not find matching attribute '%s' from Shader Interface in "
+ "Vertex Format! - TODO: Bind Dummy attribute\n",
+ interface->get_name_at_offset(mtl_shader_attribute.name_offset));
+ return;
+ }
+
+ /* Determine whether implicit type conversion between input vertex format
+ * and shader interface vertex format is supported. */
+ MTLVertexFormat convertedFormat;
+ bool can_use_implicit_conversion = mtl_convert_vertex_format(
+ mtl_shader_attribute.format,
+ (GPUVertCompType)attr->comp_type,
+ attr->comp_len,
+ (GPUVertFetchMode)attr->fetch_mode,
+ &convertedFormat);
+
+ if (can_use_implicit_conversion) {
+ /* Metal API can implicitly convert some formats during vertex assembly:
+ * - Converting from a normalized short2 format to float2
+ * - Type truncation e.g. Float4 to Float2.
+ * - Type expansion from Float3 to Float4.
+ * - Note: extra components are filled with the corresponding components of (0,0,0,1).
+ * (See
+ * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format)
+ */
+ bool is_floating_point_format = (attr->comp_type == GPU_COMP_F32);
+ desc.vertex_descriptor.attributes[i].format = convertedFormat;
+ desc.vertex_descriptor.attributes[i].format_conversion_mode =
+ (is_floating_point_format) ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+ (GPUVertFetchMode)GPU_FETCH_INT;
+ BLI_assert(convertedFormat != MTLVertexFormatInvalid);
+ }
+ else {
+ /* Some conversions are NOT valid, e.g. Int4 to Float4
+ * - In this case, we need to implement a conversion routine inside the shader.
+ * - This is handled using the format_conversion_mode flag
+ * - This flag is passed into the PSO as a function specialisation,
+ * and will generate an appropriate conversion function when reading the vertex attribute
+ * value into local shader storage.
+ * (If no explicit conversion is needed, the function specialize to a pass-through). */
+ MTLVertexFormat converted_format;
+ bool can_convert = mtl_vertex_format_resize(
+ mtl_shader_attribute.format, attr->comp_len, &converted_format);
+ desc.vertex_descriptor.attributes[i].format = (can_convert) ? converted_format :
+ mtl_shader_attribute.format;
+ desc.vertex_descriptor.attributes[i].format_conversion_mode = (GPUVertFetchMode)
+ attr->fetch_mode;
+ BLI_assert(desc.vertex_descriptor.attributes[i].format != MTLVertexFormatInvalid);
+ }
+ /* Using attribute offset in vertex format, as this will be correct */
+ desc.vertex_descriptor.attributes[i].offset = attr->offset;
+ desc.vertex_descriptor.attributes[i].buffer_index = mtl_shader_attribute.buffer_index;
+
+ /* SSBO Vertex Fetch Attribute bind. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ BLI_assert_msg(mtl_shader_attribute.buffer_index == 0,
+ "All attributes should be in buffer index zero");
+ MTLSSBOAttribute ssbo_attr(
+ mtl_shader_attribute.index,
+ mtl_shader_attribute.buffer_index,
+ attr->offset,
+ this->vertex_format.stride,
+ MTLShader::ssbo_vertex_type_to_attr_type(desc.vertex_descriptor.attributes[i].format),
+ false);
+ desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+ ssbo_attr;
+ desc.vertex_descriptor.num_ssbo_attributes++;
+ active_mtl_shader->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+ }
+ }
+
+ /* Buffer bindings for singular vertex buffer. */
+ desc.vertex_descriptor.buffer_layouts[0].step_function = MTLVertexStepFunctionPerVertex;
+ desc.vertex_descriptor.buffer_layouts[0].step_rate = 1;
+ desc.vertex_descriptor.buffer_layouts[0].stride = this->vertex_format.stride;
+ BLI_assert(this->vertex_format.stride > 0);
+
+ /* SSBO Vertex Fetch -- Verify Attributes. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ active_mtl_shader->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+ /* Set Status uniforms. */
+ BLI_assert_msg(active_mtl_shader->uni_ssbo_input_prim_type_loc != -1,
+ "ssbo_input_prim_type uniform location invalid!");
+ BLI_assert_msg(active_mtl_shader->uni_ssbo_input_vert_count_loc != -1,
+ "ssbo_input_vert_count uniform location invalid!");
+ GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+ active_mtl_shader->uni_ssbo_input_prim_type_loc,
+ 1,
+ 1,
+ (const int *)(&this->prim_type));
+ GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+ active_mtl_shader->uni_ssbo_input_vert_count_loc,
+ 1,
+ 1,
+ (const int *)(&this->vertex_idx));
+ }
+
+ MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+ if (context_->ensure_render_pipeline_state(mtl_prim_type)) {
+
+ /* Issue draw call. */
+ BLI_assert(this->vertex_idx > 0);
+
+ /* Metal API does not support triangle fan, so we can emulate this
+ * input data by generating an index buffer to re-map indices to
+ * a TriangleList.
+ *
+ * NOTE(Metal): Consider caching generated triangle fan index buffers.
+ * For immediate mode, generating these is currently very cheap, as we use
+ * fast scratch buffer allocations. Though we may benefit from caching of
+ * frequently used buffer sizes. */
+ if (mtl_needs_topology_emulation(this->prim_type)) {
+
+ /* Debug safety check for SSBO FETCH MODE. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
+ }
+
+ /* Emulate Tri-fan. */
+ if (this->prim_type == GPU_PRIM_TRI_FAN) {
+ /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
+ * vertices. */
+ uint32_t base_vert_count = this->vertex_idx;
+ uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
+ uint32_t fan_index_count = num_triangles * 3;
+ BLI_assert(num_triangles > 0);
+
+ uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
+ uint32_t *index_buffer = nullptr;
+
+ MTLTemporaryBuffer allocation =
+ context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
+ alloc_size, 128);
+ index_buffer = (uint32_t *)allocation.data;
+
+ int a = 0;
+ for (int i = 0; i < num_triangles; i++) {
+ index_buffer[a++] = 0;
+ index_buffer[a++] = i + 1;
+ index_buffer[a++] = i + 2;
+ }
+
+ @autoreleasepool {
+
+ id<MTLBuffer> index_buffer_mtl = nil;
+ uint32_t index_buffer_offset = 0;
+
+ /* Region of scratch buffer used for topology emulation element data.
+ * NOTE(Metal): We do not need to manually flush as the entire scratch
+ * buffer for current command buffer is flushed upon submission. */
+ index_buffer_mtl = allocation.metal_buffer;
+ index_buffer_offset = allocation.buffer_offset;
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
+
+ /* Bind Vertex Buffer. */
+ rps.bind_vertex_buffer(
+ current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+ /* Draw. */
+ [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+ indexCount:fan_index_count
+ indexType:MTLIndexTypeUInt32
+ indexBuffer:index_buffer_mtl
+ indexBufferOffset:index_buffer_offset];
+ }
+ }
+ else {
+ /* TODO(Metal): Topology emulation for line loop.
+ * NOTE(Metal): This is currently not used anywhere and modified at the high
+ * level for efficiency in such cases. */
+ BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
+ }
+ }
+ else {
+ MTLPrimitiveType primitive_type = metal_primitive_type_;
+ int vertex_count = this->vertex_idx;
+
+ /* Bind Vertex Buffer. */
+ rps.bind_vertex_buffer(
+ current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ context_->ensure_depth_stencil_state(primitive_type);
+
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+
+ /* Bind Null Buffers for empty/missing bind slots. */
+ id<MTLBuffer> null_buffer = context_->get_null_buffer();
+ BLI_assert(null_buffer != nil);
+ for (int i = 1; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+
+ /* We only need to ensure a buffer is bound to the context, its contents do not matter
+ * as it will not be used. */
+ if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+ rps.bind_vertex_buffer(null_buffer, 0, i);
+ }
+ }
+
+ /* SSBO vertex fetch - Nullify elements buffer. */
+ if (rps.cached_vertex_buffer_bindings[MTL_SSBO_VERTEX_FETCH_IBO_INDEX].metal_buffer ==
+ nil) {
+ rps.bind_vertex_buffer(null_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+ }
+
+ /* Submit draw call with modified vertex count, which reflects vertices per primitive
+ * defined in the USE_SSBO_VERTEX_FETCH pragma. */
+ int num_input_primitives = gpu_get_prim_count_from_type(vertex_count, this->prim_type);
+ int output_num_verts = num_input_primitives *
+ active_mtl_shader->get_ssbo_vertex_fetch_output_num_verts();
+#ifndef NDEBUG
+ BLI_assert(
+ mtl_vertex_count_fits_primitive_type(
+ output_num_verts, active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()) &&
+ "Output Vertex count is not compatible with the requested output vertex primitive "
+ "type");
+#endif
+ [rec drawPrimitives:active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()
+ vertexStart:0
+ vertexCount:output_num_verts];
+ context_->main_command_buffer.register_draw_counters(output_num_verts);
+ }
+ else {
+ /* Regular draw. */
+ [rec drawPrimitives:primitive_type vertexStart:0 vertexCount:vertex_count];
+ context_->main_command_buffer.register_draw_counters(vertex_count);
+ }
+ }
+ }
+ if (G.debug & G_DEBUG_GPU) {
+ [rec popDebugGroup];
+ }
+ }
+
+ /* Reset allocation after draw submission. */
+ has_begun_ = false;
+ if (current_allocation_.metal_buffer) {
+ [current_allocation_.metal_buffer release];
+ current_allocation_.metal_buffer = nil;
+ }
+}
+
+} // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh
index df80df6543f..bd354376b12 100644
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -340,13 +340,13 @@ class MTLBufferPool {
private:
/* Memory statistics. */
- long long int total_allocation_bytes_ = 0;
+ int64_t total_allocation_bytes_ = 0;
#if MTL_DEBUG_MEMORY_STATISTICS == 1
/* Debug statistics. */
std::atomic<int> per_frame_allocation_count_;
- std::atomic<long long int> allocations_in_pool_;
- std::atomic<long long int> buffers_in_pool_;
+ std::atomic<int64_t> allocations_in_pool_;
+ std::atomic<int64_t> buffers_in_pool_;
#endif
/* Metal resources. */
diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh
index 64d9d1cf849..5485b32dd31 100644
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -261,8 +261,6 @@ class MTLShader : public Shader {
bool get_push_constant_is_dirty();
void push_constant_bindstate_mark_dirty(bool is_dirty);
- void vertformat_from_shader(GPUVertFormat *format) const override;
-
/* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */
int program_handle_get() const override
{
diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm
index 23097f312f0..3b27b60bca0 100644
--- a/source/blender/gpu/metal/mtl_shader.mm
+++ b/source/blender/gpu/metal/mtl_shader.mm
@@ -129,6 +129,7 @@ MTLShader::~MTLShader()
if (shd_builder_ != nullptr) {
delete shd_builder_;
+ shd_builder_ = nullptr;
}
}
@@ -209,6 +210,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Release temporary compilation resources. */
delete shd_builder_;
+ shd_builder_ = nullptr;
return false;
}
}
@@ -279,6 +281,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Release temporary compilation resources. */
delete shd_builder_;
+ shd_builder_ = nullptr;
return false;
}
}
@@ -324,6 +327,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Release temporary compilation resources. */
delete shd_builder_;
+ shd_builder_ = nullptr;
return true;
}
@@ -535,28 +539,6 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
{
push_constant_modified_ = is_dirty;
}
-
-void MTLShader::vertformat_from_shader(GPUVertFormat *format) const
-{
- GPU_vertformat_clear(format);
-
- const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface);
- for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) {
- const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id);
-
- /* Extract type parameters from Metal type. */
- GPUVertCompType comp_type = comp_type_from_vert_format(attr.format);
- uint comp_len = comp_count_from_vert_format(attr.format);
- GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format);
-
- GPU_vertformat_attr_add(format,
- mtl_interface->get_name_at_offset(attr.name_offset),
- comp_type,
- comp_len,
- fetch_mode);
- }
-}
-
/** \} */
/* -------------------------------------------------------------------- */
@@ -1167,6 +1149,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_at
MTLShaderInterface *mtl_interface = this->get_interface();
BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
+ UNUSED_VARS_NDEBUG(mtl_interface);
/* Update bind-mask to verify this attribute has been used. */
BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm
index 977e97dbd82..4a2be0753bb 100644
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@@ -724,10 +724,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
}
if (msl_iface.uses_ssbo_vertex_fetch_mode) {
ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
- ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS
- << std::endl;
- ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX
- << std::endl;
for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) {
ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl;
}
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index 4af46c13751..b4e913e5be6 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -12,6 +12,7 @@
#include "GPU_batch_presets.h"
#include "GPU_capabilities.h"
#include "GPU_framebuffer.h"
+#include "GPU_immediate.h"
#include "GPU_platform.h"
#include "GPU_state.h"
@@ -303,7 +304,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
/* Execute graphics draw call to perform the blit. */
GPUBatch *quad = GPU_batch_preset_quad();
-
GPU_batch_set_shader(quad, shader);
float w = dst->width_get();
@@ -337,6 +337,20 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
GPU_batch_draw(quad);
+ /* TMP draw with IMM TODO(Metal): Remove this once GPUBatch is supported. */
+ GPUVertFormat *imm_format = immVertexFormat();
+ uint pos = GPU_vertformat_attr_add(imm_format, "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+
+ immBindShader(shader);
+ immBegin(GPU_PRIM_TRI_STRIP, 4);
+ immVertex2f(pos, 1, 0);
+ immVertex2f(pos, 0, 0);
+ immVertex2f(pos, 1, 1);
+ immVertex2f(pos, 0, 1);
+ immEnd();
+ immUnbindProgram();
+ /**********************/
+
/* restoring old pipeline state. */
GPU_depth_mask(depth_write_prev);
GPU_stencil_write_mask_set(stencil_mask_prev);
@@ -1463,79 +1477,6 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
BLI_assert_msg(this->format_ != GPU_DEPTH24_STENCIL8,
"Apple silicon does not support GPU_DEPTH24_S8");
- MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
- mtl_max_mips_ = 1;
- mipmaps_ = 0;
- this->mip_range_set(0, 0);
-
- /* Create texture from GPUVertBuf's buffer. */
- MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo));
- mtl_vbo->bind();
- mtl_vbo->flag_used();
-
- /* Get Metal Buffer. */
- id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
- BLI_assert(source_buffer);
-
- /* Verify size. */
- if (w_ <= 0) {
- MTL_LOG_WARNING("Allocating texture buffer of width 0!\n");
- w_ = 1;
- }
-
- /* Verify Texture and vertex buffer alignment. */
- int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
- int bytes_per_row = bytes_per_pixel * w_;
-
- MTLContext *mtl_ctx = MTLContext::get();
- uint align_requirement = static_cast<uint>(
- [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
-
- /* Verify per-vertex size aligns with texture size. */
- const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
- BLI_assert(bytes_per_pixel == format->stride &&
- "Pixel format stride MUST match the texture format stride -- These being different "
- "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
- UNUSED_VARS_NDEBUG(format);
-
- /* Create texture descriptor. */
- BLI_assert(type_ == GPU_TEXTURE_BUFFER);
- texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
- texture_descriptor_.pixelFormat = mtl_format;
- texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
- texture_descriptor_.width = w_;
- texture_descriptor_.height = 1;
- texture_descriptor_.depth = 1;
- texture_descriptor_.arrayLength = 1;
- texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
- texture_descriptor_.usage =
- MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
- MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
- texture_descriptor_.storageMode = [source_buffer storageMode];
- texture_descriptor_.sampleCount = 1;
- texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
- texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
-
- texture_ = [source_buffer
- newTextureWithDescriptor:texture_descriptor_
- offset:0
- bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)];
- aligned_w_ = bytes_per_row / bytes_per_pixel;
-
- BLI_assert(texture_);
- texture_.label = [NSString stringWithUTF8String:this->get_name()];
- is_baked_ = true;
- is_dirty_ = false;
- resource_mode_ = MTL_TEXTURE_MODE_VBO;
-
- /* Track Status. */
- vert_buffer_ = mtl_vbo;
- vert_buffer_mtl_ = source_buffer;
-
- /* Cleanup. */
- [texture_descriptor_ release];
- texture_descriptor_ = nullptr;
-
return true;
}
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index 928393fb39e..5ed7659f260 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -22,13 +22,7 @@
/* Utility file for secondary functionality which supports mtl_texture.mm. */
extern char datatoc_compute_texture_update_msl[];
-extern char datatoc_depth_2d_update_vert_glsl[];
-extern char datatoc_depth_2d_update_float_frag_glsl[];
-extern char datatoc_depth_2d_update_int24_frag_glsl[];
-extern char datatoc_depth_2d_update_int32_frag_glsl[];
extern char datatoc_compute_texture_read_msl[];
-extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[];
-extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[];
namespace blender::gpu {
@@ -447,42 +441,34 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
return *result;
}
- const char *fragment_source = nullptr;
+ const char *depth_2d_info_variant = nullptr;
switch (specialization.data_mode) {
case MTL_DEPTH_UPDATE_MODE_FLOAT:
- fragment_source = datatoc_depth_2d_update_float_frag_glsl;
+ depth_2d_info_variant = "depth_2d_update_float";
break;
case MTL_DEPTH_UPDATE_MODE_INT24:
- fragment_source = datatoc_depth_2d_update_int24_frag_glsl;
+ depth_2d_info_variant = "depth_2d_update_int24";
break;
case MTL_DEPTH_UPDATE_MODE_INT32:
- fragment_source = datatoc_depth_2d_update_int32_frag_glsl;
+ depth_2d_info_variant = "depth_2d_update_int32";
break;
default:
BLI_assert(false && "Invalid format mode\n");
return nullptr;
}
- GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl,
- fragment_source,
- nullptr,
- nullptr,
- nullptr,
- "depth_2d_update_sh_get");
+ GPUShader *shader = GPU_shader_create_from_info_name(depth_2d_info_variant);
mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader);
return shader;
}
GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get()
{
-
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) {
- const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl;
- const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl;
- GPUShader *shader = GPU_shader_create(
- vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit");
+ GPUShader *shader = GPU_shader_create_from_info_name("fullscreen_blit");
+
mtl_context->get_texture_utils().fullscreen_blit_shader = shader;
}
return mtl_context->get_texture_utils().fullscreen_blit_shader;
@@ -614,7 +600,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
stringWithUTF8String:datatoc_compute_texture_read_msl];
/* Defensive Debug Checks. */
- long long int depth_scale_factor = 1;
+ int64_t depth_scale_factor = 1;
if (specialization_params.depth_format_mode > 0) {
BLI_assert(specialization_params.component_count_input == 1);
BLI_assert(specialization_params.component_count_output == 1);
diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh
index 8646d94e2fd..14fca9f061d 100644
--- a/source/blender/gpu/opengl/gl_backend.hh
+++ b/source/blender/gpu/opengl/gl_backend.hh
@@ -61,7 +61,7 @@ class GLBackend : public GPUBackend {
GLTexture::samplers_update();
};
- Context *context_alloc(void *ghost_window) override
+ Context *context_alloc(void *ghost_window, void *ghost_context) override
{
return new GLContext(ghost_window, shared_orphan_list_);
};
diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc
index 224a9afcf59..67e296b11d5 100644
--- a/source/blender/gpu/tests/gpu_testing.cc
+++ b/source/blender/gpu/tests/gpu_testing.cc
@@ -19,7 +19,7 @@ void GPUTest::SetUp()
ghost_system = GHOST_CreateSystem();
ghost_context = GHOST_CreateOpenGLContext(ghost_system, glSettings);
GHOST_ActivateOpenGLContext(ghost_context);
- context = GPU_context_create(nullptr);
+ context = GPU_context_create(nullptr, ghost_context);
GPU_init();
}
diff --git a/source/blender/render/intern/pipeline.cc b/source/blender/render/intern/pipeline.cc
index 4b52fb62bee..3cac9063b8e 100644
--- a/source/blender/render/intern/pipeline.cc
+++ b/source/blender/render/intern/pipeline.cc
@@ -926,7 +926,7 @@ void *RE_gl_context_get(Render *re)
void *RE_gpu_context_get(Render *re)
{
if (re->gpu_context == nullptr) {
- re->gpu_context = GPU_context_create(nullptr);
+ re->gpu_context = GPU_context_create(NULL, re->gl_context);
}
return re->gpu_context;
}
diff --git a/source/blender/windowmanager/intern/wm_playanim.c b/source/blender/windowmanager/intern/wm_playanim.c
index 7c6650922a4..e768d18960b 100644
--- a/source/blender/windowmanager/intern/wm_playanim.c
+++ b/source/blender/windowmanager/intern/wm_playanim.c
@@ -1549,7 +1549,7 @@ static char *wm_main_playanim_intern(int argc, const char **argv)
// GHOST_ActivateWindowDrawingContext(g_WS.ghost_window);
/* initialize OpenGL immediate mode */
- g_WS.gpu_context = GPU_context_create(g_WS.ghost_window);
+ g_WS.gpu_context = GPU_context_create(g_WS.ghost_window, NULL);
GPU_init();
/* initialize the font */
diff --git a/source/blender/windowmanager/intern/wm_window.c b/source/blender/windowmanager/intern/wm_window.c
index b61ebdd11be..dea875becb1 100644
--- a/source/blender/windowmanager/intern/wm_window.c
+++ b/source/blender/windowmanager/intern/wm_window.c
@@ -579,7 +579,7 @@ static void wm_window_ghostwindow_add(wmWindowManager *wm,
glSettings);
if (ghostwin) {
- win->gpuctx = GPU_context_create(ghostwin);
+ win->gpuctx = GPU_context_create(ghostwin, NULL);
/* needed so we can detect the graphics card below */
GPU_init();