diff options
45 files changed, 3215 insertions, 561 deletions
diff --git a/intern/ghost/intern/GHOST_Context.h b/intern/ghost/intern/GHOST_Context.h index 3546fb6bbc7..04d445e7f85 100644 --- a/intern/ghost/intern/GHOST_Context.h +++ b/intern/ghost/intern/GHOST_Context.h @@ -36,19 +36,19 @@ class GHOST_Context : public GHOST_IContext { * Swaps front and back buffers of a window. * \return A boolean success indicator. */ - virtual GHOST_TSuccess swapBuffers() = 0; + virtual GHOST_TSuccess swapBuffers() override = 0; /** * Activates the drawing context of this window. * \return A boolean success indicator. */ - virtual GHOST_TSuccess activateDrawingContext() = 0; + virtual GHOST_TSuccess activateDrawingContext() override = 0; /** * Release the drawing context of the calling thread. * \return A boolean success indicator. */ - virtual GHOST_TSuccess releaseDrawingContext() = 0; + virtual GHOST_TSuccess releaseDrawingContext() override = 0; /** * Call immediately after new to initialize. If this fails then immediately delete the object. @@ -130,7 +130,7 @@ class GHOST_Context : public GHOST_IContext { * Gets the OpenGL frame-buffer associated with the OpenGL context * \return The ID of an OpenGL frame-buffer object. */ - virtual unsigned int getDefaultFramebuffer() + virtual unsigned int getDefaultFramebuffer() override { return 0; } diff --git a/intern/ghost/intern/GHOST_ContextCGL.h b/intern/ghost/intern/GHOST_ContextCGL.h index fa6d6fc6fa0..5caabb8ce00 100644 --- a/intern/ghost/intern/GHOST_ContextCGL.h +++ b/intern/ghost/intern/GHOST_ContextCGL.h @@ -9,8 +9,13 @@ #include "GHOST_Context.h" +#include <Cocoa/Cocoa.h> +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> + @class CAMetalLayer; @class MTLCommandQueue; +@class MTLDevice; @class MTLRenderPipelineState; @class MTLTexture; @class NSOpenGLContext; @@ -36,62 +41,89 @@ class GHOST_ContextCGL : public GHOST_Context { * Swaps front and back buffers of a window. * \return A boolean success indicator. */ - GHOST_TSuccess swapBuffers(); + GHOST_TSuccess swapBuffers() override; /** * Activates the drawing context of this window. * \return A boolean success indicator. */ - GHOST_TSuccess activateDrawingContext(); + GHOST_TSuccess activateDrawingContext() override; /** * Release the drawing context of the calling thread. * \return A boolean success indicator. */ - GHOST_TSuccess releaseDrawingContext(); + GHOST_TSuccess releaseDrawingContext() override; - unsigned int getDefaultFramebuffer(); + unsigned int getDefaultFramebuffer() override; /** * Call immediately after new to initialize. If this fails then immediately delete the object. * \return Indication as to whether initialization has succeeded. */ - GHOST_TSuccess initializeDrawingContext(); + GHOST_TSuccess initializeDrawingContext() override; /** * Removes references to native handles from this context and then returns * \return GHOST_kSuccess if it is OK for the parent to release the handles and * GHOST_kFailure if releasing the handles will interfere with sharing */ - GHOST_TSuccess releaseNativeHandles(); + GHOST_TSuccess releaseNativeHandles() override; /** * Sets the swap interval for #swapBuffers. * \param interval: The swap interval to use. * \return A boolean success indicator. */ - GHOST_TSuccess setSwapInterval(int interval); + GHOST_TSuccess setSwapInterval(int interval) override; /** * Gets the current swap interval for #swapBuffers. * \param intervalOut: Variable to store the swap interval if it can be read. * \return Whether the swap interval can be read. */ - GHOST_TSuccess getSwapInterval(int &); + GHOST_TSuccess getSwapInterval(int &) override; /** * Updates the drawing context of this window. * Needed whenever the window is changed. * \return Indication of success. */ - GHOST_TSuccess updateDrawingContext(); + GHOST_TSuccess updateDrawingContext() override; + + /** + * Returns a texture that Metal code can use as a render target. The current + * contents of this texture will be composited on top of the framebuffer + * each time `swapBuffers` is called. + */ + id<MTLTexture> metalOverlayTexture(); + + /** + * Return a pointer to the Metal command queue used by this context. + */ + MTLCommandQueue *metalCommandQueue(); + + /** + * Return a pointer to the Metal device associated with this context. + */ + MTLDevice *metalDevice(); + + /** + * Register present callback + */ + void metalRegisterPresentCallback(void (*callback)( + MTLRenderPassDescriptor *, id<MTLRenderPipelineState>, id<MTLTexture>, id<CAMetalDrawable>)); private: /** Metal state */ + /* Set this flag to `true` when rendering with Metal API for Viewport. + * TODO(Metal): This should be assigned to externally. */ + bool m_useMetalForRendering = false; NSView *m_metalView; CAMetalLayer *m_metalLayer; MTLCommandQueue *m_metalCmdQueue; MTLRenderPipelineState *m_metalRenderPipeline; + bool m_ownsMetalDevice; /** OpenGL state, for GPUs that don't support Metal */ NSOpenGLView *m_openGLView; @@ -102,9 +134,31 @@ class GHOST_ContextCGL : public GHOST_Context { /** The virtualized default frame-buffer. */ unsigned int m_defaultFramebuffer; - /** The virtualized default frame-buffer's texture. */ - MTLTexture *m_defaultFramebufferMetalTexture; - + /** The virtualized default framebuffer's texture */ + /** + * Texture that you can render into with Metal. The texture will be + * composited on top of `m_defaultFramebufferMetalTexture` whenever + * `swapBuffers` is called. + */ + static const int METAL_SWAPCHAIN_SIZE = 3; + struct MTLSwapchainTexture { + id<MTLTexture> texture; + unsigned int index; + }; + MTLSwapchainTexture m_defaultFramebufferMetalTexture[METAL_SWAPCHAIN_SIZE]; + unsigned int current_swapchain_index = 0; + + /* Present callback. + * We use this such that presentation can be controlled from within the Metal + * Context. This is required for optimal performance and clean control flow. + * Also helps ensure flickering does not occur by present being dependent + * on existing submissions. */ + void (*contextPresentCallback)(MTLRenderPassDescriptor *, + id<MTLRenderPipelineState>, + id<MTLTexture>, + id<CAMetalDrawable>); + + int mtl_SwapInterval; const bool m_debug; /** The first created OpenGL context (for sharing display lists) */ @@ -117,4 +171,5 @@ class GHOST_ContextCGL : public GHOST_Context { void metalInitFramebuffer(); void metalUpdateFramebuffer(); void metalSwapBuffers(); + void initClear(); }; diff --git a/intern/ghost/intern/GHOST_ContextCGL.mm b/intern/ghost/intern/GHOST_ContextCGL.mm index 488aa58aa59..6a0fed79fb0 100644 --- a/intern/ghost/intern/GHOST_ContextCGL.mm +++ b/intern/ghost/intern/GHOST_ContextCGL.mm @@ -55,139 +55,277 @@ GHOST_ContextCGL::GHOST_ContextCGL(bool stereoVisual, m_openGLView(openGLView), m_openGLContext(nil), m_defaultFramebuffer(0), - m_defaultFramebufferMetalTexture(nil), m_debug(false) { + /* Init Metal Swapchain. */ + current_swapchain_index = 0; + for (int i = 0; i < METAL_SWAPCHAIN_SIZE; i++) { + m_defaultFramebufferMetalTexture[i].texture = nil; + m_defaultFramebufferMetalTexture[i].index = i; + } if (m_metalView) { + m_ownsMetalDevice = false; metalInit(); } + else { + /* Prepare offscreen GHOST Context Metal device. */ + id<MTLDevice> metalDevice = MTLCreateSystemDefaultDevice(); + + if (m_debug) { + printf("Selected Metal Device: %s\n", [metalDevice.name UTF8String]); + } + + m_ownsMetalDevice = true; + if (metalDevice) { + m_metalLayer = [[CAMetalLayer alloc] init]; + [m_metalLayer setEdgeAntialiasingMask:0]; + [m_metalLayer setMasksToBounds:NO]; + [m_metalLayer setOpaque:YES]; + [m_metalLayer setFramebufferOnly:YES]; + [m_metalLayer setPresentsWithTransaction:NO]; + [m_metalLayer removeAllAnimations]; + [m_metalLayer setDevice:metalDevice]; + m_metalLayer.allowsNextDrawableTimeout = NO; + metalInit(); + } + else { + ghost_fatal_error_dialog( + "[ERROR] Failed to create Metal device for offscreen GHOST Context.\n"); + } + } + + /* Initialise swapinterval. */ + mtl_SwapInterval = 60; } GHOST_ContextCGL::~GHOST_ContextCGL() { metalFree(); - if (m_openGLContext != nil) { - if (m_openGLContext == [NSOpenGLContext currentContext]) { - [NSOpenGLContext clearCurrentContext]; + if (!m_useMetalForRendering) { +#if WITH_OPENGL + if (m_openGLContext != nil) { + if (m_openGLContext == [NSOpenGLContext currentContext]) { + [NSOpenGLContext clearCurrentContext]; - if (m_openGLView) { - [m_openGLView clearGLContext]; + if (m_openGLView) { + [m_openGLView clearGLContext]; + } } - } - if (m_openGLContext != s_sharedOpenGLContext || s_sharedCount == 1) { - assert(s_sharedCount > 0); + if (m_openGLContext != s_sharedOpenGLContext || s_sharedCount == 1) { + assert(s_sharedCount > 0); - s_sharedCount--; + s_sharedCount--; - if (s_sharedCount == 0) - s_sharedOpenGLContext = nil; + if (s_sharedCount == 0) + s_sharedOpenGLContext = nil; - [m_openGLContext release]; + [m_openGLContext release]; + } + } +#endif + } + + if (m_ownsMetalDevice) { + if (m_metalLayer) { + [m_metalLayer release]; + m_metalLayer = nil; } } } GHOST_TSuccess GHOST_ContextCGL::swapBuffers() { - if (m_openGLContext != nil) { - if (m_metalView) { - metalSwapBuffers(); + GHOST_TSuccess return_value = GHOST_kFailure; + + if (!m_useMetalForRendering) { +#if WITH_OPENGL + if (m_openGLContext != nil) { + if (m_metalView) { + metalSwapBuffers(); + } + else if (m_openGLView) { + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + [m_openGLContext flushBuffer]; + [pool drain]; + } + return_value = GHOST_kSuccess; } - else if (m_openGLView) { - NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; - [m_openGLContext flushBuffer]; - [pool drain]; + else { + return_value = GHOST_kFailure; } - return GHOST_kSuccess; +#endif } else { - return GHOST_kFailure; + if (m_metalView) { + metalSwapBuffers(); + } + return_value = GHOST_kSuccess; } + return return_value; } GHOST_TSuccess GHOST_ContextCGL::setSwapInterval(int interval) { - if (m_openGLContext != nil) { - NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; - [m_openGLContext setValues:&interval forParameter:NSOpenGLCPSwapInterval]; - [pool drain]; - return GHOST_kSuccess; + + if (!m_useMetalForRendering) { +#if WITH_OPENGL + if (m_openGLContext != nil) { + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + [m_openGLContext setValues:&interval forParameter:NSOpenGLCPSwapInterval]; + [pool drain]; + return GHOST_kSuccess; + } + else { + return GHOST_kFailure; + } +#endif } else { - return GHOST_kFailure; + mtl_SwapInterval = interval; + return GHOST_kSuccess; } } GHOST_TSuccess GHOST_ContextCGL::getSwapInterval(int &intervalOut) { - if (m_openGLContext != nil) { - GLint interval; - NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + if (!m_useMetalForRendering) { +#if WITH_OPENGL + if (m_openGLContext != nil) { + GLint interval; - [m_openGLContext getValues:&interval forParameter:NSOpenGLCPSwapInterval]; + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; - [pool drain]; + [m_openGLContext getValues:&interval forParameter:NSOpenGLCPSwapInterval]; - intervalOut = static_cast<int>(interval); + [pool drain]; - return GHOST_kSuccess; + intervalOut = static_cast<int>(interval); + + return GHOST_kSuccess; + } + else { + return GHOST_kFailure; + } +#endif } else { - return GHOST_kFailure; + intervalOut = mtl_SwapInterval; + return GHOST_kSuccess; } } GHOST_TSuccess GHOST_ContextCGL::activateDrawingContext() { - if (m_openGLContext != nil) { - NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; - [m_openGLContext makeCurrentContext]; - [pool drain]; - return GHOST_kSuccess; + + if (!m_useMetalForRendering) { +#if WITH_OPENGL + if (m_openGLContext != nil) { + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + [m_openGLContext makeCurrentContext]; + [pool drain]; + return GHOST_kSuccess; + } + else { + return GHOST_kFailure; + } +#endif } else { - return GHOST_kFailure; + return GHOST_kSuccess; } } GHOST_TSuccess GHOST_ContextCGL::releaseDrawingContext() { - if (m_openGLContext != nil) { - NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; - [NSOpenGLContext clearCurrentContext]; - [pool drain]; - return GHOST_kSuccess; + + if (!m_useMetalForRendering) { +#if WITH_OPENGL + if (m_openGLContext != nil) { + NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; + [NSOpenGLContext clearCurrentContext]; + [pool drain]; + return GHOST_kSuccess; + } + else { + return GHOST_kFailure; + } +#endif } else { - return GHOST_kFailure; + return GHOST_kSuccess; } } unsigned int GHOST_ContextCGL::getDefaultFramebuffer() { - return m_defaultFramebuffer; + + if (!m_useMetalForRendering) { + return m_defaultFramebuffer; + } + /* NOTE(Metal): This is not valid. */ + return 0; } GHOST_TSuccess GHOST_ContextCGL::updateDrawingContext() { - if (m_openGLContext != nil) { - if (m_metalView) { - metalUpdateFramebuffer(); + + if (!m_useMetalForRendering) { +#if WITH_OPENGL + if (m_openGLContext != nil) { + if (m_metalView) { + metalUpdateFramebuffer(); + } + else if (m_openGLView) { + @autoreleasepool { + [m_openGLContext update]; + } + } + + return GHOST_kSuccess; } - else if (m_openGLView) { - NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; - [m_openGLContext update]; - [pool drain]; + else { + return GHOST_kFailure; } - - return GHOST_kSuccess; +#endif } else { - return GHOST_kFailure; + if (m_metalView) { + metalUpdateFramebuffer(); + return GHOST_kSuccess; + } } + return GHOST_kFailure; +} + +id<MTLTexture> GHOST_ContextCGL::metalOverlayTexture() +{ + /* Increment Swapchain - Only needed if context is requesting a new texture */ + current_swapchain_index = (current_swapchain_index + 1) % METAL_SWAPCHAIN_SIZE; + + /* Ensure backing texture is ready for current swapchain index */ + updateDrawingContext(); + + /* Return texture. */ + return m_defaultFramebufferMetalTexture[current_swapchain_index].texture; +} + +MTLCommandQueue *GHOST_ContextCGL::metalCommandQueue() +{ + return m_metalCmdQueue; +} +MTLDevice *GHOST_ContextCGL::metalDevice() +{ + id<MTLDevice> device = m_metalLayer.device; + return (MTLDevice *)device; +} + +void GHOST_ContextCGL::metalRegisterPresentCallback(void (*callback)( + MTLRenderPassDescriptor *, id<MTLRenderPipelineState>, id<MTLTexture>, id<CAMetalDrawable>)) +{ + this->contextPresentCallback = callback; } static void makeAttribList(std::vector<NSOpenGLPixelFormatAttribute> &attribs, @@ -241,120 +379,134 @@ GHOST_TSuccess GHOST_ContextCGL::initializeDrawingContext() #endif /* Command-line argument would be better. */ - static bool softwareGL = getenv("BLENDER_SOFTWAREGL"); - - NSOpenGLPixelFormat *pixelFormat = nil; - std::vector<NSOpenGLPixelFormatAttribute> attribs; - bool increasedSamplerLimit = false; - - /* Attempt to initialize device with increased sampler limit. - * If this is unsupported and initialization fails, initialize GL Context as normal. - * - * NOTE: This is not available when using the SoftwareGL path, or for Intel-based - * platforms. */ - if (!softwareGL) { - if (@available(macos 11.0, *)) { - increasedSamplerLimit = true; + if (!m_useMetalForRendering) { +#if WITH_OPENGL + /* Command-line argument would be better. */ + static bool softwareGL = getenv("BLENDER_SOFTWAREGL"); + + NSOpenGLPixelFormat *pixelFormat = nil; + std::vector<NSOpenGLPixelFormatAttribute> attribs; + bool increasedSamplerLimit = false; + + /* Attempt to initialize device with increased sampler limit. + * If this is unsupported and initialization fails, initialize GL Context as normal. + * + * NOTE: This is not available when using the SoftwareGL path, or for Intel-based + * platforms. */ + if (!softwareGL) { + if (@available(macos 11.0, *)) { + increasedSamplerLimit = true; + } } - } - const int max_ctx_attempts = increasedSamplerLimit ? 2 : 1; - for (int ctx_create_attempt = 0; ctx_create_attempt < max_ctx_attempts; ctx_create_attempt++) { - - attribs.clear(); - attribs.reserve(40); - makeAttribList(attribs, m_stereoVisual, needAlpha, softwareGL, increasedSamplerLimit); + const int max_ctx_attempts = increasedSamplerLimit ? 2 : 1; + for (int ctx_create_attempt = 0; ctx_create_attempt < max_ctx_attempts; + ctx_create_attempt++) { + + attribs.clear(); + attribs.reserve(40); + makeAttribList(attribs, m_stereoVisual, needAlpha, softwareGL, increasedSamplerLimit); + + pixelFormat = [[NSOpenGLPixelFormat alloc] initWithAttributes:&attribs[0]]; + if (pixelFormat == nil) { + /* If pixel format creation fails when testing increased sampler limit, + * attempt intialisation again with feature disabled, otherwise, fail. */ + if (increasedSamplerLimit) { + increasedSamplerLimit = false; + continue; + } + return GHOST_kFailure; + } - pixelFormat = [[NSOpenGLPixelFormat alloc] initWithAttributes:&attribs[0]]; - if (pixelFormat == nil) { - /* If pixel format creation fails when testing increased sampler limit, - * attempt initialization again with feature disabled, otherwise, fail. */ - if (increasedSamplerLimit) { - increasedSamplerLimit = false; - continue; + /* Attempt to create context. */ + m_openGLContext = [[NSOpenGLContext alloc] initWithFormat:pixelFormat + shareContext:s_sharedOpenGLContext]; + [pixelFormat release]; + + if (m_openGLContext == nil) { + /* If context creation fails when testing increased sampler limit, + * attempt re-creation with feature disabled. Otherwise, error. */ + if (increasedSamplerLimit) { + increasedSamplerLimit = false; + continue; + } + + /* Default context creation attempt failed. */ + return GHOST_kFailure; } - return GHOST_kFailure; - } - /* Attempt to create context. */ - m_openGLContext = [[NSOpenGLContext alloc] initWithFormat:pixelFormat - shareContext:s_sharedOpenGLContext]; - [pixelFormat release]; + /* Created GL context successfully, activate. */ + [m_openGLContext makeCurrentContext]; - if (m_openGLContext == nil) { - /* If context creation fails when testing increased sampler limit, - * attempt re-creation with feature disabled. Otherwise, error. */ + /* When increasing sampler limit, verify created context is a supported configuration. */ if (increasedSamplerLimit) { - increasedSamplerLimit = false; - continue; + const char *vendor = (const char *)glGetString(GL_VENDOR); + const char *renderer = (const char *)glGetString(GL_RENDERER); + + /* If generated context type is unsupported, release existing context and + * fallback to creating a normal context below. */ + if (strstr(vendor, "Intel") || strstr(renderer, "Software")) { + [m_openGLContext release]; + m_openGLContext = nil; + increasedSamplerLimit = false; + continue; + } } - - /* Default context creation attempt failed. */ - return GHOST_kFailure; } - /* Created GL context successfully, activate. */ - [m_openGLContext makeCurrentContext]; + if (m_debug) { + GLint major = 0, minor = 0; + glGetIntegerv(GL_MAJOR_VERSION, &major); + glGetIntegerv(GL_MINOR_VERSION, &minor); + fprintf(stderr, "OpenGL version %d.%d%s\n", major, minor, softwareGL ? " (software)" : ""); + fprintf(stderr, "Renderer: %s\n", glGetString(GL_RENDERER)); + } - /* When increasing sampler limit, verify created context is a supported configuration. */ - if (increasedSamplerLimit) { - const char *vendor = (const char *)glGetString(GL_VENDOR); - const char *renderer = (const char *)glGetString(GL_RENDERER); - - /* If generated context type is unsupported, release existing context and - * fallback to creating a normal context below. */ - if (strstr(vendor, "Intel") || strstr(renderer, "Software")) { - [m_openGLContext release]; - m_openGLContext = nil; - increasedSamplerLimit = false; - continue; +# ifdef GHOST_WAIT_FOR_VSYNC + { + GLint swapInt = 1; + /* Wait for vertical-sync, to avoid tearing artifacts. */ + [m_openGLContext setValues:&swapInt forParameter:NSOpenGLCPSwapInterval]; + } +# endif + + if (m_metalView) { + if (m_defaultFramebuffer == 0) { + /* Create a virtual frame-buffer. */ + [m_openGLContext makeCurrentContext]; + metalInitFramebuffer(); + initClearGL(); } } - } + else if (m_openGLView) { + [m_openGLView setOpenGLContext:m_openGLContext]; + [m_openGLContext setView:m_openGLView]; + initClearGL(); + } - if (m_debug) { - GLint major = 0, minor = 0; - glGetIntegerv(GL_MAJOR_VERSION, &major); - glGetIntegerv(GL_MINOR_VERSION, &minor); - fprintf(stderr, "OpenGL version %d.%d%s\n", major, minor, softwareGL ? " (software)" : ""); - fprintf(stderr, "Renderer: %s\n", glGetString(GL_RENDERER)); - } + [m_openGLContext flushBuffer]; -#ifdef GHOST_WAIT_FOR_VSYNC - { - GLint swapInt = 1; - /* Wait for vertical-sync, to avoid tearing artifacts. */ - [m_openGLContext setValues:&swapInt forParameter:NSOpenGLCPSwapInterval]; - } -#endif + if (s_sharedCount == 0) + s_sharedOpenGLContext = m_openGLContext; - if (m_metalView) { - if (m_defaultFramebuffer == 0) { - /* Create a virtual frame-buffer. */ - [m_openGLContext makeCurrentContext]; + s_sharedCount++; +#endif + } + else { + /* NOTE(Metal): Metal-only path. */ + if (m_metalView) { metalInitFramebuffer(); - initClearGL(); } } - else if (m_openGLView) { - [m_openGLView setOpenGLContext:m_openGLContext]; - [m_openGLContext setView:m_openGLView]; - initClearGL(); - } - - [m_openGLContext flushBuffer]; - - if (s_sharedCount == 0) - s_sharedOpenGLContext = m_openGLContext; - - s_sharedCount++; } return GHOST_kSuccess; } GHOST_TSuccess GHOST_ContextCGL::releaseNativeHandles() { +#if WITH_OPENGL m_openGLContext = nil; m_openGLView = nil; +#endif m_metalView = nil; return GHOST_kSuccess; @@ -404,10 +556,14 @@ void GHOST_ContextCGL::metalInit() fragment float4 fragment_shader(Vertex v [[stage_in]], texture2d<float> t [[texture(0)]]) { - return t.sample(s, v.texCoord); - } - )msl"; + /* Final blit should ensure alpha is 1.0. This resolves + * rendering artifacts for blitting of final backbuffer. */ + float4 out_tex = t.sample(s, v.texCoord); + out_tex.a = 1.0; + return out_tex; + } + )msl"; MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease]; options.languageVersion = MTLLanguageVersion1_1; @@ -424,6 +580,8 @@ void GHOST_ContextCGL::metalInit() desc.fragmentFunction = [library newFunctionWithName:@"fragment_shader"]; desc.vertexFunction = [library newFunctionWithName:@"vertex_shader"]; + /* Ensure library is released. */ + [library autorelease]; [desc.colorAttachments objectAtIndexedSubscript:0].pixelFormat = METAL_FRAMEBUFFERPIXEL_FORMAT; @@ -434,6 +592,20 @@ void GHOST_ContextCGL::metalInit() ghost_fatal_error_dialog( "GHOST_ContextCGL::metalInit: newRenderPipelineStateWithDescriptor:error: failed!"); } + + /* Create a render pipeline to composite things rendered with Metal on top + * of the framebuffer contents. Uses the same vertex and fragment shader + * as the blit above, but with alpha blending enabled. */ + desc.label = @"Metal Overlay"; + desc.colorAttachments[0].blendingEnabled = YES; + desc.colorAttachments[0].sourceRGBBlendFactor = MTLBlendFactorSourceAlpha; + desc.colorAttachments[0].destinationRGBBlendFactor = MTLBlendFactorOneMinusSourceAlpha; + + if (error) { + ghost_fatal_error_dialog( + "GHOST_ContextCGL::metalInit: newRenderPipelineStateWithDescriptor:error: failed (when " + "creating the Metal overlay pipeline)!"); + } } } @@ -445,123 +617,206 @@ void GHOST_ContextCGL::metalFree() if (m_metalRenderPipeline) { [m_metalRenderPipeline release]; } - if (m_defaultFramebufferMetalTexture) { - [m_defaultFramebufferMetalTexture release]; + + for (int i = 0; i < METAL_SWAPCHAIN_SIZE; i++) { + if (m_defaultFramebufferMetalTexture[i].texture) { + [m_defaultFramebufferMetalTexture[i].texture release]; + } } } void GHOST_ContextCGL::metalInitFramebuffer() { - glGenFramebuffers(1, &m_defaultFramebuffer); + if (!m_useMetalForRendering) { +#if WITH_OPENGL + glGenFramebuffers(1, &m_defaultFramebuffer); +#endif + } updateDrawingContext(); - glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer); + + if (!m_useMetalForRendering) { +#if WITH_OPENGL + glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer); +#endif + } } void GHOST_ContextCGL::metalUpdateFramebuffer() { - assert(m_defaultFramebuffer != 0); + if (!m_useMetalForRendering) { +#if WITH_OPENGL + assert(m_defaultFramebuffer != 0); +#endif + } NSRect bounds = [m_metalView bounds]; NSSize backingSize = [m_metalView convertSizeToBacking:bounds.size]; size_t width = (size_t)backingSize.width; size_t height = (size_t)backingSize.height; - { - /* Test if there is anything to update */ - id<MTLTexture> tex = (id<MTLTexture>)m_defaultFramebufferMetalTexture; - if (tex && tex.width == width && tex.height == height) { - return; +#if WITH_OPENGL + unsigned int glTex; + CVPixelBufferRef cvPixelBuffer = nil; + CVOpenGLTextureCacheRef cvGLTexCache = nil; + CVOpenGLTextureRef cvGLTex = nil; + CVMetalTextureCacheRef cvMetalTexCache = nil; + CVMetalTextureRef cvMetalTex = nil; +#endif + + if (!m_useMetalForRendering) { +#if WITH_OPENGL + /* OPENGL path */ + { + /* Test if there is anything to update */ + id<MTLTexture> tex = m_defaultFramebufferMetalTexture[current_swapchain_index].texture; + if (tex && tex.width == width && tex.height == height) { + return; + } } - } - activateDrawingContext(); + activateDrawingContext(); + + NSDictionary *cvPixelBufferProps = @{ + (__bridge NSString *)kCVPixelBufferOpenGLCompatibilityKey : @YES, + (__bridge NSString *)kCVPixelBufferMetalCompatibilityKey : @YES, + }; + CVReturn cvret = CVPixelBufferCreate(kCFAllocatorDefault, + width, + height, + METAL_CORE_VIDEO_PIXEL_FORMAT, + (__bridge CFDictionaryRef)cvPixelBufferProps, + &cvPixelBuffer); + if (cvret != kCVReturnSuccess) { + ghost_fatal_error_dialog( + "GHOST_ContextCGL::metalUpdateFramebuffer: CVPixelBufferCreate failed!"); + } - NSDictionary *cvPixelBufferProps = @{ - (__bridge NSString *)kCVPixelBufferOpenGLCompatibilityKey : @YES, - (__bridge NSString *)kCVPixelBufferMetalCompatibilityKey : @YES, - }; - CVPixelBufferRef cvPixelBuffer = nil; - CVReturn cvret = CVPixelBufferCreate(kCFAllocatorDefault, - width, - height, - METAL_CORE_VIDEO_PIXEL_FORMAT, - (__bridge CFDictionaryRef)cvPixelBufferProps, - &cvPixelBuffer); - if (cvret != kCVReturnSuccess) { - ghost_fatal_error_dialog( - "GHOST_ContextCGL::metalUpdateFramebuffer: CVPixelBufferCreate failed!"); - } - - /* Create an OpenGL texture. */ - CVOpenGLTextureCacheRef cvGLTexCache = nil; - cvret = CVOpenGLTextureCacheCreate(kCFAllocatorDefault, - nil, - m_openGLContext.CGLContextObj, - m_openGLContext.pixelFormat.CGLPixelFormatObj, - nil, - &cvGLTexCache); - if (cvret != kCVReturnSuccess) { - ghost_fatal_error_dialog( - "GHOST_ContextCGL::metalUpdateFramebuffer: CVOpenGLTextureCacheCreate failed!"); - } + /* Create an OpenGL texture. */ + cvret = CVOpenGLTextureCacheCreate(kCFAllocatorDefault, + nil, + m_openGLContext.CGLContextObj, + m_openGLContext.pixelFormat.CGLPixelFormatObj, + nil, + &cvGLTexCache); + if (cvret != kCVReturnSuccess) { + ghost_fatal_error_dialog( + "GHOST_ContextCGL::metalUpdateFramebuffer: CVOpenGLTextureCacheCreate failed!"); + } - CVOpenGLTextureRef cvGLTex = nil; - cvret = CVOpenGLTextureCacheCreateTextureFromImage( - kCFAllocatorDefault, cvGLTexCache, cvPixelBuffer, nil, &cvGLTex); - if (cvret != kCVReturnSuccess) { - ghost_fatal_error_dialog( - "GHOST_ContextCGL::metalUpdateFramebuffer: " - "CVOpenGLTextureCacheCreateTextureFromImage failed!"); - } + cvret = CVOpenGLTextureCacheCreateTextureFromImage( + kCFAllocatorDefault, cvGLTexCache, cvPixelBuffer, nil, &cvGLTex); + if (cvret != kCVReturnSuccess) { + ghost_fatal_error_dialog( + "GHOST_ContextCGL::metalUpdateFramebuffer: " + "CVOpenGLTextureCacheCreateTextureFromImage failed!"); + } - unsigned int glTex; - glTex = CVOpenGLTextureGetName(cvGLTex); + glTex = CVOpenGLTextureGetName(cvGLTex); - /* Create a Metal texture. */ - CVMetalTextureCacheRef cvMetalTexCache = nil; - cvret = CVMetalTextureCacheCreate( - kCFAllocatorDefault, nil, m_metalLayer.device, nil, &cvMetalTexCache); - if (cvret != kCVReturnSuccess) { - ghost_fatal_error_dialog( - "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureCacheCreate failed!"); - } + /* Create a Metal texture. */ + cvret = CVMetalTextureCacheCreate( + kCFAllocatorDefault, nil, m_metalLayer.device, nil, &cvMetalTexCache); + if (cvret != kCVReturnSuccess) { + ghost_fatal_error_dialog( + "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureCacheCreate failed!"); + } - CVMetalTextureRef cvMetalTex = nil; - cvret = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, - cvMetalTexCache, - cvPixelBuffer, - nil, - METAL_FRAMEBUFFERPIXEL_FORMAT, - width, - height, - 0, - &cvMetalTex); - if (cvret != kCVReturnSuccess) { - ghost_fatal_error_dialog( - "GHOST_ContextCGL::metalUpdateFramebuffer: " - "CVMetalTextureCacheCreateTextureFromImage failed!"); - } + cvret = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, + cvMetalTexCache, + cvPixelBuffer, + nil, + METAL_FRAMEBUFFERPIXEL_FORMAT, + width, + height, + 0, + &cvMetalTex); + if (cvret != kCVReturnSuccess) { + ghost_fatal_error_dialog( + "GHOST_ContextCGL::metalUpdateFramebuffer: " + "CVMetalTextureCacheCreateTextureFromImage failed!"); + } - MTLTexture *tex = (MTLTexture *)CVMetalTextureGetTexture(cvMetalTex); + id<MTLTexture> tex = CVMetalTextureGetTexture(cvMetalTex); - if (!tex) { - ghost_fatal_error_dialog( - "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureGetTexture failed!"); + if (!tex) { + ghost_fatal_error_dialog( + "GHOST_ContextCGL::metalUpdateFramebuffer: CVMetalTextureGetTexture failed!"); + } + + [m_defaultFramebufferMetalTexture[current_swapchain_index].texture release]; + m_defaultFramebufferMetalTexture[current_swapchain_index].texture = [tex retain]; +#endif } + else { + /* NOTE(Metal): Metal API Path. */ + if (m_defaultFramebufferMetalTexture[current_swapchain_index].texture && + m_defaultFramebufferMetalTexture[current_swapchain_index].texture.width == width && + m_defaultFramebufferMetalTexture[current_swapchain_index].texture.height == height) { + return; + } - [m_defaultFramebufferMetalTexture release]; - m_defaultFramebufferMetalTexture = [tex retain]; + /* Free old texture */ + [m_defaultFramebufferMetalTexture[current_swapchain_index].texture release]; - glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE, glTex, 0); + id<MTLDevice> device = m_metalLayer.device; + MTLTextureDescriptor *overlayDesc = [MTLTextureDescriptor + texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float + width:width + height:height + mipmapped:NO]; + overlayDesc.storageMode = MTLStorageModePrivate; + overlayDesc.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead; + + id<MTLTexture> overlayTex = [device newTextureWithDescriptor:overlayDesc]; + if (!overlayTex) { + ghost_fatal_error_dialog( + "GHOST_ContextCGL::metalUpdateFramebuffer: failed to create Metal overlay texture!"); + } + else { + overlayTex.label = [NSString + stringWithFormat:@"Metal Overlay for GHOST Context %p", this]; //@""; - [m_metalLayer setDrawableSize:CGSizeMake((CGFloat)width, (CGFloat)height)]; + // NSLog(@"Created new Metal Overlay (backbuffer) for context %p\n", this); + } + + m_defaultFramebufferMetalTexture[current_swapchain_index].texture = + overlayTex; //[(MTLTexture *)overlayTex retain]; + + /* Clear texture on create */ + id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer]; + MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor]; + { + auto attachment = [passDescriptor.colorAttachments objectAtIndexedSubscript:0]; + attachment.texture = m_defaultFramebufferMetalTexture[current_swapchain_index].texture; + attachment.loadAction = MTLLoadActionClear; + attachment.clearColor = MTLClearColorMake(0.294, 0.294, 0.294, 1.000); + attachment.storeAction = MTLStoreActionStore; + } + { + id<MTLRenderCommandEncoder> enc = [cmdBuffer + renderCommandEncoderWithDescriptor:passDescriptor]; + [enc endEncoding]; + } + [cmdBuffer commit]; + } - CVPixelBufferRelease(cvPixelBuffer); - CVOpenGLTextureCacheRelease(cvGLTexCache); - CVOpenGLTextureRelease(cvGLTex); - CFRelease(cvMetalTexCache); - CFRelease(cvMetalTex); + if (!m_useMetalForRendering) { +#if WITH_OPENGL + glBindFramebuffer(GL_FRAMEBUFFER, m_defaultFramebuffer); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_RECTANGLE, glTex, 0); +#endif + } + + [m_metalLayer setDrawableSize:CGSizeMake((CGFloat)width, (CGFloat)height)]; + if (!m_useMetalForRendering) { +#if WITH_OPENGL + CVPixelBufferRelease(cvPixelBuffer); + CVOpenGLTextureCacheRelease(cvGLTexCache); + CVOpenGLTextureRelease(cvGLTex); + CFRelease(cvMetalTexCache); + CFRelease(cvMetalTex); +#endif + } } void GHOST_ContextCGL::metalSwapBuffers() @@ -570,40 +825,88 @@ void GHOST_ContextCGL::metalSwapBuffers() @autoreleasepool { /* clang-format on */ updateDrawingContext(); - glFlush(); - assert(m_defaultFramebufferMetalTexture != 0); + if (!m_useMetalForRendering) { +#if WITH_OPENGL + glFlush(); + assert(m_defaultFramebufferMetalTexture[current_swapchain_index].texture != nil); +#endif + } id<CAMetalDrawable> drawable = [m_metalLayer nextDrawable]; if (!drawable) { return; } - id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer]; - MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor]; { auto attachment = [passDescriptor.colorAttachments objectAtIndexedSubscript:0]; attachment.texture = drawable.texture; - attachment.loadAction = MTLLoadActionDontCare; + attachment.loadAction = MTLLoadActionClear; + attachment.clearColor = MTLClearColorMake(1.0, 0.294, 0.294, 1.000); attachment.storeAction = MTLStoreActionStore; } - id<MTLTexture> srcTexture = (id<MTLTexture>)m_defaultFramebufferMetalTexture; + if (!m_useMetalForRendering) { + id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer]; + { + assert(m_defaultFramebufferMetalTexture[current_swapchain_index].texture != nil); + id<MTLRenderCommandEncoder> enc = [cmdBuffer + renderCommandEncoderWithDescriptor:passDescriptor]; + [enc setRenderPipelineState:(id<MTLRenderPipelineState>)m_metalRenderPipeline]; + [enc setFragmentTexture:m_defaultFramebufferMetalTexture[current_swapchain_index].texture + atIndex:0]; + [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; + [enc endEncoding]; + } + + [cmdBuffer presentDrawable:drawable]; + /* Submit command buffer */ + [cmdBuffer commit]; + } + else { + assert(contextPresentCallback); + assert(m_defaultFramebufferMetalTexture[current_swapchain_index].texture != nil); + (*contextPresentCallback)(passDescriptor, + (id<MTLRenderPipelineState>)m_metalRenderPipeline, + m_defaultFramebufferMetalTexture[current_swapchain_index].texture, + drawable); + } + } +} + +void GHOST_ContextCGL::initClear() +{ + + if (!m_useMetalForRendering) { +#if WITH_OPENGL + glClearColor(0.294, 0.294, 0.294, 0.000); + glClear(GL_COLOR_BUFFER_BIT); + glClearColor(0.000, 0.000, 0.000, 0.000); +#endif + } + else { +#if WITH_METAL + // TODO (mg_gpusw_apple) this path is never taken, this is legacy left from inital integration + // of metal and gl, the whole file should be cleaned up and stripped of the legacy path + id<MTLCommandBuffer> cmdBuffer = [m_metalCmdQueue commandBuffer]; + MTLRenderPassDescriptor *passDescriptor = [MTLRenderPassDescriptor renderPassDescriptor]; + { + auto attachment = [passDescriptor.colorAttachments objectAtIndexedSubscript:0]; + attachment.texture = m_defaultFramebufferMetalTexture[current_swapchain_index].texture; + attachment.loadAction = MTLLoadActionClear; + attachment.clearColor = MTLClearColorMake(0.294, 0.294, 0.294, 1.000); + attachment.storeAction = MTLStoreActionStore; + } + + // encoding { id<MTLRenderCommandEncoder> enc = [cmdBuffer renderCommandEncoderWithDescriptor:passDescriptor]; - - [enc setRenderPipelineState:(id<MTLRenderPipelineState>)m_metalRenderPipeline]; - [enc setFragmentTexture:srcTexture atIndex:0]; - [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; - [enc endEncoding]; } - - [cmdBuffer presentDrawable:drawable]; - [cmdBuffer commit]; +#endif } } diff --git a/intern/ghost/intern/GHOST_Window.cpp b/intern/ghost/intern/GHOST_Window.cpp index db4d6c3bb71..da292a90869 100644 --- a/intern/ghost/intern/GHOST_Window.cpp +++ b/intern/ghost/intern/GHOST_Window.cpp @@ -92,6 +92,11 @@ GHOST_TSuccess GHOST_Window::getSwapInterval(int &intervalOut) return m_context->getSwapInterval(intervalOut); } +GHOST_Context *GHOST_Window::getContext() +{ + return m_context; +} + unsigned int GHOST_Window::getDefaultFramebuffer() { return (m_context) ? m_context->getDefaultFramebuffer() : 0; diff --git a/intern/ghost/intern/GHOST_Window.h b/intern/ghost/intern/GHOST_Window.h index 2c2b75a6bd5..8e1f73d3430 100644 --- a/intern/ghost/intern/GHOST_Window.h +++ b/intern/ghost/intern/GHOST_Window.h @@ -72,7 +72,7 @@ class GHOST_Window : public GHOST_IWindow { * Returns indication as to whether the window is valid. * \return The validity of the window. */ - virtual bool getValid() const + virtual bool getValid() const override { return m_context != NULL; } @@ -81,15 +81,15 @@ class GHOST_Window : public GHOST_IWindow { * Returns the associated OS object/handle * \return The associated OS object/handle */ - virtual void *getOSWindow() const; + virtual void *getOSWindow() const override; /** * Returns the current cursor shape. * \return The current cursor shape. */ - inline GHOST_TStandardCursor getCursorShape() const; + inline GHOST_TStandardCursor getCursorShape() const override; - inline bool isDialog() const + inline bool isDialog() const override { return false; } @@ -99,7 +99,7 @@ class GHOST_Window : public GHOST_IWindow { * \param cursorShape: The new cursor shape type id. * \return Indication of success. */ - GHOST_TSuccess setCursorShape(GHOST_TStandardCursor cursorShape); + GHOST_TSuccess setCursorShape(GHOST_TStandardCursor cursorShape) override; /** * Set the shape of the cursor to a custom cursor. @@ -115,15 +115,15 @@ class GHOST_Window : public GHOST_IWindow { int sizey, int hotX, int hotY, - bool canInvertColor); + bool canInvertColor) override; - GHOST_TSuccess getCursorBitmap(GHOST_CursorBitmapRef *bitmap); + GHOST_TSuccess getCursorBitmap(GHOST_CursorBitmapRef *bitmap) override; /** * Returns the visibility state of the cursor. * \return The visibility state of the cursor. */ - inline bool getCursorVisibility() const; + inline bool getCursorVisibility() const override; inline GHOST_TGrabCursorMode getCursorGrabMode() const; inline bool getCursorGrabModeIsWarp() const; inline GHOST_TAxisFlag getCursorGrabAxis() const; @@ -136,7 +136,7 @@ class GHOST_Window : public GHOST_IWindow { * \param visible: The new visibility state of the cursor. * \return Indication of success. */ - GHOST_TSuccess setCursorVisibility(bool visible); + GHOST_TSuccess setCursorVisibility(bool visible) override; /** * Sets the cursor grab. @@ -146,28 +146,28 @@ class GHOST_Window : public GHOST_IWindow { GHOST_TSuccess setCursorGrab(GHOST_TGrabCursorMode mode, GHOST_TAxisFlag wrap_axis, GHOST_Rect *bounds, - int32_t mouse_ungrab_xy[2]); + int32_t mouse_ungrab_xy[2]) override; /** * Gets the cursor grab region, if unset the window is used. * reset when grab is disabled. */ - GHOST_TSuccess getCursorGrabBounds(GHOST_Rect &bounds); + GHOST_TSuccess getCursorGrabBounds(GHOST_Rect &bounds) override; void getCursorGrabState(GHOST_TGrabCursorMode &mode, GHOST_TAxisFlag &axis_flag, GHOST_Rect &bounds, - bool &use_software_cursor); + bool &use_software_cursor) override; /** * Return true when a software cursor should be used. */ - bool getCursorGrabUseSoftwareDisplay(); + bool getCursorGrabUseSoftwareDisplay() override; /** * Sets the progress bar value displayed in the window/application icon * \param progress: The progress percentage (0.0 to 1.0). */ - virtual GHOST_TSuccess setProgressBar(float /*progress*/) + virtual GHOST_TSuccess setProgressBar(float /*progress*/) override { return GHOST_kFailure; } @@ -175,7 +175,7 @@ class GHOST_Window : public GHOST_IWindow { /** * Hides the progress bar in the icon */ - virtual GHOST_TSuccess endProgressBar() + virtual GHOST_TSuccess endProgressBar() override { return GHOST_kFailure; } @@ -185,43 +185,43 @@ class GHOST_Window : public GHOST_IWindow { * \param interval: The swap interval to use. * \return A boolean success indicator. */ - GHOST_TSuccess setSwapInterval(int interval); + GHOST_TSuccess setSwapInterval(int interval) override; /** * Gets the current swap interval for #swapBuffers. * \return An integer. */ - GHOST_TSuccess getSwapInterval(int &intervalOut); + GHOST_TSuccess getSwapInterval(int &intervalOut) override; /** * Tells if the ongoing drag'n'drop object can be accepted upon mouse drop */ - void setAcceptDragOperation(bool canAccept); + void setAcceptDragOperation(bool canAccept) override; /** * Returns acceptance of the dropped object * Usually called by the "object dropped" event handling function */ - bool canAcceptDragOperation() const; + bool canAcceptDragOperation() const override; /** * Sets the window "modified" status, indicating unsaved changes * \param isUnsavedChanges: Unsaved changes or not. * \return Indication of success. */ - virtual GHOST_TSuccess setModifiedState(bool isUnsavedChanges); + virtual GHOST_TSuccess setModifiedState(bool isUnsavedChanges) override; /** * Gets the window "modified" status, indicating unsaved changes * \return True if there are unsaved changes */ - virtual bool getModifiedState(); + virtual bool getModifiedState() override; /** * Returns the type of drawing context used in this window. * \return The current type of drawing context. */ - inline GHOST_TDrawingContextType getDrawingContextType(); + inline GHOST_TDrawingContextType getDrawingContextType() override; /** * Tries to install a rendering context in this window. @@ -230,19 +230,19 @@ class GHOST_Window : public GHOST_IWindow { * \param type: The type of rendering context installed. * \return Indication as to whether installation has succeeded. */ - GHOST_TSuccess setDrawingContextType(GHOST_TDrawingContextType type); + GHOST_TSuccess setDrawingContextType(GHOST_TDrawingContextType type) override; /** * Swaps front and back buffers of a window. * \return A boolean success indicator. */ - virtual GHOST_TSuccess swapBuffers(); + virtual GHOST_TSuccess swapBuffers() override; /** * Activates the drawing context of this window. * \return A boolean success indicator. */ - virtual GHOST_TSuccess activateDrawingContext(); + virtual GHOST_TSuccess activateDrawingContext() override; /** * Updates the drawing context of this window. Needed @@ -252,16 +252,22 @@ class GHOST_Window : public GHOST_IWindow { GHOST_TSuccess updateDrawingContext(); /** - * Gets the OpenGL frame-buffer associated with the window's contents. - * \return The ID of an OpenGL frame-buffer object. + * Get the drawing context associated with this window. + *\return Pointer to the context object. */ - virtual unsigned int getDefaultFramebuffer(); + GHOST_Context *getContext(); + + /** + * Gets the OpenGL framebuffer associated with the window's contents. + * \return The ID of an OpenGL framebuffer object. + */ + virtual unsigned int getDefaultFramebuffer() override; /** * Returns the window user data. * \return The window user data. */ - inline GHOST_TUserDataPtr getUserData() const + inline GHOST_TUserDataPtr getUserData() const override { return m_userData; } @@ -270,12 +276,12 @@ class GHOST_Window : public GHOST_IWindow { * Changes the window user data. * \param userData: The window user data. */ - void setUserData(const GHOST_TUserDataPtr userData) + void setUserData(const GHOST_TUserDataPtr userData) override { m_userData = userData; } - float getNativePixelSize(void) + float getNativePixelSize(void) override { if (m_nativePixelSize > 0.0f) return m_nativePixelSize; @@ -286,18 +292,18 @@ class GHOST_Window : public GHOST_IWindow { * Returns the recommended DPI for this window. * \return The recommended DPI for this window. */ - virtual inline uint16_t getDPIHint() + virtual inline uint16_t getDPIHint() override { return 96; } #ifdef WITH_INPUT_IME - virtual void beginIME(int32_t x, int32_t y, int32_t w, int32_t h, bool completed) + virtual void beginIME(int32_t x, int32_t y, int32_t w, int32_t h, bool completed) override { /* do nothing temporarily if not in windows */ } - virtual void endIME() + virtual void endIME() override { /* do nothing temporarily if not in windows */ } diff --git a/intern/ghost/test/multitest/MultiTest.c b/intern/ghost/test/multitest/MultiTest.c index 99b88dfb525..6a6a042f4ac 100644 --- a/intern/ghost/test/multitest/MultiTest.c +++ b/intern/ghost/test/multitest/MultiTest.c @@ -323,7 +323,7 @@ MainWindow *mainwindow_new(MultiTestApp *app) if (win) { MainWindow *mw = MEM_callocN(sizeof(*mw), "mainwindow_new"); - mw->gpu_context = GPU_context_create(win); + mw->gpu_context = GPU_context_create(win, NULL); GPU_init(); mw->app = app; @@ -578,7 +578,7 @@ LoggerWindow *loggerwindow_new(MultiTestApp *app) if (win) { LoggerWindow *lw = MEM_callocN(sizeof(*lw), "loggerwindow_new"); - lw->gpu_context = GPU_context_create(win); + lw->gpu_context = GPU_context_create(win, NULL); GPU_init(); int bbox[2][2]; @@ -780,7 +780,7 @@ ExtraWindow *extrawindow_new(MultiTestApp *app) if (win) { ExtraWindow *ew = MEM_callocN(sizeof(*ew), "mainwindow_new"); - ew->gpu_context = GPU_context_create(win); + ew->gpu_context = GPU_context_create(win, NULL); GPU_init(); ew->app = app; diff --git a/source/blender/draw/DRW_engine.h b/source/blender/draw/DRW_engine.h index dec7a22aadb..04e3bddfb6c 100644 --- a/source/blender/draw/DRW_engine.h +++ b/source/blender/draw/DRW_engine.h @@ -201,6 +201,7 @@ void DRW_gpu_render_context_enable(void *re_gpu_context); void DRW_gpu_render_context_disable(void *re_gpu_context); void DRW_deferred_shader_remove(struct GPUMaterial *mat); +void DRW_deferred_shader_optimize_remove(struct GPUMaterial *mat); /** * Get DrawData from the given ID-block. In order for this to work, we assume that diff --git a/source/blender/draw/engines/eevee/eevee_lightcache.c b/source/blender/draw/engines/eevee/eevee_lightcache.c index 614ea0b0892..0fd87ef43f0 100644 --- a/source/blender/draw/engines/eevee/eevee_lightcache.c +++ b/source/blender/draw/engines/eevee/eevee_lightcache.c @@ -597,7 +597,7 @@ static void eevee_lightbake_context_enable(EEVEE_LightBake *lbake) if (lbake->gl_context) { DRW_opengl_render_context_enable(lbake->gl_context); if (lbake->gpu_context == NULL) { - lbake->gpu_context = GPU_context_create(NULL); + lbake->gpu_context = GPU_context_create(NULL, lbake->gl_context); } DRW_gpu_render_context_enable(lbake->gpu_context); } diff --git a/source/blender/draw/engines/eevee_next/eevee_shader.cc b/source/blender/draw/engines/eevee_next/eevee_shader.cc index 64b1d4891a9..05ff06e7435 100644 --- a/source/blender/draw/engines/eevee_next/eevee_shader.cc +++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc @@ -471,6 +471,8 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name, this); GPU_material_status_set(gpumat, GPU_MAT_QUEUED); GPU_material_compile(gpumat); + /* Queue deferred material optimization. */ + DRW_shader_queue_optimize_material(gpumat); return gpumat; } diff --git a/source/blender/draw/intern/DRW_render.h b/source/blender/draw/intern/DRW_render.h index 7b80ffd2b88..4bdef577e44 100644 --- a/source/blender/draw/intern/DRW_render.h +++ b/source/blender/draw/intern/DRW_render.h @@ -251,6 +251,7 @@ struct GPUMaterial *DRW_shader_from_material(struct Material *ma, bool deferred, GPUCodegenCallbackFn callback, void *thunk); +void DRW_shader_queue_optimize_material(struct GPUMaterial *mat); void DRW_shader_free(struct GPUShader *shader); #define DRW_SHADER_FREE_SAFE(shader) \ do { \ diff --git a/source/blender/draw/intern/draw_manager.c b/source/blender/draw/intern/draw_manager.c index e1bee89db60..eab79652762 100644 --- a/source/blender/draw/intern/draw_manager.c +++ b/source/blender/draw/intern/draw_manager.c @@ -3139,7 +3139,7 @@ void DRW_opengl_context_create(void) DST.gl_context = WM_opengl_context_create(); WM_opengl_context_activate(DST.gl_context); /* Be sure to create gpu_context too. */ - DST.gpu_context = GPU_context_create(NULL); + DST.gpu_context = GPU_context_create(0, DST.gl_context); /* So we activate the window's one afterwards. */ wm_window_reset_drawable(); } diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c index 4bc3898c5e7..6f8df54ead3 100644 --- a/source/blender/draw/intern/draw_manager_shader.c +++ b/source/blender/draw/intern/draw_manager_shader.c @@ -51,9 +51,13 @@ extern char datatoc_common_fullscreen_vert_glsl[]; * \{ */ typedef struct DRWShaderCompiler { + /** Default compilation queue. */ ListBase queue; /* GPUMaterial */ SpinLock list_lock; + /** Optimization queue. */ + ListBase optimize_queue; /* GPUMaterial */ + void *gl_context; GPUContext *gpu_context; bool own_context; @@ -109,7 +113,29 @@ static void drw_deferred_shader_compilation_exec( MEM_freeN(link); } else { - break; + /* Check for Material Optimization job once there are no more + * shaders to compile. */ + BLI_spin_lock(&comp->list_lock); + /* Pop tail because it will be less likely to lock the main thread + * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */ + LinkData *link = (LinkData *)BLI_poptail(&comp->optimize_queue); + GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : NULL; + if (optimize_mat) { + /* Avoid another thread freeing the material during optimization. */ + GPU_material_acquire(optimize_mat); + } + BLI_spin_unlock(&comp->list_lock); + + if (optimize_mat) { + /* Compile optimized material shader. */ + GPU_material_optimize(optimize_mat); + GPU_material_release(optimize_mat); + MEM_freeN(link); + } + else { + /* No more materials to optimize, or shaders to compile. */ + break; + } } if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) { @@ -131,6 +157,7 @@ static void drw_deferred_shader_compilation_free(void *custom_data) BLI_spin_lock(&comp->list_lock); BLI_freelistN(&comp->queue); + BLI_freelistN(&comp->optimize_queue); BLI_spin_unlock(&comp->list_lock); if (comp->own_context) { @@ -146,34 +173,13 @@ static void drw_deferred_shader_compilation_free(void *custom_data) MEM_freeN(comp); } -static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred) +/** + * Append either shader compilation or optimization job to deferred queue and + * ensure shader compilation worker is active. + * We keep two separate queue's to ensure core compilations always complete before optimization. + */ +static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job) { - if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) { - return; - } - /* Do not defer the compilation if we are rendering for image. - * deferred rendering is only possible when `evil_C` is available */ - if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) { - deferred = false; - } - - if (!deferred) { - DRW_deferred_shader_remove(mat); - /* Shaders could already be compiling. Have to wait for compilation to finish. */ - while (GPU_material_status(mat) == GPU_MAT_QUEUED) { - PIL_sleep_ms(20); - } - if (GPU_material_status(mat) == GPU_MAT_CREATED) { - GPU_material_compile(mat); - } - return; - } - - /* Don't add material to the queue twice. */ - if (GPU_material_status(mat) == GPU_MAT_QUEUED) { - return; - } - const bool use_main_context = GPU_use_main_context_workaround(); const bool job_own_context = !use_main_context; @@ -194,6 +200,7 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred) if (old_comp) { BLI_spin_lock(&old_comp->list_lock); BLI_movelisttolist(&comp->queue, &old_comp->queue); + BLI_movelisttolist(&comp->optimize_queue, &old_comp->optimize_queue); BLI_spin_unlock(&old_comp->list_lock); /* Do not recreate context, just pass ownership. */ if (old_comp->gl_context) { @@ -204,9 +211,18 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred) } } - GPU_material_status_set(mat, GPU_MAT_QUEUED); - LinkData *node = BLI_genericNodeN(mat); - BLI_addtail(&comp->queue, node); + /* Add to either compilation or optimization queue. */ + if (is_optimization_job) { + BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED); + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED); + LinkData *node = BLI_genericNodeN(mat); + BLI_addtail(&comp->optimize_queue, node); + } + else { + GPU_material_status_set(mat, GPU_MAT_QUEUED); + LinkData *node = BLI_genericNodeN(mat); + BLI_addtail(&comp->queue, node); + } /* Create only one context. */ if (comp->gl_context == NULL) { @@ -216,7 +232,7 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred) } else { comp->gl_context = WM_opengl_context_create(); - comp->gpu_context = GPU_context_create(NULL); + comp->gpu_context = GPU_context_create(NULL, comp->gl_context); GPU_context_active_set(NULL); WM_opengl_context_activate(DST.gl_context); @@ -235,6 +251,39 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred) WM_jobs_start(wm, wm_job); } +static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred) +{ + if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) { + return; + } + + /* Do not defer the compilation if we are rendering for image. + * deferred rendering is only possible when `evil_C` is available */ + if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) { + deferred = false; + } + + if (!deferred) { + DRW_deferred_shader_remove(mat); + /* Shaders could already be compiling. Have to wait for compilation to finish. */ + while (GPU_material_status(mat) == GPU_MAT_QUEUED) { + PIL_sleep_ms(20); + } + if (GPU_material_status(mat) == GPU_MAT_CREATED) { + GPU_material_compile(mat); + } + return; + } + + /* Don't add material to the queue twice. */ + if (GPU_material_status(mat) == GPU_MAT_QUEUED) { + return; + } + + /* Add deferred shader compilation to queue. */ + drw_deferred_queue_append(mat, false); +} + void DRW_deferred_shader_remove(GPUMaterial *mat) { LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) { @@ -243,14 +292,49 @@ void DRW_deferred_shader_remove(GPUMaterial *mat) wm, wm, WM_JOB_TYPE_SHADER_COMPILATION); if (comp != NULL) { BLI_spin_lock(&comp->list_lock); + + /* Search for compilation job in queue. */ LinkData *link = (LinkData *)BLI_findptr(&comp->queue, mat, offsetof(LinkData, data)); if (link) { BLI_remlink(&comp->queue, link); GPU_material_status_set(link->data, GPU_MAT_CREATED); } - BLI_spin_unlock(&comp->list_lock); MEM_SAFE_FREE(link); + + /* Search for optimization job in queue. */ + LinkData *opti_link = (LinkData *)BLI_findptr( + &comp->optimize_queue, mat, offsetof(LinkData, data)); + if (opti_link) { + BLI_remlink(&comp->optimize_queue, opti_link); + GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY); + } + BLI_spin_unlock(&comp->list_lock); + + MEM_SAFE_FREE(opti_link); + } + } + } +} + +void DRW_deferred_shader_optimize_remove(GPUMaterial *mat) +{ + LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) { + LISTBASE_FOREACH (wmWindow *, win, &wm->windows) { + DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type( + wm, wm, WM_JOB_TYPE_SHADER_COMPILATION); + if (comp != NULL) { + BLI_spin_lock(&comp->list_lock); + /* Search for optimization job in queue. */ + LinkData *opti_link = (LinkData *)BLI_findptr( + &comp->optimize_queue, mat, offsetof(LinkData, data)); + if (opti_link) { + BLI_remlink(&comp->optimize_queue, opti_link); + GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY); + } + BLI_spin_unlock(&comp->list_lock); + + MEM_SAFE_FREE(opti_link); } } } @@ -384,6 +468,7 @@ GPUMaterial *DRW_shader_from_world(World *wo, } drw_deferred_shader_add(mat, deferred); + DRW_shader_queue_optimize_material(mat); return mat; } @@ -413,9 +498,52 @@ GPUMaterial *DRW_shader_from_material(Material *ma, } drw_deferred_shader_add(mat, deferred); + DRW_shader_queue_optimize_material(mat); return mat; } +void DRW_shader_queue_optimize_material(GPUMaterial *mat) +{ + /* Do not perform deferred optimization if performing render. + * De-queue any queued optimization jobs. */ + if (DRW_state_is_image_render()) { + if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) { + /* Remove from pending optimization job queue. */ + DRW_deferred_shader_optimize_remove(mat); + /* If optimization job had already started, wait for it to complete. */ + while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) { + PIL_sleep_ms(20); + } + } + return; + } + + /* We do not need to perform optimization on the material if it is already compiled or in the + * optimization queue. If optimization is not required, the status will be flagged as + * `GPU_MAT_OPTIMIZATION_SKIP`. + * We can also skip cases which have already been queued up. */ + if (ELEM(GPU_material_optimization_status(mat), + GPU_MAT_OPTIMIZATION_SKIP, + GPU_MAT_OPTIMIZATION_SUCCESS, + GPU_MAT_OPTIMIZATION_QUEUED)) { + return; + } + + /* Only queue optimization once the original shader has been successfully compiled. */ + if (GPU_material_status(mat) != GPU_MAT_SUCCESS) { + return; + } + + /* Defer optimization until sufficient time has passed beyond creation. This avoids excessive + * recompilation for shaders which are being actively modified. */ + if (!GPU_material_optimization_ready(mat)) { + return; + } + + /* Add deferred shader compilation to queue. */ + drw_deferred_queue_append(mat, true); +} + void DRW_shader_free(GPUShader *shader) { GPU_shader_free(shader); diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index 18da5169620..0ce4011b2b4 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -192,6 +192,7 @@ set(METAL_SRC metal/mtl_context.mm metal/mtl_debug.mm metal/mtl_framebuffer.mm + metal/mtl_immediate.mm metal/mtl_index_buffer.mm metal/mtl_memory.mm metal/mtl_query.mm @@ -205,11 +206,14 @@ set(METAL_SRC metal/mtl_vertex_buffer.mm metal/mtl_backend.hh + metal/mtl_batch.hh metal/mtl_capabilities.hh metal/mtl_common.hh metal/mtl_context.hh metal/mtl_debug.hh + metal/mtl_drawlist.hh metal/mtl_framebuffer.hh + metal/mtl_immediate.hh metal/mtl_index_buffer.hh metal/mtl_memory.hh metal/mtl_primitive.hh diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h index a242bb7cc94..b59ea9e55d2 100644 --- a/source/blender/gpu/GPU_context.h +++ b/source/blender/gpu/GPU_context.h @@ -26,7 +26,7 @@ eGPUBackendType GPU_backend_get_type(void); /** Opaque type hiding blender::gpu::Context. */ typedef struct GPUContext GPUContext; -GPUContext *GPU_context_create(void *ghost_window); +GPUContext *GPU_context_create(void *ghost_window, void *ghost_context); /** * To be called after #GPU_context_active_set(ctx_to_destroy). */ diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h index 922988bf95a..11500f5af60 100644 --- a/source/blender/gpu/GPU_material.h +++ b/source/blender/gpu/GPU_material.h @@ -117,6 +117,15 @@ typedef enum eGPUMaterialStatus { GPU_MAT_SUCCESS, } eGPUMaterialStatus; +/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not + * plan to perform optimization on a given material. */ +typedef enum eGPUMaterialOptimizationStatus { + GPU_MAT_OPTIMIZATION_SKIP = 0, + GPU_MAT_OPTIMIZATION_READY, + GPU_MAT_OPTIMIZATION_QUEUED, + GPU_MAT_OPTIMIZATION_SUCCESS, +} eGPUMaterialOptimizationStatus; + typedef enum eGPUDefaultValue { GPU_DEFAULT_0 = 0, GPU_DEFAULT_1, @@ -246,6 +255,15 @@ struct Scene *GPU_material_scene(GPUMaterial *material); struct GPUPass *GPU_material_get_pass(GPUMaterial *material); struct GPUShader *GPU_material_get_shader(GPUMaterial *material); const char *GPU_material_get_name(GPUMaterial *material); + +/** + * Material Optimization. + * \note Compiles optimal version of shader graph, populating mat->optimized_pass. + * This operation should always be deferred until existing compilations have completed. + * Default un-optimized materials will still exist for interactive material editing performance. + */ +void GPU_material_optimize(GPUMaterial *mat); + /** * Return can be NULL if it's a world material. */ @@ -256,6 +274,13 @@ struct Material *GPU_material_get_material(GPUMaterial *material); eGPUMaterialStatus GPU_material_status(GPUMaterial *mat); void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status); +/** + * Return status for async optimization jobs. + */ +eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat); +void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status); +bool GPU_material_optimization_ready(GPUMaterial *mat); + struct GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material); /** * Create dynamic UBO from parameters diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh index d2890efee72..2a545c8114e 100644 --- a/source/blender/gpu/intern/gpu_backend.hh +++ b/source/blender/gpu/intern/gpu_backend.hh @@ -38,7 +38,7 @@ class GPUBackend { virtual void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) = 0; virtual void compute_dispatch_indirect(StorageBuf *indirect_buf) = 0; - virtual Context *context_alloc(void *ghost_window) = 0; + virtual Context *context_alloc(void *ghost_window, void *ghost_context) = 0; virtual Batch *batch_alloc() = 0; virtual DrawList *drawlist_alloc(int list_length) = 0; diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc index 2241bcf9f9b..85cfa9749fa 100644 --- a/source/blender/gpu/intern/gpu_codegen.cc +++ b/source/blender/gpu/intern/gpu_codegen.cc @@ -95,6 +95,9 @@ struct GPUPass { uint32_t hash; /** Did we already tried to compile the attached GPUShader. */ bool compiled; + /** Hint that an optimized variant of this pass should be created based on a complexity heuristic + * during pass code generation. */ + bool should_optimize; }; /* -------------------------------------------------------------------- */ @@ -242,6 +245,11 @@ class GPUCodegen { ListBase ubo_inputs_ = {nullptr, nullptr}; GPUInput *cryptomatte_input_ = nullptr; + /** Cache paramters for complexity heuristic. */ + uint nodes_total_ = 0; + uint textures_total_ = 0; + uint uniforms_total_ = 0; + public: GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_) { @@ -282,6 +290,14 @@ class GPUCodegen { return hash_; } + /* Heuristic determined during pass codegen for whether a + * more optimal variant of this material should be compiled. */ + bool should_optimize_heuristic() const + { + bool do_optimize = (nodes_total_ >= 100 || textures_total_ >= 4 || uniforms_total_ >= 64); + return do_optimize; + } + private: void set_unique_ids(); @@ -403,6 +419,9 @@ void GPUCodegen::generate_resources() } } + /* Increment heuristic. */ + textures_total_ = slot; + if (!BLI_listbase_is_empty(&ubo_inputs_)) { /* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */ ss << "struct NodeTree {\n"; @@ -440,11 +459,16 @@ void GPUCodegen::generate_library() GPUCodegenCreateInfo &info = *create_info; void *value; - GSetIterState pop_state = {}; - while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) { + /* Iterate over libraries. We need to keep this struct intact incase + * it is required for the optimization an pass. */ + GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries); + while (!BLI_ghashIterator_done(ihash)) { + value = BLI_ghashIterator_getKey(ihash); auto deps = gpu_shader_dependency_get_resolved_source((const char *)value); info.dependencies_generated.extend_non_duplicates(deps); + BLI_ghashIterator_step(ihash); } + BLI_ghashIterator_free(ihash); } void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node) @@ -512,6 +536,9 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node) } } eval_ss << ");\n\n"; + + /* Increment heuristic. */ + nodes_total_++; } char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link) @@ -575,6 +602,7 @@ void GPUCodegen::generate_uniform_buffer() if (input->source == GPU_SOURCE_UNIFORM && !input->link) { /* We handle the UBO uniforms separately. */ BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input)); + uniforms_total_++; } } } @@ -602,6 +630,7 @@ void GPUCodegen::generate_graphs() { set_unique_ids(); + /* Serialize graph. */ output.surface = graph_serialize(GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface); output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume); output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement); @@ -637,10 +666,17 @@ void GPUCodegen::generate_graphs() GPUPass *GPU_generate_pass(GPUMaterial *material, GPUNodeGraph *graph, GPUCodegenCallbackFn finalize_source_cb, - void *thunk) + void *thunk, + bool optimize_graph) { gpu_node_graph_prune_unused(graph); + /* If Optimize flag is passed in, we are generating an optimized + * variant of the GPUMaterial's GPUPass. */ + if (optimize_graph) { + gpu_node_graph_optimize(graph); + } + /* Extract attributes before compiling so the generated VBOs are ready to accept the future * shader. */ gpu_node_graph_finalize_uniform_attrs(graph); @@ -648,23 +684,33 @@ GPUPass *GPU_generate_pass(GPUMaterial *material, GPUCodegen codegen(material, graph); codegen.generate_graphs(); codegen.generate_cryptomatte(); - codegen.generate_uniform_buffer(); - /* Cache lookup: Reuse shaders already compiled. */ - GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get()); - - /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source - * there is no way to have a collision currently. Some advocated to only use a bigger hash. */ - if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) { - if (!gpu_pass_is_valid(pass_hash)) { - /* Shader has already been created but failed to compile. */ - return nullptr; + GPUPass *pass_hash = nullptr; + + if (!optimize_graph) { + /* The optimized version of the shader should not re-generate a UBO. + * The UBO will not be used for this variant. */ + codegen.generate_uniform_buffer(); + + /** Cache lookup: Reuse shaders already compiled. + * NOTE: We only perform cache look-up for non-optimized shader + * graphs, as baked constant data amongst other optimizations will generate too many + * shader source permutations, with minimal re-usability. */ + pass_hash = gpu_pass_cache_lookup(codegen.hash_get()); + + /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source + * there is no way to have a collision currently. Some advocated to only use a bigger hash. */ + if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) { + if (!gpu_pass_is_valid(pass_hash)) { + /* Shader has already been created but failed to compile. */ + return nullptr; + } + /* No collision, just return the pass. */ + BLI_spin_lock(&pass_cache_spin); + pass_hash->refcount += 1; + BLI_spin_unlock(&pass_cache_spin); + return pass_hash; } - /* No collision, just return the pass. */ - BLI_spin_lock(&pass_cache_spin); - pass_hash->refcount += 1; - BLI_spin_unlock(&pass_cache_spin); - return pass_hash; } /* Either the shader is not compiled or there is a hash collision... @@ -702,14 +748,31 @@ GPUPass *GPU_generate_pass(GPUMaterial *material, pass->create_info = codegen.create_info; pass->hash = codegen.hash_get(); pass->compiled = false; + /* Only flag pass optimization hint if this is the first generated pass for a material. + * Optimized passes cannot be optimized further, even if the heuristic is still not + * favourable. */ + pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic(); codegen.create_info = nullptr; - gpu_pass_cache_insert_after(pass_hash, pass); + /* Only insert non-optimized graphs into cache. + * Optimized graphs will continuously be recompiled with new unique source during material + * editing, and thus causing the cache to fill up quickly with materials offering minimal + * re-use. */ + if (!optimize_graph) { + gpu_pass_cache_insert_after(pass_hash, pass); + } } return pass; } +bool GPU_pass_should_optimize(GPUPass *pass) +{ + /* Returns optimization heuristic prepared during + * initial codegen. */ + return pass->should_optimize; +} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_codegen.h b/source/blender/gpu/intern/gpu_codegen.h index 95a672c0400..aabdf1ac003 100644 --- a/source/blender/gpu/intern/gpu_codegen.h +++ b/source/blender/gpu/intern/gpu_codegen.h @@ -25,10 +25,12 @@ typedef struct GPUPass GPUPass; GPUPass *GPU_generate_pass(GPUMaterial *material, struct GPUNodeGraph *graph, GPUCodegenCallbackFn finalize_source_cb, - void *thunk); + void *thunk, + bool optimize_graph); GPUShader *GPU_pass_shader_get(GPUPass *pass); bool GPU_pass_compile(GPUPass *pass, const char *shname); void GPU_pass_release(GPUPass *pass); +bool GPU_pass_should_optimize(GPUPass *pass); /* Module */ diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc index bcc418169b7..92cbbc5b4b0 100644 --- a/source/blender/gpu/intern/gpu_context.cc +++ b/source/blender/gpu/intern/gpu_context.cc @@ -94,7 +94,7 @@ Context *Context::get() /* -------------------------------------------------------------------- */ -GPUContext *GPU_context_create(void *ghost_window) +GPUContext *GPU_context_create(void *ghost_window, void *ghost_context) { { std::scoped_lock lock(backend_users_mutex); @@ -105,7 +105,7 @@ GPUContext *GPU_context_create(void *ghost_window) num_backend_users++; } - Context *ctx = GPUBackend::get()->context_alloc(ghost_window); + Context *ctx = GPUBackend::get()->context_alloc(ghost_window, ghost_context); GPU_context_active_set(wrap(ctx)); return wrap(ctx); @@ -216,6 +216,9 @@ void GPU_render_step() /** \name Backend selection * \{ */ +/* NOTE: To enable Metal API, we need to temporarily change this to `GPU_BACKEND_METAL`. + * Until a global switch is added, Metal also needs to be enabled in GHOST_ContextCGL: + * `m_useMetalForRendering = true`. */ static const eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL; static GPUBackend *g_backend = nullptr; diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c index 96809db1587..991cb229eda 100644 --- a/source/blender/gpu/intern/gpu_material.c +++ b/source/blender/gpu/intern/gpu_material.c @@ -34,6 +34,8 @@ #include "DRW_engine.h" +#include "PIL_time.h" + #include "gpu_codegen.h" #include "gpu_node_graph.h" @@ -43,6 +45,17 @@ #define MAX_COLOR_BAND 128 #define MAX_GPU_SKIES 8 +/** Whether the optimized variant of the GPUPass should be created asynchronously. + * Usage of this depends on whether there are possible threading challenges of doing so. + * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader + * compilation, though this option exists in case any potential scenarios for material graph + * optimization cause a slow down on the main thread. + * + * NOTE: The actual shader program for the optimized pass will alwaysbe compiled asynchronously, + * this flag controls whether shader node graph source serialization happens on the compilation + * worker thread. */ +#define ASYNC_OPTIMIZED_PASS_CREATION 0 + typedef struct GPUColorBandBuilder { float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4]; int current_layer; @@ -57,6 +70,27 @@ struct GPUMaterial { /* Contains GPUShader and source code for deferred compilation. * Can be shared between similar material (i.e: sharing same nodetree topology). */ GPUPass *pass; + /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance. + * This shader variant bakes dynamic uniform data as constant. This variant will not use + * the ubo, and instead bake constants directly into the shader source. */ + GPUPass *optimized_pass; + /* Optimization status. + * We also use this status to determine whether this material should be considered for + * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations. + * `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization. + * `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit + * performance to do so, based on the heuristic. + */ + eGPUMaterialOptimizationStatus optimization_status; + double creation_time; +#if ASYNC_OPTIMIZED_PASS_CREATION == 1 + struct DeferredOptimizePass { + GPUCodegenCallbackFn callback; + void *thunk; + } DeferredOptimizePass; + struct DeferredOptimizePass optimize_pass_info; +#endif + /** UBOs for this material parameters. */ GPUUniformBuf *ubo; /** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */ @@ -209,6 +243,9 @@ void GPU_material_free_single(GPUMaterial *material) gpu_node_graph_free(&material->graph); + if (material->optimized_pass != NULL) { + GPU_pass_release(material->optimized_pass); + } if (material->pass != NULL) { GPU_pass_release(material->pass); } @@ -247,12 +284,15 @@ Scene *GPU_material_scene(GPUMaterial *material) GPUPass *GPU_material_get_pass(GPUMaterial *material) { - return material->pass; + return (material->optimized_pass) ? material->optimized_pass : material->pass; } GPUShader *GPU_material_get_shader(GPUMaterial *material) { - return material->pass ? GPU_pass_shader_get(material->pass) : NULL; + /* First attempt to select optimized shader. If not available, fetch original. */ + GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) : + NULL; + return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL); } const char *GPU_material_get_name(GPUMaterial *material) @@ -665,6 +705,29 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status) mat->status = status; } +eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat) +{ + return mat->optimization_status; +} + +void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status) +{ + mat->optimization_status = status; + if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) { + /* Reset creation timer to delay optimization pass. */ + mat->creation_time = PIL_check_seconds_timer(); + } +} + +bool GPU_material_optimization_ready(GPUMaterial *mat) +{ + /* Timer threshold before optimizations will be queued. + * When materials are frequently being modified, optimization + * can incur CPU overhead from excessive compilation. */ + const double optimization_time_threshold_s = 5.0; + return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s); +} + /* Code generation */ bool GPU_material_has_surface_output(GPUMaterial *mat) @@ -730,6 +793,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, mat->uuid = shader_uuid; mat->flag = GPU_MATFLAG_UPDATED; mat->status = GPU_MAT_CREATED; + mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP; mat->is_volume_shader = is_volume_shader; mat->graph.used_libraries = BLI_gset_new( BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); @@ -748,7 +812,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, { /* Create source code and search pass cache for an already compiled version. */ - mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk); + mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false); if (mat->pass == NULL) { /* We had a cache hit and the shader has already failed to compile. */ @@ -756,11 +820,44 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, gpu_node_graph_free(&mat->graph); } else { + /* Determine whether we should generate an optimized variant of the graph. + * Heuristic is based on complexity of default material pass and shader node graph. */ + if (GPU_pass_should_optimize(mat->pass)) { + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); + } + GPUShader *sh = GPU_pass_shader_get(mat->pass); if (sh != NULL) { /* We had a cache hit and the shader is already compiled. */ mat->status = GPU_MAT_SUCCESS; - gpu_node_graph_free_nodes(&mat->graph); + + if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { + gpu_node_graph_free_nodes(&mat->graph); + } + } + + /* Generate optimized pass. */ + if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) { +#if ASYNC_OPTIMIZED_PASS_CREATION == 1 + mat->optimized_pass = NULL; + mat->optimize_pass_info.callback = callback; + mat->optimize_pass_info.thunk = thunk; +#else + mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true); + if (mat->optimized_pass == NULL) { + /* Failed to create optimized pass. */ + gpu_node_graph_free_nodes(&mat->graph); + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); + } + else { + GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass); + if (optimized_sh != NULL) { + /* Optimized shader already available. */ + gpu_node_graph_free_nodes(&mat->graph); + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS); + } + } +#endif } } } @@ -811,7 +908,11 @@ void GPU_material_compile(GPUMaterial *mat) GPUShader *sh = GPU_pass_shader_get(mat->pass); if (sh != NULL) { mat->status = GPU_MAT_SUCCESS; - gpu_node_graph_free_nodes(&mat->graph); + + if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { + /* Only free node graph nodes if not required by secondary optimization pass. */ + gpu_node_graph_free_nodes(&mat->graph); + } } else { mat->status = GPU_MAT_FAILED; @@ -825,6 +926,71 @@ void GPU_material_compile(GPUMaterial *mat) } } +void GPU_material_optimize(GPUMaterial *mat) +{ + /* If shader is flagged for skipping optimization or has already been successfully + * optimized, skip. */ + if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) { + return; + } + + /* If original shader has not been fully compiled, we are not + * ready to perform optimization. */ + if (mat->status != GPU_MAT_SUCCESS) { + /* Reset optimization status. */ + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); + return; + } + +#if ASYNC_OPTIMIZED_PASS_CREATION == 1 + /* If the optimized pass is not valid, first generate optimized pass. + * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when + * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more + * optimal, as these do not benefit from caching, due to baked constants. However, this could + * possibly be cause for concern for certain cases. */ + if (!mat->optimized_pass) { + mat->optimized_pass = GPU_generate_pass( + mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true); + BLI_assert(mat->optimized_pass); + } +#else + if (!mat->optimized_pass) { + /* Optimized pass has not been created, skip future optimization attempts. */ + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); + return; + } +#endif + + bool success; + /* NOTE: The shader may have already been compiled here since we are + * sharing GPUShader across GPUMaterials. In this case it's a no-op. */ +#ifndef NDEBUG + success = GPU_pass_compile(mat->optimized_pass, mat->name); +#else + success = GPU_pass_compile(mat->optimized_pass, __func__); +#endif + + if (success) { + GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass); + if (sh != NULL) { + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS); + } + else { + /* Optimized pass failed to compile. Disable any future optimization attempts. */ + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); + } + } + else { + /* Optimization pass generation failed. Disable future attempts to optimize. */ + GPU_pass_release(mat->optimized_pass); + mat->optimized_pass = NULL; + GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); + } + + /* Release node graph as no longer needed. */ + gpu_node_graph_free_nodes(&mat->graph); +} + void GPU_materials_free(Main *bmain) { LISTBASE_FOREACH (Material *, ma, &bmain->materials) { @@ -848,6 +1014,8 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi material->graph.used_libraries = BLI_gset_new( BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); material->refcount = 1; + material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP; + material->optimized_pass = NULL; /* Construct the material graph by adding and linking the necessary GPU material nodes. */ construct_function_cb(thunk, material); @@ -856,7 +1024,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi gpu_material_ramp_texture_build(material); /* Lookup an existing pass in the cache or generate a new one. */ - material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk); + material->pass = GPU_generate_pass( + material, &material->graph, generate_code_function_cb, thunk, false); + material->optimized_pass = NULL; /* The pass already exists in the pass cache but its shader already failed to compile. */ if (material->pass == NULL) { @@ -865,11 +1035,42 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi return material; } + /* Generate optimized pass. */ + if (GPU_pass_should_optimize(material->pass)) { + +#if ASYNC_OPTIMIZED_PASS_CREATION == 1 + mmaterial->optimized_pass = NULL; + material->optimize_pass_info.callback = generate_code_function_cb; + material->optimize_pass_info.thunk = thunk; + GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY); +#else + material->optimized_pass = GPU_generate_pass( + material, &material->graph, generate_code_function_cb, thunk, true); + + if (material->optimized_pass == NULL) { + /* Failed to create optimized pass. */ + gpu_node_graph_free_nodes(&material->graph); + GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP); + } + else { + GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass); + if (optimized_sh != NULL) { + /* Optimized shader already available. */ + gpu_node_graph_free_nodes(&material->graph); + GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS); + } + } +#endif + } + /* The pass already exists in the pass cache and its shader is already compiled. */ GPUShader *shader = GPU_pass_shader_get(material->pass); if (shader != NULL) { material->status = GPU_MAT_SUCCESS; - gpu_node_graph_free_nodes(&material->graph); + if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { + /* Only free node graph if not required by secondary optimization pass. */ + gpu_node_graph_free_nodes(&material->graph); + } return material; } diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c index a305413905b..3ca2399a547 100644 --- a/source/blender/gpu/intern/gpu_node_graph.c +++ b/source/blender/gpu/intern/gpu_node_graph.c @@ -914,3 +914,22 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph) } } } + +void gpu_node_graph_optimize(GPUNodeGraph *graph) +{ + /* Replace all uniform node links with constant. */ + LISTBASE_FOREACH (GPUNode *, node, &graph->nodes) { + LISTBASE_FOREACH (GPUInput *, input, &node->inputs) { + if (input->link) { + if (input->link->link_type == GPU_NODE_LINK_UNIFORM) { + input->link->link_type = GPU_NODE_LINK_CONSTANT; + } + } + if (input->source == GPU_SOURCE_UNIFORM) { + input->source = (input->type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT; + } + } + } + + /* TODO: Consider performing other node graph optimizations here. */ +} diff --git a/source/blender/gpu/intern/gpu_node_graph.h b/source/blender/gpu/intern/gpu_node_graph.h index 085620b30e4..75ca05ffaea 100644 --- a/source/blender/gpu/intern/gpu_node_graph.h +++ b/source/blender/gpu/intern/gpu_node_graph.h @@ -179,6 +179,21 @@ typedef struct GPUNodeGraph { void gpu_node_graph_prune_unused(GPUNodeGraph *graph); void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph); + +/** + * Optimize node graph for optimized material shader path. + * Once the base material has been generated, we can modify the shader + * node graph to create one which will produce an optimally performing shader. + * This currently involves baking uniform data into constant data to enable + * aggressive constant folding by the compiler in order to reduce complexity and + * shader core memory pressure. + * + * NOTE: Graph optimizations will produce a shader which needs to be re-compiled + * more frequently, however, the default material pass will always exist to fall + * back on. + */ +void gpu_node_graph_optimize(GPUNodeGraph *graph); + /** * Free intermediate node graph. */ diff --git a/source/blender/gpu/intern/gpu_shader_builder.cc b/source/blender/gpu/intern/gpu_shader_builder.cc index 9b699c60126..3aa2963ecd0 100644 --- a/source/blender/gpu/intern/gpu_shader_builder.cc +++ b/source/blender/gpu/intern/gpu_shader_builder.cc @@ -45,7 +45,7 @@ void ShaderBuilder::init() ghost_context_ = GHOST_CreateOpenGLContext(ghost_system_, glSettings); GHOST_ActivateOpenGLContext(ghost_context_); - gpu_context_ = GPU_context_create(nullptr); + gpu_context_ = GPU_context_create(nullptr, ghost_context_); GPU_init(); } diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc index 6f43b379d31..d9e5e066fea 100644 --- a/source/blender/gpu/intern/gpu_shader_interface.cc +++ b/source/blender/gpu/intern/gpu_shader_interface.cc @@ -22,8 +22,8 @@ ShaderInterface::ShaderInterface() = default; ShaderInterface::~ShaderInterface() { /* Free memory used by name_buffer. */ - MEM_freeN(name_buffer_); - MEM_freeN(inputs_); + MEM_SAFE_FREE(name_buffer_); + MEM_SAFE_FREE(inputs_); } static void sort_input_list(MutableSpan<ShaderInput> dst) diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh index 214a5d738a9..082fab24ba4 100644 --- a/source/blender/gpu/metal/mtl_backend.hh +++ b/source/blender/gpu/metal/mtl_backend.hh @@ -63,7 +63,7 @@ class MTLBackend : public GPUBackend { /* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C * objects. */ - Context *context_alloc(void *ghost_window) override; + Context *context_alloc(void *ghost_window, void *ghost_context) override; Batch *batch_alloc() override; DrawList *drawlist_alloc(int list_length) override; FrameBuffer *framebuffer_alloc(const char *name) override; diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index ec9e8ab4d15..2ca1fd3f3d0 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -8,8 +8,11 @@ #include "gpu_backend.hh" #include "mtl_backend.hh" +#include "mtl_batch.hh" #include "mtl_context.hh" +#include "mtl_drawlist.hh" #include "mtl_framebuffer.hh" +#include "mtl_immediate.hh" #include "mtl_index_buffer.hh" #include "mtl_query.hh" #include "mtl_shader.hh" @@ -37,21 +40,21 @@ void MTLBackend::samplers_update(){ /* Placeholder -- Handled in MTLContext. */ }; -Context *MTLBackend::context_alloc(void *ghost_window) +Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context) { - return new MTLContext(ghost_window); + return new MTLContext(ghost_window, ghost_context); }; Batch *MTLBackend::batch_alloc() { - /* TODO(Metal): Implement MTLBatch. */ - return nullptr; + /* TODO(Metal): Full MTLBatch implementation. */ + return new MTLBatch(); }; DrawList *MTLBackend::drawlist_alloc(int list_length) { - /* TODO(Metal): Implement MTLDrawList. */ - return nullptr; + /* TODO(Metal): Full MTLDrawList implementation. */ + return new MTLDrawList(list_length); }; FrameBuffer *MTLBackend::framebuffer_alloc(const char *name) diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh new file mode 100644 index 00000000000..66603dabd15 --- /dev/null +++ b/source/blender/gpu/metal/mtl_batch.hh @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * GPU geometry batch + * Contains VAOs + VBOs + Shader representing a drawable entity. + */ + +#pragma once + +#include "MEM_guardedalloc.h" + +#include "gpu_batch_private.hh" + +namespace blender { +namespace gpu { + + +/* Pass-through MTLBatch. TODO(Metal): Implement. */ +class MTLBatch : public Batch { + public: + void draw(int v_first, int v_count, int i_first, int i_count) override { + + } + + void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override { + + } + + void multi_draw_indirect(GPUStorageBuf *indirect_buf, + int count, + intptr_t offset, + intptr_t stride) override { + + } + MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch"); +}; + +} // namespace gpu +} // namespace blender diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm index d2936e8e91f..a9cabbb111f 100644 --- a/source/blender/gpu/metal/mtl_command_buffer.mm +++ b/source/blender/gpu/metal/mtl_command_buffer.mm @@ -54,6 +54,7 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin() MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init]; desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus; desc.retainedReferences = YES; + BLI_assert(context_.queue != nil); active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc]; } else { @@ -611,40 +612,187 @@ void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding, bool use_argument_buffer_for_samplers, uint slot) { - /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be - * implemented alongside MTLShader. */ + /* Range check. */ + const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface(); + BLI_assert(slot >= 0); + BLI_assert(slot <= shader_interface->get_max_texture_index()); + BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS); + UNUSED_VARS_NDEBUG(shader_interface); + + /* If sampler state has not changed for the given slot, we do not need to fetch. */ + if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil || + !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) || + use_argument_buffer_for_samplers) { + + id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ? + ctx.get_default_sampler_state() : + ctx.get_sampler_from_state(sampler_binding.state); + if (!use_argument_buffer_for_samplers) { + /* Update binding and cached state. */ + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + [rec setVertexSamplerState:sampler_state atIndex:slot]; + this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state; + this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state; + } + + /* Flag last binding type. */ + this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding = + use_argument_buffer_for_samplers; + + /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in + * the samplers array is always up to date. */ + ctx.samplers_.mtl_sampler[slot] = sampler_state; + ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state; + } } void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding, bool use_argument_buffer_for_samplers, uint slot) { - /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be - * implemented alongside MTLShader. */ + /* Range check. */ + const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface(); + BLI_assert(slot >= 0); + BLI_assert(slot <= shader_interface->get_max_texture_index()); + BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS); + UNUSED_VARS_NDEBUG(shader_interface); + + /* If sampler state has not changed for the given slot, we do not need to fetch*/ + if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil || + !(this->cached_fragment_sampler_state_bindings[slot].binding_state == + sampler_binding.state) || + use_argument_buffer_for_samplers) { + + id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ? + ctx.get_default_sampler_state() : + ctx.get_sampler_from_state(sampler_binding.state); + if (!use_argument_buffer_for_samplers) { + /* Update binding and cached state. */ + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + [rec setFragmentSamplerState:sampler_state atIndex:slot]; + this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state; + this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state; + } + + /* Flag last binding type */ + this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding = + use_argument_buffer_for_samplers; + + /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in + * the samplers array is always up to date. */ + ctx.samplers_.mtl_sampler[slot] = sampler_state; + ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state; + } } void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index) { - /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be - * implemented alongside the full MTLMemoryManager. */ + BLI_assert(index >= 0); + BLI_assert(buffer_offset >= 0); + BLI_assert(buffer != nil); + + BufferBindingCached ¤t_vert_ubo_binding = this->cached_vertex_buffer_bindings[index]; + if (current_vert_ubo_binding.offset != buffer_offset || + current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes) { + + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + + if (current_vert_ubo_binding.metal_buffer == buffer) { + /* If buffer is the same, but offset has changed. */ + [rec setVertexBufferOffset:buffer_offset atIndex:index]; + } + else { + /* Bind Vertex Buffer. */ + [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index]; + } + + /* Update Bind-state cache. */ + this->cached_vertex_buffer_bindings[index].is_bytes = false; + this->cached_vertex_buffer_bindings[index].metal_buffer = buffer; + this->cached_vertex_buffer_bindings[index].offset = buffer_offset; + } } void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index) { - /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be - * implemented alongside the full MTLMemoryManager. */ + BLI_assert(index >= 0); + BLI_assert(buffer_offset >= 0); + BLI_assert(buffer != nil); + + BufferBindingCached ¤t_frag_ubo_binding = this->cached_fragment_buffer_bindings[index]; + if (current_frag_ubo_binding.offset != buffer_offset || + current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes) { + + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + + if (current_frag_ubo_binding.metal_buffer == buffer) { + /* If buffer is the same, but offset has changed. */ + [rec setFragmentBufferOffset:buffer_offset atIndex:index]; + } + else { + /* Bind Fragment Buffer */ + [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index]; + } + + /* Update Bind-state cache */ + this->cached_fragment_buffer_bindings[index].is_bytes = false; + this->cached_fragment_buffer_bindings[index].metal_buffer = buffer; + this->cached_fragment_buffer_bindings[index].offset = buffer_offset; + } } void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index) { - /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be - * implemented alongside the full MTLMemoryManager. */ + /* Bytes always updated as source data may have changed. */ + BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + BLI_assert(length > 0); + BLI_assert(bytes != nullptr); + + if (length < MTL_MAX_SET_BYTES_SIZE) { + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + [rec setVertexBytes:bytes length:length atIndex:index]; + } + else { + /* We have run over the setBytes limit, bind buffer instead. */ + MTLTemporaryBuffer range = + ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256); + memcpy(range.data, bytes, length); + this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index); + } + + /* Update Bind-state cache */ + this->cached_vertex_buffer_bindings[index].is_bytes = true; + this->cached_vertex_buffer_bindings[index].metal_buffer = nil; + this->cached_vertex_buffer_bindings[index].offset = -1; } void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index) { - /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be - * implemented alongside the full MTLMemoryManager. */ + /* Bytes always updated as source data may have changed. */ + BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + BLI_assert(length > 0); + BLI_assert(bytes != nullptr); + + if (length < MTL_MAX_SET_BYTES_SIZE) { + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + [rec setFragmentBytes:bytes length:length atIndex:index]; + } + else { + /* We have run over the setBytes limit, bind buffer instead. */ + MTLTemporaryBuffer range = + ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256); + memcpy(range.data, bytes, length); + this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index); + } + + /* Update Bind-state cache. */ + this->cached_fragment_buffer_bindings[index].is_bytes = true; + this->cached_fragment_buffer_bindings[index].metal_buffer = nil; + this->cached_fragment_buffer_bindings[index].offset = -1; } /** \} */ diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh index b6f9c0050a9..5c322efa3f9 100644 --- a/source/blender/gpu/metal/mtl_common.hh +++ b/source/blender/gpu/metal/mtl_common.hh @@ -3,7 +3,9 @@ #ifndef __MTL_COMMON #define __MTL_COMMON -// -- Renderer Options -- +/** -- Renderer Options -- */ +/* Number of frames over which rolling averages are taken. */ +#define MTL_FRAME_AVERAGE_COUNT 5 #define MTL_MAX_DRAWABLES 3 #define MTL_MAX_SET_BYTES_SIZE 4096 #define MTL_FORCE_WAIT_IDLE 0 diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh index 577438667d6..5991fe2bc3e 100644 --- a/source/blender/gpu/metal/mtl_context.hh +++ b/source/blender/gpu/metal/mtl_context.hh @@ -12,6 +12,10 @@ #include "GPU_common_types.h" #include "GPU_context.h" +#include "intern/GHOST_Context.h" +#include "intern/GHOST_ContextCGL.h" +#include "intern/GHOST_Window.h" + #include "mtl_backend.hh" #include "mtl_capabilities.hh" #include "mtl_common.hh" @@ -570,12 +574,44 @@ class MTLCommandBufferManager { class MTLContext : public Context { friend class MTLBackend; + friend class MTLRenderPassState; + + public: + /* Swapchain and latency management. */ + static std::atomic<int> max_drawables_in_flight; + static std::atomic<int64_t> avg_drawable_latency_us; + static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT]; + + public: + /* Shaders and Pipeline state. */ + MTLContextGlobalShaderPipelineState pipeline_state; + + /* Metal API Resource Handles. */ + id<MTLCommandQueue> queue = nil; + id<MTLDevice> device = nil; + +#ifndef NDEBUG + /* Label for Context debug name assignemnt. */ + NSString *label = nil; +#endif + + /* Memory Management. */ + MTLScratchBufferManager memory_manager; + static MTLBufferPool global_memory_manager; + + /* CommandBuffer managers. */ + MTLCommandBufferManager main_command_buffer; private: - /* Null buffers for empty/uninitialized bindings. - * Null attribute buffer follows default attribute format of OpenGL Back-end. */ - id<MTLBuffer> null_buffer_; /* All zero's. */ - id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */ + /* Parent Context. */ + GHOST_ContextCGL *ghost_context_; + + /* Render Passes and Framebuffers. */ + id<MTLTexture> default_fbo_mtltexture_ = nil; + gpu::MTLTexture *default_fbo_gputexture_ = nullptr; + + /* Depth-stencil state cache. */ + blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache; /* Compute and specialization caches. */ MTLContextTextureUtils texture_utils_; @@ -601,23 +637,20 @@ class MTLContext : public Context { gpu::MTLBuffer *visibility_buffer_ = nullptr; bool visibility_is_dirty_ = false; - public: - /* Shaders and Pipeline state. */ - MTLContextGlobalShaderPipelineState pipeline_state; - - /* Metal API Resource Handles. */ - id<MTLCommandQueue> queue = nil; - id<MTLDevice> device = nil; - - /* Memory Management */ - MTLScratchBufferManager memory_manager; - static MTLBufferPool global_memory_manager; + /* Null buffers for empty/unintialized bindings. + * Null attribute buffer follows default attribute format of OpenGL Backend. */ + id<MTLBuffer> null_buffer_; /* All zero's. */ + id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */ - /* CommandBuffer managers. */ - MTLCommandBufferManager main_command_buffer; + /** Dummy Resources */ + /* Maximum of 32 texture types. Though most combinations invalid. */ + gpu::MTLTexture *dummy_textures_[GPU_TEXTURE_BUFFER] = {nullptr}; + GPUVertFormat dummy_vertformat_; + GPUVertBuf *dummy_verts_ = nullptr; + public: /* GPUContext interface. */ - MTLContext(void *ghost_window); + MTLContext(void *ghost_window, void *ghost_context); ~MTLContext(); static void check_error(const char *info); @@ -673,6 +706,35 @@ class MTLContext : public Context { void pipeline_state_init(); MTLShader *get_active_shader(); + /* These functions ensure that the current RenderCommandEncoder has + * the correct global state assigned. This should be called prior + * to every draw call, to ensure that all state is applied and up + * to date. We handle: + * + * - Buffer bindings (Vertex buffers, Uniforms, UBOs, transform feedback) + * - Texture bindings + * - Sampler bindings (+ argument buffer bindings) + * - Dynamic Render pipeline state (on encoder) + * - Baking Pipeline State Objects (PSOs) for current shader, based + * on final pipeline state. + * + * `ensure_render_pipeline_state` will return false if the state is + * invalid and cannot be applied. This should cancel a draw call. */ + bool ensure_render_pipeline_state(MTLPrimitiveType prim_type); + bool ensure_uniform_buffer_bindings( + id<MTLRenderCommandEncoder> rec, + const MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance); + void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec, + MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance); + void ensure_depth_stencil_state(MTLPrimitiveType prim_type); + + id<MTLBuffer> get_null_buffer(); + id<MTLBuffer> get_null_attribute_buffer(); + gpu::MTLTexture *get_dummy_texture(eGPUTextureType type); + void free_dummy_resources(); + /* State assignment. */ void set_viewport(int origin_x, int origin_y, int width, int height); void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height); @@ -720,9 +782,37 @@ class MTLContext : public Context { { return MTLContext::global_memory_manager; } - /* Uniform Buffer Bindings to command encoders. */ - id<MTLBuffer> get_null_buffer(); - id<MTLBuffer> get_null_attribute_buffer(); + + /* Swapchain and latency management. */ + static void latency_resolve_average(int64_t frame_latency_us) + { + int64_t avg = 0; + int64_t frame_c = 0; + for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) { + MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1]; + avg += MTLContext::frame_latency[i]; + frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0; + } + MTLContext::frame_latency[0] = frame_latency_us; + avg += MTLContext::frame_latency[0]; + if (frame_c > 0) { + avg /= frame_c; + } + else { + avg = 0; + } + MTLContext::avg_drawable_latency_us = avg; + } + + private: + void set_ghost_context(GHOST_ContextHandle ghostCtxHandle); + void set_ghost_window(GHOST_WindowHandle ghostWinHandle); }; +/* GHOST Context callback and present. */ +void present(MTLRenderPassDescriptor *blit_descriptor, + id<MTLRenderPipelineState> blit_pso, + id<MTLTexture> swapchain_texture, + id<CAMetalDrawable> drawable); + } // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index 1302cf0dabd..a89339d0d14 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -5,13 +5,29 @@ */ #include "mtl_context.hh" #include "mtl_debug.hh" +#include "mtl_framebuffer.hh" +#include "mtl_immediate.hh" +#include "mtl_memory.hh" +#include "mtl_primitive.hh" #include "mtl_shader.hh" #include "mtl_shader_interface.hh" #include "mtl_state.hh" +#include "mtl_uniform_buffer.hh" #include "DNA_userdef_types.h" #include "GPU_capabilities.h" +#include "GPU_matrix.h" +#include "GPU_shader.h" +#include "GPU_texture.h" +#include "GPU_uniform_buffer.h" +#include "GPU_vertex_buffer.h" +#include "intern/gpu_matrix_private.h" + +#include "PIL_time.h" + +#include <fstream> +#include <string> using namespace blender; using namespace blender::gpu; @@ -21,21 +37,118 @@ namespace blender::gpu { /* Global memory manager. */ MTLBufferPool MTLContext::global_memory_manager; +/* Swapchain and latency management. */ +std::atomic<int> MTLContext::max_drawables_in_flight = 0; +std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0; +int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0}; + +/* -------------------------------------------------------------------- */ +/** \name GHOST Context interaction. + * \{ */ + +void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle) +{ + GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle); + BLI_assert(ghost_ctx != nullptr); + + /* Release old MTLTexture handle */ + if (default_fbo_mtltexture_) { + [default_fbo_mtltexture_ release]; + default_fbo_mtltexture_ = nil; + } + + /* Release Framebuffer attachments */ + MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left); + MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left); + mtl_front_left->remove_all_attachments(); + mtl_back_left->remove_all_attachments(); + + GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx); + if (ghost_cgl_ctx != NULL) { + default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture(); + + MTL_LOG_INFO( + "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n", + ghost_cgl_ctx, + this, + this->device, + this->queue, + default_fbo_gputexture_); + + /* Check if the GHOST Context provides a default framebuffer: */ + if (default_fbo_mtltexture_) { + + /* Release old GPUTexture handle */ + if (default_fbo_gputexture_) { + GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_))); + default_fbo_gputexture_ = nullptr; + } + + /* Retain handle */ + [default_fbo_mtltexture_ retain]; + + /*** Create front and back-buffers ***/ + /* Create gpu::MTLTexture objects */ + default_fbo_gputexture_ = new gpu::MTLTexture( + "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_); + + /* Update framebuffers with new texture attachments */ + mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0); + mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0); +#ifndef NDEBUG + this->label = default_fbo_mtltexture_.label; +#endif + } + else { + + /* Add default texture for cases where no other framebuffer is bound */ + if (!default_fbo_gputexture_) { + default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>( + unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr))); + } + mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0); + + MTL_LOG_INFO( + "-- Bound context %p for GPU context: %p is offscreen and does not have a default " + "framebuffer\n", + ghost_cgl_ctx, + this); +#ifndef NDEBUG + this->label = @"Offscreen Metal Context"; +#endif + } + } + else { + MTL_LOG_INFO( + "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null " + "(GhostContext: %p, GhostContext_CGL: %p)\n", + ghost_ctx, + ghost_cgl_ctx); + BLI_assert(false); + } +} + +void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle) +{ + GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle); + this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL)); +} + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name MTLContext * \{ */ /* Placeholder functions */ -MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this) +MTLContext::MTLContext(void *ghost_window, void *ghost_context) + : memory_manager(*this), main_command_buffer(*this) { /* Init debug. */ debug::mtl_debug_init(); - /* Device creation. - * TODO(Metal): This is a temporary initialization path to enable testing of features - * and shader compilation tests. Future functionality should fetch the existing device - * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */ - this->device = MTLCreateSystemDefaultDevice(); + /* Initialise Renderpass and Framebuffer State */ + this->back_left = nullptr; /* Initialize command buffer state. */ this->main_command_buffer.prepare(); @@ -47,10 +160,35 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command is_inside_frame_ = false; current_frame_index_ = 0; - /* Prepare null data buffer */ + /* Prepare null data buffer. */ null_buffer_ = nil; null_attribute_buffer_ = nil; + /* Zero-initialise MTL Textures. */ + default_fbo_mtltexture_ = nil; + default_fbo_gputexture_ = nullptr; + + /** Fetch GHOSTContext and fetch Metal device/queue. */ + ghost_window_ = ghost_window; + if (ghost_window_ && ghost_context == NULL) { + /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided. + * Regardless of whether windowed or not, we need access to the GhostContext + * for presentation, and device/queue access. */ + GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_); + ghost_context = (ghostWin ? ghostWin->getContext() : NULL); + } + BLI_assert(ghost_context); + this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context); + this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue(); + this->device = (id<MTLDevice>)this->ghost_context_->metalDevice(); + BLI_assert(this->queue); + BLI_assert(this->device); + [this->queue retain]; + [this->device retain]; + + /* Register present callback. */ + this->ghost_context_->metalRegisterPresentCallback(&present); + /* Create FrameBuffer handles. */ MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left"); MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left"); @@ -66,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command /* Initialize Metal modules. */ this->memory_manager.init(); this->state_manager = new MTLStateManager(this); + this->imm = new MTLImmediate(this); /* Ensure global memory manager is initialized. */ MTLContext::global_memory_manager.init(this->device); @@ -99,9 +238,29 @@ MTLContext::~MTLContext() this->end_frame(); } } + + /* Release Memory Manager */ + this->get_scratchbuffer_manager().free(); + /* Release update/blit shaders. */ this->get_texture_utils().cleanup(); + /* Detach resource references */ + GPU_texture_unbind_all(); + + /* Unbind UBOs */ + for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + if (this->pipeline_state.ubo_bindings[i].bound && + this->pipeline_state.ubo_bindings[i].ubo != nullptr) { + GPUUniformBuf *ubo = wrap( + static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo)); + GPU_uniformbuf_unbind(ubo); + } + } + + /* Release Dummy resources */ + this->free_dummy_resources(); + /* Release Sampler States. */ for (int i = 0; i < GPU_SAMPLER_MAX; i++) { if (sampler_state_cache_[i] != nil) { @@ -109,12 +268,28 @@ MTLContext::~MTLContext() sampler_state_cache_[i] = nil; } } + + /* Empty cached sampler argument buffers. */ + for (auto entry : cached_sampler_buffers_.values()) { + entry->free(); + } + cached_sampler_buffers_.clear(); + + /* Free null buffers. */ if (null_buffer_) { [null_buffer_ release]; } if (null_attribute_buffer_) { [null_attribute_buffer_ release]; } + + /* Free Metal objects. */ + if (this->queue) { + [this->queue release]; + } + if (this->device) { + [this->device release]; + } } void MTLContext::begin_frame() @@ -146,20 +321,49 @@ void MTLContext::check_error(const char *info) void MTLContext::activate() { - /* TODO(Metal): Implement. */ + /* Make sure no other context is already bound to this thread. */ + BLI_assert(is_active_ == false); + is_active_ = true; + thread_ = pthread_self(); + + /* Re-apply ghost window/context for resizing */ + if (ghost_window_) { + this->set_ghost_window((GHOST_WindowHandle)ghost_window_); + } + else if (ghost_context_) { + this->set_ghost_context((GHOST_ContextHandle)ghost_context_); + } + + /* Reset UBO bind state. */ + for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + if (this->pipeline_state.ubo_bindings[i].bound && + this->pipeline_state.ubo_bindings[i].ubo != nullptr) { + this->pipeline_state.ubo_bindings[i].bound = false; + this->pipeline_state.ubo_bindings[i].ubo = nullptr; + } + } + + /* Ensure imm active. */ + immActivate(); } + void MTLContext::deactivate() { - /* TODO(Metal): Implement. */ + BLI_assert(this->is_active_on_thread()); + /* Flush context on deactivate. */ + this->flush(); + is_active_ = false; + immDeactivate(); } void MTLContext::flush() { - /* TODO(Metal): Implement. */ + this->main_command_buffer.submit(false); } + void MTLContext::finish() { - /* TODO(Metal): Implement. */ + this->main_command_buffer.submit(true); } void MTLContext::memory_statistics_get(int *total_mem, int *free_mem) @@ -200,10 +404,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass() /* Ensure command buffer workload submissions are optimal -- * Though do not split a batch mid-IMM recording. */ - /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */ - if (this->main_command_buffer.do_break_submission() - // && !((MTLImmediate *)(this->imm))->imm_is_recording() - ) { + if (this->main_command_buffer.do_break_submission() && + !((MTLImmediate *)(this->imm))->imm_is_recording()) { this->flush(); } @@ -294,6 +496,72 @@ id<MTLBuffer> MTLContext::get_null_attribute_buffer() return null_attribute_buffer_; } +gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type) +{ + /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */ + gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1]; + if (dummy_tex != nullptr) { + return dummy_tex; + } + else { + GPUTexture *tex = nullptr; + switch (type) { + case GPU_TEXTURE_1D: + tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_1D_ARRAY: + tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_2D: + tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_2D_ARRAY: + tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_3D: + tex = GPU_texture_create_3d( + "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr); + break; + case GPU_TEXTURE_CUBE: + tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_CUBE_ARRAY: + tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_BUFFER: + if (!dummy_verts_) { + GPU_vertformat_clear(&dummy_vertformat_); + GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC); + GPU_vertbuf_data_alloc(dummy_verts_, 64); + } + tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_); + break; + default: + BLI_assert_msg(false, "Unrecognised texture type"); + return nullptr; + } + gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex)); + dummy_textures_[type - 1] = metal_tex; + return metal_tex; + } + return nullptr; +} + +void MTLContext::free_dummy_resources() +{ + for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) { + if (dummy_textures_[tex]) { + GPU_texture_free( + reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex]))); + dummy_textures_[tex] = nullptr; + } + } + if (dummy_verts_) { + GPU_vertbuf_discard(dummy_verts_); + } +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -440,6 +708,755 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled) /** \} */ /* -------------------------------------------------------------------- */ +/** \name Command Encoder and pipeline state + * These utilities ensure that all of the globally bound resources and state have been + * correctly encoded within the current RenderCommandEncoder. This involves managing + * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state. + * + * We will also trigger compilation of new PSOs where the input state has changed + * and is required. + * All of this setup is required in order to perform a valid draw call. + * \{ */ + +bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type) +{ + BLI_assert(this->pipeline_state.initialised); + + /* Check if an active shader is bound. */ + if (!this->pipeline_state.active_shader) { + MTL_LOG_WARNING("No Metal shader for bound GL shader\n"); + return false; + } + + /* Also ensure active shader is valid. */ + if (!this->pipeline_state.active_shader->is_valid()) { + MTL_LOG_WARNING( + "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", ); + return false; + } + + /* Apply global state. */ + this->state_manager->apply_state(); + + /* Main command buffer tracks the current state of the render pass, based on bound + * MTLFrameBuffer. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + /* Debug Check: Ensure Framebuffer instance is not dirty. */ + BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty()); + + /* Fetch shader interface. */ + MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface(); + if (shader_interface == nullptr) { + MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", ); + return false; + } + + /* Fetch shader and bake valid PipelineStateObject (PSO) based on current + * shader and state combination. This PSO represents the final GPU-executable + * permutation of the shader. */ + MTLRenderPipelineStateInstance *pipeline_state_instance = + this->pipeline_state.active_shader->bake_current_pipeline_state( + this, mtl_prim_type_to_topology_class(mtl_prim_type)); + if (!pipeline_state_instance) { + MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n", + shader_interface->get_name()); + return false; + } + + bool result = false; + if (pipeline_state_instance->pso) { + + /* Fetch render command encoder. A render pass should already be active. + * This will be NULL if invalid. */ + id<MTLRenderCommandEncoder> rec = + this->main_command_buffer.get_active_render_command_encoder(); + BLI_assert(rec); + if (rec == nil) { + MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n"); + return false; + } + + /* Bind Render Pipeline State. */ + BLI_assert(pipeline_state_instance->pso); + if (rps.bound_pso != pipeline_state_instance->pso) { + [rec setRenderPipelineState:pipeline_state_instance->pso]; + rps.bound_pso = pipeline_state_instance->pso; + } + + /** Ensure resource bindings. */ + /* Texture Bindings. */ + /* We will iterate through all texture bindings on the context and determine if any of the + * active slots match those in our shader interface. If so, textures will be bound. */ + if (shader_interface->get_total_textures() > 0) { + this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance); + } + + /* Transform feedback buffer binding. */ + /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which + * transform feedback data will be written. */ + // GPUVertBuf *tf_vbo = + // this->pipeline_state.active_shader->get_transform_feedback_active_buffer(); + // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) { + + // /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */ + // BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine || + // mtl_prim_type == MTLPrimitiveTypeTriangle || + // mtl_prim_type == MTLPrimitiveTypePoint); + + // /* Fetch active transform feedback buffer from vertbuf */ + // MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo)); + // int tf_buffer_offset = 0; + // id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset); + + // if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) { + // [rec setVertexBuffer:tf_buffer_mtl + // offset:tf_buffer_offset + // atIndex:pipeline_state_instance->transform_feedback_buffer_index]; + // printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n", + // tf_vbo_mtl, + // tf_buffer_mtl); + // } + // } + + /* Matrix Bindings. */ + /* This is now called upon shader bind. We may need to re-evaluate this though, + * as was done here to ensure uniform changes beween draws were tracked. + * NOTE(Metal): We may be able to remove this. */ + GPU_matrix_bind(reinterpret_cast<struct GPUShader *>( + static_cast<Shader *>(this->pipeline_state.active_shader))); + + /* Bind Uniforms */ + this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance); + + /* Bind Null attribute buffer, if needed. */ + if (pipeline_state_instance->null_attribute_buffer_index >= 0) { + if (G.debug & G_DEBUG_GPU) { + MTL_LOG_INFO("Binding null attribute buffer at index: %d\n", + pipeline_state_instance->null_attribute_buffer_index); + } + rps.bind_vertex_buffer(this->get_null_attribute_buffer(), + 0, + pipeline_state_instance->null_attribute_buffer_index); + } + + /** Dynamic Per-draw Render State on RenderCommandEncoder. */ + /* State: Viewport. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) { + MTLViewport viewport; + viewport.originX = (double)this->pipeline_state.viewport_offset_x; + viewport.originY = (double)this->pipeline_state.viewport_offset_y; + viewport.width = (double)this->pipeline_state.viewport_width; + viewport.height = (double)this->pipeline_state.viewport_height; + viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near; + viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far; + [rec setViewport:viewport]; + + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_VIEWPORT_FLAG); + } + + /* State: Scissor. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) { + + /* Get FrameBuffer associated with active RenderCommandEncoder. */ + MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer(); + + MTLScissorRect scissor; + if (this->pipeline_state.scissor_enabled) { + scissor.x = this->pipeline_state.scissor_x; + scissor.y = this->pipeline_state.scissor_y; + scissor.width = this->pipeline_state.scissor_width; + scissor.height = this->pipeline_state.scissor_height; + + /* Some scissor assignments exceed the bounds of the viewport due to implictly added + * padding to the width/height - Clamp width/height. */ + BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width()); + BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height()); + scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x); + scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y); + BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width())); + BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height())); + } + else { + /* Scissor is disabled, reset to default size as scissor state may have been previously + * assigned on this encoder. */ + scissor.x = 0; + scissor.y = 0; + scissor.width = render_fb->get_width(); + scissor.height = render_fb->get_height(); + } + + /* Scissor state can still be flagged as changed if it is toggled on and off, without + * parameters changing between draws. */ + if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) { + [rec setScissorRect:scissor]; + rps.last_scissor_rect = scissor; + } + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_SCISSOR_FLAG); + } + + /* State: Face winding. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) { + /* We nede to invert the face winding in Metal, to account for the inverted-Y coordinate + * system. */ + MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ? + MTLWindingClockwise : + MTLWindingCounterClockwise; + [rec setFrontFacingWinding:winding]; + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG); + } + + /* State: cullmode. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) { + + MTLCullMode mode = MTLCullModeNone; + if (this->pipeline_state.culling_enabled) { + switch (this->pipeline_state.cull_mode) { + case GPU_CULL_NONE: + mode = MTLCullModeNone; + break; + case GPU_CULL_FRONT: + mode = MTLCullModeFront; + break; + case GPU_CULL_BACK: + mode = MTLCullModeBack; + break; + default: + BLI_assert_unreachable(); + break; + } + } + [rec setCullMode:mode]; + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_CULLMODE_FLAG); + } + + /* Pipeline state is now good. */ + result = true; + } + return result; +} + +/* Bind uniform buffers to an active render command encoder using the rendering state of the + * current context -> Active shader, Bound UBOs). */ +bool MTLContext::ensure_uniform_buffer_bindings( + id<MTLRenderCommandEncoder> rec, + const MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance) +{ + /* Fetch Render Pass state. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + /* Shader owned push constant block for uniforms.. */ + bool active_shader_changed = (rps.last_bound_shader_state.shader_ != + this->pipeline_state.active_shader || + rps.last_bound_shader_state.shader_ == nullptr || + rps.last_bound_shader_state.pso_index_ != + pipeline_state_instance->shader_pso_index); + + const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block(); + if (push_constant_block.size > 0) { + + /* Fetch uniform buffer base binding index from pipeline_state_instance - Terhe buffer index + * will be offset by the number of bound VBOs. */ + uint32_t block_size = push_constant_block.size; + uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index + + push_constant_block.buffer_index; + + /* Only need to rebind block if push constants have been modified -- or if no data is bound for + * the current RenderCommandEncoder. */ + if (this->pipeline_state.active_shader->get_push_constant_is_dirty() || + active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes || + !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) { + + /* Bind push constant data. */ + BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr); + rps.bind_vertex_bytes( + this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index); + rps.bind_fragment_bytes( + this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index); + + /* Only need to rebind block if it has been modified. */ + this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false); + } + } + rps.last_bound_shader_state.set(this->pipeline_state.active_shader, + pipeline_state_instance->shader_pso_index); + + /* Bind Global GPUUniformBuffers */ + /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones + * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global, + * and not owned by the shader instance. */ + for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) { + const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index); + + if (ubo.buffer_index >= 0) { + + const uint32_t buffer_index = ubo.buffer_index; + int ubo_offset = 0; + id<MTLBuffer> ubo_buffer = nil; + int ubo_size = 0; + + bool bind_dummy_buffer = false; + if (this->pipeline_state.ubo_bindings[buffer_index].bound) { + + /* Fetch UBO global-binding properties from slot. */ + ubo_offset = 0; + ubo_buffer = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_metal_buffer( + &ubo_offset); + ubo_size = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_size(); + + /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid + * allocating zero buffers. */ + if (ubo_buffer == nil) { + bind_dummy_buffer = true; + } + else { + BLI_assert(ubo_buffer != nil); + BLI_assert(ubo_size > 0); + + if (pipeline_state_instance->reflection_data_available) { + /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each + * case will be the same for the same UBO. + * We also determine expected size and then ensure buffer of the correct size + * exists in one of the vertex/fragment shader binding tables. This path is used + * to verify that the size of the bound UBO matches what is expected in the shader. */ + uint32_t expected_size = + (buffer_index < + pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ? + pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index] + .size : + 0; + if (expected_size == 0) { + expected_size = + (buffer_index < + pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ? + pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index] + .size : + 0; + } + BLI_assert_msg( + expected_size > 0, + "Shader interface expects UBO, but shader reflection data reports that it " + "is not present"); + + /* If ubo size is smaller than the size expected by the shader, we need to bind the + * dummy buffer, which will be big enough, to avoid an OOB error. */ + if (ubo_size < expected_size) { + MTL_LOG_INFO( + "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size " + "%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an " + "over-binding, which is not used, but we need this to avoid validation " + "issues\n", + shader_interface->get_name_at_offset(ubo.name_offset), + buffer_index, + ubo_size, + expected_size, + shader_interface->get_name()); + bind_dummy_buffer = true; + } + } + } + } + else { + MTL_LOG_INFO( + "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but " + "nothing was bound -- binding dummy buffer\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ubo.name_offset), + buffer_index); + bind_dummy_buffer = true; + } + + if (bind_dummy_buffer) { + /* Perform Dummy binding. */ + ubo_offset = 0; + ubo_buffer = this->get_null_buffer(); + ubo_size = [ubo_buffer length]; + } + + if (ubo_buffer != nil) { + + uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index + + buffer_index; + + /* Bind Vertex UBO. */ + if (bool(ubo.stage_mask & ShaderStage::VERTEX)) { + BLI_assert(buffer_bind_index >= 0 && + buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index); + } + + /* Bind Fragment UBOs. */ + if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) { + BLI_assert(buffer_bind_index >= 0 && + buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index); + } + } + else { + MTL_LOG_WARNING( + "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer " + "is NULL!\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ubo.name_offset), + buffer_index); + } + } + } + return true; +} + +/* Ensure texture bindings are correct and up to date for current draw call. */ +void MTLContext::ensure_texture_bindings( + id<MTLRenderCommandEncoder> rec, + MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance) +{ + BLI_assert(shader_interface != nil); + BLI_assert(rec != nil); + + /* Fetch Render Pass state. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + @autoreleasepool { + int vertex_arg_buffer_bind_index = -1; + int fragment_arg_buffer_bind_index = -1; + + /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */ + bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers( + &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index); + + /* Loop through expected textures in shader interface and resolve bindings with currently + * bound textures.. */ + for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) { + /* Ensure the bound texture is compatible with the shader interface. If the + * shader does not expect a texture to be bound for the current slot, we skip + * binding. + * NOTE: Global texture bindings may be left over from prior draw calls. */ + const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t); + if (!shader_texture_info.used) { + /* Skip unused binding points if explicit indices are specified. */ + continue; + } + + int slot = shader_texture_info.slot_index; + if (slot >= 0 && slot < GPU_max_textures()) { + bool bind_dummy_texture = true; + if (this->pipeline_state.texture_bindings[slot].used) { + gpu::MTLTexture *bound_texture = + this->pipeline_state.texture_bindings[slot].texture_resource; + MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot]; + BLI_assert(bound_texture); + BLI_assert(bound_sampler.used); + + if (shader_texture_info.type == bound_texture->type_) { + /* Bind texture and sampler if the bound texture matches the type expected by the + * shader. */ + id<MTLTexture> tex = bound_texture->get_metal_handle(); + + if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) { + rps.bind_vertex_texture(tex, slot); + rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot); + } + + if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) { + rps.bind_fragment_texture(tex, slot); + rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot); + } + + /* Texture state resolved, no need to bind dummy texture */ + bind_dummy_texture = false; + } + else { + /* Texture type for bound texture (e.g. Texture2DArray) does not match what was + * expected in the shader interface. This is a problem and we will need to bind + * a dummy texture to ensure correct API usage. */ + MTL_LOG_WARNING( + "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong " + "texture target type. (Expecting type %d, actual type %d) (binding " + "name:'%s')(texture name:'%s')\n", + shader_interface->get_name(), + bound_texture, + slot, + shader_texture_info.type, + bound_texture->type_, + shader_interface->get_name_at_offset(shader_texture_info.name_offset), + bound_texture->get_name()); + } + } + else { + MTL_LOG_WARNING( + "Shader '%s' expected texture to be bound to slot %d -- No texture was " + "bound. (name:'%s')\n", + shader_interface->get_name(), + slot, + shader_interface->get_name_at_offset(shader_texture_info.name_offset)); + } + + /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats + * are provided -- as certain configurations may not need any binding. These issues should + * be fixed in the high-level, if problems crop up. */ + if (bind_dummy_texture) { + if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) { + rps.bind_vertex_texture( + get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot); + + /* Bind default sampler state. */ + MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; + rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot); + } + if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) { + rps.bind_fragment_texture( + get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot); + + /* Bind default sampler state. */ + MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; + rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot); + } + } + } + else { + MTL_LOG_WARNING( + "Shader %p expected texture to be bound to slot %d -- Slot exceeds the " + "hardware/API limit of '%d'. (name:'%s')\n", + this->pipeline_state.active_shader, + slot, + GPU_max_textures(), + shader_interface->get_name_at_offset(shader_texture_info.name_offset)); + } + } + + /* Construct and Bind argument buffer. + * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */ + if (use_argument_buffer_for_samplers) { +#ifndef NDEBUG + /* Debug check to validate each expected texture in the shader interface has a valid + * sampler object bound to the context. We will need all of these to be valid + * when constructing the sampler argument buffer. */ + for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) { + const MTLShaderTexture &texture = shader_interface->get_texture(i); + if (texture.used) { + BLI_assert(this->samplers_.mtl_sampler[i] != nil); + } + } +#endif + + /* Check to ensure the buffer binding index for the argument buffer has been assigned. + * This PSO property will be set if we expect to use argument buffers, and the shader + * uses any amount of textures. */ + BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0); + if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) { + /* Offset binding index to be relative to the start of static uniform buffer binding slots. + * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are + * used by vertex and index buffer bindings, and the number of buffers present will vary + * between PSOs. */ + int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index + + vertex_arg_buffer_bind_index); + assert(arg_buffer_idx < 32); + id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder( + arg_buffer_idx); + if (argument_encoder == nil) { + argument_encoder = [pipeline_state_instance->vert + newArgumentEncoderWithBufferIndex:arg_buffer_idx]; + shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder); + } + + /* Generate or Fetch argument buffer sampler configuration. + * NOTE(Metal): we need to base sampler counts off of the maximal texture + * index. This is not the most optimal, but in practise, not a use-case + * when argument buffers are required. + * This is because with explicit texture indices, the binding indices + * should match across draws, to allow the high-level to optimise bindpoints. */ + gpu::MTLBuffer *encoder_buffer = nullptr; + this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1; + + gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr( + this->samplers_); + if (cached_smp_buffer_search != nullptr) { + encoder_buffer = *cached_smp_buffer_search; + } + else { + /* Populate argument buffer with current global sampler bindings. */ + int size = [argument_encoder encodedLength]; + int alignment = max_uu([argument_encoder alignment], 256); + int size_align_delta = (size % alignment); + int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ? + size + (alignment - (size % alignment)) : + size; + + /* Allocate buffer to store encoded sampler arguments. */ + encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size, + true); + BLI_assert(encoder_buffer); + BLI_assert(encoder_buffer->get_metal_buffer()); + [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0]; + [argument_encoder + setSamplerStates:this->samplers_.mtl_sampler + withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)]; + encoder_buffer->flush(); + + /* Insert into cache. */ + this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer); + } + + BLI_assert(encoder_buffer != nullptr); + int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index + + vertex_arg_buffer_bind_index); + rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index); + + /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to + * re-encode. We can use the same argument buffer. */ + if (fragment_arg_buffer_bind_index >= 0) { + BLI_assert(fragment_arg_buffer_bind_index); + int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index + + fragment_arg_buffer_bind_index); + rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index); + } + } + } + } +} + +/* Encode latest depth-stencil state. */ +void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type) +{ + /* Check if we need to update state. */ + if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) { + return; + } + + /* Fetch render command encoder. */ + id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder(); + BLI_assert(rec); + + /* Fetch Render Pass state. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + /** Prepare Depth-stencil state based on current global pipeline state. */ + MTLFrameBuffer *fb = this->get_current_framebuffer(); + bool hasDepthTarget = fb->has_depth_attachment(); + bool hasStencilTarget = fb->has_stencil_attachment(); + + if (hasDepthTarget || hasStencilTarget) { + /* Update FrameBuffer State. */ + this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget; + this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget; + + /* Check if current MTLContextDepthStencilState maps to an existing state object in + * the Depth-stencil state cache. */ + id<MTLDepthStencilState> ds_state = nil; + id<MTLDepthStencilState> *depth_stencil_state_lookup = + this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state); + + /* If not, populate DepthStencil state descriptor. */ + if (depth_stencil_state_lookup == nullptr) { + + MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init] + autorelease]; + + if (hasDepthTarget) { + ds_state_desc.depthWriteEnabled = + this->pipeline_state.depth_stencil_state.depth_write_enable; + ds_state_desc.depthCompareFunction = + this->pipeline_state.depth_stencil_state.depth_test_enabled ? + this->pipeline_state.depth_stencil_state.depth_function : + MTLCompareFunctionAlways; + } + + if (hasStencilTarget) { + ds_state_desc.backFaceStencil.readMask = + this->pipeline_state.depth_stencil_state.stencil_read_mask; + ds_state_desc.backFaceStencil.writeMask = + this->pipeline_state.depth_stencil_state.stencil_write_mask; + ds_state_desc.backFaceStencil.stencilFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail; + ds_state_desc.backFaceStencil.depthFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail; + ds_state_desc.backFaceStencil.depthStencilPassOperation = + this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass; + ds_state_desc.backFaceStencil.stencilCompareFunction = + (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ? + this->pipeline_state.depth_stencil_state.stencil_func : + MTLCompareFunctionAlways; + + ds_state_desc.frontFaceStencil.readMask = + this->pipeline_state.depth_stencil_state.stencil_read_mask; + ds_state_desc.frontFaceStencil.writeMask = + this->pipeline_state.depth_stencil_state.stencil_write_mask; + ds_state_desc.frontFaceStencil.stencilFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail; + ds_state_desc.frontFaceStencil.depthFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail; + ds_state_desc.frontFaceStencil.depthStencilPassOperation = + this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass; + ds_state_desc.frontFaceStencil.stencilCompareFunction = + (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ? + this->pipeline_state.depth_stencil_state.stencil_func : + MTLCompareFunctionAlways; + } + + /* Bake new DS state. */ + ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc]; + + /* Store state in cache. */ + BLI_assert(ds_state != nil); + this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state); + } + else { + ds_state = *depth_stencil_state_lookup; + BLI_assert(ds_state != nil); + } + + /* Bind Depth Stencil State to render command encoder. */ + BLI_assert(ds_state != nil); + if (ds_state != nil) { + if (rps.bound_ds_state != ds_state) { + [rec setDepthStencilState:ds_state]; + rps.bound_ds_state = ds_state; + } + } + + /* Apply dynamic depth-stencil state on encoder. */ + if (hasStencilTarget) { + uint32_t stencil_ref_value = + (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ? + this->pipeline_state.depth_stencil_state.stencil_ref : + 0; + if (stencil_ref_value != rps.last_used_stencil_ref_value) { + [rec setStencilReferenceValue:stencil_ref_value]; + rps.last_used_stencil_ref_value = stencil_ref_value; + } + } + + if (hasDepthTarget) { + bool doBias = false; + switch (prim_type) { + case MTLPrimitiveTypeTriangle: + case MTLPrimitiveTypeTriangleStrip: + doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris; + break; + case MTLPrimitiveTypeLine: + case MTLPrimitiveTypeLineStrip: + doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines; + break; + case MTLPrimitiveTypePoint: + doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points; + break; + } + [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0 + slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0 + clamp:0]; + } + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Visibility buffer control for MTLQueryPool. * \{ */ @@ -606,4 +1623,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state() /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Swapchain management and Metal presentation. + * \{ */ + +void present(MTLRenderPassDescriptor *blit_descriptor, + id<MTLRenderPipelineState> blit_pso, + id<MTLTexture> swapchain_texture, + id<CAMetalDrawable> drawable) +{ + + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /* Flush any oustanding work. */ + ctx->flush(); + + /* Always pace CPU to maximum of 3 drawables in flight. + * nextDrawable may have more in flight if backing swapchain + * textures are re-allocate, such as during resize events. + * + * Determine frames in flight based on current latency. If + * we are in a high-latency situation, limit frames in flight + * to increase app responsiveness and keep GPU execution under control. + * If latency improves, increase frames in flight to improve overall + * performance. */ + int perf_max_drawables = MTL_MAX_DRAWABLES; + if (MTLContext::avg_drawable_latency_us > 185000) { + perf_max_drawables = 1; + } + else if (MTLContext::avg_drawable_latency_us > 85000) { + perf_max_drawables = 2; + } + + while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) { + PIL_sleep_ms(2); + } + + /* Present is submitted in its own CMD Buffer to enusure drawable reference released as early as + * possible. This command buffer is separate as it does not utilise the global state + * for rendering as the main context does. */ + id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer]; + MTLCommandBufferManager::num_active_cmd_bufs++; + + if (MTLCommandBufferManager::sync_event != nil) { + /* Ensure command buffer ordering. */ + [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event + value:MTLCommandBufferManager::event_signal_val]; + } + + /* Do Present Call and final Blit to MTLDrawable. */ + id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor]; + [enc setRenderPipelineState:blit_pso]; + [enc setFragmentTexture:swapchain_texture atIndex:0]; + [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; + [enc endEncoding]; + + /* Present drawable. */ + BLI_assert(drawable); + [cmdbuf presentDrawable:drawable]; + + /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */ + MTLSafeFreeList *cmd_free_buffer_list = + MTLContext::get_global_memory_manager().get_current_safe_list(); + BLI_assert(cmd_free_buffer_list); + + id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf; + [cmd_buffer_ref retain]; + + /* Increment drawables in flight limiter. */ + MTLContext::max_drawables_in_flight++; + std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now(); + + /* Increment free pool reference and decrement upon command buffer completion. */ + cmd_free_buffer_list->increment_reference(); + [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) { + /* Flag freed buffers associated with this CMD buffer as ready to be freed. */ + cmd_free_buffer_list->decrement_reference(); + [cmd_buffer_ref release]; + + /* Decrement count */ + MTLCommandBufferManager::num_active_cmd_bufs--; + MTL_LOG_INFO("[Metal] Active command buffers: %d\n", + MTLCommandBufferManager::num_active_cmd_bufs); + + /* Drawable count and latency management. */ + MTLContext::max_drawables_in_flight--; + std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now(); + int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>( + completion_time - submission_time) + .count(); + MTLContext::latency_resolve_average(microseconds_per_frame); + + MTL_LOG_INFO("Frame Latency: %f ms (Rolling avg: %f ms Drawables: %d)\n", + ((float)microseconds_per_frame) / 1000.0f, + ((float)MTLContext::avg_drawable_latency_us) / 1000.0f, + perf_max_drawables); + }]; + + if (MTLCommandBufferManager::sync_event == nil) { + MTLCommandBufferManager::sync_event = [ctx->device newEvent]; + BLI_assert(MTLCommandBufferManager::sync_event); + [MTLCommandBufferManager::sync_event retain]; + } + BLI_assert(MTLCommandBufferManager::sync_event != nil); + + MTLCommandBufferManager::event_signal_val++; + [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event + value:MTLCommandBufferManager::event_signal_val]; + + [cmdbuf commit]; + + /* When debugging, fetch advanced command buffer errors. */ + if (G.debug & G_DEBUG_GPU) { + [cmdbuf waitUntilCompleted]; + NSError *error = [cmdbuf error]; + if (error != nil) { + NSLog(@"%@", error); + BLI_assert(false); + + @autoreleasepool { + const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String]; + + std::ofstream outfile; + outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app); + outfile << stringAsChar; + outfile.close(); + } + } + else { + @autoreleasepool { + NSString *str = @"Command buffer completed successfully!\n"; + const char *stringAsChar = [str UTF8String]; + + std::ofstream outfile; + outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app); + outfile << stringAsChar; + outfile.close(); + } + } + } +} + +/** \} */ + } // blender::gpu diff --git a/source/blender/gpu/metal/mtl_drawlist.hh b/source/blender/gpu/metal/mtl_drawlist.hh new file mode 100644 index 00000000000..9eb465b26a0 --- /dev/null +++ b/source/blender/gpu/metal/mtl_drawlist.hh @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Implementation of Multi Draw Indirect using OpenGL. + * Fallback if the needed extensions are not supported. + */ + +#pragma once + +#pragma once + +#include "gpu_drawlist_private.hh" + +namespace blender { +namespace gpu { + +/** + * TODO(Metal): MTLDrawList Implementation. Included as temporary stub. + */ +class MTLDrawList : public DrawList { + public: + MTLDrawList(int length) {} + ~MTLDrawList() {} + + void append(GPUBatch *batch, int i_first, int i_count) override {} + void submit() override {} + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList"); +}; + +} // namespace gpu +} // namespace blender diff --git a/source/blender/gpu/metal/mtl_immediate.hh b/source/blender/gpu/metal/mtl_immediate.hh new file mode 100644 index 00000000000..b743efb397d --- /dev/null +++ b/source/blender/gpu/metal/mtl_immediate.hh @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Mimics old style opengl immediate mode drawing. + */ + + +#pragma once + +#include "MEM_guardedalloc.h" +#include "gpu_immediate_private.hh" + +#include <Cocoa/Cocoa.h> +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> + +namespace blender::gpu { + +class MTLImmediate : public Immediate { + private: + MTLContext *context_ = nullptr; + MTLTemporaryBuffer current_allocation_; + MTLPrimitiveTopologyClass metal_primitive_mode_; + MTLPrimitiveType metal_primitive_type_; + bool has_begun_ = false; + + public: + MTLImmediate(MTLContext *ctx); + ~MTLImmediate(); + + uchar *begin() override; + void end() override; + bool imm_is_recording() + { + return has_begun_; + } +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm new file mode 100644 index 00000000000..41632e39092 --- /dev/null +++ b/source/blender/gpu/metal/mtl_immediate.mm @@ -0,0 +1,397 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Mimics old style opengl immediate mode drawing. + */ + +#include "BKE_global.h" + +#include "GPU_vertex_format.h" +#include "gpu_context_private.hh" +#include "gpu_shader_private.hh" +#include "gpu_vertex_format_private.h" + +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_immediate.hh" +#include "mtl_primitive.hh" +#include "mtl_shader.hh" + +namespace blender::gpu { + +MTLImmediate::MTLImmediate(MTLContext *ctx) +{ + context_ = ctx; +} + +MTLImmediate::~MTLImmediate() +{ +} + +uchar *MTLImmediate::begin() +{ + BLI_assert(!has_begun_); + + /* Determine primitive type. */ + metal_primitive_type_ = gpu_prim_type_to_metal(this->prim_type); + metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_); + has_begun_ = true; + + /* Allocate a range of data and return host-accessible pointer. */ + const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len); + current_allocation_ = context_->get_scratchbuffer_manager() + .scratch_buffer_allocate_range_aligned(bytes_needed, 256); + [current_allocation_.metal_buffer retain]; + return reinterpret_cast<uchar *>(current_allocation_.data); +} + +void MTLImmediate::end() +{ + /* Ensure we're between a imm::begin/imm:end pair. */ + BLI_assert(has_begun_); + BLI_assert(prim_type != GPU_PRIM_NONE); + + /* Verify context is valid, vertex data is written and a valid shader is bound. */ + if (context_ && this->vertex_idx > 0 && this->shader) { + + MTLShader *active_mtl_shader = static_cast<MTLShader *>(unwrap(shader)); + + /* Skip draw if Metal shader is not valid. */ + if (active_mtl_shader == nullptr || !active_mtl_shader->is_valid() || + active_mtl_shader->get_interface() == nullptr) { + + const char *ptr = (active_mtl_shader) ? active_mtl_shader->name_get() : nullptr; + MTL_LOG_WARNING( + "MTLImmediate::end -- cannot perform draw as active shader is NULL or invalid (likely " + "unimplemented) (shader %p '%s')\n", + active_mtl_shader, + ptr); + return; + } + + /* Ensure we are inside a render pass and fetch active RenderCommandEncoder. */ + id<MTLRenderCommandEncoder> rec = context_->ensure_begin_render_pass(); + BLI_assert(rec != nil); + + /* Fetch active render pipeline state. */ + MTLRenderPassState &rps = context_->main_command_buffer.get_render_pass_state(); + + /* Bind Shader. */ + GPU_shader_bind(this->shader); + + /* Debug markers for frame-capture and detailed error messages. */ + if (G.debug & G_DEBUG_GPU) { + [rec pushDebugGroup:[NSString + stringWithFormat:@"immEnd(verts: %d, shader: %s)", + this->vertex_idx, + active_mtl_shader->get_interface()->get_name()]]; + [rec insertDebugSignpost:[NSString stringWithFormat:@"immEnd(verts: %d, shader: %s)", + this->vertex_idx, + active_mtl_shader->get_interface() + ->get_name()]]; + } + + /* Populate pipeline state vertex descriptor. */ + MTLStateManager *state_manager = static_cast<MTLStateManager *>( + MTLContext::get()->state_manager); + MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor(); + const MTLShaderInterface *interface = active_mtl_shader->get_interface(); + + desc.vertex_descriptor.num_attributes = interface->get_total_attributes(); + desc.vertex_descriptor.num_vert_buffers = 1; + + for (int i = 0; i < desc.vertex_descriptor.num_attributes; i++) { + desc.vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid; + } + desc.vertex_descriptor.uses_ssbo_vertex_fetch = + active_mtl_shader->get_uses_ssbo_vertex_fetch(); + desc.vertex_descriptor.num_ssbo_attributes = 0; + + /* SSBO Vertex Fetch -- Verify Attributes. */ + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + active_mtl_shader->ssbo_vertex_fetch_bind_attributes_begin(); + + /* Disable Indexed rendering in SSBO vertex fetch. */ + int uniform_ssbo_use_indexed = active_mtl_shader->uni_ssbo_uses_indexed_rendering; + BLI_assert_msg(uniform_ssbo_use_indexed != -1, "Expected valid uniform location for ssbo_uses_indexed_rendering."); + int uses_indexed_rendering = 0; + active_mtl_shader->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering); + } + + /* Populate Vertex descriptor and verify attributes. + * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */ + for (int i = 0; i < interface->get_total_attributes(); i++) { + + /* Note: Attribute in VERTEX FORMAT does not necessarily share the same array index as + * attributes in shader interface. */ + GPUVertAttr *attr = nullptr; + const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i); + + /* Scan through vertex_format attributes until one with a name matching the shader interface + * is found. */ + for (uint32_t a_idx = 0; a_idx < this->vertex_format.attr_len && attr == nullptr; a_idx++) { + GPUVertAttr *check_attribute = &this->vertex_format.attrs[a_idx]; + + /* Attributes can have multiple name aliases associated with them. */ + for (uint32_t n_idx = 0; n_idx < check_attribute->name_len; n_idx++) { + const char *name = GPU_vertformat_attr_name_get( + &this->vertex_format, check_attribute, n_idx); + + if (strcmp(name, interface->get_name_at_offset(mtl_shader_attribute.name_offset)) == 0) { + attr = check_attribute; + break; + } + } + } + + BLI_assert_msg(attr != nullptr, + "Could not find expected attribute in immediate mode vertex format."); + if (attr == nullptr) { + MTL_LOG_ERROR( + "MTLImmediate::end Could not find matching attribute '%s' from Shader Interface in " + "Vertex Format! - TODO: Bind Dummy attribute\n", + interface->get_name_at_offset(mtl_shader_attribute.name_offset)); + return; + } + + /* Determine whether implicit type conversion between input vertex format + * and shader interface vertex format is supported. */ + MTLVertexFormat convertedFormat; + bool can_use_implicit_conversion = mtl_convert_vertex_format( + mtl_shader_attribute.format, + (GPUVertCompType)attr->comp_type, + attr->comp_len, + (GPUVertFetchMode)attr->fetch_mode, + &convertedFormat); + + if (can_use_implicit_conversion) { + /* Metal API can implicitly convert some formats during vertex assembly: + * - Converting from a normalized short2 format to float2 + * - Type truncation e.g. Float4 to Float2. + * - Type expansion from Float3 to Float4. + * - Note: extra components are filled with the corresponding components of (0,0,0,1). + * (See + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format) + */ + bool is_floating_point_format = (attr->comp_type == GPU_COMP_F32); + desc.vertex_descriptor.attributes[i].format = convertedFormat; + desc.vertex_descriptor.attributes[i].format_conversion_mode = + (is_floating_point_format) ? (GPUVertFetchMode)GPU_FETCH_FLOAT : + (GPUVertFetchMode)GPU_FETCH_INT; + BLI_assert(convertedFormat != MTLVertexFormatInvalid); + } + else { + /* Some conversions are NOT valid, e.g. Int4 to Float4 + * - In this case, we need to implement a conversion routine inside the shader. + * - This is handled using the format_conversion_mode flag + * - This flag is passed into the PSO as a function specialisation, + * and will generate an appropriate conversion function when reading the vertex attribute + * value into local shader storage. + * (If no explicit conversion is needed, the function specialize to a pass-through). */ + MTLVertexFormat converted_format; + bool can_convert = mtl_vertex_format_resize( + mtl_shader_attribute.format, attr->comp_len, &converted_format); + desc.vertex_descriptor.attributes[i].format = (can_convert) ? converted_format : + mtl_shader_attribute.format; + desc.vertex_descriptor.attributes[i].format_conversion_mode = (GPUVertFetchMode) + attr->fetch_mode; + BLI_assert(desc.vertex_descriptor.attributes[i].format != MTLVertexFormatInvalid); + } + /* Using attribute offset in vertex format, as this will be correct */ + desc.vertex_descriptor.attributes[i].offset = attr->offset; + desc.vertex_descriptor.attributes[i].buffer_index = mtl_shader_attribute.buffer_index; + + /* SSBO Vertex Fetch Attribute bind. */ + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + BLI_assert_msg(mtl_shader_attribute.buffer_index == 0, + "All attributes should be in buffer index zero"); + MTLSSBOAttribute ssbo_attr( + mtl_shader_attribute.index, + mtl_shader_attribute.buffer_index, + attr->offset, + this->vertex_format.stride, + MTLShader::ssbo_vertex_type_to_attr_type(desc.vertex_descriptor.attributes[i].format), + false); + desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] = + ssbo_attr; + desc.vertex_descriptor.num_ssbo_attributes++; + active_mtl_shader->ssbo_vertex_fetch_bind_attribute(ssbo_attr); + } + } + + /* Buffer bindings for singular vertex buffer. */ + desc.vertex_descriptor.buffer_layouts[0].step_function = MTLVertexStepFunctionPerVertex; + desc.vertex_descriptor.buffer_layouts[0].step_rate = 1; + desc.vertex_descriptor.buffer_layouts[0].stride = this->vertex_format.stride; + BLI_assert(this->vertex_format.stride > 0); + + /* SSBO Vertex Fetch -- Verify Attributes. */ + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + active_mtl_shader->ssbo_vertex_fetch_bind_attributes_end(rec); + + /* Set Status uniforms. */ + BLI_assert_msg(active_mtl_shader->uni_ssbo_input_prim_type_loc != -1, + "ssbo_input_prim_type uniform location invalid!"); + BLI_assert_msg(active_mtl_shader->uni_ssbo_input_vert_count_loc != -1, + "ssbo_input_vert_count uniform location invalid!"); + GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)), + active_mtl_shader->uni_ssbo_input_prim_type_loc, + 1, + 1, + (const int *)(&this->prim_type)); + GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)), + active_mtl_shader->uni_ssbo_input_vert_count_loc, + 1, + 1, + (const int *)(&this->vertex_idx)); + } + + MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type); + if (context_->ensure_render_pipeline_state(mtl_prim_type)) { + + /* Issue draw call. */ + BLI_assert(this->vertex_idx > 0); + + /* Metal API does not support triangle fan, so we can emulate this + * input data by generating an index buffer to re-map indices to + * a TriangleList. + * + * NOTE(Metal): Consider caching generated triangle fan index buffers. + * For immediate mode, generating these is currently very cheap, as we use + * fast scratch buffer allocations. Though we may benefit from caching of + * frequently used buffer sizes. */ + if (mtl_needs_topology_emulation(this->prim_type)) { + + /* Debug safety check for SSBO FETCH MODE. */ + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode"); + } + + /* Emulate Tri-fan. */ + if (this->prim_type == GPU_PRIM_TRI_FAN) { + /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input + * vertices. */ + uint32_t base_vert_count = this->vertex_idx; + uint32_t num_triangles = max_ii(base_vert_count - 2, 0); + uint32_t fan_index_count = num_triangles * 3; + BLI_assert(num_triangles > 0); + + uint32_t alloc_size = sizeof(uint32_t) * fan_index_count; + uint32_t *index_buffer = nullptr; + + MTLTemporaryBuffer allocation = + context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned( + alloc_size, 128); + index_buffer = (uint32_t *)allocation.data; + + int a = 0; + for (int i = 0; i < num_triangles; i++) { + index_buffer[a++] = 0; + index_buffer[a++] = i + 1; + index_buffer[a++] = i + 2; + } + + @autoreleasepool { + + id<MTLBuffer> index_buffer_mtl = nil; + uint32_t index_buffer_offset = 0; + + /* Region of scratch buffer used for topology emulation element data. + * NOTE(Metal): We do not need to manually flush as the entire scratch + * buffer for current command buffer is flushed upon submission. */ + index_buffer_mtl = allocation.metal_buffer; + index_buffer_offset = allocation.buffer_offset; + + /* Set depth stencil state (requires knowledge of primitive type). */ + context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle); + + /* Bind Vertex Buffer. */ + rps.bind_vertex_buffer( + current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0); + + /* Draw. */ + [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:fan_index_count + indexType:MTLIndexTypeUInt32 + indexBuffer:index_buffer_mtl + indexBufferOffset:index_buffer_offset]; + } + } + else { + /* TODO(Metal): Topology emulation for line loop. + * NOTE(Metal): This is currently not used anywhere and modified at the high + * level for efficiency in such cases. */ + BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode."); + } + } + else { + MTLPrimitiveType primitive_type = metal_primitive_type_; + int vertex_count = this->vertex_idx; + + /* Bind Vertex Buffer. */ + rps.bind_vertex_buffer( + current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0); + + /* Set depth stencil state (requires knowledge of primitive type). */ + context_->ensure_depth_stencil_state(primitive_type); + + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + + /* Bind Null Buffers for empty/missing bind slots. */ + id<MTLBuffer> null_buffer = context_->get_null_buffer(); + BLI_assert(null_buffer != nil); + for (int i = 1; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + + /* We only need to ensure a buffer is bound to the context, its contents do not matter + * as it will not be used. */ + if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) { + rps.bind_vertex_buffer(null_buffer, 0, i); + } + } + + /* SSBO vertex fetch - Nullify elements buffer. */ + if (rps.cached_vertex_buffer_bindings[MTL_SSBO_VERTEX_FETCH_IBO_INDEX].metal_buffer == + nil) { + rps.bind_vertex_buffer(null_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX); + } + + /* Submit draw call with modified vertex count, which reflects vertices per primitive + * defined in the USE_SSBO_VERTEX_FETCH pragma. */ + int num_input_primitives = gpu_get_prim_count_from_type(vertex_count, this->prim_type); + int output_num_verts = num_input_primitives * + active_mtl_shader->get_ssbo_vertex_fetch_output_num_verts(); +#ifndef NDEBUG + BLI_assert( + mtl_vertex_count_fits_primitive_type( + output_num_verts, active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()) && + "Output Vertex count is not compatible with the requested output vertex primitive " + "type"); +#endif + [rec drawPrimitives:active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type() + vertexStart:0 + vertexCount:output_num_verts]; + context_->main_command_buffer.register_draw_counters(output_num_verts); + } + else { + /* Regular draw. */ + [rec drawPrimitives:primitive_type vertexStart:0 vertexCount:vertex_count]; + context_->main_command_buffer.register_draw_counters(vertex_count); + } + } + } + if (G.debug & G_DEBUG_GPU) { + [rec popDebugGroup]; + } + } + + /* Reset allocation after draw submission. */ + has_begun_ = false; + if (current_allocation_.metal_buffer) { + [current_allocation_.metal_buffer release]; + current_allocation_.metal_buffer = nil; + } +} + +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh index df80df6543f..bd354376b12 100644 --- a/source/blender/gpu/metal/mtl_memory.hh +++ b/source/blender/gpu/metal/mtl_memory.hh @@ -340,13 +340,13 @@ class MTLBufferPool { private: /* Memory statistics. */ - long long int total_allocation_bytes_ = 0; + int64_t total_allocation_bytes_ = 0; #if MTL_DEBUG_MEMORY_STATISTICS == 1 /* Debug statistics. */ std::atomic<int> per_frame_allocation_count_; - std::atomic<long long int> allocations_in_pool_; - std::atomic<long long int> buffers_in_pool_; + std::atomic<int64_t> allocations_in_pool_; + std::atomic<int64_t> buffers_in_pool_; #endif /* Metal resources. */ diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh index 64d9d1cf849..5485b32dd31 100644 --- a/source/blender/gpu/metal/mtl_shader.hh +++ b/source/blender/gpu/metal/mtl_shader.hh @@ -261,8 +261,6 @@ class MTLShader : public Shader { bool get_push_constant_is_dirty(); void push_constant_bindstate_mark_dirty(bool is_dirty); - void vertformat_from_shader(GPUVertFormat *format) const override; - /* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */ int program_handle_get() const override { diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm index 23097f312f0..3b27b60bca0 100644 --- a/source/blender/gpu/metal/mtl_shader.mm +++ b/source/blender/gpu/metal/mtl_shader.mm @@ -129,6 +129,7 @@ MTLShader::~MTLShader() if (shd_builder_ != nullptr) { delete shd_builder_; + shd_builder_ = nullptr; } } @@ -209,6 +210,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) /* Release temporary compilation resources. */ delete shd_builder_; + shd_builder_ = nullptr; return false; } } @@ -279,6 +281,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) /* Release temporary compilation resources. */ delete shd_builder_; + shd_builder_ = nullptr; return false; } } @@ -324,6 +327,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) /* Release temporary compilation resources. */ delete shd_builder_; + shd_builder_ = nullptr; return true; } @@ -535,28 +539,6 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty) { push_constant_modified_ = is_dirty; } - -void MTLShader::vertformat_from_shader(GPUVertFormat *format) const -{ - GPU_vertformat_clear(format); - - const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface); - for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) { - const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id); - - /* Extract type parameters from Metal type. */ - GPUVertCompType comp_type = comp_type_from_vert_format(attr.format); - uint comp_len = comp_count_from_vert_format(attr.format); - GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format); - - GPU_vertformat_attr_add(format, - mtl_interface->get_name_at_offset(attr.name_offset), - comp_type, - comp_len, - fetch_mode); - } -} - /** \} */ /* -------------------------------------------------------------------- */ @@ -1167,6 +1149,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_at MTLShaderInterface *mtl_interface = this->get_interface(); BLI_assert(ssbo_attr.mtl_attribute_index >= 0 && ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes()); + UNUSED_VARS_NDEBUG(mtl_interface); /* Update bind-mask to verify this attribute has been used. */ BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) == diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm index 977e97dbd82..4a2be0753bb 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.mm +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -724,10 +724,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) } if (msl_iface.uses_ssbo_vertex_fetch_mode) { ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl; - ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS - << std::endl; - ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX - << std::endl; for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) { ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl; } diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm index 4af46c13751..b4e913e5be6 100644 --- a/source/blender/gpu/metal/mtl_texture.mm +++ b/source/blender/gpu/metal/mtl_texture.mm @@ -12,6 +12,7 @@ #include "GPU_batch_presets.h" #include "GPU_capabilities.h" #include "GPU_framebuffer.h" +#include "GPU_immediate.h" #include "GPU_platform.h" #include "GPU_state.h" @@ -303,7 +304,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst, /* Execute graphics draw call to perform the blit. */ GPUBatch *quad = GPU_batch_preset_quad(); - GPU_batch_set_shader(quad, shader); float w = dst->width_get(); @@ -337,6 +337,20 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst, GPU_batch_draw(quad); + /* TMP draw with IMM TODO(Metal): Remove this once GPUBatch is supported. */ + GPUVertFormat *imm_format = immVertexFormat(); + uint pos = GPU_vertformat_attr_add(imm_format, "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT); + + immBindShader(shader); + immBegin(GPU_PRIM_TRI_STRIP, 4); + immVertex2f(pos, 1, 0); + immVertex2f(pos, 0, 0); + immVertex2f(pos, 1, 1); + immVertex2f(pos, 0, 1); + immEnd(); + immUnbindProgram(); + /**********************/ + /* restoring old pipeline state. */ GPU_depth_mask(depth_write_prev); GPU_stencil_write_mask_set(stencil_mask_prev); @@ -1463,79 +1477,6 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo) BLI_assert_msg(this->format_ != GPU_DEPTH24_STENCIL8, "Apple silicon does not support GPU_DEPTH24_S8"); - MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_); - mtl_max_mips_ = 1; - mipmaps_ = 0; - this->mip_range_set(0, 0); - - /* Create texture from GPUVertBuf's buffer. */ - MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo)); - mtl_vbo->bind(); - mtl_vbo->flag_used(); - - /* Get Metal Buffer. */ - id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer(); - BLI_assert(source_buffer); - - /* Verify size. */ - if (w_ <= 0) { - MTL_LOG_WARNING("Allocating texture buffer of width 0!\n"); - w_ = 1; - } - - /* Verify Texture and vertex buffer alignment. */ - int bytes_per_pixel = get_mtl_format_bytesize(mtl_format); - int bytes_per_row = bytes_per_pixel * w_; - - MTLContext *mtl_ctx = MTLContext::get(); - uint align_requirement = static_cast<uint>( - [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]); - - /* Verify per-vertex size aligns with texture size. */ - const GPUVertFormat *format = GPU_vertbuf_get_format(vbo); - BLI_assert(bytes_per_pixel == format->stride && - "Pixel format stride MUST match the texture format stride -- These being different " - "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex"); - UNUSED_VARS_NDEBUG(format); - - /* Create texture descriptor. */ - BLI_assert(type_ == GPU_TEXTURE_BUFFER); - texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; - texture_descriptor_.pixelFormat = mtl_format; - texture_descriptor_.textureType = MTLTextureTypeTextureBuffer; - texture_descriptor_.width = w_; - texture_descriptor_.height = 1; - texture_descriptor_.depth = 1; - texture_descriptor_.arrayLength = 1; - texture_descriptor_.mipmapLevelCount = mtl_max_mips_; - texture_descriptor_.usage = - MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | - MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */ - texture_descriptor_.storageMode = [source_buffer storageMode]; - texture_descriptor_.sampleCount = 1; - texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode]; - texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode]; - - texture_ = [source_buffer - newTextureWithDescriptor:texture_descriptor_ - offset:0 - bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)]; - aligned_w_ = bytes_per_row / bytes_per_pixel; - - BLI_assert(texture_); - texture_.label = [NSString stringWithUTF8String:this->get_name()]; - is_baked_ = true; - is_dirty_ = false; - resource_mode_ = MTL_TEXTURE_MODE_VBO; - - /* Track Status. */ - vert_buffer_ = mtl_vbo; - vert_buffer_mtl_ = source_buffer; - - /* Cleanup. */ - [texture_descriptor_ release]; - texture_descriptor_ = nullptr; - return true; } diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm index 928393fb39e..5ed7659f260 100644 --- a/source/blender/gpu/metal/mtl_texture_util.mm +++ b/source/blender/gpu/metal/mtl_texture_util.mm @@ -22,13 +22,7 @@ /* Utility file for secondary functionality which supports mtl_texture.mm. */ extern char datatoc_compute_texture_update_msl[]; -extern char datatoc_depth_2d_update_vert_glsl[]; -extern char datatoc_depth_2d_update_float_frag_glsl[]; -extern char datatoc_depth_2d_update_int24_frag_glsl[]; -extern char datatoc_depth_2d_update_int32_frag_glsl[]; extern char datatoc_compute_texture_read_msl[]; -extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[]; -extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[]; namespace blender::gpu { @@ -447,42 +441,34 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get( return *result; } - const char *fragment_source = nullptr; + const char *depth_2d_info_variant = nullptr; switch (specialization.data_mode) { case MTL_DEPTH_UPDATE_MODE_FLOAT: - fragment_source = datatoc_depth_2d_update_float_frag_glsl; + depth_2d_info_variant = "depth_2d_update_float"; break; case MTL_DEPTH_UPDATE_MODE_INT24: - fragment_source = datatoc_depth_2d_update_int24_frag_glsl; + depth_2d_info_variant = "depth_2d_update_int24"; break; case MTL_DEPTH_UPDATE_MODE_INT32: - fragment_source = datatoc_depth_2d_update_int32_frag_glsl; + depth_2d_info_variant = "depth_2d_update_int32"; break; default: BLI_assert(false && "Invalid format mode\n"); return nullptr; } - GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl, - fragment_source, - nullptr, - nullptr, - nullptr, - "depth_2d_update_sh_get"); + GPUShader *shader = GPU_shader_create_from_info_name(depth_2d_info_variant); mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader); return shader; } GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get() { - MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) { - const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl; - const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl; - GPUShader *shader = GPU_shader_create( - vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit"); + GPUShader *shader = GPU_shader_create_from_info_name("fullscreen_blit"); + mtl_context->get_texture_utils().fullscreen_blit_shader = shader; } return mtl_context->get_texture_utils().fullscreen_blit_shader; @@ -614,7 +600,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( stringWithUTF8String:datatoc_compute_texture_read_msl]; /* Defensive Debug Checks. */ - long long int depth_scale_factor = 1; + int64_t depth_scale_factor = 1; if (specialization_params.depth_format_mode > 0) { BLI_assert(specialization_params.component_count_input == 1); BLI_assert(specialization_params.component_count_output == 1); diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh index 8646d94e2fd..14fca9f061d 100644 --- a/source/blender/gpu/opengl/gl_backend.hh +++ b/source/blender/gpu/opengl/gl_backend.hh @@ -61,7 +61,7 @@ class GLBackend : public GPUBackend { GLTexture::samplers_update(); }; - Context *context_alloc(void *ghost_window) override + Context *context_alloc(void *ghost_window, void *ghost_context) override { return new GLContext(ghost_window, shared_orphan_list_); }; diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc index 224a9afcf59..67e296b11d5 100644 --- a/source/blender/gpu/tests/gpu_testing.cc +++ b/source/blender/gpu/tests/gpu_testing.cc @@ -19,7 +19,7 @@ void GPUTest::SetUp() ghost_system = GHOST_CreateSystem(); ghost_context = GHOST_CreateOpenGLContext(ghost_system, glSettings); GHOST_ActivateOpenGLContext(ghost_context); - context = GPU_context_create(nullptr); + context = GPU_context_create(nullptr, ghost_context); GPU_init(); } diff --git a/source/blender/render/intern/pipeline.cc b/source/blender/render/intern/pipeline.cc index 4b52fb62bee..3cac9063b8e 100644 --- a/source/blender/render/intern/pipeline.cc +++ b/source/blender/render/intern/pipeline.cc @@ -926,7 +926,7 @@ void *RE_gl_context_get(Render *re) void *RE_gpu_context_get(Render *re) { if (re->gpu_context == nullptr) { - re->gpu_context = GPU_context_create(nullptr); + re->gpu_context = GPU_context_create(NULL, re->gl_context); } return re->gpu_context; } diff --git a/source/blender/windowmanager/intern/wm_playanim.c b/source/blender/windowmanager/intern/wm_playanim.c index 7c6650922a4..e768d18960b 100644 --- a/source/blender/windowmanager/intern/wm_playanim.c +++ b/source/blender/windowmanager/intern/wm_playanim.c @@ -1549,7 +1549,7 @@ static char *wm_main_playanim_intern(int argc, const char **argv) // GHOST_ActivateWindowDrawingContext(g_WS.ghost_window); /* initialize OpenGL immediate mode */ - g_WS.gpu_context = GPU_context_create(g_WS.ghost_window); + g_WS.gpu_context = GPU_context_create(g_WS.ghost_window, NULL); GPU_init(); /* initialize the font */ diff --git a/source/blender/windowmanager/intern/wm_window.c b/source/blender/windowmanager/intern/wm_window.c index b61ebdd11be..dea875becb1 100644 --- a/source/blender/windowmanager/intern/wm_window.c +++ b/source/blender/windowmanager/intern/wm_window.c @@ -579,7 +579,7 @@ static void wm_window_ghostwindow_add(wmWindowManager *wm, glSettings); if (ghostwin) { - win->gpuctx = GPU_context_create(ghostwin); + win->gpuctx = GPU_context_create(ghostwin, NULL); /* needed so we can detect the graphics card below */ GPU_init(); |