diff options
Diffstat (limited to 'source/blender/gpu/metal/mtl_context.mm')
-rw-r--r-- | source/blender/gpu/metal/mtl_context.mm | 1342 |
1 files changed, 1283 insertions, 59 deletions
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index 26cfe6632ef..50576379f0d 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -5,11 +5,29 @@ */ #include "mtl_context.hh" #include "mtl_debug.hh" +#include "mtl_framebuffer.hh" +#include "mtl_immediate.hh" +#include "mtl_memory.hh" +#include "mtl_primitive.hh" +#include "mtl_shader.hh" +#include "mtl_shader_interface.hh" #include "mtl_state.hh" +#include "mtl_uniform_buffer.hh" #include "DNA_userdef_types.h" #include "GPU_capabilities.h" +#include "GPU_matrix.h" +#include "GPU_shader.h" +#include "GPU_texture.h" +#include "GPU_uniform_buffer.h" +#include "GPU_vertex_buffer.h" +#include "intern/gpu_matrix_private.h" + +#include "PIL_time.h" + +#include <fstream> +#include <string> using namespace blender; using namespace blender::gpu; @@ -19,29 +37,165 @@ namespace blender::gpu { /* Global memory manager. */ MTLBufferPool MTLContext::global_memory_manager; +/* Swap-chain and latency management. */ +std::atomic<int> MTLContext::max_drawables_in_flight = 0; +std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0; +int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0}; + +/* -------------------------------------------------------------------- */ +/** \name GHOST Context interaction. + * \{ */ + +void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle) +{ + GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle); + BLI_assert(ghost_ctx != nullptr); + + /* Release old MTLTexture handle */ + if (default_fbo_mtltexture_) { + [default_fbo_mtltexture_ release]; + default_fbo_mtltexture_ = nil; + } + + /* Release Framebuffer attachments */ + MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left); + MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left); + mtl_front_left->remove_all_attachments(); + mtl_back_left->remove_all_attachments(); + + GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx); + if (ghost_cgl_ctx != NULL) { + default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture(); + + MTL_LOG_INFO( + "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n", + ghost_cgl_ctx, + this, + this->device, + this->queue, + default_fbo_gputexture_); + + /* Check if the GHOST Context provides a default framebuffer: */ + if (default_fbo_mtltexture_) { + + /* Release old GPUTexture handle */ + if (default_fbo_gputexture_) { + GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_))); + default_fbo_gputexture_ = nullptr; + } + + /* Retain handle */ + [default_fbo_mtltexture_ retain]; + + /*** Create front and back-buffers ***/ + /* Create gpu::MTLTexture objects */ + default_fbo_gputexture_ = new gpu::MTLTexture( + "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_); + + /* Update frame-buffers with new texture attachments. */ + mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0); + mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0); +#ifndef NDEBUG + this->label = default_fbo_mtltexture_.label; +#endif + } + else { + + /* Add default texture for cases where no other framebuffer is bound */ + if (!default_fbo_gputexture_) { + default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>( + unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr))); + } + mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0); + + MTL_LOG_INFO( + "-- Bound context %p for GPU context: %p is offscreen and does not have a default " + "framebuffer\n", + ghost_cgl_ctx, + this); +#ifndef NDEBUG + this->label = @"Offscreen Metal Context"; +#endif + } + } + else { + MTL_LOG_INFO( + "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null " + "(GhostContext: %p, GhostContext_CGL: %p)\n", + ghost_ctx, + ghost_cgl_ctx); + BLI_assert(false); + } +} + +void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle) +{ + GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle); + this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL)); +} + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name MTLContext * \{ */ /* Placeholder functions */ -MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this) +MTLContext::MTLContext(void *ghost_window, void *ghost_context) + : memory_manager(*this), main_command_buffer(*this) { /* Init debug. */ debug::mtl_debug_init(); + /* Initialize Render-pass and Frame-buffer State. */ + this->back_left = nullptr; + /* Initialize command buffer state. */ this->main_command_buffer.prepare(); + /* Initialize IMM and pipeline state */ + this->pipeline_state.initialised = false; + /* Frame management. */ is_inside_frame_ = false; current_frame_index_ = 0; + /* Prepare null data buffer. */ + null_buffer_ = nil; + null_attribute_buffer_ = nil; + + /* Zero-initialize MTL textures. */ + default_fbo_mtltexture_ = nil; + default_fbo_gputexture_ = nullptr; + + /** Fetch GHOSTContext and fetch Metal device/queue. */ + ghost_window_ = ghost_window; + if (ghost_window_ && ghost_context == NULL) { + /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided. + * Regardless of whether windowed or not, we need access to the GhostContext + * for presentation, and device/queue access. */ + GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_); + ghost_context = (ghostWin ? ghostWin->getContext() : NULL); + } + BLI_assert(ghost_context); + this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context); + this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue(); + this->device = (id<MTLDevice>)this->ghost_context_->metalDevice(); + BLI_assert(this->queue); + BLI_assert(this->device); + [this->queue retain]; + [this->device retain]; + + /* Register present callback. */ + this->ghost_context_->metalRegisterPresentCallback(&present); + /* Create FrameBuffer handles. */ MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left"); MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left"); this->front_left = mtl_front_left; this->back_left = mtl_back_left; this->active_fb = this->back_left; + /* Prepare platform and capabilities. (NOTE: With METAL, this needs to be done after CTX * initialization). */ MTLBackend::platform_init(this); @@ -50,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command /* Initialize Metal modules. */ this->memory_manager.init(); this->state_manager = new MTLStateManager(this); + this->imm = new MTLImmediate(this); /* Ensure global memory manager is initialized. */ MTLContext::global_memory_manager.init(this->device); @@ -83,9 +238,29 @@ MTLContext::~MTLContext() this->end_frame(); } } + + /* Release Memory Manager */ + this->get_scratchbuffer_manager().free(); + /* Release update/blit shaders. */ this->get_texture_utils().cleanup(); + /* Detach resource references */ + GPU_texture_unbind_all(); + + /* Unbind UBOs */ + for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + if (this->pipeline_state.ubo_bindings[i].bound && + this->pipeline_state.ubo_bindings[i].ubo != nullptr) { + GPUUniformBuf *ubo = wrap( + static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo)); + GPU_uniformbuf_unbind(ubo); + } + } + + /* Release Dummy resources */ + this->free_dummy_resources(); + /* Release Sampler States. */ for (int i = 0; i < GPU_SAMPLER_MAX; i++) { if (sampler_state_cache_[i] != nil) { @@ -93,6 +268,28 @@ MTLContext::~MTLContext() sampler_state_cache_[i] = nil; } } + + /* Empty cached sampler argument buffers. */ + for (auto entry : cached_sampler_buffers_.values()) { + entry->free(); + } + cached_sampler_buffers_.clear(); + + /* Free null buffers. */ + if (null_buffer_) { + [null_buffer_ release]; + } + if (null_attribute_buffer_) { + [null_attribute_buffer_ release]; + } + + /* Free Metal objects. */ + if (this->queue) { + [this->queue release]; + } + if (this->device) { + [this->device release]; + } } void MTLContext::begin_frame() @@ -124,20 +321,49 @@ void MTLContext::check_error(const char *info) void MTLContext::activate() { - /* TODO(Metal): Implement. */ + /* Make sure no other context is already bound to this thread. */ + BLI_assert(is_active_ == false); + is_active_ = true; + thread_ = pthread_self(); + + /* Re-apply ghost window/context for resizing */ + if (ghost_window_) { + this->set_ghost_window((GHOST_WindowHandle)ghost_window_); + } + else if (ghost_context_) { + this->set_ghost_context((GHOST_ContextHandle)ghost_context_); + } + + /* Reset UBO bind state. */ + for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + if (this->pipeline_state.ubo_bindings[i].bound && + this->pipeline_state.ubo_bindings[i].ubo != nullptr) { + this->pipeline_state.ubo_bindings[i].bound = false; + this->pipeline_state.ubo_bindings[i].ubo = nullptr; + } + } + + /* Ensure imm active. */ + immActivate(); } + void MTLContext::deactivate() { - /* TODO(Metal): Implement. */ + BLI_assert(this->is_active_on_thread()); + /* Flush context on deactivate. */ + this->flush(); + is_active_ = false; + immDeactivate(); } void MTLContext::flush() { - /* TODO(Metal): Implement. */ + this->main_command_buffer.submit(false); } + void MTLContext::finish() { - /* TODO(Metal): Implement. */ + this->main_command_buffer.submit(true); } void MTLContext::memory_statistics_get(int *total_mem, int *free_mem) @@ -177,10 +403,9 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass() } /* Ensure command buffer workload submissions are optimal -- - * Though do not split a batch mid-IMM recording */ - /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */ - if (this->main_command_buffer.do_break_submission()/*&& - !((MTLImmediate *)(this->imm))->imm_is_recording()*/) { + * Though do not split a batch mid-IMM recording. */ + if (this->main_command_buffer.do_break_submission() && + !((MTLImmediate *)(this->imm))->imm_is_recording()) { this->flush(); } @@ -227,6 +452,116 @@ MTLFrameBuffer *MTLContext::get_default_framebuffer() return static_cast<MTLFrameBuffer *>(this->back_left); } +MTLShader *MTLContext::get_active_shader() +{ + return this->pipeline_state.active_shader; +} + +id<MTLBuffer> MTLContext::get_null_buffer() +{ + if (null_buffer_ != nil) { + return null_buffer_; + } + + static const int null_buffer_size = 4096; + null_buffer_ = [this->device newBufferWithLength:null_buffer_size + options:MTLResourceStorageModeManaged]; + [null_buffer_ retain]; + uint32_t *null_data = (uint32_t *)calloc(0, null_buffer_size); + memcpy([null_buffer_ contents], null_data, null_buffer_size); + [null_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)]; + free(null_data); + + BLI_assert(null_buffer_ != nil); + return null_buffer_; +} + +id<MTLBuffer> MTLContext::get_null_attribute_buffer() +{ + if (null_attribute_buffer_ != nil) { + return null_attribute_buffer_; + } + + /* Allocate Null buffer if it has not yet been created. + * Min buffer size is 256 bytes -- though we only need 64 bytes of data. */ + static const int null_buffer_size = 256; + null_attribute_buffer_ = [this->device newBufferWithLength:null_buffer_size + options:MTLResourceStorageModeManaged]; + BLI_assert(null_attribute_buffer_ != nil); + [null_attribute_buffer_ retain]; + float data[4] = {0.0f, 0.0f, 0.0f, 1.0f}; + memcpy([null_attribute_buffer_ contents], data, sizeof(float) * 4); + [null_attribute_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)]; + + return null_attribute_buffer_; +} + +gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type) +{ + /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */ + gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1]; + if (dummy_tex != nullptr) { + return dummy_tex; + } + else { + GPUTexture *tex = nullptr; + switch (type) { + case GPU_TEXTURE_1D: + tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_1D_ARRAY: + tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_2D: + tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_2D_ARRAY: + tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_3D: + tex = GPU_texture_create_3d( + "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr); + break; + case GPU_TEXTURE_CUBE: + tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_CUBE_ARRAY: + tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_BUFFER: + if (!dummy_verts_) { + GPU_vertformat_clear(&dummy_vertformat_); + GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC); + GPU_vertbuf_data_alloc(dummy_verts_, 64); + } + tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_); + break; + default: + BLI_assert_msg(false, "Unrecognised texture type"); + return nullptr; + } + gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex)); + dummy_textures_[type - 1] = metal_tex; + return metal_tex; + } + return nullptr; +} + +void MTLContext::free_dummy_resources() +{ + for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) { + if (dummy_textures_[tex]) { + GPU_texture_free( + reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex]))); + dummy_textures_[tex] = nullptr; + } + } + if (dummy_verts_) { + GPU_vertbuf_discard(dummy_verts_); + } +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -239,20 +574,20 @@ void MTLContext::pipeline_state_init() /*** Initialize state only once. ***/ if (!this->pipeline_state.initialised) { this->pipeline_state.initialised = true; - this->pipeline_state.active_shader = NULL; + this->pipeline_state.active_shader = nullptr; /* Clear bindings state. */ for (int t = 0; t < GPU_max_textures(); t++) { this->pipeline_state.texture_bindings[t].used = false; - this->pipeline_state.texture_bindings[t].texture_slot_index = t; - this->pipeline_state.texture_bindings[t].texture_resource = NULL; + this->pipeline_state.texture_bindings[t].slot_index = -1; + this->pipeline_state.texture_bindings[t].texture_resource = nullptr; } for (int s = 0; s < MTL_MAX_SAMPLER_SLOTS; s++) { this->pipeline_state.sampler_bindings[s].used = false; } for (int u = 0; u < MTL_MAX_UNIFORM_BUFFER_BINDINGS; u++) { this->pipeline_state.ubo_bindings[u].bound = false; - this->pipeline_state.ubo_bindings[u].ubo = NULL; + this->pipeline_state.ubo_bindings[u].ubo = nullptr; } } @@ -373,6 +708,757 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled) /** \} */ /* -------------------------------------------------------------------- */ +/** \name Command Encoder and pipeline state + * These utilities ensure that all of the globally bound resources and state have been + * correctly encoded within the current RenderCommandEncoder. This involves managing + * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state. + * + * We will also trigger compilation of new PSOs where the input state has changed + * and is required. + * All of this setup is required in order to perform a valid draw call. + * \{ */ + +bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type) +{ + BLI_assert(this->pipeline_state.initialised); + + /* Check if an active shader is bound. */ + if (!this->pipeline_state.active_shader) { + MTL_LOG_WARNING("No Metal shader for bound GL shader\n"); + return false; + } + + /* Also ensure active shader is valid. */ + if (!this->pipeline_state.active_shader->is_valid()) { + MTL_LOG_WARNING( + "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", ); + return false; + } + + /* Apply global state. */ + this->state_manager->apply_state(); + + /* Main command buffer tracks the current state of the render pass, based on bound + * MTLFrameBuffer. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + /* Debug Check: Ensure Framebuffer instance is not dirty. */ + BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty()); + + /* Fetch shader interface. */ + MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface(); + if (shader_interface == nullptr) { + MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", ); + return false; + } + + /* Fetch shader and bake valid PipelineStateObject (PSO) based on current + * shader and state combination. This PSO represents the final GPU-executable + * permutation of the shader. */ + MTLRenderPipelineStateInstance *pipeline_state_instance = + this->pipeline_state.active_shader->bake_current_pipeline_state( + this, mtl_prim_type_to_topology_class(mtl_prim_type)); + if (!pipeline_state_instance) { + MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n", + shader_interface->get_name()); + return false; + } + + bool result = false; + if (pipeline_state_instance->pso) { + + /* Fetch render command encoder. A render pass should already be active. + * This will be NULL if invalid. */ + id<MTLRenderCommandEncoder> rec = + this->main_command_buffer.get_active_render_command_encoder(); + BLI_assert(rec); + if (rec == nil) { + MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n"); + return false; + } + + /* Bind Render Pipeline State. */ + BLI_assert(pipeline_state_instance->pso); + if (rps.bound_pso != pipeline_state_instance->pso) { + [rec setRenderPipelineState:pipeline_state_instance->pso]; + rps.bound_pso = pipeline_state_instance->pso; + } + + /** Ensure resource bindings. */ + /* Texture Bindings. */ + /* We will iterate through all texture bindings on the context and determine if any of the + * active slots match those in our shader interface. If so, textures will be bound. */ + if (shader_interface->get_total_textures() > 0) { + this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance); + } + + /* Transform feedback buffer binding. */ + /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which + * transform feedback data will be written. */ + // GPUVertBuf *tf_vbo = + // this->pipeline_state.active_shader->get_transform_feedback_active_buffer(); + // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) { + + // /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */ + // BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine || + // mtl_prim_type == MTLPrimitiveTypeTriangle || + // mtl_prim_type == MTLPrimitiveTypePoint); + + // /* Fetch active transform feedback buffer from vertbuf */ + // MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo)); + // int tf_buffer_offset = 0; + // id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset); + + // if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) { + // [rec setVertexBuffer:tf_buffer_mtl + // offset:tf_buffer_offset + // atIndex:pipeline_state_instance->transform_feedback_buffer_index]; + // printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n", + // tf_vbo_mtl, + // tf_buffer_mtl); + // } + // } + + /* Matrix Bindings. */ + /* This is now called upon shader bind. We may need to re-evaluate this though, + * as was done here to ensure uniform changes between draws were tracked. + * NOTE(Metal): We may be able to remove this. */ + GPU_matrix_bind(reinterpret_cast<struct GPUShader *>( + static_cast<Shader *>(this->pipeline_state.active_shader))); + + /* Bind Uniforms */ + this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance); + + /* Bind Null attribute buffer, if needed. */ + if (pipeline_state_instance->null_attribute_buffer_index >= 0) { + if (G.debug & G_DEBUG_GPU) { + MTL_LOG_INFO("Binding null attribute buffer at index: %d\n", + pipeline_state_instance->null_attribute_buffer_index); + } + rps.bind_vertex_buffer(this->get_null_attribute_buffer(), + 0, + pipeline_state_instance->null_attribute_buffer_index); + } + + /** Dynamic Per-draw Render State on RenderCommandEncoder. */ + /* State: Viewport. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) { + MTLViewport viewport; + viewport.originX = (double)this->pipeline_state.viewport_offset_x; + viewport.originY = (double)this->pipeline_state.viewport_offset_y; + viewport.width = (double)this->pipeline_state.viewport_width; + viewport.height = (double)this->pipeline_state.viewport_height; + viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near; + viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far; + [rec setViewport:viewport]; + + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_VIEWPORT_FLAG); + } + + /* State: Scissor. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) { + + /* Get FrameBuffer associated with active RenderCommandEncoder. */ + MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer(); + + MTLScissorRect scissor; + if (this->pipeline_state.scissor_enabled) { + scissor.x = this->pipeline_state.scissor_x; + scissor.y = this->pipeline_state.scissor_y; + scissor.width = this->pipeline_state.scissor_width; + scissor.height = this->pipeline_state.scissor_height; + + /* Some scissor assignments exceed the bounds of the viewport due to implicitly added + * padding to the width/height - Clamp width/height. */ + BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width()); + BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height()); + scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x); + scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y); + BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width())); + BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height())); + } + else { + /* Scissor is disabled, reset to default size as scissor state may have been previously + * assigned on this encoder. */ + scissor.x = 0; + scissor.y = 0; + scissor.width = render_fb->get_width(); + scissor.height = render_fb->get_height(); + } + + /* Scissor state can still be flagged as changed if it is toggled on and off, without + * parameters changing between draws. */ + if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) { + [rec setScissorRect:scissor]; + rps.last_scissor_rect = scissor; + } + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_SCISSOR_FLAG); + } + + /* State: Face winding. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) { + /* We need to invert the face winding in Metal, to account for the inverted-Y coordinate + * system. */ + MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ? + MTLWindingClockwise : + MTLWindingCounterClockwise; + [rec setFrontFacingWinding:winding]; + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG); + } + + /* State: cull-mode. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) { + + MTLCullMode mode = MTLCullModeNone; + if (this->pipeline_state.culling_enabled) { + switch (this->pipeline_state.cull_mode) { + case GPU_CULL_NONE: + mode = MTLCullModeNone; + break; + case GPU_CULL_FRONT: + mode = MTLCullModeFront; + break; + case GPU_CULL_BACK: + mode = MTLCullModeBack; + break; + default: + BLI_assert_unreachable(); + break; + } + } + [rec setCullMode:mode]; + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_CULLMODE_FLAG); + } + + /* Pipeline state is now good. */ + result = true; + } + return result; +} + +/* Bind uniform buffers to an active render command encoder using the rendering state of the + * current context -> Active shader, Bound UBOs). */ +bool MTLContext::ensure_uniform_buffer_bindings( + id<MTLRenderCommandEncoder> rec, + const MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance) +{ + /* Fetch Render Pass state. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + /* Shader owned push constant block for uniforms.. */ + bool active_shader_changed = (rps.last_bound_shader_state.shader_ != + this->pipeline_state.active_shader || + rps.last_bound_shader_state.shader_ == nullptr || + rps.last_bound_shader_state.pso_index_ != + pipeline_state_instance->shader_pso_index); + + const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block(); + if (push_constant_block.size > 0) { + + /* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index + * will be offset by the number of bound VBOs. */ + uint32_t block_size = push_constant_block.size; + uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index + + push_constant_block.buffer_index; + + /* Only need to rebind block if push constants have been modified -- or if no data is bound for + * the current RenderCommandEncoder. */ + if (this->pipeline_state.active_shader->get_push_constant_is_dirty() || + active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes || + !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) { + + /* Bind push constant data. */ + BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr); + rps.bind_vertex_bytes( + this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index); + rps.bind_fragment_bytes( + this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index); + + /* Only need to rebind block if it has been modified. */ + this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false); + } + } + rps.last_bound_shader_state.set(this->pipeline_state.active_shader, + pipeline_state_instance->shader_pso_index); + + /* Bind Global GPUUniformBuffers */ + /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones + * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global, + * and not owned by the shader instance. */ + for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) { + const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index); + + if (ubo.buffer_index >= 0) { + + /* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for + * the uniform PushConstantBlock. */ + const uint32_t buffer_index = ubo.buffer_index + 1; + int ubo_offset = 0; + id<MTLBuffer> ubo_buffer = nil; + int ubo_size = 0; + + bool bind_dummy_buffer = false; + if (this->pipeline_state.ubo_bindings[ubo_index].bound) { + + /* Fetch UBO global-binding properties from slot. */ + ubo_offset = 0; + ubo_buffer = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_metal_buffer( + &ubo_offset); + ubo_size = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_size(); + + /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid + * allocating zero buffers. */ + if (ubo_buffer == nil) { + bind_dummy_buffer = true; + } + else { + BLI_assert(ubo_buffer != nil); + BLI_assert(ubo_size > 0); + + if (pipeline_state_instance->reflection_data_available) { + /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each + * case will be the same for the same UBO. + * We also determine expected size and then ensure buffer of the correct size + * exists in one of the vertex/fragment shader binding tables. This path is used + * to verify that the size of the bound UBO matches what is expected in the shader. */ + uint32_t expected_size = + (buffer_index < + pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ? + pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index] + .size : + 0; + if (expected_size == 0) { + expected_size = + (buffer_index < + pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ? + pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index] + .size : + 0; + } + BLI_assert_msg( + expected_size > 0, + "Shader interface expects UBO, but shader reflection data reports that it " + "is not present"); + + /* If ubo size is smaller than the size expected by the shader, we need to bind the + * dummy buffer, which will be big enough, to avoid an OOB error. */ + if (ubo_size < expected_size) { + MTL_LOG_INFO( + "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size " + "%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an " + "over-binding, which is not used, but we need this to avoid validation " + "issues\n", + shader_interface->get_name_at_offset(ubo.name_offset), + buffer_index, + ubo_size, + expected_size, + shader_interface->get_name()); + bind_dummy_buffer = true; + } + } + } + } + else { + MTL_LOG_INFO( + "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but " + "nothing was bound -- binding dummy buffer\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ubo.name_offset), + buffer_index); + bind_dummy_buffer = true; + } + + if (bind_dummy_buffer) { + /* Perform Dummy binding. */ + ubo_offset = 0; + ubo_buffer = this->get_null_buffer(); + ubo_size = [ubo_buffer length]; + } + + if (ubo_buffer != nil) { + + uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index + + buffer_index; + + /* Bind Vertex UBO. */ + if (bool(ubo.stage_mask & ShaderStage::VERTEX)) { + BLI_assert(buffer_bind_index >= 0 && + buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index); + } + + /* Bind Fragment UBOs. */ + if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) { + BLI_assert(buffer_bind_index >= 0 && + buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index); + } + } + else { + MTL_LOG_WARNING( + "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer " + "is NULL!\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ubo.name_offset), + buffer_index); + } + } + } + return true; +} + +/* Ensure texture bindings are correct and up to date for current draw call. */ +void MTLContext::ensure_texture_bindings( + id<MTLRenderCommandEncoder> rec, + MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance) +{ + BLI_assert(shader_interface != nil); + BLI_assert(rec != nil); + + /* Fetch Render Pass state. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + @autoreleasepool { + int vertex_arg_buffer_bind_index = -1; + int fragment_arg_buffer_bind_index = -1; + + /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */ + bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers( + &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index); + + /* Loop through expected textures in shader interface and resolve bindings with currently + * bound textures.. */ + for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) { + /* Ensure the bound texture is compatible with the shader interface. If the + * shader does not expect a texture to be bound for the current slot, we skip + * binding. + * NOTE: Global texture bindings may be left over from prior draw calls. */ + const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t); + if (!shader_texture_info.used) { + /* Skip unused binding points if explicit indices are specified. */ + continue; + } + + int slot = shader_texture_info.slot_index; + if (slot >= 0 && slot < GPU_max_textures()) { + bool bind_dummy_texture = true; + if (this->pipeline_state.texture_bindings[slot].used) { + gpu::MTLTexture *bound_texture = + this->pipeline_state.texture_bindings[slot].texture_resource; + MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot]; + BLI_assert(bound_texture); + BLI_assert(bound_sampler.used); + + if (shader_texture_info.type == bound_texture->type_) { + /* Bind texture and sampler if the bound texture matches the type expected by the + * shader. */ + id<MTLTexture> tex = bound_texture->get_metal_handle(); + + if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) { + rps.bind_vertex_texture(tex, slot); + rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot); + } + + if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) { + rps.bind_fragment_texture(tex, slot); + rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot); + } + + /* Texture state resolved, no need to bind dummy texture */ + bind_dummy_texture = false; + } + else { + /* Texture type for bound texture (e.g. Texture2DArray) does not match what was + * expected in the shader interface. This is a problem and we will need to bind + * a dummy texture to ensure correct API usage. */ + MTL_LOG_WARNING( + "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong " + "texture target type. (Expecting type %d, actual type %d) (binding " + "name:'%s')(texture name:'%s')\n", + shader_interface->get_name(), + bound_texture, + slot, + shader_texture_info.type, + bound_texture->type_, + shader_interface->get_name_at_offset(shader_texture_info.name_offset), + bound_texture->get_name()); + } + } + else { + MTL_LOG_WARNING( + "Shader '%s' expected texture to be bound to slot %d -- No texture was " + "bound. (name:'%s')\n", + shader_interface->get_name(), + slot, + shader_interface->get_name_at_offset(shader_texture_info.name_offset)); + } + + /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats + * are provided -- as certain configurations may not need any binding. These issues should + * be fixed in the high-level, if problems crop up. */ + if (bind_dummy_texture) { + if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) { + rps.bind_vertex_texture( + get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot); + + /* Bind default sampler state. */ + MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; + rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot); + } + if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) { + rps.bind_fragment_texture( + get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot); + + /* Bind default sampler state. */ + MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; + rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot); + } + } + } + else { + MTL_LOG_WARNING( + "Shader %p expected texture to be bound to slot %d -- Slot exceeds the " + "hardware/API limit of '%d'. (name:'%s')\n", + this->pipeline_state.active_shader, + slot, + GPU_max_textures(), + shader_interface->get_name_at_offset(shader_texture_info.name_offset)); + } + } + + /* Construct and Bind argument buffer. + * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */ + if (use_argument_buffer_for_samplers) { +#ifndef NDEBUG + /* Debug check to validate each expected texture in the shader interface has a valid + * sampler object bound to the context. We will need all of these to be valid + * when constructing the sampler argument buffer. */ + for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) { + const MTLShaderTexture &texture = shader_interface->get_texture(i); + if (texture.used) { + BLI_assert(this->samplers_.mtl_sampler[i] != nil); + } + } +#endif + + /* Check to ensure the buffer binding index for the argument buffer has been assigned. + * This PSO property will be set if we expect to use argument buffers, and the shader + * uses any amount of textures. */ + BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0); + if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) { + /* Offset binding index to be relative to the start of static uniform buffer binding slots. + * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are + * used by vertex and index buffer bindings, and the number of buffers present will vary + * between PSOs. */ + int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index + + vertex_arg_buffer_bind_index); + assert(arg_buffer_idx < 32); + id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder( + arg_buffer_idx); + if (argument_encoder == nil) { + argument_encoder = [pipeline_state_instance->vert + newArgumentEncoderWithBufferIndex:arg_buffer_idx]; + shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder); + } + + /* Generate or Fetch argument buffer sampler configuration. + * NOTE(Metal): we need to base sampler counts off of the maximal texture + * index. This is not the most optimal, but in practice, not a use-case + * when argument buffers are required. + * This is because with explicit texture indices, the binding indices + * should match across draws, to allow the high-level to optimize bind-points. */ + gpu::MTLBuffer *encoder_buffer = nullptr; + this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1; + + gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr( + this->samplers_); + if (cached_smp_buffer_search != nullptr) { + encoder_buffer = *cached_smp_buffer_search; + } + else { + /* Populate argument buffer with current global sampler bindings. */ + int size = [argument_encoder encodedLength]; + int alignment = max_uu([argument_encoder alignment], 256); + int size_align_delta = (size % alignment); + int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ? + size + (alignment - (size % alignment)) : + size; + + /* Allocate buffer to store encoded sampler arguments. */ + encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size, + true); + BLI_assert(encoder_buffer); + BLI_assert(encoder_buffer->get_metal_buffer()); + [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0]; + [argument_encoder + setSamplerStates:this->samplers_.mtl_sampler + withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)]; + encoder_buffer->flush(); + + /* Insert into cache. */ + this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer); + } + + BLI_assert(encoder_buffer != nullptr); + int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index + + vertex_arg_buffer_bind_index); + rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index); + + /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to + * re-encode. We can use the same argument buffer. */ + if (fragment_arg_buffer_bind_index >= 0) { + BLI_assert(fragment_arg_buffer_bind_index); + int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index + + fragment_arg_buffer_bind_index); + rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index); + } + } + } + } +} + +/* Encode latest depth-stencil state. */ +void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type) +{ + /* Check if we need to update state. */ + if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) { + return; + } + + /* Fetch render command encoder. */ + id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder(); + BLI_assert(rec); + + /* Fetch Render Pass state. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + /** Prepare Depth-stencil state based on current global pipeline state. */ + MTLFrameBuffer *fb = this->get_current_framebuffer(); + bool hasDepthTarget = fb->has_depth_attachment(); + bool hasStencilTarget = fb->has_stencil_attachment(); + + if (hasDepthTarget || hasStencilTarget) { + /* Update FrameBuffer State. */ + this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget; + this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget; + + /* Check if current MTLContextDepthStencilState maps to an existing state object in + * the Depth-stencil state cache. */ + id<MTLDepthStencilState> ds_state = nil; + id<MTLDepthStencilState> *depth_stencil_state_lookup = + this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state); + + /* If not, populate DepthStencil state descriptor. */ + if (depth_stencil_state_lookup == nullptr) { + + MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init] + autorelease]; + + if (hasDepthTarget) { + ds_state_desc.depthWriteEnabled = + this->pipeline_state.depth_stencil_state.depth_write_enable; + ds_state_desc.depthCompareFunction = + this->pipeline_state.depth_stencil_state.depth_test_enabled ? + this->pipeline_state.depth_stencil_state.depth_function : + MTLCompareFunctionAlways; + } + + if (hasStencilTarget) { + ds_state_desc.backFaceStencil.readMask = + this->pipeline_state.depth_stencil_state.stencil_read_mask; + ds_state_desc.backFaceStencil.writeMask = + this->pipeline_state.depth_stencil_state.stencil_write_mask; + ds_state_desc.backFaceStencil.stencilFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail; + ds_state_desc.backFaceStencil.depthFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail; + ds_state_desc.backFaceStencil.depthStencilPassOperation = + this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass; + ds_state_desc.backFaceStencil.stencilCompareFunction = + (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ? + this->pipeline_state.depth_stencil_state.stencil_func : + MTLCompareFunctionAlways; + + ds_state_desc.frontFaceStencil.readMask = + this->pipeline_state.depth_stencil_state.stencil_read_mask; + ds_state_desc.frontFaceStencil.writeMask = + this->pipeline_state.depth_stencil_state.stencil_write_mask; + ds_state_desc.frontFaceStencil.stencilFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail; + ds_state_desc.frontFaceStencil.depthFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail; + ds_state_desc.frontFaceStencil.depthStencilPassOperation = + this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass; + ds_state_desc.frontFaceStencil.stencilCompareFunction = + (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ? + this->pipeline_state.depth_stencil_state.stencil_func : + MTLCompareFunctionAlways; + } + + /* Bake new DS state. */ + ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc]; + + /* Store state in cache. */ + BLI_assert(ds_state != nil); + this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state); + } + else { + ds_state = *depth_stencil_state_lookup; + BLI_assert(ds_state != nil); + } + + /* Bind Depth Stencil State to render command encoder. */ + BLI_assert(ds_state != nil); + if (ds_state != nil) { + if (rps.bound_ds_state != ds_state) { + [rec setDepthStencilState:ds_state]; + rps.bound_ds_state = ds_state; + } + } + + /* Apply dynamic depth-stencil state on encoder. */ + if (hasStencilTarget) { + uint32_t stencil_ref_value = + (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ? + this->pipeline_state.depth_stencil_state.stencil_ref : + 0; + if (stencil_ref_value != rps.last_used_stencil_ref_value) { + [rec setStencilReferenceValue:stencil_ref_value]; + rps.last_used_stencil_ref_value = stencil_ref_value; + } + } + + if (hasDepthTarget) { + bool doBias = false; + switch (prim_type) { + case MTLPrimitiveTypeTriangle: + case MTLPrimitiveTypeTriangleStrip: + doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris; + break; + case MTLPrimitiveTypeLine: + case MTLPrimitiveTypeLineStrip: + doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines; + break; + case MTLPrimitiveTypePoint: + doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points; + break; + } + [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0 + slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0 + clamp:0]; + } + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Visibility buffer control for MTLQueryPool. * \{ */ @@ -487,52 +1573,46 @@ id<MTLSamplerState> MTLContext::get_sampler_from_state(MTLSamplerState sampler_s id<MTLSamplerState> MTLContext::generate_sampler_from_state(MTLSamplerState sampler_state) { /* Check if sampler already exists for given state. */ - id<MTLSamplerState> st = sampler_state_cache_[(uint)sampler_state]; - if (st != nil) { - return st; - } - else { - MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init]; - descriptor.normalizedCoordinates = true; - - MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ? - MTLSamplerAddressModeClampToBorderColor : - MTLSamplerAddressModeClampToEdge; - descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ? - MTLSamplerAddressModeRepeat : - clamp_type; - descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ? - MTLSamplerAddressModeRepeat : - clamp_type; - descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ? - MTLSamplerAddressModeRepeat : - clamp_type; - descriptor.borderColor = MTLSamplerBorderColorTransparentBlack; - descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? - MTLSamplerMinMagFilterLinear : - MTLSamplerMinMagFilterNearest; - descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? - MTLSamplerMinMagFilterLinear : - MTLSamplerMinMagFilterNearest; - descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? - MTLSamplerMipFilterLinear : - MTLSamplerMipFilterNotMipmapped; - descriptor.lodMinClamp = -1000; - descriptor.lodMaxClamp = 1000; - float aniso_filter = max_ff(16, U.anisotropic_filter); - descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1; - descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ? - MTLCompareFunctionLessEqual : - MTLCompareFunctionAlways; - descriptor.supportArgumentBuffers = true; - - id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor]; - sampler_state_cache_[(uint)sampler_state] = state; - - BLI_assert(state != nil); - [descriptor autorelease]; - return state; - } + MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init]; + descriptor.normalizedCoordinates = true; + + MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ? + MTLSamplerAddressModeClampToBorderColor : + MTLSamplerAddressModeClampToEdge; + descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ? + MTLSamplerAddressModeRepeat : + clamp_type; + descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ? + MTLSamplerAddressModeRepeat : + clamp_type; + descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ? + MTLSamplerAddressModeRepeat : + clamp_type; + descriptor.borderColor = MTLSamplerBorderColorTransparentBlack; + descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? + MTLSamplerMinMagFilterLinear : + MTLSamplerMinMagFilterNearest; + descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? + MTLSamplerMinMagFilterLinear : + MTLSamplerMinMagFilterNearest; + descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? + MTLSamplerMipFilterLinear : + MTLSamplerMipFilterNotMipmapped; + descriptor.lodMinClamp = -1000; + descriptor.lodMaxClamp = 1000; + float aniso_filter = max_ff(16, U.anisotropic_filter); + descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1; + descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ? + MTLCompareFunctionLessEqual : + MTLCompareFunctionAlways; + descriptor.supportArgumentBuffers = true; + + id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor]; + sampler_state_cache_[(uint)sampler_state] = state; + + BLI_assert(state != nil); + [descriptor autorelease]; + return state; } id<MTLSamplerState> MTLContext::get_default_sampler_state() @@ -545,4 +1625,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state() /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Swap-chain management and Metal presentation. + * \{ */ + +void present(MTLRenderPassDescriptor *blit_descriptor, + id<MTLRenderPipelineState> blit_pso, + id<MTLTexture> swapchain_texture, + id<CAMetalDrawable> drawable) +{ + + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /* Flush any outstanding work. */ + ctx->flush(); + + /* Always pace CPU to maximum of 3 drawables in flight. + * nextDrawable may have more in flight if backing swapchain + * textures are re-allocate, such as during resize events. + * + * Determine frames in flight based on current latency. If + * we are in a high-latency situation, limit frames in flight + * to increase app responsiveness and keep GPU execution under control. + * If latency improves, increase frames in flight to improve overall + * performance. */ + int perf_max_drawables = MTL_MAX_DRAWABLES; + if (MTLContext::avg_drawable_latency_us > 185000) { + perf_max_drawables = 1; + } + else if (MTLContext::avg_drawable_latency_us > 85000) { + perf_max_drawables = 2; + } + + while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) { + PIL_sleep_ms(2); + } + + /* Present is submitted in its own CMD Buffer to ensure drawable reference released as early as + * possible. This command buffer is separate as it does not utilize the global state + * for rendering as the main context does. */ + id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer]; + MTLCommandBufferManager::num_active_cmd_bufs++; + + if (MTLCommandBufferManager::sync_event != nil) { + /* Ensure command buffer ordering. */ + [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event + value:MTLCommandBufferManager::event_signal_val]; + } + + /* Do Present Call and final Blit to MTLDrawable. */ + id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor]; + [enc setRenderPipelineState:blit_pso]; + [enc setFragmentTexture:swapchain_texture atIndex:0]; + [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; + [enc endEncoding]; + + /* Present drawable. */ + BLI_assert(drawable); + [cmdbuf presentDrawable:drawable]; + + /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */ + MTLSafeFreeList *cmd_free_buffer_list = + MTLContext::get_global_memory_manager().get_current_safe_list(); + BLI_assert(cmd_free_buffer_list); + + id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf; + [cmd_buffer_ref retain]; + + /* Increment drawables in flight limiter. */ + MTLContext::max_drawables_in_flight++; + std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now(); + + /* Increment free pool reference and decrement upon command buffer completion. */ + cmd_free_buffer_list->increment_reference(); + [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) { + /* Flag freed buffers associated with this CMD buffer as ready to be freed. */ + cmd_free_buffer_list->decrement_reference(); + [cmd_buffer_ref release]; + + /* Decrement count */ + MTLCommandBufferManager::num_active_cmd_bufs--; + MTL_LOG_INFO("[Metal] Active command buffers: %d\n", + MTLCommandBufferManager::num_active_cmd_bufs); + + /* Drawable count and latency management. */ + MTLContext::max_drawables_in_flight--; + std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now(); + int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>( + completion_time - submission_time) + .count(); + MTLContext::latency_resolve_average(microseconds_per_frame); + + MTL_LOG_INFO("Frame Latency: %f ms (Rolling avg: %f ms Drawables: %d)\n", + ((float)microseconds_per_frame) / 1000.0f, + ((float)MTLContext::avg_drawable_latency_us) / 1000.0f, + perf_max_drawables); + }]; + + if (MTLCommandBufferManager::sync_event == nil) { + MTLCommandBufferManager::sync_event = [ctx->device newEvent]; + BLI_assert(MTLCommandBufferManager::sync_event); + [MTLCommandBufferManager::sync_event retain]; + } + BLI_assert(MTLCommandBufferManager::sync_event != nil); + + MTLCommandBufferManager::event_signal_val++; + [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event + value:MTLCommandBufferManager::event_signal_val]; + + [cmdbuf commit]; + + /* When debugging, fetch advanced command buffer errors. */ + if (G.debug & G_DEBUG_GPU) { + [cmdbuf waitUntilCompleted]; + NSError *error = [cmdbuf error]; + if (error != nil) { + NSLog(@"%@", error); + BLI_assert(false); + + @autoreleasepool { + const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String]; + + std::ofstream outfile; + outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app); + outfile << stringAsChar; + outfile.close(); + } + } + else { + @autoreleasepool { + NSString *str = @"Command buffer completed successfully!\n"; + const char *stringAsChar = [str UTF8String]; + + std::ofstream outfile; + outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app); + outfile << stringAsChar; + outfile.close(); + } + } + } +} + +/** \} */ + } // blender::gpu |