diff options
Diffstat (limited to 'source/blender/gpu/metal')
25 files changed, 4011 insertions, 179 deletions
diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh index 214a5d738a9..082fab24ba4 100644 --- a/source/blender/gpu/metal/mtl_backend.hh +++ b/source/blender/gpu/metal/mtl_backend.hh @@ -63,7 +63,7 @@ class MTLBackend : public GPUBackend { /* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C * objects. */ - Context *context_alloc(void *ghost_window) override; + Context *context_alloc(void *ghost_window, void *ghost_context) override; Batch *batch_alloc() override; DrawList *drawlist_alloc(int list_length) override; FrameBuffer *framebuffer_alloc(const char *name) override; diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index 3cd7794f6c9..240951c1ebd 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -8,12 +8,16 @@ #include "gpu_backend.hh" #include "mtl_backend.hh" +#include "mtl_batch.hh" #include "mtl_context.hh" +#include "mtl_drawlist.hh" #include "mtl_framebuffer.hh" +#include "mtl_immediate.hh" #include "mtl_index_buffer.hh" #include "mtl_query.hh" #include "mtl_shader.hh" #include "mtl_uniform_buffer.hh" +#include "mtl_vertex_buffer.hh" #include "gpu_capabilities_private.hh" #include "gpu_platform_private.hh" @@ -36,21 +40,19 @@ void MTLBackend::samplers_update(){ /* Placeholder -- Handled in MTLContext. */ }; -Context *MTLBackend::context_alloc(void *ghost_window) +Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context) { - return new MTLContext(ghost_window); + return new MTLContext(ghost_window, ghost_context); }; Batch *MTLBackend::batch_alloc() { - /* TODO(Metal): Implement MTLBatch. */ - return nullptr; + return new MTLBatch(); }; DrawList *MTLBackend::drawlist_alloc(int list_length) { - /* TODO(Metal): Implement MTLDrawList. */ - return nullptr; + return new MTLDrawList(list_length); }; FrameBuffer *MTLBackend::framebuffer_alloc(const char *name) @@ -94,8 +96,7 @@ StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const cha VertBuf *MTLBackend::vertbuf_alloc() { - /* TODO(Metal): Implement MTLVertBuf. */ - return nullptr; + return new MTLVertBuf(); } void MTLBackend::render_begin() @@ -417,6 +418,7 @@ void MTLBackend::capabilities_init(MTLContext *ctx) GCaps.depth_blitting_workaround = false; GCaps.use_main_context_workaround = false; GCaps.broken_amd_driver = false; + GCaps.clear_viewport_workaround = true; /* Metal related workarounds. */ /* Minimum per-vertex stride is 4 bytes in Metal. diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh new file mode 100644 index 00000000000..9e179e662b5 --- /dev/null +++ b/source/blender/gpu/metal/mtl_batch.hh @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * GPU geometry batch + * Contains VAOs + VBOs + Shader representing a drawable entity. + */ + +#pragma once + +#include "MEM_guardedalloc.h" +#include "gpu_batch_private.hh" +#include "mtl_index_buffer.hh" +#include "mtl_primitive.hh" +#include "mtl_shader.hh" +#include "mtl_vertex_buffer.hh" + +namespace blender::gpu { + +class MTLContext; +class MTLShaderInterface; + +#define GPU_VAO_STATIC_LEN 64 + +struct VertexBufferID { + uint32_t id : 16; + uint32_t is_instance : 15; + uint32_t used : 1; +}; + +class MTLBatch : public Batch { + + /* Vertex Bind-state Caching for a given shader interface used with the Batch. */ + struct VertexDescriptorShaderInterfacePair { + MTLVertexDescriptor vertex_descriptor{}; + const ShaderInterface *interface = nullptr; + uint16_t attr_mask{}; + int num_buffers{}; + VertexBufferID bufferIds[GPU_BATCH_VBO_MAX_LEN] = {}; + /* Cache life index compares a cache entry with the active MTLBatch state. + * This is initially set to the cache life index of MTLBatch. If the batch has been modified, + * this index is incremented to cheaply invalidate existing cache entries. */ + uint32_t cache_life_index = 0; + }; + + class MTLVertexDescriptorCache { + + private: + MTLBatch *batch_; + + VertexDescriptorShaderInterfacePair cache_[GPU_VAO_STATIC_LEN] = {}; + MTLContext *cache_context_ = nullptr; + uint32_t cache_life_index_ = 0; + + public: + MTLVertexDescriptorCache(MTLBatch *batch) : batch_(batch){}; + VertexDescriptorShaderInterfacePair *find(const ShaderInterface *interface); + bool insert(VertexDescriptorShaderInterfacePair &data); + + private: + void vertex_descriptor_cache_init(MTLContext *ctx); + void vertex_descriptor_cache_clear(); + void vertex_descriptor_cache_ensure(); + }; + + private: + MTLShader *active_shader_ = nullptr; + bool shader_in_use_ = false; + MTLVertexDescriptorCache vao_cache = {this}; + + /* Topology emulation. */ + gpu::MTLBuffer *emulated_topology_buffer_ = nullptr; + GPUPrimType emulated_topology_type_; + uint32_t topology_buffer_input_v_count_ = 0; + uint32_t topology_buffer_output_v_count_ = 0; + + public: + MTLBatch(){}; + ~MTLBatch(){}; + + void draw(int v_first, int v_count, int i_first, int i_count) override; + void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override + { + /* TODO(Metal): Support indirect draw commands. */ + } + void multi_draw_indirect(GPUStorageBuf *indirect_buf, + int count, + intptr_t offset, + intptr_t stride) override + { + /* TODO(Metal): Support indirect draw commands. */ + } + + /* Returns an initialized RenderComandEncoder for drawing if all is good. + * Otherwise, nil. */ + id<MTLRenderCommandEncoder> bind(uint v_first, uint v_count, uint i_first, uint i_count); + void unbind(); + + /* Convenience getters. */ + MTLIndexBuf *elem_() const + { + return static_cast<MTLIndexBuf *>(unwrap(elem)); + } + MTLVertBuf *verts_(const int index) const + { + return static_cast<MTLVertBuf *>(unwrap(verts[index])); + } + MTLVertBuf *inst_(const int index) const + { + return static_cast<MTLVertBuf *>(unwrap(inst[index])); + } + MTLShader *active_shader_get() const + { + return active_shader_; + } + + private: + void shader_bind(); + void draw_advanced(int v_first, int v_count, int i_first, int i_count); + int prepare_vertex_binding(MTLVertBuf *verts, + MTLRenderPipelineStateDescriptor &desc, + const MTLShaderInterface *interface, + uint16_t &attr_mask, + bool instanced); + + id<MTLBuffer> get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type, uint32_t &v_count); + + void prepare_vertex_descriptor_and_bindings( + MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count); + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch"); +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_batch.mm b/source/blender/gpu/metal/mtl_batch.mm new file mode 100644 index 00000000000..988fb9b793b --- /dev/null +++ b/source/blender/gpu/metal/mtl_batch.mm @@ -0,0 +1,998 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Metal implementation of GPUBatch. + */ + +#include "BLI_assert.h" +#include "BLI_span.hh" + +#include "BKE_global.h" + +#include "GPU_common.h" +#include "gpu_batch_private.hh" +#include "gpu_shader_private.hh" + +#include "mtl_batch.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_index_buffer.hh" +#include "mtl_shader.hh" +#include "mtl_vertex_buffer.hh" + +#include <string> + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Creation & Deletion + * \{ */ +void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count) +{ + if (this->flag & GPU_BATCH_INVALID) { + this->shader_in_use_ = false; + } + this->draw_advanced(v_first, v_count, i_first, i_count); +} + +void MTLBatch::shader_bind() +{ + if (active_shader_ && active_shader_->is_valid()) { + active_shader_->bind(); + shader_in_use_ = true; + } +} + +void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_init(MTLContext *ctx) +{ + BLI_assert(ctx != nullptr); + this->vertex_descriptor_cache_clear(); + cache_context_ = ctx; +} + +void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_clear() +{ + cache_life_index_++; + cache_context_ = nullptr; +} + +void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_ensure() +{ + if (this->cache_context_ != nullptr) { + + /* Invalidate vertex descriptor bindings cache if batch has changed. */ + if (batch_->flag & GPU_BATCH_DIRTY) { + batch_->flag &= ~GPU_BATCH_DIRTY; + this->vertex_descriptor_cache_clear(); + } + } + + /* Initialize cache if not ready. */ + if (cache_context_ == nullptr) { + this->vertex_descriptor_cache_init(MTLContext::get()); + } +} + +MTLBatch::VertexDescriptorShaderInterfacePair *MTLBatch::MTLVertexDescriptorCache::find( + const ShaderInterface *interface) +{ + this->vertex_descriptor_cache_ensure(); + for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) { + if (cache_[i].interface == interface && cache_[i].cache_life_index == cache_life_index_) { + return &cache_[i]; + } + } + return nullptr; +} + +bool MTLBatch::MTLVertexDescriptorCache::insert( + MTLBatch::VertexDescriptorShaderInterfacePair &data) +{ + vertex_descriptor_cache_ensure(); + for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) { + if (cache_[i].interface == nullptr || cache_[i].cache_life_index != cache_life_index_) { + cache_[i] = data; + cache_[i].cache_life_index = cache_life_index_; + return true; + } + } + return false; +} + +int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts, + MTLRenderPipelineStateDescriptor &desc, + const MTLShaderInterface *interface, + uint16_t &attr_mask, + bool instanced) +{ + + const GPUVertFormat *format = &verts->format; + /* Whether the current vertex buffer has been added to the buffer layout descriptor. */ + bool buffer_added = false; + /* Per-vertex stride of current vertex buffer. */ + int buffer_stride = format->stride; + /* Buffer binding index of the vertex buffer once added to the buffer layout descriptor. */ + int buffer_index = -1; + int attribute_offset = 0; + + if (!active_shader_->get_uses_ssbo_vertex_fetch()) { + BLI_assert( + buffer_stride >= 4 && + "In Metal, Vertex buffer stride should be 4. SSBO Vertex fetch is not affected by this"); + } + + /* Iterate over GPUVertBuf vertex format and find attributes matching those in the active + * shader's interface. */ + for (uint32_t a_idx = 0; a_idx < format->attr_len; a_idx++) { + const GPUVertAttr *a = &format->attrs[a_idx]; + + if (format->deinterleaved) { + attribute_offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].size) * verts->vertex_len; + buffer_stride = a->size; + } + else { + attribute_offset = a->offset; + } + + /* Find attribute with the matching name. Attributes may have multiple compatible + * name aliases. */ + for (uint32_t n_idx = 0; n_idx < a->name_len; n_idx++) { + const char *name = GPU_vertformat_attr_name_get(format, a, n_idx); + const ShaderInput *input = interface->attr_get(name); + + if (input == nullptr || input->location == -1) { + /* Vertex/instance buffers provided have attribute data for attributes which are not needed + * by this particular shader. This shader only needs binding information for the attributes + * has in the shader interface. */ + MTL_LOG_WARNING( + "MTLBatch: Could not find attribute with name '%s' (defined in active vertex format) " + "in the shader interface for shader '%s'\n", + name, + interface->get_name()); + continue; + } + + /* Fetch metal attribute information. */ + const MTLShaderInputAttribute &mtl_attr = interface->get_attribute(input->location); + BLI_assert(mtl_attr.location >= 0); + /* Verify that the attribute location from the shader interface + * matches the attribute location returned. */ + BLI_assert(mtl_attr.location == input->location); + + /* Check if attribute is already present in the given slot. */ + if ((~attr_mask) & (1 << mtl_attr.location)) { + MTL_LOG_INFO( + " -- [Batch] Skipping attribute with input location %d (As one is already bound)\n", + mtl_attr.location); + } + else { + + /* Update attribute used-slot mask. */ + attr_mask &= ~(1 << mtl_attr.location); + + /* Add buffer layout entry in descriptor if it has not yet been added + * for current vertex buffer. */ + if (!buffer_added) { + buffer_index = desc.vertex_descriptor.num_vert_buffers; + desc.vertex_descriptor.buffer_layouts[buffer_index].step_function = + (instanced) ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex; + desc.vertex_descriptor.buffer_layouts[buffer_index].step_rate = 1; + desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride; + desc.vertex_descriptor.num_vert_buffers++; + buffer_added = true; + + MTL_LOG_INFO(" -- [Batch] Adding source %s buffer (Index: %d, Stride: %d)\n", + (instanced) ? "instance" : "vertex", + buffer_index, + buffer_stride); + } + else { + /* Ensure stride is correct for de-interleaved attributes. */ + desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride; + } + + /* Handle Matrix/Array vertex attribute types. + * Metal does not natively support these as attribute types, so we handle these cases + * by stacking together compatible types (e.g. 4xVec4 for Mat4) and combining + * the data in the shader. + * The generated Metal shader will contain a generated input binding, which reads + * in individual attributes and merges them into the desired type after vertex + * assembly. e.g. a Mat4 (Float4x4) will generate 4 Float4 attributes. */ + if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) { + BLI_assert_msg( + a->comp_len == 16, + "only mat4 attributes currently supported -- Not ready to handle other long " + "component length attributes yet"); + + /* SSBO Vertex Fetch Attribute safety checks. */ + if (active_shader_->get_uses_ssbo_vertex_fetch()) { + /* When using SSBO vertex fetch, we do not need to expose split attributes, + * A matrix can be read directly as a whole block of contiguous data. */ + MTLSSBOAttribute ssbo_attr(mtl_attr.index, + buffer_index, + attribute_offset, + buffer_stride, + GPU_SHADER_ATTR_TYPE_MAT4, + instanced); + active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr); + desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] = + ssbo_attr; + desc.vertex_descriptor.num_ssbo_attributes++; + } + else { + + /* Handle Mat4 attributes. */ + if (a->comp_len == 16) { + /* Debug safety checks. */ + BLI_assert_msg(mtl_attr.matrix_element_count == 4, + "mat4 type expected but there are fewer components"); + BLI_assert_msg(mtl_attr.size == 16, "Expecting subtype 'vec4' with 16 bytes"); + BLI_assert_msg( + mtl_attr.format == MTLVertexFormatFloat4, + "Per-attribute vertex format MUST be float4 for an input type of 'mat4'"); + + /* We have found the 'ROOT' attribute. A mat4 contains 4 consecutive float4 attribute + * locations we must map to. */ + for (int i = 0; i < a->comp_len / 4; i++) { + desc.vertex_descriptor.attributes[mtl_attr.location + i].format = + MTLVertexFormatFloat4; + /* Data is consecutive in the buffer for the whole matrix, each float4 will shift + * the offset by 16 bytes. */ + desc.vertex_descriptor.attributes[mtl_attr.location + i].offset = + attribute_offset + i * 16; + /* All source data for a matrix is in the same singular buffer. */ + desc.vertex_descriptor.attributes[mtl_attr.location + i].buffer_index = + buffer_index; + + /* Update total attribute account. */ + desc.vertex_descriptor.num_attributes = max_ii( + mtl_attr.location + i + 1, desc.vertex_descriptor.num_attributes); + MTL_LOG_INFO("-- Sub-Attrib Location: %d, offset: %d, buffer index: %d\n", + mtl_attr.location + i, + attribute_offset + i * 16, + buffer_index); + } + MTL_LOG_INFO( + "Float4x4 attribute type added for '%s' at attribute locations: %d to %d\n", + name, + mtl_attr.location, + mtl_attr.location + 3); + } + + /* Ensure we are not exceeding the attribute limit. */ + BLI_assert(desc.vertex_descriptor.num_attributes <= MTL_MAX_VERTEX_INPUT_ATTRIBUTES); + } + } + else { + + /* Handle Any required format conversions. + * NOTE(Metal): If there is a mis-match between the format of an attribute + * in the shader interface, and the specified format in the VertexBuffer VertexFormat, + * we need to perform a format conversion. + * + * The Metal API can perform certain conversions internally during vertex assembly: + * - Type Normalization e.g short2 to float2 between 0.0 to 1.0. + * - Type Truncation e.g. Float4 to Float2. + * - Type expansion e,g, Float3 to Float4 (Following 0,0,0,1 for assignment to empty + * elements). + * + * Certain conversion cannot be performed however, and in these cases, we need to + * instruct the shader to generate a specialized version with a conversion routine upon + * attribute read. + * - This handles cases such as conversion between types e.g. Integer to float without + * normalization. + * + * For more information on the supported and unsupported conversions, see: + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc + */ + MTLVertexFormat converted_format; + bool can_use_internal_conversion = mtl_convert_vertex_format( + mtl_attr.format, + (GPUVertCompType)a->comp_type, + a->comp_len, + (GPUVertFetchMode)a->fetch_mode, + &converted_format); + bool is_floating_point_format = (a->comp_type == GPU_COMP_F32); + + if (can_use_internal_conversion) { + desc.vertex_descriptor.attributes[mtl_attr.location].format = converted_format; + desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode = + is_floating_point_format ? (GPUVertFetchMode)GPU_FETCH_FLOAT : + (GPUVertFetchMode)GPU_FETCH_INT; + BLI_assert(converted_format != MTLVertexFormatInvalid); + } + else { + /* The internal implicit conversion is not supported. + * In this case, we need to handle conversion inside the shader. + * This is handled using `format_conversion_mode`. + * `format_conversion_mode` is assigned the blender-specified fetch mode (GPU_FETCH_*). + * This then controls how a given attribute is interpreted. The data will be read + * as specified and then converted appropriately to the correct form. + * + * e.g. if `GPU_FETCH_INT_TO_FLOAT` is specified, the specialized read-routine + * in the shader will read the data as an int, and cast this to floating point + * representation. (Rather than reading the source data as float). + * + * NOTE: Even if full conversion is not supported, we may still partially perform an + * implicit conversion where possible, such as vector truncation or expansion. */ + MTLVertexFormat converted_format; + bool can_convert = mtl_vertex_format_resize( + mtl_attr.format, a->comp_len, &converted_format); + desc.vertex_descriptor.attributes[mtl_attr.location].format = can_convert ? + converted_format : + mtl_attr.format; + desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode = + (GPUVertFetchMode)a->fetch_mode; + BLI_assert(desc.vertex_descriptor.attributes[mtl_attr.location].format != + MTLVertexFormatInvalid); + } + desc.vertex_descriptor.attributes[mtl_attr.location].offset = attribute_offset; + desc.vertex_descriptor.attributes[mtl_attr.location].buffer_index = buffer_index; + desc.vertex_descriptor.num_attributes = ((mtl_attr.location + 1) > + desc.vertex_descriptor.num_attributes) ? + (mtl_attr.location + 1) : + desc.vertex_descriptor.num_attributes; + + /* SSBO Vertex Fetch attribute bind. */ + if (active_shader_->get_uses_ssbo_vertex_fetch()) { + BLI_assert_msg(desc.vertex_descriptor.attributes[mtl_attr.location].format == + mtl_attr.format, + "SSBO Vertex Fetch does not support attribute conversion."); + + MTLSSBOAttribute ssbo_attr( + mtl_attr.index, + buffer_index, + attribute_offset, + buffer_stride, + MTLShader::ssbo_vertex_type_to_attr_type( + desc.vertex_descriptor.attributes[mtl_attr.location].format), + instanced); + + active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr); + desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] = + ssbo_attr; + desc.vertex_descriptor.num_ssbo_attributes++; + } + + /* NOTE: We are setting num_attributes to be up to the maximum found index, because of + * this, it is possible that we may skip over certain attributes if they were not in the + * source GPUVertFormat. */ + MTL_LOG_INFO( + " -- Batch Attribute(%d): ORIG Shader Format: %d, ORIG Vert format: %d, Vert " + "components: %d, Fetch Mode %d --> FINAL FORMAT: %d\n", + mtl_attr.location, + (int)mtl_attr.format, + (int)a->comp_type, + (int)a->comp_len, + (int)a->fetch_mode, + (int)desc.vertex_descriptor.attributes[mtl_attr.location].format); + + MTL_LOG_INFO( + " -- [Batch] matching %s attribute '%s' (Attribute Index: %d, Buffer index: %d, " + "offset: %d)\n", + (instanced) ? "instance" : "vertex", + name, + mtl_attr.location, + buffer_index, + attribute_offset); + } + } + } + } + if (buffer_added) { + return buffer_index; + } + return -1; +} + +id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_first, uint i_count) +{ + /* Setup draw call and render pipeline state here. Called by every draw, but setup here so that + * MTLDrawList only needs to perform setup a single time. */ + BLI_assert(this); + + /* Fetch Metal device. */ + MTLContext *ctx = MTLContext::get(); + if (!ctx) { + BLI_assert_msg(false, "No context available for rendering."); + return nil; + } + + /* Verify Shader. */ + active_shader_ = (shader) ? static_cast<MTLShader *>(unwrap(shader)) : nullptr; + + if (active_shader_ == nullptr || !active_shader_->is_valid()) { + /* Skip drawing if there is no valid Metal shader. + * This will occur if the path through which the shader is prepared + * is invalid (e.g. Python without create-info), or, the source shader uses a geometry pass. */ + BLI_assert_msg(false, "No valid Metal shader!"); + return nil; + } + + /* Check if using SSBO Fetch Mode. + * This is an alternative drawing mode to geometry shaders, wherein vertex buffers + * are bound as readable (random-access) GPU buffers and certain descriptor properties + * are passed using Shader uniforms. */ + bool uses_ssbo_fetch = active_shader_->get_uses_ssbo_vertex_fetch(); + + /* Prepare Vertex Descriptor and extract VertexBuffers to bind. */ + MTLVertBuf *buffers[GPU_BATCH_VBO_MAX_LEN] = {nullptr}; + int num_buffers = 0; + + /* Ensure Index Buffer is ready. */ + MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem)); + if (mtl_elem != NULL) { + mtl_elem->upload_data(); + } + + /* Populate vertex descriptor with attribute binding information. + * The vertex descriptor and buffer layout descriptors describe + * how vertex data from bound vertex buffers maps to the + * shader's input. + * A unique vertex descriptor will result in a new PipelineStateObject + * being generated for the currently bound shader. */ + prepare_vertex_descriptor_and_bindings(buffers, num_buffers, v_first, v_count, i_first, i_count); + + /* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer + * data operations are required. */ + for (int i = 0; i < num_buffers; i++) { + MTLVertBuf *buf_at_index = buffers[i]; + if (buf_at_index == NULL) { + BLI_assert_msg( + false, + "Total buffer count does not match highest buffer index, could be gaps in bindings"); + continue; + } + + MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index)); + mtlvbo->bind(); + } + + /* Ensure render pass is active and fetch active RenderCommandEncoder. */ + id<MTLRenderCommandEncoder> rec = ctx->ensure_begin_render_pass(); + + /* Fetch RenderPassState to enable resource binding for active pass. */ + MTLRenderPassState &rps = ctx->main_command_buffer.get_render_pass_state(); + + /* Debug Check: Ensure Frame-buffer instance is not dirty. */ + BLI_assert(!ctx->main_command_buffer.get_active_framebuffer()->get_dirty()); + + /* Bind Shader. */ + this->shader_bind(); + + /* GPU debug markers. */ + if (G.debug & G_DEBUG_GPU) { + [rec pushDebugGroup:[NSString stringWithFormat:@"batch_bind%@(shader: %s)", + this->elem ? @"(indexed)" : @"", + active_shader_->get_interface()->get_name()]]; + [rec insertDebugSignpost:[NSString + stringWithFormat:@"batch_bind%@(shader: %s)", + this->elem ? @"(indexed)" : @"", + active_shader_->get_interface()->get_name()]]; + } + + /* Ensure Context Render Pipeline State is fully setup and ready to execute the draw. */ + MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type); + if (!ctx->ensure_render_pipeline_state(mtl_prim_type)) { + printf("FAILED TO ENSURE RENDER PIPELINE STATE"); + BLI_assert(false); + + if (G.debug & G_DEBUG_GPU) { + [rec popDebugGroup]; + } + return nil; + } + + /*** Bind Vertex Buffers and Index Buffers **/ + + /* SSBO Vertex Fetch Buffer bindings. */ + if (uses_ssbo_fetch) { + + /* SSBO Vertex Fetch - Bind Index Buffer to appropriate slot -- if used. */ + id<MTLBuffer> idx_buffer = nil; + GPUPrimType final_prim_type = this->prim_type; + + if (mtl_elem != nullptr) { + + /* Fetch index buffer. This function can situationally return an optimized + * index buffer of a different primitive type. If this is the case, `final_prim_type` + * and `v_count` will be updated with the new format. + * NOTE: For indexed rendering, v_count represents the number of indices. */ + idx_buffer = mtl_elem->get_index_buffer(final_prim_type, v_count); + BLI_assert(idx_buffer != nil); + + /* Update uniforms for SSBO-vertex-fetch-mode indexed rendering to flag usage. */ + int &uniform_ssbo_index_mode_u16 = active_shader_->uni_ssbo_uses_index_mode_u16; + BLI_assert(uniform_ssbo_index_mode_u16 != -1); + int uses_index_mode_u16 = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 1 : 0; + active_shader_->uniform_int(uniform_ssbo_index_mode_u16, 1, 1, &uses_index_mode_u16); + } + else { + idx_buffer = ctx->get_null_buffer(); + } + rps.bind_vertex_buffer(idx_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX); + + /* Ensure all attributes are set */ + active_shader_->ssbo_vertex_fetch_bind_attributes_end(rec); + + /* Bind NULL Buffers for unused vertex data slots. */ + id<MTLBuffer> null_buffer = ctx->get_null_buffer(); + BLI_assert(null_buffer != nil); + for (int i = num_buffers; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) { + rps.bind_vertex_buffer(null_buffer, 0, i); + } + } + + /* Flag whether Indexed rendering is used or not. */ + int &uniform_ssbo_use_indexed = active_shader_->uni_ssbo_uses_indexed_rendering; + BLI_assert(uniform_ssbo_use_indexed != -1); + int uses_indexed_rendering = (mtl_elem != NULL) ? 1 : 0; + active_shader_->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering); + + /* Set SSBO-fetch-mode status uniforms. */ + BLI_assert(active_shader_->uni_ssbo_input_prim_type_loc != -1); + BLI_assert(active_shader_->uni_ssbo_input_vert_count_loc != -1); + GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_shader_)), + active_shader_->uni_ssbo_input_prim_type_loc, + 1, + 1, + (const int *)(&final_prim_type)); + GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_shader_)), + active_shader_->uni_ssbo_input_vert_count_loc, + 1, + 1, + (const int *)(&v_count)); + } + + /* Bind Vertex Buffers. */ + for (int i = 0; i < num_buffers; i++) { + MTLVertBuf *buf_at_index = buffers[i]; + if (buf_at_index == NULL) { + BLI_assert_msg( + false, + "Total buffer count does not match highest buffer index, could be gaps in bindings"); + continue; + } + /* Buffer handle. */ + MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index)); + mtlvbo->flag_used(); + + /* Fetch buffer from MTLVertexBuffer and bind. */ + id<MTLBuffer> mtl_buffer = mtlvbo->get_metal_buffer(); + + BLI_assert(mtl_buffer != nil); + rps.bind_vertex_buffer(mtl_buffer, 0, i); + } + + if (G.debug & G_DEBUG_GPU) { + [rec popDebugGroup]; + } + + /* Return Render Command Encoder used with setup. */ + return rec; +} + +void MTLBatch::unbind() +{ +} + +void MTLBatch::prepare_vertex_descriptor_and_bindings( + MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count) +{ + + /* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound. + */ + MTLStateManager *state_manager = static_cast<MTLStateManager *>( + MTLContext::get()->state_manager); + MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor(); + const MTLShaderInterface *interface = active_shader_->get_interface(); + uint16_t attr_mask = interface->get_enabled_attribute_mask(); + + /* Reset vertex descriptor to default state. */ + desc.reset_vertex_descriptor(); + + /* Fetch Vertex and Instance Buffers. */ + Span<MTLVertBuf *> mtl_verts(reinterpret_cast<MTLVertBuf **>(this->verts), + GPU_BATCH_VBO_MAX_LEN); + Span<MTLVertBuf *> mtl_inst(reinterpret_cast<MTLVertBuf **>(this->inst), + GPU_BATCH_INST_VBO_MAX_LEN); + + /* SSBO Vertex fetch also passes vertex descriptor information into the shader. */ + if (active_shader_->get_uses_ssbo_vertex_fetch()) { + active_shader_->ssbo_vertex_fetch_bind_attributes_begin(); + } + + /* Resolve Metal vertex buffer bindings. */ + /* Vertex Descriptors + * ------------------ + * Vertex Descriptors are required to generate a pipeline state, based on the current Batch's + * buffer bindings. These bindings are a unique matching, depending on what input attributes a + * batch has in its buffers, and those which are supported by the shader interface. + + * We iterate through the buffers and resolve which attributes satisfy the requirements of the + * currently bound shader. We cache this data, for a given Batch<->ShderInterface pairing in a + * VAO cache to avoid the need to recalculate this data. */ + bool buffer_is_instanced[GPU_BATCH_VBO_MAX_LEN] = {false}; + + VertexDescriptorShaderInterfacePair *descriptor = this->vao_cache.find(interface); + if (descriptor) { + desc.vertex_descriptor = descriptor->vertex_descriptor; + attr_mask = descriptor->attr_mask; + num_buffers = descriptor->num_buffers; + + for (int bid = 0; bid < GPU_BATCH_VBO_MAX_LEN; ++bid) { + if (descriptor->bufferIds[bid].used) { + if (descriptor->bufferIds[bid].is_instance) { + buffers[bid] = mtl_inst[descriptor->bufferIds[bid].id]; + buffer_is_instanced[bid] = true; + } + else { + buffers[bid] = mtl_verts[descriptor->bufferIds[bid].id]; + buffer_is_instanced[bid] = false; + } + } + } + + /* Use cached ssbo attribute binding data. */ + if (active_shader_->get_uses_ssbo_vertex_fetch()) { + BLI_assert(desc.vertex_descriptor.uses_ssbo_vertex_fetch); + for (int attr_id = 0; attr_id < desc.vertex_descriptor.num_ssbo_attributes; attr_id++) { + active_shader_->ssbo_vertex_fetch_bind_attribute( + desc.vertex_descriptor.ssbo_attributes[attr_id]); + } + } + } + else { + VertexDescriptorShaderInterfacePair pair{}; + pair.interface = interface; + + for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN; ++i) { + pair.bufferIds[i].id = -1; + pair.bufferIds[i].is_instance = 0; + pair.bufferIds[i].used = 0; + } + /* NOTE: Attribute extraction order from buffer is the reverse of the OpenGL as we flag once an + * attribute is found, rather than pre-setting the mask. */ + /* Extract Instance attributes (These take highest priority). */ + for (int v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) { + if (mtl_inst[v]) { + MTL_LOG_INFO(" -- [Batch] Checking bindings for bound instance buffer %p\n", mtl_inst[v]); + int buffer_ind = this->prepare_vertex_binding( + mtl_inst[v], desc, interface, attr_mask, true); + if (buffer_ind >= 0) { + buffers[buffer_ind] = mtl_inst[v]; + buffer_is_instanced[buffer_ind] = true; + + pair.bufferIds[buffer_ind].id = v; + pair.bufferIds[buffer_ind].used = 1; + pair.bufferIds[buffer_ind].is_instance = 1; + num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers; + } + } + } + + /* Extract Vertex attributes (First-bound vertex buffer takes priority). */ + for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) { + if (mtl_verts[v] != NULL) { + MTL_LOG_INFO(" -- [Batch] Checking bindings for bound vertex buffer %p\n", mtl_verts[v]); + int buffer_ind = this->prepare_vertex_binding( + mtl_verts[v], desc, interface, attr_mask, false); + if (buffer_ind >= 0) { + buffers[buffer_ind] = mtl_verts[v]; + buffer_is_instanced[buffer_ind] = false; + + pair.bufferIds[buffer_ind].id = v; + pair.bufferIds[buffer_ind].used = 1; + pair.bufferIds[buffer_ind].is_instance = 0; + num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers; + } + } + } + + /* Add to VertexDescriptor cache */ + desc.vertex_descriptor.uses_ssbo_vertex_fetch = active_shader_->get_uses_ssbo_vertex_fetch(); + pair.attr_mask = attr_mask; + pair.vertex_descriptor = desc.vertex_descriptor; + pair.num_buffers = num_buffers; + if (!this->vao_cache.insert(pair)) { + printf( + "[Performance Warning] cache is full (Size: %d), vertex descriptor will not be cached\n", + GPU_VAO_STATIC_LEN); + } + } + +/* DEBUG: verify if our attribute bindings have been fully provided as expected. */ +#if MTL_DEBUG_SHADER_ATTRIBUTES == 1 + if (attr_mask != 0) { + for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) { + if (attr_mask & mask) { + /* Fallback for setting default attributes, for missed slots. Attributes flagged with + * 'MTLVertexFormatInvalid' in the vertex descriptor are bound to a NULL buffer during PSO + * creation. */ + MTL_LOG_WARNING("MTLBatch: Missing expected attribute '%s' at index '%d' for shader: %s\n", + this->active_shader->interface->attributes[a].name, + a, + interface->name); + /* Ensure any assigned attribute has not been given an invalid format. This should not + * occur and may be the result of an unsupported attribute type conversion. */ + BLI_assert(desc.attributes[a].format == MTLVertexFormatInvalid); + } + } + } +#endif +} + +void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count) +{ + +#if TRUST_NO_ONE + BLI_assert(v_count > 0 && i_count > 0); +#endif + + /* Setup RenderPipelineState for batch. */ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + id<MTLRenderCommandEncoder> rec = this->bind(v_first, v_count, i_first, i_count); + if (rec == nil) { + return; + } + + /* Fetch IndexBuffer and resolve primitive type. */ + MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem)); + MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type); + + /* Render using SSBO Vertex Fetch. */ + if (active_shader_->get_uses_ssbo_vertex_fetch()) { + + /* Submit draw call with modified vertex count, which reflects vertices per primitive defined + * in the USE_SSBO_VERTEX_FETCH pragma. */ + int num_input_primitives = gpu_get_prim_count_from_type(v_count, this->prim_type); + int output_num_verts = num_input_primitives * + active_shader_->get_ssbo_vertex_fetch_output_num_verts(); + BLI_assert_msg( + mtl_vertex_count_fits_primitive_type( + output_num_verts, active_shader_->get_ssbo_vertex_fetch_output_prim_type()), + "Output Vertex count is not compatible with the requested output vertex primitive type"); + [rec drawPrimitives:active_shader_->get_ssbo_vertex_fetch_output_prim_type() + vertexStart:0 + vertexCount:output_num_verts + instanceCount:i_count + baseInstance:i_first]; + ctx->main_command_buffer.register_draw_counters(output_num_verts * i_count); + } + /* Perform regular draw. */ + else if (mtl_elem == NULL) { + + /* Primitive Type toplogy emulation. */ + if (mtl_needs_topology_emulation(this->prim_type)) { + + /* Generate index buffer for primitive types requiring emulation. */ + GPUPrimType emulated_prim_type = this->prim_type; + uint32_t emulated_v_count = v_count; + id<MTLBuffer> generated_index_buffer = this->get_emulated_toplogy_buffer(emulated_prim_type, + emulated_v_count); + BLI_assert(generated_index_buffer != nil); + + MTLPrimitiveType emulated_mtl_prim_type = gpu_prim_type_to_metal(emulated_prim_type); + + /* Temp: Disable culling for emulated primitive types. + * TODO(Metal): Support face winding in topology buffer. */ + [rec setCullMode:MTLCullModeNone]; + + if (generated_index_buffer != nil) { + BLI_assert(emulated_mtl_prim_type == MTLPrimitiveTypeTriangle || + emulated_mtl_prim_type == MTLPrimitiveTypeLine); + if (emulated_mtl_prim_type == MTLPrimitiveTypeTriangle) { + BLI_assert(emulated_v_count % 3 == 0); + } + if (emulated_mtl_prim_type == MTLPrimitiveTypeLine) { + BLI_assert(emulated_v_count % 2 == 0); + } + + /* Set depth stencil state (requires knowledge of primitive type). */ + ctx->ensure_depth_stencil_state(emulated_mtl_prim_type); + + [rec drawIndexedPrimitives:emulated_mtl_prim_type + indexCount:emulated_v_count + indexType:MTLIndexTypeUInt32 + indexBuffer:generated_index_buffer + indexBufferOffset:0 + instanceCount:i_count + baseVertex:v_first + baseInstance:i_first]; + } + else { + printf("[Note] Cannot draw batch -- Emulated Topology mode: %u not yet supported\n", + this->prim_type); + } + } + else { + /* Set depth stencil state (requires knowledge of primitive type). */ + ctx->ensure_depth_stencil_state(mtl_prim_type); + + /* Issue draw call. */ + [rec drawPrimitives:mtl_prim_type + vertexStart:v_first + vertexCount:v_count + instanceCount:i_count + baseInstance:i_first]; + } + ctx->main_command_buffer.register_draw_counters(v_count * i_count); + } + /* Perform indexed draw. */ + else { + + MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_); + uint32_t base_index = mtl_elem->index_base_; + uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4; + uint32_t v_first_ofs = ((v_first + mtl_elem->index_start_) * index_size); + BLI_assert_msg((v_first_ofs % index_size) == 0, + "Index offset is not 2/4-byte aligned as per METAL spec"); + + /* Fetch index buffer. May return an index buffer of a differing format, + * if index buffer optimization is used. In these cases, final_prim_type and + * index_count get updated with the new properties. */ + GPUPrimType final_prim_type = this->prim_type; + uint index_count = v_count; + + id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count); + mtl_prim_type = gpu_prim_type_to_metal(final_prim_type); + BLI_assert(index_buffer != nil); + + if (index_buffer != nil) { + + /* Set depth stencil state (requires knowledge of primitive type). */ + ctx->ensure_depth_stencil_state(mtl_prim_type); + + /* Issue draw call. */ + [rec drawIndexedPrimitives:mtl_prim_type + indexCount:index_count + indexType:index_type + indexBuffer:index_buffer + indexBufferOffset:v_first_ofs + instanceCount:i_count + baseVertex:base_index + baseInstance:i_first]; + ctx->main_command_buffer.register_draw_counters(index_count * i_count); + } + else { + BLI_assert_msg(false, "Index buffer does not have backing Metal buffer"); + } + } + + /* End of draw. */ + this->unbind(); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Topology emulation and optimization + * \{ */ + +id<MTLBuffer> MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type, + uint32_t &in_out_v_count) +{ + + BLI_assert(in_out_v_count > 0); + /* Determine emulated primitive types. */ + GPUPrimType input_prim_type = in_out_prim_type; + uint32_t v_count = in_out_v_count; + GPUPrimType output_prim_type; + switch (input_prim_type) { + case GPU_PRIM_POINTS: + case GPU_PRIM_LINES: + case GPU_PRIM_TRIS: + BLI_assert_msg(false, "Optimal primitive types should not reach here."); + return nil; + break; + case GPU_PRIM_LINES_ADJ: + case GPU_PRIM_TRIS_ADJ: + BLI_assert_msg(false, "Adjacency primitive types should not reach here."); + return nil; + break; + case GPU_PRIM_LINE_STRIP: + case GPU_PRIM_LINE_LOOP: + case GPU_PRIM_LINE_STRIP_ADJ: + output_prim_type = GPU_PRIM_LINES; + break; + case GPU_PRIM_TRI_STRIP: + case GPU_PRIM_TRI_FAN: + output_prim_type = GPU_PRIM_TRIS; + break; + default: + BLI_assert_msg(false, "Invalid primitive type."); + return nil; + } + + /* Check if topology buffer exists and is valid. */ + if (this->emulated_topology_buffer_ != nullptr && + (emulated_topology_type_ != input_prim_type || topology_buffer_input_v_count_ != v_count)) { + + /* Release existing topology buffer. */ + emulated_topology_buffer_->free(); + emulated_topology_buffer_ = nullptr; + } + + /* Generate new topology index buffer. */ + if (this->emulated_topology_buffer_ == nullptr) { + /* Calculate IB len. */ + uint32_t output_prim_count = 0; + switch (input_prim_type) { + case GPU_PRIM_LINE_STRIP: + case GPU_PRIM_LINE_STRIP_ADJ: + output_prim_count = v_count - 1; + break; + case GPU_PRIM_LINE_LOOP: + output_prim_count = v_count; + break; + case GPU_PRIM_TRI_STRIP: + case GPU_PRIM_TRI_FAN: + output_prim_count = v_count - 2; + break; + default: + BLI_assert_msg(false, "Cannot generate optimized topology buffer for other types."); + break; + } + uint32_t output_IB_elems = output_prim_count * ((output_prim_type == GPU_PRIM_TRIS) ? 3 : 2); + + /* Allocate buffer. */ + uint32_t buffer_bytes = output_IB_elems * 4; + BLI_assert(buffer_bytes > 0); + this->emulated_topology_buffer_ = MTLContext::get_global_memory_manager().allocate( + buffer_bytes, true); + + /* Populate. */ + uint32_t *data = (uint32_t *)this->emulated_topology_buffer_->get_host_ptr(); + BLI_assert(data != nullptr); + + /* TODO(Metal): Support inverse winding modes. */ + bool winding_clockwise = false; + UNUSED_VARS(winding_clockwise); + + switch (input_prim_type) { + /* Line Loop. */ + case GPU_PRIM_LINE_LOOP: { + int line = 0; + for (line = 0; line < output_prim_count - 1; line++) { + data[line * 3 + 0] = line + 0; + data[line * 3 + 1] = line + 1; + } + /* Closing line. */ + data[line * 2 + 0] = line + 0; + data[line * 2 + 1] = 0; + } break; + + /* Triangle Fan. */ + case GPU_PRIM_TRI_FAN: { + for (int triangle = 0; triangle < output_prim_count; triangle++) { + data[triangle * 3 + 0] = 0; /* Always 0 */ + data[triangle * 3 + 1] = triangle + 1; + data[triangle * 3 + 2] = triangle + 2; + } + } break; + + default: + BLI_assert_msg(false, "Other primitive types do not require emulation."); + return nil; + } + + /* Flush. */ + this->emulated_topology_buffer_->flush(); + /* Assign members relating to current cached IB. */ + topology_buffer_input_v_count_ = v_count; + topology_buffer_output_v_count_ = output_IB_elems; + emulated_topology_type_ = input_prim_type; + } + + /* Return. */ + in_out_v_count = topology_buffer_output_v_count_; + in_out_prim_type = output_prim_type; + return (emulated_topology_buffer_) ? emulated_topology_buffer_->get_metal_buffer() : nil; +} + +/** \} */ + +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm index 0e13e8d4690..a9cabbb111f 100644 --- a/source/blender/gpu/metal/mtl_command_buffer.mm +++ b/source/blender/gpu/metal/mtl_command_buffer.mm @@ -54,6 +54,7 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin() MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init]; desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus; desc.retainedReferences = YES; + BLI_assert(context_.queue != nil); active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc]; } else { @@ -498,7 +499,7 @@ bool MTLCommandBufferManager::insert_memory_barrier(eGPUBarrier barrier_bits, /* Rendering. */ case MTL_RENDER_COMMAND_ENCODER: { /* Currently flagging both stages -- can use bits above to filter on stage type -- - * though full barrier is safe for now*/ + * though full barrier is safe for now. */ MTLRenderStages before_stage_flags = 0; MTLRenderStages after_stage_flags = 0; if (before_stages & GPU_BARRIER_STAGE_VERTEX && @@ -611,40 +612,187 @@ void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding, bool use_argument_buffer_for_samplers, uint slot) { - /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be - * implemented alongside MTLShader. */ + /* Range check. */ + const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface(); + BLI_assert(slot >= 0); + BLI_assert(slot <= shader_interface->get_max_texture_index()); + BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS); + UNUSED_VARS_NDEBUG(shader_interface); + + /* If sampler state has not changed for the given slot, we do not need to fetch. */ + if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil || + !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) || + use_argument_buffer_for_samplers) { + + id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ? + ctx.get_default_sampler_state() : + ctx.get_sampler_from_state(sampler_binding.state); + if (!use_argument_buffer_for_samplers) { + /* Update binding and cached state. */ + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + [rec setVertexSamplerState:sampler_state atIndex:slot]; + this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state; + this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state; + } + + /* Flag last binding type. */ + this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding = + use_argument_buffer_for_samplers; + + /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in + * the samplers array is always up to date. */ + ctx.samplers_.mtl_sampler[slot] = sampler_state; + ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state; + } } void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding, bool use_argument_buffer_for_samplers, uint slot) { - /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be - * implemented alongside MTLShader. */ + /* Range check. */ + const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface(); + BLI_assert(slot >= 0); + BLI_assert(slot <= shader_interface->get_max_texture_index()); + BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS); + UNUSED_VARS_NDEBUG(shader_interface); + + /* If sampler state has not changed for the given slot, we do not need to fetch*/ + if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil || + !(this->cached_fragment_sampler_state_bindings[slot].binding_state == + sampler_binding.state) || + use_argument_buffer_for_samplers) { + + id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ? + ctx.get_default_sampler_state() : + ctx.get_sampler_from_state(sampler_binding.state); + if (!use_argument_buffer_for_samplers) { + /* Update binding and cached state. */ + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + [rec setFragmentSamplerState:sampler_state atIndex:slot]; + this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state; + this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state; + } + + /* Flag last binding type */ + this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding = + use_argument_buffer_for_samplers; + + /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in + * the samplers array is always up to date. */ + ctx.samplers_.mtl_sampler[slot] = sampler_state; + ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state; + } } void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index) { - /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be - * implemented alongside the full MTLMemoryManager. */ + BLI_assert(index >= 0); + BLI_assert(buffer_offset >= 0); + BLI_assert(buffer != nil); + + BufferBindingCached ¤t_vert_ubo_binding = this->cached_vertex_buffer_bindings[index]; + if (current_vert_ubo_binding.offset != buffer_offset || + current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes) { + + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + + if (current_vert_ubo_binding.metal_buffer == buffer) { + /* If buffer is the same, but offset has changed. */ + [rec setVertexBufferOffset:buffer_offset atIndex:index]; + } + else { + /* Bind Vertex Buffer. */ + [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index]; + } + + /* Update Bind-state cache. */ + this->cached_vertex_buffer_bindings[index].is_bytes = false; + this->cached_vertex_buffer_bindings[index].metal_buffer = buffer; + this->cached_vertex_buffer_bindings[index].offset = buffer_offset; + } } void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index) { - /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be - * implemented alongside the full MTLMemoryManager. */ + BLI_assert(index >= 0); + BLI_assert(buffer_offset >= 0); + BLI_assert(buffer != nil); + + BufferBindingCached ¤t_frag_ubo_binding = this->cached_fragment_buffer_bindings[index]; + if (current_frag_ubo_binding.offset != buffer_offset || + current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes) { + + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + + if (current_frag_ubo_binding.metal_buffer == buffer) { + /* If buffer is the same, but offset has changed. */ + [rec setFragmentBufferOffset:buffer_offset atIndex:index]; + } + else { + /* Bind Fragment Buffer */ + [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index]; + } + + /* Update Bind-state cache */ + this->cached_fragment_buffer_bindings[index].is_bytes = false; + this->cached_fragment_buffer_bindings[index].metal_buffer = buffer; + this->cached_fragment_buffer_bindings[index].offset = buffer_offset; + } } void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index) { - /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be - * implemented alongside the full MTLMemoryManager. */ + /* Bytes always updated as source data may have changed. */ + BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + BLI_assert(length > 0); + BLI_assert(bytes != nullptr); + + if (length < MTL_MAX_SET_BYTES_SIZE) { + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + [rec setVertexBytes:bytes length:length atIndex:index]; + } + else { + /* We have run over the setBytes limit, bind buffer instead. */ + MTLTemporaryBuffer range = + ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256); + memcpy(range.data, bytes, length); + this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index); + } + + /* Update Bind-state cache */ + this->cached_vertex_buffer_bindings[index].is_bytes = true; + this->cached_vertex_buffer_bindings[index].metal_buffer = nil; + this->cached_vertex_buffer_bindings[index].offset = -1; } void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index) { - /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be - * implemented alongside the full MTLMemoryManager. */ + /* Bytes always updated as source data may have changed. */ + BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + BLI_assert(length > 0); + BLI_assert(bytes != nullptr); + + if (length < MTL_MAX_SET_BYTES_SIZE) { + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + [rec setFragmentBytes:bytes length:length atIndex:index]; + } + else { + /* We have run over the setBytes limit, bind buffer instead. */ + MTLTemporaryBuffer range = + ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256); + memcpy(range.data, bytes, length); + this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index); + } + + /* Update Bind-state cache. */ + this->cached_fragment_buffer_bindings[index].is_bytes = true; + this->cached_fragment_buffer_bindings[index].metal_buffer = nil; + this->cached_fragment_buffer_bindings[index].offset = -1; } /** \} */ diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh index b6f9c0050a9..5c322efa3f9 100644 --- a/source/blender/gpu/metal/mtl_common.hh +++ b/source/blender/gpu/metal/mtl_common.hh @@ -3,7 +3,9 @@ #ifndef __MTL_COMMON #define __MTL_COMMON -// -- Renderer Options -- +/** -- Renderer Options -- */ +/* Number of frames over which rolling averages are taken. */ +#define MTL_FRAME_AVERAGE_COUNT 5 #define MTL_MAX_DRAWABLES 3 #define MTL_MAX_SET_BYTES_SIZE 4096 #define MTL_FORCE_WAIT_IDLE 0 diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh index e996193e722..6229afcef79 100644 --- a/source/blender/gpu/metal/mtl_context.hh +++ b/source/blender/gpu/metal/mtl_context.hh @@ -12,6 +12,10 @@ #include "GPU_common_types.h" #include "GPU_context.h" +#include "intern/GHOST_Context.h" +#include "intern/GHOST_ContextCGL.h" +#include "intern/GHOST_Window.h" + #include "mtl_backend.hh" #include "mtl_capabilities.hh" #include "mtl_common.hh" @@ -248,7 +252,7 @@ struct MTLContextTextureUtils { /* Depth texture updates are not directly supported with Blit operations, similarly, we cannot * use a compute shader to write to depth, so we must instead render to a depth target. * These processes use vertex/fragment shaders to render texture data from an intermediate - * source, in order to prime the depth buffer*/ + * source, in order to prime the depth buffer. */ blender::Map<DepthTextureUpdateRoutineSpecialisation, GPUShader *> depth_2d_update_shaders; GPUShader *fullscreen_blit_shader = nullptr; @@ -348,7 +352,7 @@ struct MTLSamplerArray { { uint32_t hash = this->num_samplers; for (int i = 0; i < this->num_samplers; i++) { - hash ^= (uint32_t)this->mtl_sampler_flags[i] << (i % 3); + hash ^= uint32_t(this->mtl_sampler_flags[i]) << (i % 3); } return hash; } @@ -570,12 +574,44 @@ class MTLCommandBufferManager { class MTLContext : public Context { friend class MTLBackend; + friend class MTLRenderPassState; + + public: + /* Swap-chain and latency management. */ + static std::atomic<int> max_drawables_in_flight; + static std::atomic<int64_t> avg_drawable_latency_us; + static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT]; + + public: + /* Shaders and Pipeline state. */ + MTLContextGlobalShaderPipelineState pipeline_state; + + /* Metal API Resource Handles. */ + id<MTLCommandQueue> queue = nil; + id<MTLDevice> device = nil; + +#ifndef NDEBUG + /* Label for Context debug name assignment. */ + NSString *label = nil; +#endif + + /* Memory Management. */ + MTLScratchBufferManager memory_manager; + static MTLBufferPool global_memory_manager; + + /* CommandBuffer managers. */ + MTLCommandBufferManager main_command_buffer; private: - /* Null buffers for empty/uninitialized bindings. - * Null attribute buffer follows default attribute format of OpenGL Back-end. */ - id<MTLBuffer> null_buffer_; /* All zero's. */ - id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */ + /* Parent Context. */ + GHOST_ContextCGL *ghost_context_; + + /* Render Passes and Frame-buffers. */ + id<MTLTexture> default_fbo_mtltexture_ = nil; + gpu::MTLTexture *default_fbo_gputexture_ = nullptr; + + /* Depth-stencil state cache. */ + blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache; /* Compute and specialization caches. */ MTLContextTextureUtils texture_utils_; @@ -601,23 +637,20 @@ class MTLContext : public Context { gpu::MTLBuffer *visibility_buffer_ = nullptr; bool visibility_is_dirty_ = false; - public: - /* Shaders and Pipeline state. */ - MTLContextGlobalShaderPipelineState pipeline_state; - - /* Metal API Resource Handles. */ - id<MTLCommandQueue> queue = nil; - id<MTLDevice> device = nil; - - /* Memory Management */ - MTLScratchBufferManager memory_manager; - static MTLBufferPool global_memory_manager; + /* Null buffers for empty/uninitialized bindings. + * Null attribute buffer follows default attribute format of OpenGL Backend. */ + id<MTLBuffer> null_buffer_; /* All zero's. */ + id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */ - /* CommandBuffer managers. */ - MTLCommandBufferManager main_command_buffer; + /** Dummy Resources */ + /* Maximum of 32 texture types. Though most combinations invalid. */ + gpu::MTLTexture *dummy_textures_[GPU_TEXTURE_BUFFER] = {nullptr}; + GPUVertFormat dummy_vertformat_; + GPUVertBuf *dummy_verts_ = nullptr; + public: /* GPUContext interface. */ - MTLContext(void *ghost_window); + MTLContext(void *ghost_window, void *ghost_context); ~MTLContext(); static void check_error(const char *info); @@ -673,6 +706,35 @@ class MTLContext : public Context { void pipeline_state_init(); MTLShader *get_active_shader(); + /* These functions ensure that the current RenderCommandEncoder has + * the correct global state assigned. This should be called prior + * to every draw call, to ensure that all state is applied and up + * to date. We handle: + * + * - Buffer bindings (Vertex buffers, Uniforms, UBOs, transform feedback) + * - Texture bindings + * - Sampler bindings (+ argument buffer bindings) + * - Dynamic Render pipeline state (on encoder) + * - Baking Pipeline State Objects (PSOs) for current shader, based + * on final pipeline state. + * + * `ensure_render_pipeline_state` will return false if the state is + * invalid and cannot be applied. This should cancel a draw call. */ + bool ensure_render_pipeline_state(MTLPrimitiveType prim_type); + bool ensure_uniform_buffer_bindings( + id<MTLRenderCommandEncoder> rec, + const MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance); + void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec, + MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance); + void ensure_depth_stencil_state(MTLPrimitiveType prim_type); + + id<MTLBuffer> get_null_buffer(); + id<MTLBuffer> get_null_attribute_buffer(); + gpu::MTLTexture *get_dummy_texture(eGPUTextureType type); + void free_dummy_resources(); + /* State assignment. */ void set_viewport(int origin_x, int origin_y, int width, int height); void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height); @@ -720,9 +782,37 @@ class MTLContext : public Context { { return MTLContext::global_memory_manager; } - /* Uniform Buffer Bindings to command encoders. */ - id<MTLBuffer> get_null_buffer(); - id<MTLBuffer> get_null_attribute_buffer(); + + /* Swap-chain and latency management. */ + static void latency_resolve_average(int64_t frame_latency_us) + { + int64_t avg = 0; + int64_t frame_c = 0; + for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) { + MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1]; + avg += MTLContext::frame_latency[i]; + frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0; + } + MTLContext::frame_latency[0] = frame_latency_us; + avg += MTLContext::frame_latency[0]; + if (frame_c > 0) { + avg /= frame_c; + } + else { + avg = 0; + } + MTLContext::avg_drawable_latency_us = avg; + } + + private: + void set_ghost_context(GHOST_ContextHandle ghostCtxHandle); + void set_ghost_window(GHOST_WindowHandle ghostWinHandle); }; +/* GHOST Context callback and present. */ +void present(MTLRenderPassDescriptor *blit_descriptor, + id<MTLRenderPipelineState> blit_pso, + id<MTLTexture> swapchain_texture, + id<CAMetalDrawable> drawable); + } // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index a66645e5fb5..50576379f0d 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -5,13 +5,29 @@ */ #include "mtl_context.hh" #include "mtl_debug.hh" +#include "mtl_framebuffer.hh" +#include "mtl_immediate.hh" +#include "mtl_memory.hh" +#include "mtl_primitive.hh" #include "mtl_shader.hh" #include "mtl_shader_interface.hh" #include "mtl_state.hh" +#include "mtl_uniform_buffer.hh" #include "DNA_userdef_types.h" #include "GPU_capabilities.h" +#include "GPU_matrix.h" +#include "GPU_shader.h" +#include "GPU_texture.h" +#include "GPU_uniform_buffer.h" +#include "GPU_vertex_buffer.h" +#include "intern/gpu_matrix_private.h" + +#include "PIL_time.h" + +#include <fstream> +#include <string> using namespace blender; using namespace blender::gpu; @@ -21,21 +37,118 @@ namespace blender::gpu { /* Global memory manager. */ MTLBufferPool MTLContext::global_memory_manager; +/* Swap-chain and latency management. */ +std::atomic<int> MTLContext::max_drawables_in_flight = 0; +std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0; +int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0}; + +/* -------------------------------------------------------------------- */ +/** \name GHOST Context interaction. + * \{ */ + +void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle) +{ + GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle); + BLI_assert(ghost_ctx != nullptr); + + /* Release old MTLTexture handle */ + if (default_fbo_mtltexture_) { + [default_fbo_mtltexture_ release]; + default_fbo_mtltexture_ = nil; + } + + /* Release Framebuffer attachments */ + MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left); + MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left); + mtl_front_left->remove_all_attachments(); + mtl_back_left->remove_all_attachments(); + + GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx); + if (ghost_cgl_ctx != NULL) { + default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture(); + + MTL_LOG_INFO( + "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n", + ghost_cgl_ctx, + this, + this->device, + this->queue, + default_fbo_gputexture_); + + /* Check if the GHOST Context provides a default framebuffer: */ + if (default_fbo_mtltexture_) { + + /* Release old GPUTexture handle */ + if (default_fbo_gputexture_) { + GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_))); + default_fbo_gputexture_ = nullptr; + } + + /* Retain handle */ + [default_fbo_mtltexture_ retain]; + + /*** Create front and back-buffers ***/ + /* Create gpu::MTLTexture objects */ + default_fbo_gputexture_ = new gpu::MTLTexture( + "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_); + + /* Update frame-buffers with new texture attachments. */ + mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0); + mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0); +#ifndef NDEBUG + this->label = default_fbo_mtltexture_.label; +#endif + } + else { + + /* Add default texture for cases where no other framebuffer is bound */ + if (!default_fbo_gputexture_) { + default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>( + unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr))); + } + mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0); + + MTL_LOG_INFO( + "-- Bound context %p for GPU context: %p is offscreen and does not have a default " + "framebuffer\n", + ghost_cgl_ctx, + this); +#ifndef NDEBUG + this->label = @"Offscreen Metal Context"; +#endif + } + } + else { + MTL_LOG_INFO( + "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null " + "(GhostContext: %p, GhostContext_CGL: %p)\n", + ghost_ctx, + ghost_cgl_ctx); + BLI_assert(false); + } +} + +void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle) +{ + GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle); + this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL)); +} + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name MTLContext * \{ */ /* Placeholder functions */ -MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this) +MTLContext::MTLContext(void *ghost_window, void *ghost_context) + : memory_manager(*this), main_command_buffer(*this) { /* Init debug. */ debug::mtl_debug_init(); - /* Device creation. - * TODO(Metal): This is a temporary initialization path to enable testing of features - * and shader compilation tests. Future functionality should fetch the existing device - * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */ - this->device = MTLCreateSystemDefaultDevice(); + /* Initialize Render-pass and Frame-buffer State. */ + this->back_left = nullptr; /* Initialize command buffer state. */ this->main_command_buffer.prepare(); @@ -47,10 +160,35 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command is_inside_frame_ = false; current_frame_index_ = 0; - /* Prepare null data buffer */ + /* Prepare null data buffer. */ null_buffer_ = nil; null_attribute_buffer_ = nil; + /* Zero-initialize MTL textures. */ + default_fbo_mtltexture_ = nil; + default_fbo_gputexture_ = nullptr; + + /** Fetch GHOSTContext and fetch Metal device/queue. */ + ghost_window_ = ghost_window; + if (ghost_window_ && ghost_context == NULL) { + /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided. + * Regardless of whether windowed or not, we need access to the GhostContext + * for presentation, and device/queue access. */ + GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_); + ghost_context = (ghostWin ? ghostWin->getContext() : NULL); + } + BLI_assert(ghost_context); + this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context); + this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue(); + this->device = (id<MTLDevice>)this->ghost_context_->metalDevice(); + BLI_assert(this->queue); + BLI_assert(this->device); + [this->queue retain]; + [this->device retain]; + + /* Register present callback. */ + this->ghost_context_->metalRegisterPresentCallback(&present); + /* Create FrameBuffer handles. */ MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left"); MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left"); @@ -66,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command /* Initialize Metal modules. */ this->memory_manager.init(); this->state_manager = new MTLStateManager(this); + this->imm = new MTLImmediate(this); /* Ensure global memory manager is initialized. */ MTLContext::global_memory_manager.init(this->device); @@ -99,9 +238,29 @@ MTLContext::~MTLContext() this->end_frame(); } } + + /* Release Memory Manager */ + this->get_scratchbuffer_manager().free(); + /* Release update/blit shaders. */ this->get_texture_utils().cleanup(); + /* Detach resource references */ + GPU_texture_unbind_all(); + + /* Unbind UBOs */ + for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + if (this->pipeline_state.ubo_bindings[i].bound && + this->pipeline_state.ubo_bindings[i].ubo != nullptr) { + GPUUniformBuf *ubo = wrap( + static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo)); + GPU_uniformbuf_unbind(ubo); + } + } + + /* Release Dummy resources */ + this->free_dummy_resources(); + /* Release Sampler States. */ for (int i = 0; i < GPU_SAMPLER_MAX; i++) { if (sampler_state_cache_[i] != nil) { @@ -109,12 +268,28 @@ MTLContext::~MTLContext() sampler_state_cache_[i] = nil; } } + + /* Empty cached sampler argument buffers. */ + for (auto entry : cached_sampler_buffers_.values()) { + entry->free(); + } + cached_sampler_buffers_.clear(); + + /* Free null buffers. */ if (null_buffer_) { [null_buffer_ release]; } if (null_attribute_buffer_) { [null_attribute_buffer_ release]; } + + /* Free Metal objects. */ + if (this->queue) { + [this->queue release]; + } + if (this->device) { + [this->device release]; + } } void MTLContext::begin_frame() @@ -146,20 +321,49 @@ void MTLContext::check_error(const char *info) void MTLContext::activate() { - /* TODO(Metal): Implement. */ + /* Make sure no other context is already bound to this thread. */ + BLI_assert(is_active_ == false); + is_active_ = true; + thread_ = pthread_self(); + + /* Re-apply ghost window/context for resizing */ + if (ghost_window_) { + this->set_ghost_window((GHOST_WindowHandle)ghost_window_); + } + else if (ghost_context_) { + this->set_ghost_context((GHOST_ContextHandle)ghost_context_); + } + + /* Reset UBO bind state. */ + for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + if (this->pipeline_state.ubo_bindings[i].bound && + this->pipeline_state.ubo_bindings[i].ubo != nullptr) { + this->pipeline_state.ubo_bindings[i].bound = false; + this->pipeline_state.ubo_bindings[i].ubo = nullptr; + } + } + + /* Ensure imm active. */ + immActivate(); } + void MTLContext::deactivate() { - /* TODO(Metal): Implement. */ + BLI_assert(this->is_active_on_thread()); + /* Flush context on deactivate. */ + this->flush(); + is_active_ = false; + immDeactivate(); } void MTLContext::flush() { - /* TODO(Metal): Implement. */ + this->main_command_buffer.submit(false); } + void MTLContext::finish() { - /* TODO(Metal): Implement. */ + this->main_command_buffer.submit(true); } void MTLContext::memory_statistics_get(int *total_mem, int *free_mem) @@ -200,9 +404,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass() /* Ensure command buffer workload submissions are optimal -- * Though do not split a batch mid-IMM recording. */ - /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */ - if (this->main_command_buffer.do_break_submission()/*&& - !((MTLImmediate *)(this->imm))->imm_is_recording()*/) { + if (this->main_command_buffer.do_break_submission() && + !((MTLImmediate *)(this->imm))->imm_is_recording()) { this->flush(); } @@ -293,6 +496,72 @@ id<MTLBuffer> MTLContext::get_null_attribute_buffer() return null_attribute_buffer_; } +gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type) +{ + /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */ + gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1]; + if (dummy_tex != nullptr) { + return dummy_tex; + } + else { + GPUTexture *tex = nullptr; + switch (type) { + case GPU_TEXTURE_1D: + tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_1D_ARRAY: + tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_2D: + tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_2D_ARRAY: + tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_3D: + tex = GPU_texture_create_3d( + "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr); + break; + case GPU_TEXTURE_CUBE: + tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_CUBE_ARRAY: + tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr); + break; + case GPU_TEXTURE_BUFFER: + if (!dummy_verts_) { + GPU_vertformat_clear(&dummy_vertformat_); + GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT); + dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC); + GPU_vertbuf_data_alloc(dummy_verts_, 64); + } + tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_); + break; + default: + BLI_assert_msg(false, "Unrecognised texture type"); + return nullptr; + } + gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex)); + dummy_textures_[type - 1] = metal_tex; + return metal_tex; + } + return nullptr; +} + +void MTLContext::free_dummy_resources() +{ + for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) { + if (dummy_textures_[tex]) { + GPU_texture_free( + reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex]))); + dummy_textures_[tex] = nullptr; + } + } + if (dummy_verts_) { + GPU_vertbuf_discard(dummy_verts_); + } +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -439,6 +708,757 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled) /** \} */ /* -------------------------------------------------------------------- */ +/** \name Command Encoder and pipeline state + * These utilities ensure that all of the globally bound resources and state have been + * correctly encoded within the current RenderCommandEncoder. This involves managing + * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state. + * + * We will also trigger compilation of new PSOs where the input state has changed + * and is required. + * All of this setup is required in order to perform a valid draw call. + * \{ */ + +bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type) +{ + BLI_assert(this->pipeline_state.initialised); + + /* Check if an active shader is bound. */ + if (!this->pipeline_state.active_shader) { + MTL_LOG_WARNING("No Metal shader for bound GL shader\n"); + return false; + } + + /* Also ensure active shader is valid. */ + if (!this->pipeline_state.active_shader->is_valid()) { + MTL_LOG_WARNING( + "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", ); + return false; + } + + /* Apply global state. */ + this->state_manager->apply_state(); + + /* Main command buffer tracks the current state of the render pass, based on bound + * MTLFrameBuffer. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + /* Debug Check: Ensure Framebuffer instance is not dirty. */ + BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty()); + + /* Fetch shader interface. */ + MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface(); + if (shader_interface == nullptr) { + MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", ); + return false; + } + + /* Fetch shader and bake valid PipelineStateObject (PSO) based on current + * shader and state combination. This PSO represents the final GPU-executable + * permutation of the shader. */ + MTLRenderPipelineStateInstance *pipeline_state_instance = + this->pipeline_state.active_shader->bake_current_pipeline_state( + this, mtl_prim_type_to_topology_class(mtl_prim_type)); + if (!pipeline_state_instance) { + MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n", + shader_interface->get_name()); + return false; + } + + bool result = false; + if (pipeline_state_instance->pso) { + + /* Fetch render command encoder. A render pass should already be active. + * This will be NULL if invalid. */ + id<MTLRenderCommandEncoder> rec = + this->main_command_buffer.get_active_render_command_encoder(); + BLI_assert(rec); + if (rec == nil) { + MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n"); + return false; + } + + /* Bind Render Pipeline State. */ + BLI_assert(pipeline_state_instance->pso); + if (rps.bound_pso != pipeline_state_instance->pso) { + [rec setRenderPipelineState:pipeline_state_instance->pso]; + rps.bound_pso = pipeline_state_instance->pso; + } + + /** Ensure resource bindings. */ + /* Texture Bindings. */ + /* We will iterate through all texture bindings on the context and determine if any of the + * active slots match those in our shader interface. If so, textures will be bound. */ + if (shader_interface->get_total_textures() > 0) { + this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance); + } + + /* Transform feedback buffer binding. */ + /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which + * transform feedback data will be written. */ + // GPUVertBuf *tf_vbo = + // this->pipeline_state.active_shader->get_transform_feedback_active_buffer(); + // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) { + + // /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */ + // BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine || + // mtl_prim_type == MTLPrimitiveTypeTriangle || + // mtl_prim_type == MTLPrimitiveTypePoint); + + // /* Fetch active transform feedback buffer from vertbuf */ + // MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo)); + // int tf_buffer_offset = 0; + // id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset); + + // if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) { + // [rec setVertexBuffer:tf_buffer_mtl + // offset:tf_buffer_offset + // atIndex:pipeline_state_instance->transform_feedback_buffer_index]; + // printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n", + // tf_vbo_mtl, + // tf_buffer_mtl); + // } + // } + + /* Matrix Bindings. */ + /* This is now called upon shader bind. We may need to re-evaluate this though, + * as was done here to ensure uniform changes between draws were tracked. + * NOTE(Metal): We may be able to remove this. */ + GPU_matrix_bind(reinterpret_cast<struct GPUShader *>( + static_cast<Shader *>(this->pipeline_state.active_shader))); + + /* Bind Uniforms */ + this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance); + + /* Bind Null attribute buffer, if needed. */ + if (pipeline_state_instance->null_attribute_buffer_index >= 0) { + if (G.debug & G_DEBUG_GPU) { + MTL_LOG_INFO("Binding null attribute buffer at index: %d\n", + pipeline_state_instance->null_attribute_buffer_index); + } + rps.bind_vertex_buffer(this->get_null_attribute_buffer(), + 0, + pipeline_state_instance->null_attribute_buffer_index); + } + + /** Dynamic Per-draw Render State on RenderCommandEncoder. */ + /* State: Viewport. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) { + MTLViewport viewport; + viewport.originX = (double)this->pipeline_state.viewport_offset_x; + viewport.originY = (double)this->pipeline_state.viewport_offset_y; + viewport.width = (double)this->pipeline_state.viewport_width; + viewport.height = (double)this->pipeline_state.viewport_height; + viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near; + viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far; + [rec setViewport:viewport]; + + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_VIEWPORT_FLAG); + } + + /* State: Scissor. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) { + + /* Get FrameBuffer associated with active RenderCommandEncoder. */ + MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer(); + + MTLScissorRect scissor; + if (this->pipeline_state.scissor_enabled) { + scissor.x = this->pipeline_state.scissor_x; + scissor.y = this->pipeline_state.scissor_y; + scissor.width = this->pipeline_state.scissor_width; + scissor.height = this->pipeline_state.scissor_height; + + /* Some scissor assignments exceed the bounds of the viewport due to implicitly added + * padding to the width/height - Clamp width/height. */ + BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width()); + BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height()); + scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x); + scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y); + BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width())); + BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height())); + } + else { + /* Scissor is disabled, reset to default size as scissor state may have been previously + * assigned on this encoder. */ + scissor.x = 0; + scissor.y = 0; + scissor.width = render_fb->get_width(); + scissor.height = render_fb->get_height(); + } + + /* Scissor state can still be flagged as changed if it is toggled on and off, without + * parameters changing between draws. */ + if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) { + [rec setScissorRect:scissor]; + rps.last_scissor_rect = scissor; + } + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_SCISSOR_FLAG); + } + + /* State: Face winding. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) { + /* We need to invert the face winding in Metal, to account for the inverted-Y coordinate + * system. */ + MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ? + MTLWindingClockwise : + MTLWindingCounterClockwise; + [rec setFrontFacingWinding:winding]; + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG); + } + + /* State: cull-mode. */ + if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) { + + MTLCullMode mode = MTLCullModeNone; + if (this->pipeline_state.culling_enabled) { + switch (this->pipeline_state.cull_mode) { + case GPU_CULL_NONE: + mode = MTLCullModeNone; + break; + case GPU_CULL_FRONT: + mode = MTLCullModeFront; + break; + case GPU_CULL_BACK: + mode = MTLCullModeBack; + break; + default: + BLI_assert_unreachable(); + break; + } + } + [rec setCullMode:mode]; + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags & + ~MTL_PIPELINE_STATE_CULLMODE_FLAG); + } + + /* Pipeline state is now good. */ + result = true; + } + return result; +} + +/* Bind uniform buffers to an active render command encoder using the rendering state of the + * current context -> Active shader, Bound UBOs). */ +bool MTLContext::ensure_uniform_buffer_bindings( + id<MTLRenderCommandEncoder> rec, + const MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance) +{ + /* Fetch Render Pass state. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + /* Shader owned push constant block for uniforms.. */ + bool active_shader_changed = (rps.last_bound_shader_state.shader_ != + this->pipeline_state.active_shader || + rps.last_bound_shader_state.shader_ == nullptr || + rps.last_bound_shader_state.pso_index_ != + pipeline_state_instance->shader_pso_index); + + const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block(); + if (push_constant_block.size > 0) { + + /* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index + * will be offset by the number of bound VBOs. */ + uint32_t block_size = push_constant_block.size; + uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index + + push_constant_block.buffer_index; + + /* Only need to rebind block if push constants have been modified -- or if no data is bound for + * the current RenderCommandEncoder. */ + if (this->pipeline_state.active_shader->get_push_constant_is_dirty() || + active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes || + !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) { + + /* Bind push constant data. */ + BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr); + rps.bind_vertex_bytes( + this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index); + rps.bind_fragment_bytes( + this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index); + + /* Only need to rebind block if it has been modified. */ + this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false); + } + } + rps.last_bound_shader_state.set(this->pipeline_state.active_shader, + pipeline_state_instance->shader_pso_index); + + /* Bind Global GPUUniformBuffers */ + /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones + * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global, + * and not owned by the shader instance. */ + for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) { + const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index); + + if (ubo.buffer_index >= 0) { + + /* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for + * the uniform PushConstantBlock. */ + const uint32_t buffer_index = ubo.buffer_index + 1; + int ubo_offset = 0; + id<MTLBuffer> ubo_buffer = nil; + int ubo_size = 0; + + bool bind_dummy_buffer = false; + if (this->pipeline_state.ubo_bindings[ubo_index].bound) { + + /* Fetch UBO global-binding properties from slot. */ + ubo_offset = 0; + ubo_buffer = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_metal_buffer( + &ubo_offset); + ubo_size = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_size(); + + /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid + * allocating zero buffers. */ + if (ubo_buffer == nil) { + bind_dummy_buffer = true; + } + else { + BLI_assert(ubo_buffer != nil); + BLI_assert(ubo_size > 0); + + if (pipeline_state_instance->reflection_data_available) { + /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each + * case will be the same for the same UBO. + * We also determine expected size and then ensure buffer of the correct size + * exists in one of the vertex/fragment shader binding tables. This path is used + * to verify that the size of the bound UBO matches what is expected in the shader. */ + uint32_t expected_size = + (buffer_index < + pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ? + pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index] + .size : + 0; + if (expected_size == 0) { + expected_size = + (buffer_index < + pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ? + pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index] + .size : + 0; + } + BLI_assert_msg( + expected_size > 0, + "Shader interface expects UBO, but shader reflection data reports that it " + "is not present"); + + /* If ubo size is smaller than the size expected by the shader, we need to bind the + * dummy buffer, which will be big enough, to avoid an OOB error. */ + if (ubo_size < expected_size) { + MTL_LOG_INFO( + "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size " + "%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an " + "over-binding, which is not used, but we need this to avoid validation " + "issues\n", + shader_interface->get_name_at_offset(ubo.name_offset), + buffer_index, + ubo_size, + expected_size, + shader_interface->get_name()); + bind_dummy_buffer = true; + } + } + } + } + else { + MTL_LOG_INFO( + "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but " + "nothing was bound -- binding dummy buffer\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ubo.name_offset), + buffer_index); + bind_dummy_buffer = true; + } + + if (bind_dummy_buffer) { + /* Perform Dummy binding. */ + ubo_offset = 0; + ubo_buffer = this->get_null_buffer(); + ubo_size = [ubo_buffer length]; + } + + if (ubo_buffer != nil) { + + uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index + + buffer_index; + + /* Bind Vertex UBO. */ + if (bool(ubo.stage_mask & ShaderStage::VERTEX)) { + BLI_assert(buffer_bind_index >= 0 && + buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index); + } + + /* Bind Fragment UBOs. */ + if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) { + BLI_assert(buffer_bind_index >= 0 && + buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index); + } + } + else { + MTL_LOG_WARNING( + "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer " + "is NULL!\n", + shader_interface->get_name(), + shader_interface->get_name_at_offset(ubo.name_offset), + buffer_index); + } + } + } + return true; +} + +/* Ensure texture bindings are correct and up to date for current draw call. */ +void MTLContext::ensure_texture_bindings( + id<MTLRenderCommandEncoder> rec, + MTLShaderInterface *shader_interface, + const MTLRenderPipelineStateInstance *pipeline_state_instance) +{ + BLI_assert(shader_interface != nil); + BLI_assert(rec != nil); + + /* Fetch Render Pass state. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + @autoreleasepool { + int vertex_arg_buffer_bind_index = -1; + int fragment_arg_buffer_bind_index = -1; + + /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */ + bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers( + &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index); + + /* Loop through expected textures in shader interface and resolve bindings with currently + * bound textures.. */ + for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) { + /* Ensure the bound texture is compatible with the shader interface. If the + * shader does not expect a texture to be bound for the current slot, we skip + * binding. + * NOTE: Global texture bindings may be left over from prior draw calls. */ + const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t); + if (!shader_texture_info.used) { + /* Skip unused binding points if explicit indices are specified. */ + continue; + } + + int slot = shader_texture_info.slot_index; + if (slot >= 0 && slot < GPU_max_textures()) { + bool bind_dummy_texture = true; + if (this->pipeline_state.texture_bindings[slot].used) { + gpu::MTLTexture *bound_texture = + this->pipeline_state.texture_bindings[slot].texture_resource; + MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot]; + BLI_assert(bound_texture); + BLI_assert(bound_sampler.used); + + if (shader_texture_info.type == bound_texture->type_) { + /* Bind texture and sampler if the bound texture matches the type expected by the + * shader. */ + id<MTLTexture> tex = bound_texture->get_metal_handle(); + + if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) { + rps.bind_vertex_texture(tex, slot); + rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot); + } + + if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) { + rps.bind_fragment_texture(tex, slot); + rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot); + } + + /* Texture state resolved, no need to bind dummy texture */ + bind_dummy_texture = false; + } + else { + /* Texture type for bound texture (e.g. Texture2DArray) does not match what was + * expected in the shader interface. This is a problem and we will need to bind + * a dummy texture to ensure correct API usage. */ + MTL_LOG_WARNING( + "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong " + "texture target type. (Expecting type %d, actual type %d) (binding " + "name:'%s')(texture name:'%s')\n", + shader_interface->get_name(), + bound_texture, + slot, + shader_texture_info.type, + bound_texture->type_, + shader_interface->get_name_at_offset(shader_texture_info.name_offset), + bound_texture->get_name()); + } + } + else { + MTL_LOG_WARNING( + "Shader '%s' expected texture to be bound to slot %d -- No texture was " + "bound. (name:'%s')\n", + shader_interface->get_name(), + slot, + shader_interface->get_name_at_offset(shader_texture_info.name_offset)); + } + + /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats + * are provided -- as certain configurations may not need any binding. These issues should + * be fixed in the high-level, if problems crop up. */ + if (bind_dummy_texture) { + if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) { + rps.bind_vertex_texture( + get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot); + + /* Bind default sampler state. */ + MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; + rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot); + } + if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) { + rps.bind_fragment_texture( + get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot); + + /* Bind default sampler state. */ + MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE}; + rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot); + } + } + } + else { + MTL_LOG_WARNING( + "Shader %p expected texture to be bound to slot %d -- Slot exceeds the " + "hardware/API limit of '%d'. (name:'%s')\n", + this->pipeline_state.active_shader, + slot, + GPU_max_textures(), + shader_interface->get_name_at_offset(shader_texture_info.name_offset)); + } + } + + /* Construct and Bind argument buffer. + * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */ + if (use_argument_buffer_for_samplers) { +#ifndef NDEBUG + /* Debug check to validate each expected texture in the shader interface has a valid + * sampler object bound to the context. We will need all of these to be valid + * when constructing the sampler argument buffer. */ + for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) { + const MTLShaderTexture &texture = shader_interface->get_texture(i); + if (texture.used) { + BLI_assert(this->samplers_.mtl_sampler[i] != nil); + } + } +#endif + + /* Check to ensure the buffer binding index for the argument buffer has been assigned. + * This PSO property will be set if we expect to use argument buffers, and the shader + * uses any amount of textures. */ + BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0); + if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) { + /* Offset binding index to be relative to the start of static uniform buffer binding slots. + * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are + * used by vertex and index buffer bindings, and the number of buffers present will vary + * between PSOs. */ + int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index + + vertex_arg_buffer_bind_index); + assert(arg_buffer_idx < 32); + id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder( + arg_buffer_idx); + if (argument_encoder == nil) { + argument_encoder = [pipeline_state_instance->vert + newArgumentEncoderWithBufferIndex:arg_buffer_idx]; + shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder); + } + + /* Generate or Fetch argument buffer sampler configuration. + * NOTE(Metal): we need to base sampler counts off of the maximal texture + * index. This is not the most optimal, but in practice, not a use-case + * when argument buffers are required. + * This is because with explicit texture indices, the binding indices + * should match across draws, to allow the high-level to optimize bind-points. */ + gpu::MTLBuffer *encoder_buffer = nullptr; + this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1; + + gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr( + this->samplers_); + if (cached_smp_buffer_search != nullptr) { + encoder_buffer = *cached_smp_buffer_search; + } + else { + /* Populate argument buffer with current global sampler bindings. */ + int size = [argument_encoder encodedLength]; + int alignment = max_uu([argument_encoder alignment], 256); + int size_align_delta = (size % alignment); + int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ? + size + (alignment - (size % alignment)) : + size; + + /* Allocate buffer to store encoded sampler arguments. */ + encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size, + true); + BLI_assert(encoder_buffer); + BLI_assert(encoder_buffer->get_metal_buffer()); + [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0]; + [argument_encoder + setSamplerStates:this->samplers_.mtl_sampler + withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)]; + encoder_buffer->flush(); + + /* Insert into cache. */ + this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer); + } + + BLI_assert(encoder_buffer != nullptr); + int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index + + vertex_arg_buffer_bind_index); + rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index); + + /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to + * re-encode. We can use the same argument buffer. */ + if (fragment_arg_buffer_bind_index >= 0) { + BLI_assert(fragment_arg_buffer_bind_index); + int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index + + fragment_arg_buffer_bind_index); + rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index); + } + } + } + } +} + +/* Encode latest depth-stencil state. */ +void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type) +{ + /* Check if we need to update state. */ + if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) { + return; + } + + /* Fetch render command encoder. */ + id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder(); + BLI_assert(rec); + + /* Fetch Render Pass state. */ + MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state(); + + /** Prepare Depth-stencil state based on current global pipeline state. */ + MTLFrameBuffer *fb = this->get_current_framebuffer(); + bool hasDepthTarget = fb->has_depth_attachment(); + bool hasStencilTarget = fb->has_stencil_attachment(); + + if (hasDepthTarget || hasStencilTarget) { + /* Update FrameBuffer State. */ + this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget; + this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget; + + /* Check if current MTLContextDepthStencilState maps to an existing state object in + * the Depth-stencil state cache. */ + id<MTLDepthStencilState> ds_state = nil; + id<MTLDepthStencilState> *depth_stencil_state_lookup = + this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state); + + /* If not, populate DepthStencil state descriptor. */ + if (depth_stencil_state_lookup == nullptr) { + + MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init] + autorelease]; + + if (hasDepthTarget) { + ds_state_desc.depthWriteEnabled = + this->pipeline_state.depth_stencil_state.depth_write_enable; + ds_state_desc.depthCompareFunction = + this->pipeline_state.depth_stencil_state.depth_test_enabled ? + this->pipeline_state.depth_stencil_state.depth_function : + MTLCompareFunctionAlways; + } + + if (hasStencilTarget) { + ds_state_desc.backFaceStencil.readMask = + this->pipeline_state.depth_stencil_state.stencil_read_mask; + ds_state_desc.backFaceStencil.writeMask = + this->pipeline_state.depth_stencil_state.stencil_write_mask; + ds_state_desc.backFaceStencil.stencilFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail; + ds_state_desc.backFaceStencil.depthFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail; + ds_state_desc.backFaceStencil.depthStencilPassOperation = + this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass; + ds_state_desc.backFaceStencil.stencilCompareFunction = + (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ? + this->pipeline_state.depth_stencil_state.stencil_func : + MTLCompareFunctionAlways; + + ds_state_desc.frontFaceStencil.readMask = + this->pipeline_state.depth_stencil_state.stencil_read_mask; + ds_state_desc.frontFaceStencil.writeMask = + this->pipeline_state.depth_stencil_state.stencil_write_mask; + ds_state_desc.frontFaceStencil.stencilFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail; + ds_state_desc.frontFaceStencil.depthFailureOperation = + this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail; + ds_state_desc.frontFaceStencil.depthStencilPassOperation = + this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass; + ds_state_desc.frontFaceStencil.stencilCompareFunction = + (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ? + this->pipeline_state.depth_stencil_state.stencil_func : + MTLCompareFunctionAlways; + } + + /* Bake new DS state. */ + ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc]; + + /* Store state in cache. */ + BLI_assert(ds_state != nil); + this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state); + } + else { + ds_state = *depth_stencil_state_lookup; + BLI_assert(ds_state != nil); + } + + /* Bind Depth Stencil State to render command encoder. */ + BLI_assert(ds_state != nil); + if (ds_state != nil) { + if (rps.bound_ds_state != ds_state) { + [rec setDepthStencilState:ds_state]; + rps.bound_ds_state = ds_state; + } + } + + /* Apply dynamic depth-stencil state on encoder. */ + if (hasStencilTarget) { + uint32_t stencil_ref_value = + (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ? + this->pipeline_state.depth_stencil_state.stencil_ref : + 0; + if (stencil_ref_value != rps.last_used_stencil_ref_value) { + [rec setStencilReferenceValue:stencil_ref_value]; + rps.last_used_stencil_ref_value = stencil_ref_value; + } + } + + if (hasDepthTarget) { + bool doBias = false; + switch (prim_type) { + case MTLPrimitiveTypeTriangle: + case MTLPrimitiveTypeTriangleStrip: + doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris; + break; + case MTLPrimitiveTypeLine: + case MTLPrimitiveTypeLineStrip: + doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines; + break; + case MTLPrimitiveTypePoint: + doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points; + break; + } + [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0 + slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0 + clamp:0]; + } + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Visibility buffer control for MTLQueryPool. * \{ */ @@ -605,4 +1625,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state() /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Swap-chain management and Metal presentation. + * \{ */ + +void present(MTLRenderPassDescriptor *blit_descriptor, + id<MTLRenderPipelineState> blit_pso, + id<MTLTexture> swapchain_texture, + id<CAMetalDrawable> drawable) +{ + + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + + /* Flush any outstanding work. */ + ctx->flush(); + + /* Always pace CPU to maximum of 3 drawables in flight. + * nextDrawable may have more in flight if backing swapchain + * textures are re-allocate, such as during resize events. + * + * Determine frames in flight based on current latency. If + * we are in a high-latency situation, limit frames in flight + * to increase app responsiveness and keep GPU execution under control. + * If latency improves, increase frames in flight to improve overall + * performance. */ + int perf_max_drawables = MTL_MAX_DRAWABLES; + if (MTLContext::avg_drawable_latency_us > 185000) { + perf_max_drawables = 1; + } + else if (MTLContext::avg_drawable_latency_us > 85000) { + perf_max_drawables = 2; + } + + while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) { + PIL_sleep_ms(2); + } + + /* Present is submitted in its own CMD Buffer to ensure drawable reference released as early as + * possible. This command buffer is separate as it does not utilize the global state + * for rendering as the main context does. */ + id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer]; + MTLCommandBufferManager::num_active_cmd_bufs++; + + if (MTLCommandBufferManager::sync_event != nil) { + /* Ensure command buffer ordering. */ + [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event + value:MTLCommandBufferManager::event_signal_val]; + } + + /* Do Present Call and final Blit to MTLDrawable. */ + id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor]; + [enc setRenderPipelineState:blit_pso]; + [enc setFragmentTexture:swapchain_texture atIndex:0]; + [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3]; + [enc endEncoding]; + + /* Present drawable. */ + BLI_assert(drawable); + [cmdbuf presentDrawable:drawable]; + + /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */ + MTLSafeFreeList *cmd_free_buffer_list = + MTLContext::get_global_memory_manager().get_current_safe_list(); + BLI_assert(cmd_free_buffer_list); + + id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf; + [cmd_buffer_ref retain]; + + /* Increment drawables in flight limiter. */ + MTLContext::max_drawables_in_flight++; + std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now(); + + /* Increment free pool reference and decrement upon command buffer completion. */ + cmd_free_buffer_list->increment_reference(); + [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) { + /* Flag freed buffers associated with this CMD buffer as ready to be freed. */ + cmd_free_buffer_list->decrement_reference(); + [cmd_buffer_ref release]; + + /* Decrement count */ + MTLCommandBufferManager::num_active_cmd_bufs--; + MTL_LOG_INFO("[Metal] Active command buffers: %d\n", + MTLCommandBufferManager::num_active_cmd_bufs); + + /* Drawable count and latency management. */ + MTLContext::max_drawables_in_flight--; + std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now(); + int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>( + completion_time - submission_time) + .count(); + MTLContext::latency_resolve_average(microseconds_per_frame); + + MTL_LOG_INFO("Frame Latency: %f ms (Rolling avg: %f ms Drawables: %d)\n", + ((float)microseconds_per_frame) / 1000.0f, + ((float)MTLContext::avg_drawable_latency_us) / 1000.0f, + perf_max_drawables); + }]; + + if (MTLCommandBufferManager::sync_event == nil) { + MTLCommandBufferManager::sync_event = [ctx->device newEvent]; + BLI_assert(MTLCommandBufferManager::sync_event); + [MTLCommandBufferManager::sync_event retain]; + } + BLI_assert(MTLCommandBufferManager::sync_event != nil); + + MTLCommandBufferManager::event_signal_val++; + [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event + value:MTLCommandBufferManager::event_signal_val]; + + [cmdbuf commit]; + + /* When debugging, fetch advanced command buffer errors. */ + if (G.debug & G_DEBUG_GPU) { + [cmdbuf waitUntilCompleted]; + NSError *error = [cmdbuf error]; + if (error != nil) { + NSLog(@"%@", error); + BLI_assert(false); + + @autoreleasepool { + const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String]; + + std::ofstream outfile; + outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app); + outfile << stringAsChar; + outfile.close(); + } + } + else { + @autoreleasepool { + NSString *str = @"Command buffer completed successfully!\n"; + const char *stringAsChar = [str UTF8String]; + + std::ofstream outfile; + outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app); + outfile << stringAsChar; + outfile.close(); + } + } + } +} + +/** \} */ + } // blender::gpu diff --git a/source/blender/gpu/metal/mtl_drawlist.hh b/source/blender/gpu/metal/mtl_drawlist.hh new file mode 100644 index 00000000000..47055f3d7f4 --- /dev/null +++ b/source/blender/gpu/metal/mtl_drawlist.hh @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Implementation of Multi Draw Indirect using OpenGL. + * Fallback if the needed extensions are not supported. + */ + +#pragma once + +#include "BLI_sys_types.h" +#include "GPU_batch.h" +#include "MEM_guardedalloc.h" +#include "gpu_drawlist_private.hh" + +#include "mtl_batch.hh" +#include "mtl_context.hh" + +namespace blender::gpu { + +/** + * Implementation of Multi Draw Indirect using OpenGL. + **/ +class MTLDrawList : public DrawList { + + private: + /** Batch for which we are recording commands for. */ + MTLBatch *batch_; + /** Mapped memory bounds. */ + void *data_; + /** Length of the mapped buffer (in byte). */ + size_t data_size_; + /** Current offset inside the mapped buffer (in byte). */ + size_t command_offset_; + /** Current number of command recorded inside the mapped buffer. */ + uint32_t command_len_; + /** Is UINT_MAX if not drawing indexed geom. Also Avoid dereferencing batch. */ + uint32_t base_index_; + /** Also Avoid dereferencing batch. */ + uint32_t v_first_, v_count_; + /** Length of whole the buffer (in byte). */ + uint32_t buffer_size_; + + public: + MTLDrawList(int length); + ~MTLDrawList(); + + void append(GPUBatch *batch, int i_first, int i_count) override; + void submit() override; + + private: + void init(); + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList"); +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_drawlist.mm b/source/blender/gpu/metal/mtl_drawlist.mm new file mode 100644 index 00000000000..99194d2b72c --- /dev/null +++ b/source/blender/gpu/metal/mtl_drawlist.mm @@ -0,0 +1,284 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Implementation of Multi Draw Indirect using OpenGL. + * Fallback if the needed extensions are not supported. + */ + +#include "BLI_assert.h" + +#include "GPU_batch.h" +#include "mtl_common.hh" +#include "mtl_drawlist.hh" +#include "mtl_primitive.hh" + +using namespace blender::gpu; + +namespace blender::gpu { + +/* Indirect draw call structure for reference. */ +/* MTLDrawPrimitivesIndirectArguments -- + * https://developer.apple.com/documentation/metal/mtldrawprimitivesindirectarguments?language=objc + */ +/* struct MTLDrawPrimitivesIndirectArguments { + * uint32_t vertexCount; + * uint32_t instanceCount; + * uint32_t vertexStart; + * uint32_t baseInstance; +};*/ + +/* MTLDrawIndexedPrimitivesIndirectArguments -- + * https://developer.apple.com/documentation/metal/mtldrawindexedprimitivesindirectarguments?language=objc + */ +/* struct MTLDrawIndexedPrimitivesIndirectArguments { + * uint32_t indexCount; + * uint32_t instanceCount; + * uint32_t indexStart; + * uint32_t baseVertex; + * uint32_t baseInstance; +};*/ + +#define MDI_ENABLED (buffer_size_ != 0) +#define MDI_DISABLED (buffer_size_ == 0) +#define MDI_INDEXED (base_index_ != UINT_MAX) + +MTLDrawList::MTLDrawList(int length) +{ + BLI_assert(length > 0); + batch_ = nullptr; + command_len_ = 0; + base_index_ = 0; + command_offset_ = 0; + data_size_ = 0; + buffer_size_ = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * length; + data_ = (void *)MEM_mallocN(buffer_size_, __func__); +} + +MTLDrawList::~MTLDrawList() +{ + if (data_) { + MEM_freeN(data_); + data_ = nullptr; + } +} + +void MTLDrawList::init() +{ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + BLI_assert(ctx); + BLI_assert(MDI_ENABLED); + BLI_assert(data_ == nullptr); + UNUSED_VARS_NDEBUG(ctx); + + batch_ = nullptr; + command_len_ = 0; + BLI_assert(data_); + + command_offset_ = 0; +} + +void MTLDrawList::append(GPUBatch *gpu_batch, int i_first, int i_count) +{ + /* Fallback when MultiDrawIndirect is not supported/enabled. */ + MTLShader *shader = static_cast<MTLShader *>(unwrap(gpu_batch->shader)); + bool requires_ssbo = (shader->get_uses_ssbo_vertex_fetch()); + bool requires_emulation = mtl_needs_topology_emulation(gpu_batch->prim_type); + if (MDI_DISABLED || requires_ssbo || requires_emulation) { + GPU_batch_draw_advanced(gpu_batch, 0, 0, i_first, i_count); + return; + } + + if (data_ == nullptr) { + this->init(); + } + BLI_assert(data_); + + MTLBatch *mtl_batch = static_cast<MTLBatch *>(gpu_batch); + BLI_assert(mtl_batch); + if (mtl_batch != batch_) { + /* Submit existing calls. */ + this->submit(); + + /* Begin new batch. */ + batch_ = mtl_batch; + + /* Cached for faster access. */ + MTLIndexBuf *el = batch_->elem_(); + base_index_ = el ? el->index_base_ : UINT_MAX; + v_first_ = el ? el->index_start_ : 0; + v_count_ = el ? el->index_len_ : batch_->verts_(0)->vertex_len; + } + + if (v_count_ == 0) { + /* Nothing to draw. */ + return; + } + + if (MDI_INDEXED) { + MTLDrawIndexedPrimitivesIndirectArguments *cmd = + reinterpret_cast<MTLDrawIndexedPrimitivesIndirectArguments *>((char *)data_ + + command_offset_); + cmd->indexStart = v_first_; + cmd->indexCount = v_count_; + cmd->instanceCount = i_count; + cmd->baseVertex = base_index_; + cmd->baseInstance = i_first; + } + else { + MTLDrawPrimitivesIndirectArguments *cmd = + reinterpret_cast<MTLDrawPrimitivesIndirectArguments *>((char *)data_ + command_offset_); + cmd->vertexStart = v_first_; + cmd->vertexCount = v_count_; + cmd->instanceCount = i_count; + cmd->baseInstance = i_first; + } + + size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) : + sizeof(MTLDrawPrimitivesIndirectArguments); + + command_offset_ += command_size; + command_len_++; + + /* Check if we can fit at least one other command. */ + if (command_offset_ + command_size > buffer_size_) { + this->submit(); + } + + return; +} + +void MTLDrawList::submit() +{ + /* Metal does not support MDI from the host side, but we still benefit from only executing the + * batch bind a single time, rather than per-draw. + * NOTE(Metal): Consider using #MTLIndirectCommandBuffer to achieve similar behavior. */ + if (command_len_ == 0) { + return; + } + + /* Something's wrong if we get here without MDI support. */ + BLI_assert(MDI_ENABLED); + BLI_assert(data_); + + /* Host-side MDI Currently unsupported on Metal. */ + bool can_use_MDI = false; + + /* Verify context. */ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + BLI_assert(ctx); + + /* Execute indirect draw calls. */ + MTLShader *shader = static_cast<MTLShader *>(unwrap(batch_->shader)); + bool SSBO_MODE = (shader->get_uses_ssbo_vertex_fetch()); + if (SSBO_MODE) { + can_use_MDI = false; + BLI_assert(false); + return; + } + + /* Heuristic to determine whether using indirect drawing is more efficient. */ + size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) : + sizeof(MTLDrawPrimitivesIndirectArguments); + const bool is_finishing_a_buffer = (command_offset_ + command_size > buffer_size_); + can_use_MDI = can_use_MDI && (is_finishing_a_buffer || command_len_ > 2); + + /* Bind Batch to setup render pipeline state. */ + id<MTLRenderCommandEncoder> rec = batch_->bind(0, 0, 0, 0); + if (!rec) { + BLI_assert_msg(false, "A RenderCommandEncoder should always be available!\n"); + return; + } + + /* Common properties. */ + MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(batch_->prim_type); + + /* Execute multi-draw indirect. */ + if (can_use_MDI && false) { + /* Metal Doesn't support MDI -- Singular Indirect draw calls are supported, + * but Multi-draw is not. + * TODO(Metal): Consider using #IndirectCommandBuffers to provide similar + * behavior. */ + } + else { + + /* Execute draws manually. */ + if (MDI_INDEXED) { + MTLDrawIndexedPrimitivesIndirectArguments *cmd = + (MTLDrawIndexedPrimitivesIndirectArguments *)data_; + MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>( + reinterpret_cast<IndexBuf *>(batch_->elem)); + BLI_assert(mtl_elem); + MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_); + uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4; + uint32_t v_first_ofs = (mtl_elem->index_start_ * index_size); + uint32_t index_count = cmd->indexCount; + + /* Fetch index buffer. May return an index buffer of a differing format, + * if index buffer optimization is used. In these cases, mtl_prim_type and + * index_count get updated with the new properties. */ + GPUPrimType final_prim_type = batch_->prim_type; + id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count); + BLI_assert(index_buffer != nil); + + /* Final primitive type. */ + mtl_prim_type = gpu_prim_type_to_metal(final_prim_type); + + if (index_buffer != nil) { + + /* Set depth stencil state (requires knowledge of primitive type). */ + ctx->ensure_depth_stencil_state(mtl_prim_type); + + for (int i = 0; i < command_len_; i++, cmd++) { + [rec drawIndexedPrimitives:mtl_prim_type + indexCount:index_count + indexType:index_type + indexBuffer:index_buffer + indexBufferOffset:v_first_ofs + instanceCount:cmd->instanceCount + baseVertex:cmd->baseVertex + baseInstance:cmd->baseInstance]; + ctx->main_command_buffer.register_draw_counters(cmd->indexCount * cmd->instanceCount); + } + } + else { + BLI_assert_msg(false, "Index buffer does not have backing Metal buffer"); + } + } + else { + MTLDrawPrimitivesIndirectArguments *cmd = (MTLDrawPrimitivesIndirectArguments *)data_; + + /* Verify if topology emulation is required. */ + if (mtl_needs_topology_emulation(batch_->prim_type)) { + BLI_assert_msg(false, "topology emulation cases should use fallback."); + } + else { + + /* Set depth stencil state (requires knowledge of primitive type). */ + ctx->ensure_depth_stencil_state(mtl_prim_type); + + for (int i = 0; i < command_len_; i++, cmd++) { + [rec drawPrimitives:mtl_prim_type + vertexStart:cmd->vertexStart + vertexCount:cmd->vertexCount + instanceCount:cmd->instanceCount + baseInstance:cmd->baseInstance]; + ctx->main_command_buffer.register_draw_counters(cmd->vertexCount * cmd->instanceCount); + } + } + } + } + + /* Unbind batch. */ + batch_->unbind(); + + /* Reset command offsets. */ + command_len_ = 0; + command_offset_ = 0; + + /* Avoid keeping reference to the batch. */ + batch_ = nullptr; +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_immediate.hh b/source/blender/gpu/metal/mtl_immediate.hh new file mode 100644 index 00000000000..8d852282ac8 --- /dev/null +++ b/source/blender/gpu/metal/mtl_immediate.hh @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Mimics old style opengl immediate mode drawing. + */ + +#pragma once + +#include "MEM_guardedalloc.h" +#include "gpu_immediate_private.hh" + +#include <Cocoa/Cocoa.h> +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> + +namespace blender::gpu { + +class MTLImmediate : public Immediate { + private: + MTLContext *context_ = nullptr; + MTLTemporaryBuffer current_allocation_; + MTLPrimitiveTopologyClass metal_primitive_mode_; + MTLPrimitiveType metal_primitive_type_; + bool has_begun_ = false; + + public: + MTLImmediate(MTLContext *ctx); + ~MTLImmediate(); + + uchar *begin() override; + void end() override; + bool imm_is_recording() + { + return has_begun_; + } +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm new file mode 100644 index 00000000000..ee48bdd6ee1 --- /dev/null +++ b/source/blender/gpu/metal/mtl_immediate.mm @@ -0,0 +1,401 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Mimics old style opengl immediate mode drawing. + */ + +#include "BKE_global.h" + +#include "GPU_vertex_format.h" +#include "gpu_context_private.hh" +#include "gpu_shader_private.hh" +#include "gpu_vertex_format_private.h" + +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_immediate.hh" +#include "mtl_primitive.hh" +#include "mtl_shader.hh" + +namespace blender::gpu { + +MTLImmediate::MTLImmediate(MTLContext *ctx) +{ + context_ = ctx; +} + +MTLImmediate::~MTLImmediate() +{ +} + +uchar *MTLImmediate::begin() +{ + BLI_assert(!has_begun_); + + /* Determine primitive type. */ + metal_primitive_type_ = gpu_prim_type_to_metal(this->prim_type); + metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_); + has_begun_ = true; + + /* Allocate a range of data and return host-accessible pointer. */ + const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len); + current_allocation_ = context_->get_scratchbuffer_manager() + .scratch_buffer_allocate_range_aligned(bytes_needed, 256); + [current_allocation_.metal_buffer retain]; + return reinterpret_cast<uchar *>(current_allocation_.data); +} + +void MTLImmediate::end() +{ + /* Ensure we're between a `imm::begin` / `imm:end` pair. */ + BLI_assert(has_begun_); + BLI_assert(prim_type != GPU_PRIM_NONE); + + /* Verify context is valid, vertex data is written and a valid shader is bound. */ + if (context_ && this->vertex_idx > 0 && this->shader) { + + MTLShader *active_mtl_shader = static_cast<MTLShader *>(unwrap(shader)); + + /* Skip draw if Metal shader is not valid. */ + if (active_mtl_shader == nullptr || !active_mtl_shader->is_valid() || + active_mtl_shader->get_interface() == nullptr) { + + const char *ptr = (active_mtl_shader) ? active_mtl_shader->name_get() : nullptr; + MTL_LOG_WARNING( + "MTLImmediate::end -- cannot perform draw as active shader is NULL or invalid (likely " + "unimplemented) (shader %p '%s')\n", + active_mtl_shader, + ptr); + return; + } + + /* Ensure we are inside a render pass and fetch active RenderCommandEncoder. */ + id<MTLRenderCommandEncoder> rec = context_->ensure_begin_render_pass(); + BLI_assert(rec != nil); + + /* Fetch active render pipeline state. */ + MTLRenderPassState &rps = context_->main_command_buffer.get_render_pass_state(); + + /* Bind Shader. */ + GPU_shader_bind(this->shader); + + /* Debug markers for frame-capture and detailed error messages. */ + if (G.debug & G_DEBUG_GPU) { + [rec pushDebugGroup:[NSString + stringWithFormat:@"immEnd(verts: %d, shader: %s)", + this->vertex_idx, + active_mtl_shader->get_interface()->get_name()]]; + [rec insertDebugSignpost:[NSString stringWithFormat:@"immEnd(verts: %d, shader: %s)", + this->vertex_idx, + active_mtl_shader->get_interface() + ->get_name()]]; + } + + /* Populate pipeline state vertex descriptor. */ + MTLStateManager *state_manager = static_cast<MTLStateManager *>( + MTLContext::get()->state_manager); + MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor(); + const MTLShaderInterface *interface = active_mtl_shader->get_interface(); + + /* Reset vertex descriptor to default state. */ + desc.reset_vertex_descriptor(); + + desc.vertex_descriptor.num_attributes = interface->get_total_attributes(); + desc.vertex_descriptor.num_vert_buffers = 1; + + for (int i = 0; i < desc.vertex_descriptor.num_attributes; i++) { + desc.vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid; + } + desc.vertex_descriptor.uses_ssbo_vertex_fetch = + active_mtl_shader->get_uses_ssbo_vertex_fetch(); + desc.vertex_descriptor.num_ssbo_attributes = 0; + + /* SSBO Vertex Fetch -- Verify Attributes. */ + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + active_mtl_shader->ssbo_vertex_fetch_bind_attributes_begin(); + + /* Disable Indexed rendering in SSBO vertex fetch. */ + int uniform_ssbo_use_indexed = active_mtl_shader->uni_ssbo_uses_indexed_rendering; + BLI_assert_msg(uniform_ssbo_use_indexed != -1, + "Expected valid uniform location for ssbo_uses_indexed_rendering."); + int uses_indexed_rendering = 0; + active_mtl_shader->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering); + } + + /* Populate Vertex descriptor and verify attributes. + * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */ + for (int i = 0; i < interface->get_total_attributes(); i++) { + + /* NOTE: Attribute in VERTEX FORMAT does not necessarily share the same array index as + * attributes in shader interface. */ + GPUVertAttr *attr = nullptr; + const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i); + + /* Scan through vertex_format attributes until one with a name matching the shader interface + * is found. */ + for (uint32_t a_idx = 0; a_idx < this->vertex_format.attr_len && attr == nullptr; a_idx++) { + GPUVertAttr *check_attribute = &this->vertex_format.attrs[a_idx]; + + /* Attributes can have multiple name aliases associated with them. */ + for (uint32_t n_idx = 0; n_idx < check_attribute->name_len; n_idx++) { + const char *name = GPU_vertformat_attr_name_get( + &this->vertex_format, check_attribute, n_idx); + + if (strcmp(name, interface->get_name_at_offset(mtl_shader_attribute.name_offset)) == 0) { + attr = check_attribute; + break; + } + } + } + + BLI_assert_msg(attr != nullptr, + "Could not find expected attribute in immediate mode vertex format."); + if (attr == nullptr) { + MTL_LOG_ERROR( + "MTLImmediate::end Could not find matching attribute '%s' from Shader Interface in " + "Vertex Format! - TODO: Bind Dummy attribute\n", + interface->get_name_at_offset(mtl_shader_attribute.name_offset)); + return; + } + + /* Determine whether implicit type conversion between input vertex format + * and shader interface vertex format is supported. */ + MTLVertexFormat convertedFormat; + bool can_use_implicit_conversion = mtl_convert_vertex_format( + mtl_shader_attribute.format, + (GPUVertCompType)attr->comp_type, + attr->comp_len, + (GPUVertFetchMode)attr->fetch_mode, + &convertedFormat); + + if (can_use_implicit_conversion) { + /* Metal API can implicitly convert some formats during vertex assembly: + * - Converting from a normalized short2 format to float2 + * - Type truncation e.g. Float4 to Float2. + * - Type expansion from Float3 to Float4. + * - Note: extra components are filled with the corresponding components of (0,0,0,1). + * (See + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format) + */ + bool is_floating_point_format = (attr->comp_type == GPU_COMP_F32); + desc.vertex_descriptor.attributes[i].format = convertedFormat; + desc.vertex_descriptor.attributes[i].format_conversion_mode = + (is_floating_point_format) ? (GPUVertFetchMode)GPU_FETCH_FLOAT : + (GPUVertFetchMode)GPU_FETCH_INT; + BLI_assert(convertedFormat != MTLVertexFormatInvalid); + } + else { + /* Some conversions are NOT valid, e.g. Int4 to Float4 + * - In this case, we need to implement a conversion routine inside the shader. + * - This is handled using the format_conversion_mode flag + * - This flag is passed into the PSO as a function specialization, + * and will generate an appropriate conversion function when reading the vertex attribute + * value into local shader storage. + * (If no explicit conversion is needed, the function specialize to a pass-through). */ + MTLVertexFormat converted_format; + bool can_convert = mtl_vertex_format_resize( + mtl_shader_attribute.format, attr->comp_len, &converted_format); + desc.vertex_descriptor.attributes[i].format = (can_convert) ? converted_format : + mtl_shader_attribute.format; + desc.vertex_descriptor.attributes[i].format_conversion_mode = (GPUVertFetchMode) + attr->fetch_mode; + BLI_assert(desc.vertex_descriptor.attributes[i].format != MTLVertexFormatInvalid); + } + /* Using attribute offset in vertex format, as this will be correct */ + desc.vertex_descriptor.attributes[i].offset = attr->offset; + desc.vertex_descriptor.attributes[i].buffer_index = mtl_shader_attribute.buffer_index; + + /* SSBO Vertex Fetch Attribute bind. */ + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + BLI_assert_msg(mtl_shader_attribute.buffer_index == 0, + "All attributes should be in buffer index zero"); + MTLSSBOAttribute ssbo_attr( + mtl_shader_attribute.index, + mtl_shader_attribute.buffer_index, + attr->offset, + this->vertex_format.stride, + MTLShader::ssbo_vertex_type_to_attr_type(desc.vertex_descriptor.attributes[i].format), + false); + desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] = + ssbo_attr; + desc.vertex_descriptor.num_ssbo_attributes++; + active_mtl_shader->ssbo_vertex_fetch_bind_attribute(ssbo_attr); + } + } + + /* Buffer bindings for singular vertex buffer. */ + desc.vertex_descriptor.buffer_layouts[0].step_function = MTLVertexStepFunctionPerVertex; + desc.vertex_descriptor.buffer_layouts[0].step_rate = 1; + desc.vertex_descriptor.buffer_layouts[0].stride = this->vertex_format.stride; + BLI_assert(this->vertex_format.stride > 0); + + /* SSBO Vertex Fetch -- Verify Attributes. */ + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + active_mtl_shader->ssbo_vertex_fetch_bind_attributes_end(rec); + + /* Set Status uniforms. */ + BLI_assert_msg(active_mtl_shader->uni_ssbo_input_prim_type_loc != -1, + "ssbo_input_prim_type uniform location invalid!"); + BLI_assert_msg(active_mtl_shader->uni_ssbo_input_vert_count_loc != -1, + "ssbo_input_vert_count uniform location invalid!"); + GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)), + active_mtl_shader->uni_ssbo_input_prim_type_loc, + 1, + 1, + (const int *)(&this->prim_type)); + GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)), + active_mtl_shader->uni_ssbo_input_vert_count_loc, + 1, + 1, + (const int *)(&this->vertex_idx)); + } + + MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type); + if (context_->ensure_render_pipeline_state(mtl_prim_type)) { + + /* Issue draw call. */ + BLI_assert(this->vertex_idx > 0); + + /* Metal API does not support triangle fan, so we can emulate this + * input data by generating an index buffer to re-map indices to + * a TriangleList. + * + * NOTE(Metal): Consider caching generated triangle fan index buffers. + * For immediate mode, generating these is currently very cheap, as we use + * fast scratch buffer allocations. Though we may benefit from caching of + * frequently used buffer sizes. */ + if (mtl_needs_topology_emulation(this->prim_type)) { + + /* Debug safety check for SSBO FETCH MODE. */ + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode"); + } + + /* Emulate Tri-fan. */ + if (this->prim_type == GPU_PRIM_TRI_FAN) { + /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input + * vertices. */ + uint32_t base_vert_count = this->vertex_idx; + uint32_t num_triangles = max_ii(base_vert_count - 2, 0); + uint32_t fan_index_count = num_triangles * 3; + BLI_assert(num_triangles > 0); + + uint32_t alloc_size = sizeof(uint32_t) * fan_index_count; + uint32_t *index_buffer = nullptr; + + MTLTemporaryBuffer allocation = + context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned( + alloc_size, 128); + index_buffer = (uint32_t *)allocation.data; + + int a = 0; + for (int i = 0; i < num_triangles; i++) { + index_buffer[a++] = 0; + index_buffer[a++] = i + 1; + index_buffer[a++] = i + 2; + } + + @autoreleasepool { + + id<MTLBuffer> index_buffer_mtl = nil; + uint32_t index_buffer_offset = 0; + + /* Region of scratch buffer used for topology emulation element data. + * NOTE(Metal): We do not need to manually flush as the entire scratch + * buffer for current command buffer is flushed upon submission. */ + index_buffer_mtl = allocation.metal_buffer; + index_buffer_offset = allocation.buffer_offset; + + /* Set depth stencil state (requires knowledge of primitive type). */ + context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle); + + /* Bind Vertex Buffer. */ + rps.bind_vertex_buffer( + current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0); + + /* Draw. */ + [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle + indexCount:fan_index_count + indexType:MTLIndexTypeUInt32 + indexBuffer:index_buffer_mtl + indexBufferOffset:index_buffer_offset]; + } + } + else { + /* TODO(Metal): Topology emulation for line loop. + * NOTE(Metal): This is currently not used anywhere and modified at the high + * level for efficiency in such cases. */ + BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode."); + } + } + else { + MTLPrimitiveType primitive_type = metal_primitive_type_; + int vertex_count = this->vertex_idx; + + /* Bind Vertex Buffer. */ + rps.bind_vertex_buffer( + current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0); + + /* Set depth stencil state (requires knowledge of primitive type). */ + context_->ensure_depth_stencil_state(primitive_type); + + if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) { + + /* Bind Null Buffers for empty/missing bind slots. */ + id<MTLBuffer> null_buffer = context_->get_null_buffer(); + BLI_assert(null_buffer != nil); + for (int i = 1; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + + /* We only need to ensure a buffer is bound to the context, its contents do not matter + * as it will not be used. */ + if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) { + rps.bind_vertex_buffer(null_buffer, 0, i); + } + } + + /* SSBO vertex fetch - Nullify elements buffer. */ + if (rps.cached_vertex_buffer_bindings[MTL_SSBO_VERTEX_FETCH_IBO_INDEX].metal_buffer == + nil) { + rps.bind_vertex_buffer(null_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX); + } + + /* Submit draw call with modified vertex count, which reflects vertices per primitive + * defined in the USE_SSBO_VERTEX_FETCH `pragma`. */ + int num_input_primitives = gpu_get_prim_count_from_type(vertex_count, this->prim_type); + int output_num_verts = num_input_primitives * + active_mtl_shader->get_ssbo_vertex_fetch_output_num_verts(); +#ifndef NDEBUG + BLI_assert( + mtl_vertex_count_fits_primitive_type( + output_num_verts, active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()) && + "Output Vertex count is not compatible with the requested output vertex primitive " + "type"); +#endif + [rec drawPrimitives:active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type() + vertexStart:0 + vertexCount:output_num_verts]; + context_->main_command_buffer.register_draw_counters(output_num_verts); + } + else { + /* Regular draw. */ + [rec drawPrimitives:primitive_type vertexStart:0 vertexCount:vertex_count]; + context_->main_command_buffer.register_draw_counters(vertex_count); + } + } + } + if (G.debug & G_DEBUG_GPU) { + [rec popDebugGroup]; + } + } + + /* Reset allocation after draw submission. */ + has_begun_ = false; + if (current_allocation_.metal_buffer) { + [current_allocation_.metal_buffer release]; + current_allocation_.metal_buffer = nil; + } +} + +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_index_buffer.mm b/source/blender/gpu/metal/mtl_index_buffer.mm index 2195ab7538d..9712dce7b40 100644 --- a/source/blender/gpu/metal/mtl_index_buffer.mm +++ b/source/blender/gpu/metal/mtl_index_buffer.mm @@ -138,7 +138,7 @@ void MTLIndexBuf::update_sub(uint32_t start, uint32_t len, const void *data) BLI_assert(ibo_ != nullptr); /* Otherwise, we will inject a data update, using staged data, into the command stream. - * Stage update contents in temporary buffer*/ + * Stage update contents in temporary buffer. */ MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(ctx); MTLTemporaryBuffer range = ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range(len); diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh index df80df6543f..bd354376b12 100644 --- a/source/blender/gpu/metal/mtl_memory.hh +++ b/source/blender/gpu/metal/mtl_memory.hh @@ -340,13 +340,13 @@ class MTLBufferPool { private: /* Memory statistics. */ - long long int total_allocation_bytes_ = 0; + int64_t total_allocation_bytes_ = 0; #if MTL_DEBUG_MEMORY_STATISTICS == 1 /* Debug statistics. */ std::atomic<int> per_frame_allocation_count_; - std::atomic<long long int> allocations_in_pool_; - std::atomic<long long int> buffers_in_pool_; + std::atomic<int64_t> allocations_in_pool_; + std::atomic<int64_t> buffers_in_pool_; #endif /* Metal resources. */ diff --git a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh index 1906350679a..04ceb5bdf03 100644 --- a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh +++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh @@ -28,8 +28,8 @@ struct MTLVertexAttributeDescriptorPSO { uint64_t hash() const { - return (uint64_t)((uint64_t)this->format ^ (this->offset << 4) ^ (this->buffer_index << 8) ^ - (this->format_conversion_mode << 12)); + return uint64_t((uint64_t(this->format) ^ (this->offset << 4) ^ (this->buffer_index << 8) ^ + (this->format_conversion_mode << 12))); } }; @@ -46,8 +46,7 @@ struct MTLVertexBufferLayoutDescriptorPSO { uint64_t hash() const { - return (uint64_t)((uint64_t)this->step_function ^ (this->step_rate << 4) ^ - (this->stride << 8)); + return uint64_t(uint64_t(this->step_function) ^ (this->step_rate << 4) ^ (this->stride << 8)); } }; @@ -217,34 +216,46 @@ struct MTLRenderPipelineStateDescriptor { * has collisions. */ uint64_t hash = this->vertex_descriptor.hash(); - hash ^= (uint64_t)this->num_color_attachments << 16; /* up to 6 (3 bits). */ - hash ^= (uint64_t)this->depth_attachment_format << 18; /* up to 555 (9 bits). */ - hash ^= (uint64_t)this->stencil_attachment_format << 20; /* up to 555 (9 bits). */ - hash ^= (uint64_t)(*( - (uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */ + hash ^= uint64_t(this->num_color_attachments) << 16; /* up to 6 (3 bits). */ + hash ^= uint64_t(this->depth_attachment_format) << 18; /* up to 555 (9 bits). */ + hash ^= uint64_t(this->stencil_attachment_format) << 20; /* up to 555 (9 bits). */ + hash ^= uint64_t( + *((uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */ /* Only include elements in Hash if they are needed - avoids variable null assignments * influencing hash. */ if (this->num_color_attachments > 0) { - hash ^= (uint64_t)this->color_write_mask << 22; /* 4 bit bit-mask. */ - hash ^= (uint64_t)this->alpha_blend_op << 26; /* Up to 4 (3 bits). */ - hash ^= (uint64_t)this->rgb_blend_op << 29; /* Up to 4 (3 bits). */ - hash ^= (uint64_t)this->dest_alpha_blend_factor << 32; /* Up to 18 (5 bits). */ - hash ^= (uint64_t)this->dest_rgb_blend_factor << 37; /* Up to 18 (5 bits). */ - hash ^= (uint64_t)this->src_alpha_blend_factor << 42; /* Up to 18 (5 bits). */ - hash ^= (uint64_t)this->src_rgb_blend_factor << 47; /* Up to 18 (5 bits). */ + hash ^= uint64_t(this->color_write_mask) << 22; /* 4 bit bit-mask. */ + hash ^= uint64_t(this->alpha_blend_op) << 26; /* Up to 4 (3 bits). */ + hash ^= uint64_t(this->rgb_blend_op) << 29; /* Up to 4 (3 bits). */ + hash ^= uint64_t(this->dest_alpha_blend_factor) << 32; /* Up to 18 (5 bits). */ + hash ^= uint64_t(this->dest_rgb_blend_factor) << 37; /* Up to 18 (5 bits). */ + hash ^= uint64_t(this->src_alpha_blend_factor) << 42; /* Up to 18 (5 bits). */ + hash ^= uint64_t(this->src_rgb_blend_factor) << 47; /* Up to 18 (5 bits). */ } for (const uint c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) { - hash ^= (uint64_t)this->color_attachment_format[c] << (c + 52); // up to 555 (9 bits) + hash ^= uint64_t(this->color_attachment_format[c]) << (c + 52); /* Up to 555 (9 bits). */ } - hash |= (uint64_t)((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0) - << 62; - hash ^= (uint64_t)this->point_size; + hash |= uint64_t((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0) << 62; + hash ^= uint64_t(this->point_size); return hash; } + + /* Reset the Vertex Descriptor to default. */ + void reset_vertex_descriptor() + { + vertex_descriptor.num_attributes = 0; + vertex_descriptor.num_vert_buffers = 0; + for (int i = 0; i < GPU_VERT_ATTR_MAX_LEN; i++) { + vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid; + vertex_descriptor.attributes[i].offset = 0; + } + vertex_descriptor.uses_ssbo_vertex_fetch = false; + vertex_descriptor.num_ssbo_attributes = 0; + } }; } // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh index 64d9d1cf849..5485b32dd31 100644 --- a/source/blender/gpu/metal/mtl_shader.hh +++ b/source/blender/gpu/metal/mtl_shader.hh @@ -261,8 +261,6 @@ class MTLShader : public Shader { bool get_push_constant_is_dirty(); void push_constant_bindstate_mark_dirty(bool is_dirty); - void vertformat_from_shader(GPUVertFormat *format) const override; - /* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */ int program_handle_get() const override { diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm index 23097f312f0..006d3394378 100644 --- a/source/blender/gpu/metal/mtl_shader.mm +++ b/source/blender/gpu/metal/mtl_shader.mm @@ -129,6 +129,7 @@ MTLShader::~MTLShader() if (shd_builder_ != nullptr) { delete shd_builder_; + shd_builder_ = nullptr; } } @@ -209,6 +210,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) /* Release temporary compilation resources. */ delete shd_builder_; + shd_builder_ = nullptr; return false; } } @@ -279,6 +281,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) /* Release temporary compilation resources. */ delete shd_builder_; + shd_builder_ = nullptr; return false; } } @@ -324,6 +327,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) /* Release temporary compilation resources. */ delete shd_builder_; + shd_builder_ = nullptr; return true; } @@ -536,27 +540,6 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty) push_constant_modified_ = is_dirty; } -void MTLShader::vertformat_from_shader(GPUVertFormat *format) const -{ - GPU_vertformat_clear(format); - - const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface); - for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) { - const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id); - - /* Extract type parameters from Metal type. */ - GPUVertCompType comp_type = comp_type_from_vert_format(attr.format); - uint comp_len = comp_count_from_vert_format(attr.format); - GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format); - - GPU_vertformat_attr_add(format, - mtl_interface->get_name_at_offset(attr.name_offset), - comp_type, - comp_len, - fetch_mode); - } -} - /** \} */ /* -------------------------------------------------------------------- */ @@ -1167,6 +1150,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_at MTLShaderInterface *mtl_interface = this->get_interface(); BLI_assert(ssbo_attr.mtl_attribute_index >= 0 && ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes()); + UNUSED_VARS_NDEBUG(mtl_interface); /* Update bind-mask to verify this attribute has been used. */ BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) == diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh index 43890ca0170..63e2e6d5924 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.hh +++ b/source/blender/gpu/metal/mtl_shader_generator.hh @@ -497,7 +497,7 @@ inline std::string get_stage_class_name(ShaderStage stage) inline bool is_builtin_type(std::string type) { /* Add Types as needed. */ - /* TODO(Metal): Consider replacing this with a switch and constexpr hash and switch. + /* TODO(Metal): Consider replacing this with a switch and `constexpr` hash and switch. * Though most efficient and maintainable approach to be determined. */ static std::map<std::string, eMTLDataType> glsl_builtin_types = { {"float", MTL_DATATYPE_FLOAT}, diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm index 977e97dbd82..4a2be0753bb 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.mm +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -724,10 +724,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) } if (msl_iface.uses_ssbo_vertex_fetch_mode) { ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl; - ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS - << std::endl; - ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX - << std::endl; for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) { ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl; } diff --git a/source/blender/gpu/metal/mtl_shader_interface.mm b/source/blender/gpu/metal/mtl_shader_interface.mm index 3703d5b5684..97a82345761 100644 --- a/source/blender/gpu/metal/mtl_shader_interface.mm +++ b/source/blender/gpu/metal/mtl_shader_interface.mm @@ -117,9 +117,7 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset, MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_]; uni_block.name_offset = name_offset; - /* We offset the buffer binding index by one, as the first slot is reserved for push constant - * data. */ - uni_block.buffer_index = buffer_index + 1; + uni_block.buffer_index = buffer_index; uni_block.size = size; uni_block.current_offset = 0; uni_block.stage_mask = ShaderStage::BOTH; @@ -297,8 +295,10 @@ void MTLShaderInterface::prepare_common_shader_inputs() current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset)); /* Location refers to the index in the ubos_ array. */ current_input->location = ubo_index; - /* Final binding location refers to the buffer binding index within the shader (Relative to - * MTL_uniform_buffer_base_index). */ + /* Binding location refers to the UBO bind slot in + * #MTLContextGlobalShaderPipelineState::ubo_bindings. The buffer bind index [[buffer(N)]] + * within the shader will apply an offset for bound vertex buffers and the default uniform + * PushConstantBlock. */ current_input->binding = shd_ubo.buffer_index; current_input++; } diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh index be6f3a3a02b..28b55306707 100644 --- a/source/blender/gpu/metal/mtl_texture.hh +++ b/source/blender/gpu/metal/mtl_texture.hh @@ -108,10 +108,10 @@ struct TextureReadRoutineSpecialisation { uint64_t hash() const { blender::DefaultHash<std::string> string_hasher; - return (uint64_t)string_hasher(this->input_data_type + this->output_data_type + - std::to_string((this->component_count_input << 8) + - this->component_count_output + - (this->depth_format_mode << 28))); + return uint64_t(string_hasher(this->input_data_type + this->output_data_type + + std::to_string((this->component_count_input << 8) + + this->component_count_output + + (this->depth_format_mode << 28)))); } }; @@ -138,12 +138,12 @@ struct MTLSamplerState { operator uint() const { - return (uint)state; + return uint(state); } operator uint64_t() const { - return (uint64_t)state; + return uint64_t(state); } }; @@ -200,7 +200,7 @@ class MTLTexture : public Texture { TEXTURE_VIEW_SWIZZLE_DIRTY = (1 << 0), TEXTURE_VIEW_MIP_DIRTY = (1 << 1) }; - id<MTLTexture> mip_swizzle_view_; + id<MTLTexture> mip_swizzle_view_ = nil; char tex_swizzle_mask_[4]; MTLTextureSwizzleChannels mtl_swizzle_mask_; bool mip_range_dirty_ = false; @@ -216,7 +216,6 @@ class MTLTexture : public Texture { /* VBO. */ MTLVertBuf *vert_buffer_; id<MTLBuffer> vert_buffer_mtl_; - int vert_buffer_offset_; /* Core parameters and sub-resources. */ eGPUTextureUsage gpu_image_usage_flags_; @@ -247,7 +246,7 @@ class MTLTexture : public Texture { void mip_range_set(int min, int max) override; void *read(int mip, eGPUDataFormat type) override; - /* Remove once no longer required -- will just return 0 for now in MTL path*/ + /* Remove once no longer required -- will just return 0 for now in MTL path. */ uint gl_bindcode_get() const override; bool texture_is_baked(); @@ -256,6 +255,14 @@ class MTLTexture : public Texture { return name_; } + id<MTLBuffer> get_vertex_buffer() const + { + if (resource_mode_ == MTL_TEXTURE_MODE_VBO) { + return vert_buffer_mtl_; + } + return nil; + } + protected: bool init_internal() override; bool init_internal(GPUVertBuf *vbo) override; @@ -324,8 +331,6 @@ class MTLTexture : public Texture { int height); GPUFrameBuffer *get_blit_framebuffer(uint dst_slice, uint dst_mip); - MEM_CXX_CLASS_ALLOC_FUNCS("gpu::MTLTexture") - /* Texture Update function Utilities. */ /* Metal texture updating does not provide the same range of functionality for type conversion * and format compatibility as are available in OpenGL. To achieve the same level of @@ -357,9 +362,9 @@ class MTLTexture : public Texture { */ struct TextureUpdateParams { int mip_index; - int extent[3]; /* Width, Height, Slice on 2D Array tex*/ - int offset[3]; /* Width, Height, Slice on 2D Array tex*/ - uint unpack_row_length; /* Number of pixels between bytes in input data */ + int extent[3]; /* Width, Height, Slice on 2D Array tex. */ + int offset[3]; /* Width, Height, Slice on 2D Array tex. */ + uint unpack_row_length; /* Number of pixels between bytes in input data. */ }; id<MTLComputePipelineState> texture_update_1d_get_kernel( @@ -383,7 +388,7 @@ class MTLTexture : public Texture { /* Depth texture updates are not directly supported with Blit operations, similarly, we cannot * use a compute shader to write to depth, so we must instead render to a depth target. * These processes use vertex/fragment shaders to render texture data from an intermediate - * source, in order to prime the depth buffer*/ + * source, in order to prime the depth buffer. */ GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialization); void update_sub_depth_2d( @@ -392,8 +397,8 @@ class MTLTexture : public Texture { /* Texture Read function utilities -- Follows a similar mechanism to the updating routines */ struct TextureReadParams { int mip_index; - int extent[3]; /* Width, Height, Slice on 2D Array tex*/ - int offset[3]; /* Width, Height, Slice on 2D Array tex*/ + int extent[3]; /* Width, Height, Slice on 2D Array tex. */ + int offset[3]; /* Width, Height, Slice on 2D Array tex. */ }; id<MTLComputePipelineState> texture_read_1d_get_kernel( @@ -415,6 +420,8 @@ class MTLTexture : public Texture { /* fullscreen blit utilities. */ GPUShader *fullscreen_blit_sh_get(); + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLTexture") }; /* Utility */ diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm index 2b7c2333bff..29dcc8d32ee 100644 --- a/source/blender/gpu/metal/mtl_texture.mm +++ b/source/blender/gpu/metal/mtl_texture.mm @@ -12,6 +12,7 @@ #include "GPU_batch_presets.h" #include "GPU_capabilities.h" #include "GPU_framebuffer.h" +#include "GPU_immediate.h" #include "GPU_platform.h" #include "GPU_state.h" @@ -20,6 +21,7 @@ #include "mtl_context.hh" #include "mtl_debug.hh" #include "mtl_texture.hh" +#include "mtl_vertex_buffer.hh" #include "GHOST_C-api.h" @@ -50,7 +52,6 @@ void gpu::MTLTexture::mtl_texture_init() /* VBO. */ vert_buffer_ = nullptr; vert_buffer_mtl_ = nil; - vert_buffer_offset_ = -1; /* Default Swizzle. */ tex_swizzle_mask_[0] = 'r'; @@ -169,26 +170,39 @@ void gpu::MTLTexture::bake_mip_swizzle_view() id<MTLTexture> gpu::MTLTexture::get_metal_handle() { - /* ensure up to date and baked. */ - this->ensure_baked(); - /* Verify VBO texture shares same buffer. */ if (resource_mode_ == MTL_TEXTURE_MODE_VBO) { - int r_offset = -1; + id<MTLBuffer> buf = vert_buffer_->get_metal_buffer(); + + /* Source vertex buffer has been re-generated, require re-initialization. */ + if (buf != vert_buffer_mtl_) { + MTL_LOG_INFO( + "MTLTexture '%p' using MTL_TEXTURE_MODE_VBO requires re-generation due to updated " + "Vertex-Buffer.\n", + this); + /* Clear state. */ + this->reset(); + + /* Re-initialize. */ + this->init_internal(wrap(vert_buffer_)); + + /* Update for assertion check below. */ + buf = vert_buffer_->get_metal_buffer(); + } - /* TODO(Metal): Fetch buffer from MTLVertBuf when implemented. */ - id<MTLBuffer> buf = nil; /*vert_buffer_->get_metal_buffer(&r_offset);*/ + /* Ensure buffer is valid. + * Fetch-vert buffer handle directly in-case it changed above. */ BLI_assert(vert_buffer_mtl_ != nil); - BLI_assert(buf == vert_buffer_mtl_ && r_offset == vert_buffer_offset_); - - UNUSED_VARS(buf); - UNUSED_VARS_NDEBUG(r_offset); + BLI_assert(vert_buffer_->get_metal_buffer() == vert_buffer_mtl_); } + /* ensure up to date and baked. */ + this->ensure_baked(); + if (is_baked_) { /* For explicit texture views, ensure we always return the texture view. */ if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) { - BLI_assert(mip_swizzle_view_ && "Texture view should always have a valid handle."); + BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle."); } if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) { @@ -208,7 +222,7 @@ id<MTLTexture> gpu::MTLTexture::get_metal_handle_base() /* For explicit texture views, always return the texture view. */ if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) { - BLI_assert(mip_swizzle_view_ && "Texture view should always have a valid handle."); + BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle."); if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) { bake_mip_swizzle_view(); } @@ -290,7 +304,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst, /* Execute graphics draw call to perform the blit. */ GPUBatch *quad = GPU_batch_preset_quad(); - GPU_batch_set_shader(quad, shader); float w = dst->width_get(); @@ -915,7 +928,7 @@ void gpu::MTLTexture::generate_mipmap() /* Ensure texture is baked. */ this->ensure_baked(); - BLI_assert(is_baked_ && texture_ && "MTLTexture is not valid"); + BLI_assert_msg(is_baked_ && texture_, "MTLTexture is not valid"); if (mipmaps_ == 1 || mtl_max_mips_ == 1) { MTL_LOG_WARNING("Call to generate mipmaps on texture with 'mipmaps_=1\n'"); @@ -1231,7 +1244,7 @@ void gpu::MTLTexture::read_internal(int mip, depth_format_mode = 4; break; default: - BLI_assert(false && "Unhandled depth read format case"); + BLI_assert_msg(false, "Unhandled depth read format case"); break; } } @@ -1445,11 +1458,84 @@ bool gpu::MTLTexture::init_internal() bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo) { - /* Zero initialize. */ - this->prepare_internal(); + if (this->format_ == GPU_DEPTH24_STENCIL8) { + /* Apple Silicon requires GPU_DEPTH32F_STENCIL8 instead of GPU_DEPTH24_STENCIL8. */ + this->format_ = GPU_DEPTH32F_STENCIL8; + } + + MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_); + mtl_max_mips_ = 1; + mipmaps_ = 0; + this->mip_range_set(0, 0); + + /* Create texture from GPUVertBuf's buffer. */ + MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo)); + mtl_vbo->bind(); + mtl_vbo->flag_used(); + + /* Get Metal Buffer. */ + id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer(); + BLI_assert(source_buffer); + + /* Verify size. */ + if (w_ <= 0) { + MTL_LOG_WARNING("Allocating texture buffer of width 0!\n"); + w_ = 1; + } + + /* Verify Texture and vertex buffer alignment. */ + int bytes_per_pixel = get_mtl_format_bytesize(mtl_format); + int bytes_per_row = bytes_per_pixel * w_; + + MTLContext *mtl_ctx = MTLContext::get(); + uint32_t align_requirement = static_cast<uint32_t>( + [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]); + + /* Verify per-vertex size aligns with texture size. */ + const GPUVertFormat *format = GPU_vertbuf_get_format(vbo); + BLI_assert(bytes_per_pixel == format->stride && + "Pixel format stride MUST match the texture format stride -- These being different " + "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex"); + UNUSED_VARS_NDEBUG(format); + + /* Create texture descriptor. */ + BLI_assert(type_ == GPU_TEXTURE_BUFFER); + texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + texture_descriptor_.pixelFormat = mtl_format; + texture_descriptor_.textureType = MTLTextureTypeTextureBuffer; + texture_descriptor_.width = w_; + texture_descriptor_.height = 1; + texture_descriptor_.depth = 1; + texture_descriptor_.arrayLength = 1; + texture_descriptor_.mipmapLevelCount = mtl_max_mips_; + texture_descriptor_.usage = + MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | + MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */ + texture_descriptor_.storageMode = [source_buffer storageMode]; + texture_descriptor_.sampleCount = 1; + texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode]; + texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode]; + + texture_ = [source_buffer + newTextureWithDescriptor:texture_descriptor_ + offset:0 + bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)]; + aligned_w_ = bytes_per_row / bytes_per_pixel; + + BLI_assert(texture_); + texture_.label = [NSString stringWithUTF8String:this->get_name()]; + is_baked_ = true; + is_dirty_ = false; + resource_mode_ = MTL_TEXTURE_MODE_VBO; - /* TODO(Metal): Add implementation for GPU Vert buf. */ - return false; + /* Track Status. */ + vert_buffer_ = mtl_vbo; + vert_buffer_mtl_ = source_buffer; + /* Cleanup. */ + [texture_descriptor_ release]; + texture_descriptor_ = nullptr; + + return true; } bool gpu::MTLTexture::init_internal(const GPUTexture *src, int mip_offset, int layer_offset) @@ -1494,7 +1580,6 @@ bool gpu::MTLTexture::texture_is_baked() /* Prepare texture parameters after initialization, but before baking. */ void gpu::MTLTexture::prepare_internal() { - /* Derive implicit usage flags for Depth/Stencil attachments. */ if (format_flag_ & GPU_FORMAT_DEPTH || format_flag_ & GPU_FORMAT_STENCIL) { gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT; @@ -1659,7 +1744,7 @@ void gpu::MTLTexture::ensure_baked() /* Determine Resource Mode. */ resource_mode_ = MTL_TEXTURE_MODE_DEFAULT; - /* Create texture. */ + /* Standard texture allocation. */ texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_]; [texture_descriptor_ release]; diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm index 928393fb39e..33a62e2e3ef 100644 --- a/source/blender/gpu/metal/mtl_texture_util.mm +++ b/source/blender/gpu/metal/mtl_texture_util.mm @@ -22,13 +22,7 @@ /* Utility file for secondary functionality which supports mtl_texture.mm. */ extern char datatoc_compute_texture_update_msl[]; -extern char datatoc_depth_2d_update_vert_glsl[]; -extern char datatoc_depth_2d_update_float_frag_glsl[]; -extern char datatoc_depth_2d_update_int24_frag_glsl[]; -extern char datatoc_depth_2d_update_int32_frag_glsl[]; extern char datatoc_compute_texture_read_msl[]; -extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[]; -extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[]; namespace blender::gpu { @@ -40,7 +34,7 @@ MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format) { switch (tex_format) { - /* Formats texture & renderbuffer. */ + /* Formats texture & render-buffer. */ case GPU_RGBA8UI: return MTLPixelFormatRGBA8Uint; case GPU_RGBA8I: @@ -447,42 +441,34 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get( return *result; } - const char *fragment_source = nullptr; + const char *depth_2d_info_variant = nullptr; switch (specialization.data_mode) { case MTL_DEPTH_UPDATE_MODE_FLOAT: - fragment_source = datatoc_depth_2d_update_float_frag_glsl; + depth_2d_info_variant = "depth_2d_update_float"; break; case MTL_DEPTH_UPDATE_MODE_INT24: - fragment_source = datatoc_depth_2d_update_int24_frag_glsl; + depth_2d_info_variant = "depth_2d_update_int24"; break; case MTL_DEPTH_UPDATE_MODE_INT32: - fragment_source = datatoc_depth_2d_update_int32_frag_glsl; + depth_2d_info_variant = "depth_2d_update_int32"; break; default: BLI_assert(false && "Invalid format mode\n"); return nullptr; } - GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl, - fragment_source, - nullptr, - nullptr, - nullptr, - "depth_2d_update_sh_get"); + GPUShader *shader = GPU_shader_create_from_info_name(depth_2d_info_variant); mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader); return shader; } GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get() { - MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) { - const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl; - const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl; - GPUShader *shader = GPU_shader_create( - vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit"); + GPUShader *shader = GPU_shader_create_from_info_name("fullscreen_blit"); + mtl_context->get_texture_utils().fullscreen_blit_shader = shader; } return mtl_context->get_texture_utils().fullscreen_blit_shader; @@ -614,7 +600,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( stringWithUTF8String:datatoc_compute_texture_read_msl]; /* Defensive Debug Checks. */ - long long int depth_scale_factor = 1; + int64_t depth_scale_factor = 1; if (specialization_params.depth_format_mode > 0) { BLI_assert(specialization_params.component_count_input == 1); BLI_assert(specialization_params.component_count_output == 1); diff --git a/source/blender/gpu/metal/mtl_vertex_buffer.hh b/source/blender/gpu/metal/mtl_vertex_buffer.hh new file mode 100644 index 00000000000..2cc8b0a9636 --- /dev/null +++ b/source/blender/gpu/metal/mtl_vertex_buffer.hh @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include <Cocoa/Cocoa.h> +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> + +#include "MEM_guardedalloc.h" + +#include "GPU_vertex_buffer.h" +#include "gpu_vertex_buffer_private.hh" +#include "mtl_context.hh" + +namespace blender::gpu { + +class MTLVertBuf : public VertBuf { + friend class gpu::MTLTexture; /* For buffer texture. */ + friend class MTLShader; /* For transform feedback. */ + friend class MTLBatch; + friend class MTLContext; /* For transform feedback. */ + + private: + /** Metal buffer allocation. **/ + gpu::MTLBuffer *vbo_ = nullptr; + /** Texture used if the buffer is bound as buffer texture. Init on first use. */ + struct ::GPUTexture *buffer_texture_ = nullptr; + /** Defines whether the buffer handle is wrapped by this MTLVertBuf, i.e. we do not own it and + * should not free it. */ + bool is_wrapper_ = false; + /** Requested allocation size for Metal buffer. + * Differs from raw buffer size as alignment is not included. */ + uint64_t alloc_size_ = 0; + /** Whether existing allocation has been submitted for use by the GPU. */ + bool contents_in_flight_ = false; + + /* Fetch Metal buffer and offset into allocation if necessary. + * Access limited to friend classes. */ + id<MTLBuffer> get_metal_buffer() + { + vbo_->debug_ensure_used(); + return vbo_->get_metal_buffer(); + } + + public: + MTLVertBuf(); + ~MTLVertBuf(); + + void bind(); + void flag_used(); + + void update_sub(uint start, uint len, const void *data) override; + + const void *read() const override; + void *unmap(const void *mapped_data) const override; + + void wrap_handle(uint64_t handle) override; + + protected: + void acquire_data() override; + void resize_data() override; + void release_data() override; + void upload_data() override; + void duplicate_data(VertBuf *dst) override; + void bind_as_ssbo(uint binding) override; + void bind_as_texture(uint binding) override; + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLVertBuf"); +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_vertex_buffer.mm b/source/blender/gpu/metal/mtl_vertex_buffer.mm new file mode 100644 index 00000000000..1c7201ce5f9 --- /dev/null +++ b/source/blender/gpu/metal/mtl_vertex_buffer.mm @@ -0,0 +1,368 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ +#include "mtl_vertex_buffer.hh" +#include "mtl_debug.hh" + +namespace blender::gpu { + +MTLVertBuf::MTLVertBuf() : VertBuf() +{ +} + +MTLVertBuf::~MTLVertBuf() +{ + this->release_data(); +} + +void MTLVertBuf::acquire_data() +{ + /* Discard previous data, if any. */ + MEM_SAFE_FREE(data); + if (usage_ == GPU_USAGE_DEVICE_ONLY) { + data = nullptr; + } + else { + data = (uchar *)MEM_mallocN(sizeof(uchar) * this->size_alloc_get(), __func__); + } +} + +void MTLVertBuf::resize_data() +{ + if (usage_ == GPU_USAGE_DEVICE_ONLY) { + data = nullptr; + } + else { + data = (uchar *)MEM_reallocN(data, sizeof(uchar) * this->size_alloc_get()); + } +} + +void MTLVertBuf::release_data() +{ + if (vbo_ != nullptr) { + vbo_->free(); + vbo_ = nullptr; + is_wrapper_ = false; + } + + GPU_TEXTURE_FREE_SAFE(buffer_texture_); + + MEM_SAFE_FREE(data); +} + +void MTLVertBuf::duplicate_data(VertBuf *dst_) +{ + BLI_assert(MTLContext::get() != NULL); + MTLVertBuf *src = this; + MTLVertBuf *dst = static_cast<MTLVertBuf *>(dst_); + + /* Ensure buffer has been initialized. */ + src->bind(); + + if (src->vbo_) { + + /* Fetch active context. */ + MTLContext *ctx = MTLContext::get(); + BLI_assert(ctx); + + /* Ensure destination does not have an active VBO. */ + BLI_assert(dst->vbo_ == nullptr); + + /* Allocate VBO for destination vertbuf. */ + uint length = src->vbo_->get_size(); + dst->vbo_ = MTLContext::get_global_memory_manager().allocate( + length, (dst->get_usage_type() != GPU_USAGE_DEVICE_ONLY)); + dst->alloc_size_ = length; + + /* Fetch Metal buffer handles. */ + id<MTLBuffer> src_buffer = src->vbo_->get_metal_buffer(); + id<MTLBuffer> dest_buffer = dst->vbo_->get_metal_buffer(); + + /* Use blit encoder to copy data to duplicate buffer allocation. */ + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + if (G.debug & G_DEBUG_GPU) { + [enc insertDebugSignpost:@"VertexBufferDuplicate"]; + } + [enc copyFromBuffer:src_buffer + sourceOffset:0 + toBuffer:dest_buffer + destinationOffset:0 + size:length]; + + /* Flush results back to host buffer, if one exists. */ + if (dest_buffer.storageMode == MTLStorageModeManaged) { + [enc synchronizeResource:dest_buffer]; + } + + if (G.debug & G_DEBUG_GPU) { + [enc insertDebugSignpost:@"VertexBufferDuplicateEnd"]; + } + + /* Mark as in-use, as contents are updated via GPU command. */ + src->flag_used(); + } + + /* Copy raw CPU data. */ + if (data != nullptr) { + dst->data = (uchar *)MEM_dupallocN(src->data); + } +} + +void MTLVertBuf::upload_data() +{ + this->bind(); +} + +void MTLVertBuf::bind() +{ + /* Determine allocation size. Set minimum allocation size to be + * the maximal of a single attribute to avoid validation and + * correctness errors. */ + uint64_t required_size_raw = sizeof(uchar) * this->size_used_get(); + uint64_t required_size = max_ulul(required_size_raw, 128); + + if (required_size_raw == 0) { + MTL_LOG_WARNING("Warning: Vertex buffer required_size = 0\n"); + } + + /* If the vertex buffer has already been allocated, but new data is ready, + * or the usage size has changed, we release the existing buffer and + * allocate a new buffer to ensure we do not overwrite in-use GPU resources. + * + * NOTE: We only need to free the existing allocation if contents have been + * submitted to the GPU. Otherwise we can simply upload new data to the + * existing buffer, if it will fit. + * + * NOTE: If a buffer is re-sized, but no new data is provided, the previous + * contents are copied into the newly allocated buffer. */ + bool requires_reallocation = (vbo_ != nullptr) && (alloc_size_ != required_size); + bool new_data_ready = (this->flag & GPU_VERTBUF_DATA_DIRTY) && this->data; + + gpu::MTLBuffer *prev_vbo = nullptr; + GPUVertBufStatus prev_flag = this->flag; + + if (vbo_ != nullptr) { + if (requires_reallocation || (new_data_ready && contents_in_flight_)) { + /* Track previous VBO to copy data from. */ + prev_vbo = vbo_; + + /* Reset current allocation status. */ + vbo_ = nullptr; + is_wrapper_ = false; + alloc_size_ = 0; + + /* Flag as requiring data upload. */ + if (requires_reallocation) { + this->flag &= ~GPU_VERTBUF_DATA_UPLOADED; + } + } + } + + /* Create MTLBuffer of requested size. */ + if (vbo_ == nullptr) { + vbo_ = MTLContext::get_global_memory_manager().allocate( + required_size, (this->get_usage_type() != GPU_USAGE_DEVICE_ONLY)); + vbo_->set_label(@"Vertex Buffer"); + BLI_assert(vbo_ != nullptr); + BLI_assert(vbo_->get_metal_buffer() != nil); + + is_wrapper_ = false; + alloc_size_ = required_size; + contents_in_flight_ = false; + } + + /* Upload new data, if provided. */ + if (new_data_ready) { + + /* Only upload data if usage size is greater than zero. + * Do not upload data for device-only buffers. */ + if (required_size_raw > 0 && usage_ != GPU_USAGE_DEVICE_ONLY) { + + /* Debug: Verify allocation is large enough. */ + BLI_assert(vbo_->get_size() >= required_size_raw); + + /* Fetch mapped buffer host ptr and upload data. */ + void *dst_data = vbo_->get_host_ptr(); + memcpy((uint8_t *)dst_data, this->data, required_size_raw); + vbo_->flush_range(0, required_size_raw); + } + + /* If static usage, free host-side data. */ + if (usage_ == GPU_USAGE_STATIC) { + MEM_SAFE_FREE(data); + } + + /* Flag data as having been uploaded. */ + this->flag &= ~GPU_VERTBUF_DATA_DIRTY; + this->flag |= GPU_VERTBUF_DATA_UPLOADED; + } + else if (requires_reallocation) { + + /* If buffer has been re-sized, copy existing data if host + * data had been previously uploaded. */ + BLI_assert(prev_vbo != nullptr); + + if (prev_flag & GPU_VERTBUF_DATA_UPLOADED) { + + /* Fetch active context. */ + MTLContext *ctx = MTLContext::get(); + BLI_assert(ctx); + + id<MTLBuffer> copy_prev_buffer = prev_vbo->get_metal_buffer(); + id<MTLBuffer> copy_new_buffer = vbo_->get_metal_buffer(); + BLI_assert(copy_prev_buffer != nil); + BLI_assert(copy_new_buffer != nil); + + /* Ensure a blit command encoder is active for buffer copy operation. */ + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + [enc copyFromBuffer:copy_prev_buffer + sourceOffset:0 + toBuffer:copy_new_buffer + destinationOffset:0 + size:min_ii([copy_new_buffer length], [copy_prev_buffer length])]; + + /* Flush newly copied data back to host-side buffer, if one exists. + * Ensures data and cache coherency for managed MTLBuffers. */ + if (copy_new_buffer.storageMode == MTLStorageModeManaged) { + [enc synchronizeResource:copy_new_buffer]; + } + + /* For VBOs flagged as static, release host data as it will no longer be needed. */ + if (usage_ == GPU_USAGE_STATIC) { + MEM_SAFE_FREE(data); + } + + /* Flag data as uploaded. */ + this->flag |= GPU_VERTBUF_DATA_UPLOADED; + + /* Flag as in-use, as contents have been updated via GPU commands. */ + this->flag_used(); + } + } + + /* Release previous buffer if re-allocated. */ + if (prev_vbo != nullptr) { + prev_vbo->free(); + } + + /* Ensure buffer has been created. */ + BLI_assert(vbo_ != nullptr); +} + +/* Update Sub currently only used by hair */ +void MTLVertBuf::update_sub(uint start, uint len, const void *data) +{ + /* Fetch and verify active context. */ + MTLContext *ctx = reinterpret_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + BLI_assert(ctx->device); + + /* Ensure vertbuf has been created. */ + this->bind(); + BLI_assert(start + len <= alloc_size_); + + /* Create temporary scratch buffer allocation for sub-range of data. */ + MTLTemporaryBuffer scratch_allocation = + ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(len, 256); + memcpy(scratch_allocation.data, data, len); + [scratch_allocation.metal_buffer + didModifyRange:NSMakeRange(scratch_allocation.buffer_offset, len)]; + id<MTLBuffer> data_buffer = scratch_allocation.metal_buffer; + uint data_buffer_offset = scratch_allocation.buffer_offset; + + BLI_assert(vbo_ != nullptr && data != nullptr); + BLI_assert((start + len) <= vbo_->get_size()); + + /* Fetch destination buffer. */ + id<MTLBuffer> dst_buffer = vbo_->get_metal_buffer(); + + /* Ensure blit command encoder for copying data. */ + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + [enc copyFromBuffer:data_buffer + sourceOffset:data_buffer_offset + toBuffer:dst_buffer + destinationOffset:start + size:len]; + + /* Flush modified buffer back to host buffer, if one exists. */ + if (dst_buffer.storageMode == MTLStorageModeManaged) { + [enc synchronizeResource:dst_buffer]; + } +} + +void MTLVertBuf::bind_as_ssbo(uint binding) +{ + /* TODO(Metal): Support binding of buffers as SSBOs. + * Pending overall compute support for Metal backend. */ + MTL_LOG_WARNING("MTLVertBuf::bind_as_ssbo not yet implemented!\n"); + this->flag_used(); +} + +void MTLVertBuf::bind_as_texture(uint binding) +{ + /* Ensure allocations are ready, and data uploaded. */ + this->bind(); + BLI_assert(vbo_ != nullptr); + + /* If vertex buffer updated, release existing texture and re-create. */ + id<MTLBuffer> buf = this->get_metal_buffer(); + if (buffer_texture_ != nullptr) { + gpu::MTLTexture *mtl_buffer_tex = static_cast<gpu::MTLTexture *>( + unwrap(this->buffer_texture_)); + id<MTLBuffer> tex_buf = mtl_buffer_tex->get_vertex_buffer(); + if (tex_buf != buf) { + GPU_TEXTURE_FREE_SAFE(buffer_texture_); + buffer_texture_ = nullptr; + } + } + + /* Create texture from vertex buffer. */ + if (buffer_texture_ == nullptr) { + buffer_texture_ = GPU_texture_create_from_vertbuf("vertbuf_as_texture", wrap(this)); + } + + /* Verify successful creation and bind. */ + BLI_assert(buffer_texture_ != nullptr); + GPU_texture_bind(buffer_texture_, binding); +} + +const void *MTLVertBuf::read() const +{ + BLI_assert(vbo_ != nullptr); + BLI_assert(usage_ != GPU_USAGE_DEVICE_ONLY); + void *return_ptr = vbo_->get_host_ptr(); + BLI_assert(return_ptr != nullptr); + + return return_ptr; +} + +void *MTLVertBuf::unmap(const void *mapped_data) const +{ + void *result = MEM_mallocN(alloc_size_, __func__); + memcpy(result, mapped_data, alloc_size_); + return result; +} + +void MTLVertBuf::wrap_handle(uint64_t handle) +{ + BLI_assert(vbo_ == nullptr); + + /* Attempt to cast to Metal buffer handle. */ + BLI_assert(handle != 0); + id<MTLBuffer> buffer = reinterpret_cast<id<MTLBuffer>>((void *)handle); + + is_wrapper_ = true; + vbo_ = new gpu::MTLBuffer(buffer); + + /* We assume the data is already on the device, so no need to allocate or send it. */ + flag = GPU_VERTBUF_DATA_UPLOADED; +} + +void MTLVertBuf::flag_used() +{ + contents_in_flight_ = true; +} + +} // namespace blender::gpu |