/* SPDX-License-Identifier: GPL-2.0-or-later */ /** \file * \ingroup gpu * * Metal implementation of GPUBatch. */ #include "BLI_assert.h" #include "BLI_span.hh" #include "BKE_global.h" #include "GPU_common.h" #include "gpu_batch_private.hh" #include "gpu_shader_private.hh" #include "mtl_batch.hh" #include "mtl_context.hh" #include "mtl_debug.hh" #include "mtl_index_buffer.hh" #include "mtl_shader.hh" #include "mtl_vertex_buffer.hh" #include namespace blender::gpu { /* -------------------------------------------------------------------- */ /** \name Creation & Deletion * \{ */ void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count) { if (this->flag & GPU_BATCH_INVALID) { this->shader_in_use_ = false; } this->draw_advanced(v_first, v_count, i_first, i_count); } void MTLBatch::shader_bind() { if (active_shader_ && active_shader_->is_valid()) { active_shader_->bind(); shader_in_use_ = true; } } void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_init(MTLContext *ctx) { BLI_assert(ctx != nullptr); this->vertex_descriptor_cache_clear(); cache_context_ = ctx; } void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_clear() { cache_life_index_++; cache_context_ = nullptr; } void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_ensure() { if (this->cache_context_ != nullptr) { /* Invalidate vertex descriptor bindings cache if batch has changed. */ if (batch_->flag & GPU_BATCH_DIRTY) { batch_->flag &= ~GPU_BATCH_DIRTY; this->vertex_descriptor_cache_clear(); } } /* Initialize cache if not ready. */ if (cache_context_ == nullptr) { this->vertex_descriptor_cache_init(MTLContext::get()); } } MTLBatch::VertexDescriptorShaderInterfacePair *MTLBatch::MTLVertexDescriptorCache::find( const ShaderInterface *interface) { this->vertex_descriptor_cache_ensure(); for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) { if (cache_[i].interface == interface && cache_[i].cache_life_index == cache_life_index_) { return &cache_[i]; } } return nullptr; } bool MTLBatch::MTLVertexDescriptorCache::insert( MTLBatch::VertexDescriptorShaderInterfacePair &data) { vertex_descriptor_cache_ensure(); for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) { if (cache_[i].interface == nullptr || cache_[i].cache_life_index != cache_life_index_) { cache_[i] = data; cache_[i].cache_life_index = cache_life_index_; return true; } } return false; } int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts, MTLRenderPipelineStateDescriptor &desc, const MTLShaderInterface *interface, uint16_t &attr_mask, bool instanced) { const GPUVertFormat *format = &verts->format; /* Whether the current vertex buffer has been added to the buffer layout descriptor. */ bool buffer_added = false; /* Per-vertex stride of current vertex buffer. */ int buffer_stride = format->stride; /* Buffer binding index of the vertex buffer once added to the buffer layout descriptor. */ int buffer_index = -1; int attribute_offset = 0; if (!active_shader_->get_uses_ssbo_vertex_fetch()) { BLI_assert( buffer_stride >= 4 && "In Metal, Vertex buffer stride should be 4. SSBO Vertex fetch is not affected by this"); } /* Iterate over GPUVertBuf vertex format and find attributes matching those in the active * shader's interface. */ for (uint32_t a_idx = 0; a_idx < format->attr_len; a_idx++) { const GPUVertAttr *a = &format->attrs[a_idx]; if (format->deinterleaved) { attribute_offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].size) * verts->vertex_len; buffer_stride = a->size; } else { attribute_offset = a->offset; } /* Find attribute with the matching name. Attributes may have multiple compatible * name aliases. */ for (uint32_t n_idx = 0; n_idx < a->name_len; n_idx++) { const char *name = GPU_vertformat_attr_name_get(format, a, n_idx); const ShaderInput *input = interface->attr_get(name); if (input == nullptr || input->location == -1) { /* Vertex/instance buffers provided have attribute data for attributes which are not needed * by this particular shader. This shader only needs binding information for the attributes * has in the shader interface. */ MTL_LOG_WARNING( "MTLBatch: Could not find attribute with name '%s' (defined in active vertex format) " "in the shader interface for shader '%s'\n", name, interface->get_name()); continue; } /* Fetch metal attribute information. */ const MTLShaderInputAttribute &mtl_attr = interface->get_attribute(input->location); BLI_assert(mtl_attr.location >= 0); /* Verify that the attribute location from the shader interface * matches the attribute location returned. */ BLI_assert(mtl_attr.location == input->location); /* Check if attribute is already present in the given slot. */ if ((~attr_mask) & (1 << mtl_attr.location)) { MTL_LOG_INFO( " -- [Batch] Skipping attribute with input location %d (As one is already bound)\n", mtl_attr.location); } else { /* Update attribute used-slot mask. */ attr_mask &= ~(1 << mtl_attr.location); /* Add buffer layout entry in descriptor if it has not yet been added * for current vertex buffer. */ if (!buffer_added) { buffer_index = desc.vertex_descriptor.num_vert_buffers; desc.vertex_descriptor.buffer_layouts[buffer_index].step_function = (instanced) ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex; desc.vertex_descriptor.buffer_layouts[buffer_index].step_rate = 1; desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride; desc.vertex_descriptor.num_vert_buffers++; buffer_added = true; MTL_LOG_INFO(" -- [Batch] Adding source %s buffer (Index: %d, Stride: %d)\n", (instanced) ? "instance" : "vertex", buffer_index, buffer_stride); } else { /* Ensure stride is correct for de-interleaved attributes. */ desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride; } /* Handle Matrix/Array vertex attribute types. * Metal does not natively support these as attribute types, so we handle these cases * by stacking together compatible types (e.g. 4xVec4 for Mat4) and combining * the data in the shader. * The generated Metal shader will contain a generated input binding, which reads * in individual attributes and merges them into the desired type after vertex * assembly. e.g. a Mat4 (Float4x4) will generate 4 Float4 attributes. */ if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) { BLI_assert_msg( a->comp_len == 16, "only mat4 attributes currently supported -- Not ready to handle other long " "component length attributes yet"); /* SSBO Vertex Fetch Attribute safety checks. */ if (active_shader_->get_uses_ssbo_vertex_fetch()) { /* When using SSBO vertex fetch, we do not need to expose split attributes, * A matrix can be read directly as a whole block of contiguous data. */ MTLSSBOAttribute ssbo_attr(mtl_attr.index, buffer_index, attribute_offset, buffer_stride, GPU_SHADER_ATTR_TYPE_MAT4, instanced); active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr); desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] = ssbo_attr; desc.vertex_descriptor.num_ssbo_attributes++; } else { /* Handle Mat4 attributes. */ if (a->comp_len == 16) { /* Debug safety checks. */ BLI_assert_msg(mtl_attr.matrix_element_count == 4, "mat4 type expected but there are fewer components"); BLI_assert_msg(mtl_attr.size == 16, "Expecting subtype 'vec4' with 16 bytes"); BLI_assert_msg( mtl_attr.format == MTLVertexFormatFloat4, "Per-attribute vertex format MUST be float4 for an input type of 'mat4'"); /* We have found the 'ROOT' attribute. A mat4 contains 4 consecutive float4 attribute * locations we must map to. */ for (int i = 0; i < a->comp_len / 4; i++) { desc.vertex_descriptor.attributes[mtl_attr.location + i].format = MTLVertexFormatFloat4; /* Data is consecutive in the buffer for the whole matrix, each float4 will shift * the offset by 16 bytes. */ desc.vertex_descriptor.attributes[mtl_attr.location + i].offset = attribute_offset + i * 16; /* All source data for a matrix is in the same singular buffer. */ desc.vertex_descriptor.attributes[mtl_attr.location + i].buffer_index = buffer_index; /* Update total attribute account. */ desc.vertex_descriptor.num_attributes = max_ii( mtl_attr.location + i + 1, desc.vertex_descriptor.num_attributes); MTL_LOG_INFO("-- Sub-Attrib Location: %d, offset: %d, buffer index: %d\n", mtl_attr.location + i, attribute_offset + i * 16, buffer_index); } MTL_LOG_INFO( "Float4x4 attribute type added for '%s' at attribute locations: %d to %d\n", name, mtl_attr.location, mtl_attr.location + 3); } /* Ensure we are not exceeding the attribute limit. */ BLI_assert(desc.vertex_descriptor.num_attributes <= MTL_MAX_VERTEX_INPUT_ATTRIBUTES); } } else { /* Handle Any required format conversions. * NOTE(Metal): If there is a mis-match between the format of an attribute * in the shader interface, and the specified format in the VertexBuffer VertexFormat, * we need to perform a format conversion. * * The Metal API can perform certain conversions internally during vertex assembly: * - Type Normalization e.g short2 to float2 between 0.0 to 1.0. * - Type Truncation e.g. Float4 to Float2. * - Type expansion e,g, Float3 to Float4 (Following 0,0,0,1 for assignment to empty * elements). * * Certain conversion cannot be performed however, and in these cases, we need to * instruct the shader to generate a specialized version with a conversion routine upon * attribute read. * - This handles cases such as conversion between types e.g. Integer to float without * normalization. * * For more information on the supported and unsupported conversions, see: * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc */ MTLVertexFormat converted_format; bool can_use_internal_conversion = mtl_convert_vertex_format( mtl_attr.format, (GPUVertCompType)a->comp_type, a->comp_len, (GPUVertFetchMode)a->fetch_mode, &converted_format); bool is_floating_point_format = (a->comp_type == GPU_COMP_F32); if (can_use_internal_conversion) { desc.vertex_descriptor.attributes[mtl_attr.location].format = converted_format; desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode = is_floating_point_format ? (GPUVertFetchMode)GPU_FETCH_FLOAT : (GPUVertFetchMode)GPU_FETCH_INT; BLI_assert(converted_format != MTLVertexFormatInvalid); } else { /* The internal implicit conversion is not supported. * In this case, we need to handle conversion inside the shader. * This is handled using `format_conversion_mode`. * `format_conversion_mode` is assigned the blender-specified fetch mode (GPU_FETCH_*). * This then controls how a given attribute is interpreted. The data will be read * as specified and then converted appropriately to the correct form. * * e.g. if `GPU_FETCH_INT_TO_FLOAT` is specified, the specialized read-routine * in the shader will read the data as an int, and cast this to floating point * representation. (Rather than reading the source data as float). * * NOTE: Even if full conversion is not supported, we may still partially perform an * implicit conversion where possible, such as vector truncation or expansion. */ MTLVertexFormat converted_format; bool can_convert = mtl_vertex_format_resize( mtl_attr.format, a->comp_len, &converted_format); desc.vertex_descriptor.attributes[mtl_attr.location].format = can_convert ? converted_format : mtl_attr.format; desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode = (GPUVertFetchMode)a->fetch_mode; BLI_assert(desc.vertex_descriptor.attributes[mtl_attr.location].format != MTLVertexFormatInvalid); } desc.vertex_descriptor.attributes[mtl_attr.location].offset = attribute_offset; desc.vertex_descriptor.attributes[mtl_attr.location].buffer_index = buffer_index; desc.vertex_descriptor.num_attributes = ((mtl_attr.location + 1) > desc.vertex_descriptor.num_attributes) ? (mtl_attr.location + 1) : desc.vertex_descriptor.num_attributes; /* SSBO Vertex Fetch attribute bind. */ if (active_shader_->get_uses_ssbo_vertex_fetch()) { BLI_assert_msg(desc.vertex_descriptor.attributes[mtl_attr.location].format == mtl_attr.format, "SSBO Vertex Fetch does not support attribute conversion."); MTLSSBOAttribute ssbo_attr( mtl_attr.index, buffer_index, attribute_offset, buffer_stride, MTLShader::ssbo_vertex_type_to_attr_type( desc.vertex_descriptor.attributes[mtl_attr.location].format), instanced); active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr); desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] = ssbo_attr; desc.vertex_descriptor.num_ssbo_attributes++; } /* NOTE: We are setting num_attributes to be up to the maximum found index, because of * this, it is possible that we may skip over certain attributes if they were not in the * source GPUVertFormat. */ MTL_LOG_INFO( " -- Batch Attribute(%d): ORIG Shader Format: %d, ORIG Vert format: %d, Vert " "components: %d, Fetch Mode %d --> FINAL FORMAT: %d\n", mtl_attr.location, (int)mtl_attr.format, (int)a->comp_type, (int)a->comp_len, (int)a->fetch_mode, (int)desc.vertex_descriptor.attributes[mtl_attr.location].format); MTL_LOG_INFO( " -- [Batch] matching %s attribute '%s' (Attribute Index: %d, Buffer index: %d, " "offset: %d)\n", (instanced) ? "instance" : "vertex", name, mtl_attr.location, buffer_index, attribute_offset); } } } } if (buffer_added) { return buffer_index; } return -1; } id MTLBatch::bind(uint v_first, uint v_count, uint i_first, uint i_count) { /* Setup draw call and render pipeline state here. Called by every draw, but setup here so that * MTLDrawList only needs to perform setup a single time. */ BLI_assert(this); /* Fetch Metal device. */ MTLContext *ctx = MTLContext::get(); if (!ctx) { BLI_assert_msg(false, "No context available for rendering."); return nil; } /* Verify Shader. */ active_shader_ = (shader) ? static_cast(unwrap(shader)) : nullptr; if (active_shader_ == nullptr || !active_shader_->is_valid()) { /* Skip drawing if there is no valid Metal shader. * This will occur if the path through which the shader is prepared * is invalid (e.g. Python without create-info), or, the source shader uses a geometry pass. */ BLI_assert_msg(false, "No valid Metal shader!"); return nil; } /* Check if using SSBO Fetch Mode. * This is an alternative drawing mode to geometry shaders, wherein vertex buffers * are bound as readable (random-access) GPU buffers and certain descriptor properties * are passed using Shader uniforms. */ bool uses_ssbo_fetch = active_shader_->get_uses_ssbo_vertex_fetch(); /* Prepare Vertex Descriptor and extract VertexBuffers to bind. */ MTLVertBuf *buffers[GPU_BATCH_VBO_MAX_LEN] = {nullptr}; int num_buffers = 0; /* Ensure Index Buffer is ready. */ MTLIndexBuf *mtl_elem = static_cast(reinterpret_cast(this->elem)); if (mtl_elem != NULL) { mtl_elem->upload_data(); } /* Populate vertex descriptor with attribute binding information. * The vertex descriptor and buffer layout descriptors describe * how vertex data from bound vertex buffers maps to the * shader's input. * A unique vertex descriptor will result in a new PipelineStateObject * being generated for the currently bound shader. */ prepare_vertex_descriptor_and_bindings(buffers, num_buffers, v_first, v_count, i_first, i_count); /* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer * data operations are required. */ for (int i = 0; i < num_buffers; i++) { MTLVertBuf *buf_at_index = buffers[i]; if (buf_at_index == NULL) { BLI_assert_msg( false, "Total buffer count does not match highest buffer index, could be gaps in bindings"); continue; } MTLVertBuf *mtlvbo = static_cast(reinterpret_cast(buf_at_index)); mtlvbo->bind(); } /* Ensure render pass is active and fetch active RenderCommandEncoder. */ id rec = ctx->ensure_begin_render_pass(); /* Fetch RenderPassState to enable resource binding for active pass. */ MTLRenderPassState &rps = ctx->main_command_buffer.get_render_pass_state(); /* Debug Check: Ensure Frame-buffer instance is not dirty. */ BLI_assert(!ctx->main_command_buffer.get_active_framebuffer()->get_dirty()); /* Bind Shader. */ this->shader_bind(); /* GPU debug markers. */ if (G.debug & G_DEBUG_GPU) { [rec pushDebugGroup:[NSString stringWithFormat:@"batch_bind%@(shader: %s)", this->elem ? @"(indexed)" : @"", active_shader_->get_interface()->get_name()]]; [rec insertDebugSignpost:[NSString stringWithFormat:@"batch_bind%@(shader: %s)", this->elem ? @"(indexed)" : @"", active_shader_->get_interface()->get_name()]]; } /* Ensure Context Render Pipeline State is fully setup and ready to execute the draw. */ MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type); if (!ctx->ensure_render_pipeline_state(mtl_prim_type)) { printf("FAILED TO ENSURE RENDER PIPELINE STATE"); BLI_assert(false); if (G.debug & G_DEBUG_GPU) { [rec popDebugGroup]; } return nil; } /*** Bind Vertex Buffers and Index Buffers **/ /* SSBO Vertex Fetch Buffer bindings. */ if (uses_ssbo_fetch) { /* SSBO Vertex Fetch - Bind Index Buffer to appropriate slot -- if used. */ id idx_buffer = nil; GPUPrimType final_prim_type = this->prim_type; if (mtl_elem != nullptr) { /* Fetch index buffer. This function can situationally return an optimized * index buffer of a different primitive type. If this is the case, `final_prim_type` * and `v_count` will be updated with the new format. * NOTE: For indexed rendering, v_count represents the number of indices. */ idx_buffer = mtl_elem->get_index_buffer(final_prim_type, v_count); BLI_assert(idx_buffer != nil); /* Update uniforms for SSBO-vertex-fetch-mode indexed rendering to flag usage. */ int &uniform_ssbo_index_mode_u16 = active_shader_->uni_ssbo_uses_index_mode_u16; BLI_assert(uniform_ssbo_index_mode_u16 != -1); int uses_index_mode_u16 = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 1 : 0; active_shader_->uniform_int(uniform_ssbo_index_mode_u16, 1, 1, &uses_index_mode_u16); } else { idx_buffer = ctx->get_null_buffer(); } rps.bind_vertex_buffer(idx_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX); /* Ensure all attributes are set */ active_shader_->ssbo_vertex_fetch_bind_attributes_end(rec); /* Bind NULL Buffers for unused vertex data slots. */ id null_buffer = ctx->get_null_buffer(); BLI_assert(null_buffer != nil); for (int i = num_buffers; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) { rps.bind_vertex_buffer(null_buffer, 0, i); } } /* Flag whether Indexed rendering is used or not. */ int &uniform_ssbo_use_indexed = active_shader_->uni_ssbo_uses_indexed_rendering; BLI_assert(uniform_ssbo_use_indexed != -1); int uses_indexed_rendering = (mtl_elem != NULL) ? 1 : 0; active_shader_->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering); /* Set SSBO-fetch-mode status uniforms. */ BLI_assert(active_shader_->uni_ssbo_input_prim_type_loc != -1); BLI_assert(active_shader_->uni_ssbo_input_vert_count_loc != -1); GPU_shader_uniform_vector_int(reinterpret_cast(wrap(active_shader_)), active_shader_->uni_ssbo_input_prim_type_loc, 1, 1, (const int *)(&final_prim_type)); GPU_shader_uniform_vector_int(reinterpret_cast(wrap(active_shader_)), active_shader_->uni_ssbo_input_vert_count_loc, 1, 1, (const int *)(&v_count)); } /* Bind Vertex Buffers. */ for (int i = 0; i < num_buffers; i++) { MTLVertBuf *buf_at_index = buffers[i]; if (buf_at_index == NULL) { BLI_assert_msg( false, "Total buffer count does not match highest buffer index, could be gaps in bindings"); continue; } /* Buffer handle. */ MTLVertBuf *mtlvbo = static_cast(reinterpret_cast(buf_at_index)); mtlvbo->flag_used(); /* Fetch buffer from MTLVertexBuffer and bind. */ id mtl_buffer = mtlvbo->get_metal_buffer(); BLI_assert(mtl_buffer != nil); rps.bind_vertex_buffer(mtl_buffer, 0, i); } if (G.debug & G_DEBUG_GPU) { [rec popDebugGroup]; } /* Return Render Command Encoder used with setup. */ return rec; } void MTLBatch::unbind() { } void MTLBatch::prepare_vertex_descriptor_and_bindings( MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count) { /* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound. */ MTLStateManager *state_manager = static_cast( MTLContext::get()->state_manager); MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor(); const MTLShaderInterface *interface = active_shader_->get_interface(); uint16_t attr_mask = interface->get_enabled_attribute_mask(); /* Reset vertex descriptor to default state. */ desc.reset_vertex_descriptor(); /* Fetch Vertex and Instance Buffers. */ Span mtl_verts(reinterpret_cast(this->verts), GPU_BATCH_VBO_MAX_LEN); Span mtl_inst(reinterpret_cast(this->inst), GPU_BATCH_INST_VBO_MAX_LEN); /* SSBO Vertex fetch also passes vertex descriptor information into the shader. */ if (active_shader_->get_uses_ssbo_vertex_fetch()) { active_shader_->ssbo_vertex_fetch_bind_attributes_begin(); } /* Resolve Metal vertex buffer bindings. */ /* Vertex Descriptors * ------------------ * Vertex Descriptors are required to generate a pipeline state, based on the current Batch's * buffer bindings. These bindings are a unique matching, depending on what input attributes a * batch has in its buffers, and those which are supported by the shader interface. * We iterate through the buffers and resolve which attributes satisfy the requirements of the * currently bound shader. We cache this data, for a given Batch<->ShderInterface pairing in a * VAO cache to avoid the need to recalculate this data. */ bool buffer_is_instanced[GPU_BATCH_VBO_MAX_LEN] = {false}; VertexDescriptorShaderInterfacePair *descriptor = this->vao_cache.find(interface); if (descriptor) { desc.vertex_descriptor = descriptor->vertex_descriptor; attr_mask = descriptor->attr_mask; num_buffers = descriptor->num_buffers; for (int bid = 0; bid < GPU_BATCH_VBO_MAX_LEN; ++bid) { if (descriptor->bufferIds[bid].used) { if (descriptor->bufferIds[bid].is_instance) { buffers[bid] = mtl_inst[descriptor->bufferIds[bid].id]; buffer_is_instanced[bid] = true; } else { buffers[bid] = mtl_verts[descriptor->bufferIds[bid].id]; buffer_is_instanced[bid] = false; } } } /* Use cached ssbo attribute binding data. */ if (active_shader_->get_uses_ssbo_vertex_fetch()) { BLI_assert(desc.vertex_descriptor.uses_ssbo_vertex_fetch); for (int attr_id = 0; attr_id < desc.vertex_descriptor.num_ssbo_attributes; attr_id++) { active_shader_->ssbo_vertex_fetch_bind_attribute( desc.vertex_descriptor.ssbo_attributes[attr_id]); } } } else { VertexDescriptorShaderInterfacePair pair{}; pair.interface = interface; for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN; ++i) { pair.bufferIds[i].id = -1; pair.bufferIds[i].is_instance = 0; pair.bufferIds[i].used = 0; } /* NOTE: Attribute extraction order from buffer is the reverse of the OpenGL as we flag once an * attribute is found, rather than pre-setting the mask. */ /* Extract Instance attributes (These take highest priority). */ for (int v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) { if (mtl_inst[v]) { MTL_LOG_INFO(" -- [Batch] Checking bindings for bound instance buffer %p\n", mtl_inst[v]); int buffer_ind = this->prepare_vertex_binding( mtl_inst[v], desc, interface, attr_mask, true); if (buffer_ind >= 0) { buffers[buffer_ind] = mtl_inst[v]; buffer_is_instanced[buffer_ind] = true; pair.bufferIds[buffer_ind].id = v; pair.bufferIds[buffer_ind].used = 1; pair.bufferIds[buffer_ind].is_instance = 1; num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers; } } } /* Extract Vertex attributes (First-bound vertex buffer takes priority). */ for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) { if (mtl_verts[v] != NULL) { MTL_LOG_INFO(" -- [Batch] Checking bindings for bound vertex buffer %p\n", mtl_verts[v]); int buffer_ind = this->prepare_vertex_binding( mtl_verts[v], desc, interface, attr_mask, false); if (buffer_ind >= 0) { buffers[buffer_ind] = mtl_verts[v]; buffer_is_instanced[buffer_ind] = false; pair.bufferIds[buffer_ind].id = v; pair.bufferIds[buffer_ind].used = 1; pair.bufferIds[buffer_ind].is_instance = 0; num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers; } } } /* Add to VertexDescriptor cache */ desc.vertex_descriptor.uses_ssbo_vertex_fetch = active_shader_->get_uses_ssbo_vertex_fetch(); pair.attr_mask = attr_mask; pair.vertex_descriptor = desc.vertex_descriptor; pair.num_buffers = num_buffers; if (!this->vao_cache.insert(pair)) { printf( "[Performance Warning] cache is full (Size: %d), vertex descriptor will not be cached\n", GPU_VAO_STATIC_LEN); } } /* DEBUG: verify if our attribute bindings have been fully provided as expected. */ #if MTL_DEBUG_SHADER_ATTRIBUTES == 1 if (attr_mask != 0) { for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) { if (attr_mask & mask) { /* Fallback for setting default attributes, for missed slots. Attributes flagged with * 'MTLVertexFormatInvalid' in the vertex descriptor are bound to a NULL buffer during PSO * creation. */ MTL_LOG_WARNING("MTLBatch: Missing expected attribute '%s' at index '%d' for shader: %s\n", this->active_shader->interface->attributes[a].name, a, interface->name); /* Ensure any assigned attribute has not been given an invalid format. This should not * occur and may be the result of an unsupported attribute type conversion. */ BLI_assert(desc.attributes[a].format == MTLVertexFormatInvalid); } } } #endif } void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count) { #if TRUST_NO_ONE BLI_assert(v_count > 0 && i_count > 0); #endif /* Setup RenderPipelineState for batch. */ MTLContext *ctx = reinterpret_cast(GPU_context_active_get()); id rec = this->bind(v_first, v_count, i_first, i_count); if (rec == nil) { return; } /* Fetch IndexBuffer and resolve primitive type. */ MTLIndexBuf *mtl_elem = static_cast(reinterpret_cast(this->elem)); MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type); /* Render using SSBO Vertex Fetch. */ if (active_shader_->get_uses_ssbo_vertex_fetch()) { /* Submit draw call with modified vertex count, which reflects vertices per primitive defined * in the USE_SSBO_VERTEX_FETCH pragma. */ int num_input_primitives = gpu_get_prim_count_from_type(v_count, this->prim_type); int output_num_verts = num_input_primitives * active_shader_->get_ssbo_vertex_fetch_output_num_verts(); BLI_assert_msg( mtl_vertex_count_fits_primitive_type( output_num_verts, active_shader_->get_ssbo_vertex_fetch_output_prim_type()), "Output Vertex count is not compatible with the requested output vertex primitive type"); [rec drawPrimitives:active_shader_->get_ssbo_vertex_fetch_output_prim_type() vertexStart:0 vertexCount:output_num_verts instanceCount:i_count baseInstance:i_first]; ctx->main_command_buffer.register_draw_counters(output_num_verts * i_count); } /* Perform regular draw. */ else if (mtl_elem == NULL) { /* Primitive Type toplogy emulation. */ if (mtl_needs_topology_emulation(this->prim_type)) { /* Generate index buffer for primitive types requiring emulation. */ GPUPrimType emulated_prim_type = this->prim_type; uint32_t emulated_v_count = v_count; id generated_index_buffer = this->get_emulated_toplogy_buffer(emulated_prim_type, emulated_v_count); BLI_assert(generated_index_buffer != nil); MTLPrimitiveType emulated_mtl_prim_type = gpu_prim_type_to_metal(emulated_prim_type); /* Temp: Disable culling for emulated primitive types. * TODO(Metal): Support face winding in topology buffer. */ [rec setCullMode:MTLCullModeNone]; if (generated_index_buffer != nil) { BLI_assert(emulated_mtl_prim_type == MTLPrimitiveTypeTriangle || emulated_mtl_prim_type == MTLPrimitiveTypeLine); if (emulated_mtl_prim_type == MTLPrimitiveTypeTriangle) { BLI_assert(emulated_v_count % 3 == 0); } if (emulated_mtl_prim_type == MTLPrimitiveTypeLine) { BLI_assert(emulated_v_count % 2 == 0); } /* Set depth stencil state (requires knowledge of primitive type). */ ctx->ensure_depth_stencil_state(emulated_mtl_prim_type); [rec drawIndexedPrimitives:emulated_mtl_prim_type indexCount:emulated_v_count indexType:MTLIndexTypeUInt32 indexBuffer:generated_index_buffer indexBufferOffset:0 instanceCount:i_count baseVertex:v_first baseInstance:i_first]; } else { printf("[Note] Cannot draw batch -- Emulated Topology mode: %u not yet supported\n", this->prim_type); } } else { /* Set depth stencil state (requires knowledge of primitive type). */ ctx->ensure_depth_stencil_state(mtl_prim_type); /* Issue draw call. */ [rec drawPrimitives:mtl_prim_type vertexStart:v_first vertexCount:v_count instanceCount:i_count baseInstance:i_first]; } ctx->main_command_buffer.register_draw_counters(v_count * i_count); } /* Perform indexed draw. */ else { MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_); uint32_t base_index = mtl_elem->index_base_; uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4; uint32_t v_first_ofs = ((v_first + mtl_elem->index_start_) * index_size); BLI_assert_msg((v_first_ofs % index_size) == 0, "Index offset is not 2/4-byte aligned as per METAL spec"); /* Fetch index buffer. May return an index buffer of a differing format, * if index buffer optimization is used. In these cases, final_prim_type and * index_count get updated with the new properties. */ GPUPrimType final_prim_type = this->prim_type; uint index_count = v_count; id index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count); mtl_prim_type = gpu_prim_type_to_metal(final_prim_type); BLI_assert(index_buffer != nil); if (index_buffer != nil) { /* Set depth stencil state (requires knowledge of primitive type). */ ctx->ensure_depth_stencil_state(mtl_prim_type); /* Issue draw call. */ [rec drawIndexedPrimitives:mtl_prim_type indexCount:index_count indexType:index_type indexBuffer:index_buffer indexBufferOffset:v_first_ofs instanceCount:i_count baseVertex:base_index baseInstance:i_first]; ctx->main_command_buffer.register_draw_counters(index_count * i_count); } else { BLI_assert_msg(false, "Index buffer does not have backing Metal buffer"); } } /* End of draw. */ this->unbind(); } /** \} */ /* -------------------------------------------------------------------- */ /** \name Topology emulation and optimization * \{ */ id MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type, uint32_t &in_out_v_count) { BLI_assert(in_out_v_count > 0); /* Determine emulated primitive types. */ GPUPrimType input_prim_type = in_out_prim_type; uint32_t v_count = in_out_v_count; GPUPrimType output_prim_type; switch (input_prim_type) { case GPU_PRIM_POINTS: case GPU_PRIM_LINES: case GPU_PRIM_TRIS: BLI_assert_msg(false, "Optimal primitive types should not reach here."); return nil; break; case GPU_PRIM_LINES_ADJ: case GPU_PRIM_TRIS_ADJ: BLI_assert_msg(false, "Adjacency primitive types should not reach here."); return nil; break; case GPU_PRIM_LINE_STRIP: case GPU_PRIM_LINE_LOOP: case GPU_PRIM_LINE_STRIP_ADJ: output_prim_type = GPU_PRIM_LINES; break; case GPU_PRIM_TRI_STRIP: case GPU_PRIM_TRI_FAN: output_prim_type = GPU_PRIM_TRIS; break; default: BLI_assert_msg(false, "Invalid primitive type."); return nil; } /* Check if topology buffer exists and is valid. */ if (this->emulated_topology_buffer_ != nullptr && (emulated_topology_type_ != input_prim_type || topology_buffer_input_v_count_ != v_count)) { /* Release existing topology buffer. */ emulated_topology_buffer_->free(); emulated_topology_buffer_ = nullptr; } /* Generate new topology index buffer. */ if (this->emulated_topology_buffer_ == nullptr) { /* Calculate IB len. */ uint32_t output_prim_count = 0; switch (input_prim_type) { case GPU_PRIM_LINE_STRIP: case GPU_PRIM_LINE_STRIP_ADJ: output_prim_count = v_count - 1; break; case GPU_PRIM_LINE_LOOP: output_prim_count = v_count; break; case GPU_PRIM_TRI_STRIP: case GPU_PRIM_TRI_FAN: output_prim_count = v_count - 2; break; default: BLI_assert_msg(false, "Cannot generate optimized topology buffer for other types."); break; } uint32_t output_IB_elems = output_prim_count * ((output_prim_type == GPU_PRIM_TRIS) ? 3 : 2); /* Allocate buffer. */ uint32_t buffer_bytes = output_IB_elems * 4; BLI_assert(buffer_bytes > 0); this->emulated_topology_buffer_ = MTLContext::get_global_memory_manager().allocate( buffer_bytes, true); /* Populate. */ uint32_t *data = (uint32_t *)this->emulated_topology_buffer_->get_host_ptr(); BLI_assert(data != nullptr); /* TODO(Metal): Support inverse winding modes. */ bool winding_clockwise = false; UNUSED_VARS(winding_clockwise); switch (input_prim_type) { /* Line Loop. */ case GPU_PRIM_LINE_LOOP: { int line = 0; for (line = 0; line < output_prim_count - 1; line++) { data[line * 3 + 0] = line + 0; data[line * 3 + 1] = line + 1; } /* Closing line. */ data[line * 2 + 0] = line + 0; data[line * 2 + 1] = 0; } break; /* Triangle Fan. */ case GPU_PRIM_TRI_FAN: { for (int triangle = 0; triangle < output_prim_count; triangle++) { data[triangle * 3 + 0] = 0; /* Always 0 */ data[triangle * 3 + 1] = triangle + 1; data[triangle * 3 + 2] = triangle + 2; } } break; default: BLI_assert_msg(false, "Other primitive types do not require emulation."); return nil; } /* Flush. */ this->emulated_topology_buffer_->flush(); /* Assign members relating to current cached IB. */ topology_buffer_input_v_count_ = v_count; topology_buffer_output_v_count_ = output_IB_elems; emulated_topology_type_ = input_prim_type; } /* Return. */ in_out_v_count = topology_buffer_output_v_count_; in_out_prim_type = output_prim_type; return (emulated_topology_buffer_) ? emulated_topology_buffer_->get_metal_buffer() : nil; } /** \} */ } // blender::gpu