Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'source/blender/gpu/metal')
-rw-r--r--source/blender/gpu/metal/mtl_backend.hh2
-rw-r--r--source/blender/gpu/metal/mtl_backend.mm18
-rw-r--r--source/blender/gpu/metal/mtl_batch.hh135
-rw-r--r--source/blender/gpu/metal/mtl_batch.mm998
-rw-r--r--source/blender/gpu/metal/mtl_command_buffer.mm174
-rw-r--r--source/blender/gpu/metal/mtl_common.hh4
-rw-r--r--source/blender/gpu/metal/mtl_context.hh136
-rw-r--r--source/blender/gpu/metal/mtl_context.mm1192
-rw-r--r--source/blender/gpu/metal/mtl_drawlist.hh58
-rw-r--r--source/blender/gpu/metal/mtl_drawlist.mm284
-rw-r--r--source/blender/gpu/metal/mtl_immediate.hh40
-rw-r--r--source/blender/gpu/metal/mtl_immediate.mm401
-rw-r--r--source/blender/gpu/metal/mtl_index_buffer.mm2
-rw-r--r--source/blender/gpu/metal/mtl_memory.hh6
-rw-r--r--source/blender/gpu/metal/mtl_pso_descriptor_state.hh51
-rw-r--r--source/blender/gpu/metal/mtl_shader.hh2
-rw-r--r--source/blender/gpu/metal/mtl_shader.mm26
-rw-r--r--source/blender/gpu/metal/mtl_shader_generator.hh2
-rw-r--r--source/blender/gpu/metal/mtl_shader_generator.mm4
-rw-r--r--source/blender/gpu/metal/mtl_shader_interface.mm10
-rw-r--r--source/blender/gpu/metal/mtl_texture.hh41
-rw-r--r--source/blender/gpu/metal/mtl_texture.mm129
-rw-r--r--source/blender/gpu/metal/mtl_texture_util.mm32
-rw-r--r--source/blender/gpu/metal/mtl_vertex_buffer.hh75
-rw-r--r--source/blender/gpu/metal/mtl_vertex_buffer.mm368
25 files changed, 4011 insertions, 179 deletions
diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh
index 214a5d738a9..082fab24ba4 100644
--- a/source/blender/gpu/metal/mtl_backend.hh
+++ b/source/blender/gpu/metal/mtl_backend.hh
@@ -63,7 +63,7 @@ class MTLBackend : public GPUBackend {
/* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
* objects. */
- Context *context_alloc(void *ghost_window) override;
+ Context *context_alloc(void *ghost_window, void *ghost_context) override;
Batch *batch_alloc() override;
DrawList *drawlist_alloc(int list_length) override;
FrameBuffer *framebuffer_alloc(const char *name) override;
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index 3cd7794f6c9..240951c1ebd 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -8,12 +8,16 @@
#include "gpu_backend.hh"
#include "mtl_backend.hh"
+#include "mtl_batch.hh"
#include "mtl_context.hh"
+#include "mtl_drawlist.hh"
#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
#include "mtl_index_buffer.hh"
#include "mtl_query.hh"
#include "mtl_shader.hh"
#include "mtl_uniform_buffer.hh"
+#include "mtl_vertex_buffer.hh"
#include "gpu_capabilities_private.hh"
#include "gpu_platform_private.hh"
@@ -36,21 +40,19 @@ void MTLBackend::samplers_update(){
/* Placeholder -- Handled in MTLContext. */
};
-Context *MTLBackend::context_alloc(void *ghost_window)
+Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
{
- return new MTLContext(ghost_window);
+ return new MTLContext(ghost_window, ghost_context);
};
Batch *MTLBackend::batch_alloc()
{
- /* TODO(Metal): Implement MTLBatch. */
- return nullptr;
+ return new MTLBatch();
};
DrawList *MTLBackend::drawlist_alloc(int list_length)
{
- /* TODO(Metal): Implement MTLDrawList. */
- return nullptr;
+ return new MTLDrawList(list_length);
};
FrameBuffer *MTLBackend::framebuffer_alloc(const char *name)
@@ -94,8 +96,7 @@ StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const cha
VertBuf *MTLBackend::vertbuf_alloc()
{
- /* TODO(Metal): Implement MTLVertBuf. */
- return nullptr;
+ return new MTLVertBuf();
}
void MTLBackend::render_begin()
@@ -417,6 +418,7 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
GCaps.depth_blitting_workaround = false;
GCaps.use_main_context_workaround = false;
GCaps.broken_amd_driver = false;
+ GCaps.clear_viewport_workaround = true;
/* Metal related workarounds. */
/* Minimum per-vertex stride is 4 bytes in Metal.
diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh
new file mode 100644
index 00000000000..9e179e662b5
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_batch_private.hh"
+#include "mtl_index_buffer.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+#include "mtl_vertex_buffer.hh"
+
+namespace blender::gpu {
+
+class MTLContext;
+class MTLShaderInterface;
+
+#define GPU_VAO_STATIC_LEN 64
+
+struct VertexBufferID {
+ uint32_t id : 16;
+ uint32_t is_instance : 15;
+ uint32_t used : 1;
+};
+
+class MTLBatch : public Batch {
+
+ /* Vertex Bind-state Caching for a given shader interface used with the Batch. */
+ struct VertexDescriptorShaderInterfacePair {
+ MTLVertexDescriptor vertex_descriptor{};
+ const ShaderInterface *interface = nullptr;
+ uint16_t attr_mask{};
+ int num_buffers{};
+ VertexBufferID bufferIds[GPU_BATCH_VBO_MAX_LEN] = {};
+ /* Cache life index compares a cache entry with the active MTLBatch state.
+ * This is initially set to the cache life index of MTLBatch. If the batch has been modified,
+ * this index is incremented to cheaply invalidate existing cache entries. */
+ uint32_t cache_life_index = 0;
+ };
+
+ class MTLVertexDescriptorCache {
+
+ private:
+ MTLBatch *batch_;
+
+ VertexDescriptorShaderInterfacePair cache_[GPU_VAO_STATIC_LEN] = {};
+ MTLContext *cache_context_ = nullptr;
+ uint32_t cache_life_index_ = 0;
+
+ public:
+ MTLVertexDescriptorCache(MTLBatch *batch) : batch_(batch){};
+ VertexDescriptorShaderInterfacePair *find(const ShaderInterface *interface);
+ bool insert(VertexDescriptorShaderInterfacePair &data);
+
+ private:
+ void vertex_descriptor_cache_init(MTLContext *ctx);
+ void vertex_descriptor_cache_clear();
+ void vertex_descriptor_cache_ensure();
+ };
+
+ private:
+ MTLShader *active_shader_ = nullptr;
+ bool shader_in_use_ = false;
+ MTLVertexDescriptorCache vao_cache = {this};
+
+ /* Topology emulation. */
+ gpu::MTLBuffer *emulated_topology_buffer_ = nullptr;
+ GPUPrimType emulated_topology_type_;
+ uint32_t topology_buffer_input_v_count_ = 0;
+ uint32_t topology_buffer_output_v_count_ = 0;
+
+ public:
+ MTLBatch(){};
+ ~MTLBatch(){};
+
+ void draw(int v_first, int v_count, int i_first, int i_count) override;
+ void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override
+ {
+ /* TODO(Metal): Support indirect draw commands. */
+ }
+ void multi_draw_indirect(GPUStorageBuf *indirect_buf,
+ int count,
+ intptr_t offset,
+ intptr_t stride) override
+ {
+ /* TODO(Metal): Support indirect draw commands. */
+ }
+
+ /* Returns an initialized RenderComandEncoder for drawing if all is good.
+ * Otherwise, nil. */
+ id<MTLRenderCommandEncoder> bind(uint v_first, uint v_count, uint i_first, uint i_count);
+ void unbind();
+
+ /* Convenience getters. */
+ MTLIndexBuf *elem_() const
+ {
+ return static_cast<MTLIndexBuf *>(unwrap(elem));
+ }
+ MTLVertBuf *verts_(const int index) const
+ {
+ return static_cast<MTLVertBuf *>(unwrap(verts[index]));
+ }
+ MTLVertBuf *inst_(const int index) const
+ {
+ return static_cast<MTLVertBuf *>(unwrap(inst[index]));
+ }
+ MTLShader *active_shader_get() const
+ {
+ return active_shader_;
+ }
+
+ private:
+ void shader_bind();
+ void draw_advanced(int v_first, int v_count, int i_first, int i_count);
+ int prepare_vertex_binding(MTLVertBuf *verts,
+ MTLRenderPipelineStateDescriptor &desc,
+ const MTLShaderInterface *interface,
+ uint16_t &attr_mask,
+ bool instanced);
+
+ id<MTLBuffer> get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type, uint32_t &v_count);
+
+ void prepare_vertex_descriptor_and_bindings(
+ MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count);
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
+};
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_batch.mm b/source/blender/gpu/metal/mtl_batch.mm
new file mode 100644
index 00000000000..988fb9b793b
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_batch.mm
@@ -0,0 +1,998 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Metal implementation of GPUBatch.
+ */
+
+#include "BLI_assert.h"
+#include "BLI_span.hh"
+
+#include "BKE_global.h"
+
+#include "GPU_common.h"
+#include "gpu_batch_private.hh"
+#include "gpu_shader_private.hh"
+
+#include "mtl_batch.hh"
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_index_buffer.hh"
+#include "mtl_shader.hh"
+#include "mtl_vertex_buffer.hh"
+
+#include <string>
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count)
+{
+ if (this->flag & GPU_BATCH_INVALID) {
+ this->shader_in_use_ = false;
+ }
+ this->draw_advanced(v_first, v_count, i_first, i_count);
+}
+
+void MTLBatch::shader_bind()
+{
+ if (active_shader_ && active_shader_->is_valid()) {
+ active_shader_->bind();
+ shader_in_use_ = true;
+ }
+}
+
+void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_init(MTLContext *ctx)
+{
+ BLI_assert(ctx != nullptr);
+ this->vertex_descriptor_cache_clear();
+ cache_context_ = ctx;
+}
+
+void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_clear()
+{
+ cache_life_index_++;
+ cache_context_ = nullptr;
+}
+
+void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_ensure()
+{
+ if (this->cache_context_ != nullptr) {
+
+ /* Invalidate vertex descriptor bindings cache if batch has changed. */
+ if (batch_->flag & GPU_BATCH_DIRTY) {
+ batch_->flag &= ~GPU_BATCH_DIRTY;
+ this->vertex_descriptor_cache_clear();
+ }
+ }
+
+ /* Initialize cache if not ready. */
+ if (cache_context_ == nullptr) {
+ this->vertex_descriptor_cache_init(MTLContext::get());
+ }
+}
+
+MTLBatch::VertexDescriptorShaderInterfacePair *MTLBatch::MTLVertexDescriptorCache::find(
+ const ShaderInterface *interface)
+{
+ this->vertex_descriptor_cache_ensure();
+ for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
+ if (cache_[i].interface == interface && cache_[i].cache_life_index == cache_life_index_) {
+ return &cache_[i];
+ }
+ }
+ return nullptr;
+}
+
+bool MTLBatch::MTLVertexDescriptorCache::insert(
+ MTLBatch::VertexDescriptorShaderInterfacePair &data)
+{
+ vertex_descriptor_cache_ensure();
+ for (int i = 0; i < GPU_VAO_STATIC_LEN; ++i) {
+ if (cache_[i].interface == nullptr || cache_[i].cache_life_index != cache_life_index_) {
+ cache_[i] = data;
+ cache_[i].cache_life_index = cache_life_index_;
+ return true;
+ }
+ }
+ return false;
+}
+
+int MTLBatch::prepare_vertex_binding(MTLVertBuf *verts,
+ MTLRenderPipelineStateDescriptor &desc,
+ const MTLShaderInterface *interface,
+ uint16_t &attr_mask,
+ bool instanced)
+{
+
+ const GPUVertFormat *format = &verts->format;
+ /* Whether the current vertex buffer has been added to the buffer layout descriptor. */
+ bool buffer_added = false;
+ /* Per-vertex stride of current vertex buffer. */
+ int buffer_stride = format->stride;
+ /* Buffer binding index of the vertex buffer once added to the buffer layout descriptor. */
+ int buffer_index = -1;
+ int attribute_offset = 0;
+
+ if (!active_shader_->get_uses_ssbo_vertex_fetch()) {
+ BLI_assert(
+ buffer_stride >= 4 &&
+ "In Metal, Vertex buffer stride should be 4. SSBO Vertex fetch is not affected by this");
+ }
+
+ /* Iterate over GPUVertBuf vertex format and find attributes matching those in the active
+ * shader's interface. */
+ for (uint32_t a_idx = 0; a_idx < format->attr_len; a_idx++) {
+ const GPUVertAttr *a = &format->attrs[a_idx];
+
+ if (format->deinterleaved) {
+ attribute_offset += ((a_idx == 0) ? 0 : format->attrs[a_idx - 1].size) * verts->vertex_len;
+ buffer_stride = a->size;
+ }
+ else {
+ attribute_offset = a->offset;
+ }
+
+ /* Find attribute with the matching name. Attributes may have multiple compatible
+ * name aliases. */
+ for (uint32_t n_idx = 0; n_idx < a->name_len; n_idx++) {
+ const char *name = GPU_vertformat_attr_name_get(format, a, n_idx);
+ const ShaderInput *input = interface->attr_get(name);
+
+ if (input == nullptr || input->location == -1) {
+ /* Vertex/instance buffers provided have attribute data for attributes which are not needed
+ * by this particular shader. This shader only needs binding information for the attributes
+ * has in the shader interface. */
+ MTL_LOG_WARNING(
+ "MTLBatch: Could not find attribute with name '%s' (defined in active vertex format) "
+ "in the shader interface for shader '%s'\n",
+ name,
+ interface->get_name());
+ continue;
+ }
+
+ /* Fetch metal attribute information. */
+ const MTLShaderInputAttribute &mtl_attr = interface->get_attribute(input->location);
+ BLI_assert(mtl_attr.location >= 0);
+ /* Verify that the attribute location from the shader interface
+ * matches the attribute location returned. */
+ BLI_assert(mtl_attr.location == input->location);
+
+ /* Check if attribute is already present in the given slot. */
+ if ((~attr_mask) & (1 << mtl_attr.location)) {
+ MTL_LOG_INFO(
+ " -- [Batch] Skipping attribute with input location %d (As one is already bound)\n",
+ mtl_attr.location);
+ }
+ else {
+
+ /* Update attribute used-slot mask. */
+ attr_mask &= ~(1 << mtl_attr.location);
+
+ /* Add buffer layout entry in descriptor if it has not yet been added
+ * for current vertex buffer. */
+ if (!buffer_added) {
+ buffer_index = desc.vertex_descriptor.num_vert_buffers;
+ desc.vertex_descriptor.buffer_layouts[buffer_index].step_function =
+ (instanced) ? MTLVertexStepFunctionPerInstance : MTLVertexStepFunctionPerVertex;
+ desc.vertex_descriptor.buffer_layouts[buffer_index].step_rate = 1;
+ desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
+ desc.vertex_descriptor.num_vert_buffers++;
+ buffer_added = true;
+
+ MTL_LOG_INFO(" -- [Batch] Adding source %s buffer (Index: %d, Stride: %d)\n",
+ (instanced) ? "instance" : "vertex",
+ buffer_index,
+ buffer_stride);
+ }
+ else {
+ /* Ensure stride is correct for de-interleaved attributes. */
+ desc.vertex_descriptor.buffer_layouts[buffer_index].stride = buffer_stride;
+ }
+
+ /* Handle Matrix/Array vertex attribute types.
+ * Metal does not natively support these as attribute types, so we handle these cases
+ * by stacking together compatible types (e.g. 4xVec4 for Mat4) and combining
+ * the data in the shader.
+ * The generated Metal shader will contain a generated input binding, which reads
+ * in individual attributes and merges them into the desired type after vertex
+ * assembly. e.g. a Mat4 (Float4x4) will generate 4 Float4 attributes. */
+ if (a->comp_len == 16 || a->comp_len == 12 || a->comp_len == 8) {
+ BLI_assert_msg(
+ a->comp_len == 16,
+ "only mat4 attributes currently supported -- Not ready to handle other long "
+ "component length attributes yet");
+
+ /* SSBO Vertex Fetch Attribute safety checks. */
+ if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+ /* When using SSBO vertex fetch, we do not need to expose split attributes,
+ * A matrix can be read directly as a whole block of contiguous data. */
+ MTLSSBOAttribute ssbo_attr(mtl_attr.index,
+ buffer_index,
+ attribute_offset,
+ buffer_stride,
+ GPU_SHADER_ATTR_TYPE_MAT4,
+ instanced);
+ active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+ desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+ ssbo_attr;
+ desc.vertex_descriptor.num_ssbo_attributes++;
+ }
+ else {
+
+ /* Handle Mat4 attributes. */
+ if (a->comp_len == 16) {
+ /* Debug safety checks. */
+ BLI_assert_msg(mtl_attr.matrix_element_count == 4,
+ "mat4 type expected but there are fewer components");
+ BLI_assert_msg(mtl_attr.size == 16, "Expecting subtype 'vec4' with 16 bytes");
+ BLI_assert_msg(
+ mtl_attr.format == MTLVertexFormatFloat4,
+ "Per-attribute vertex format MUST be float4 for an input type of 'mat4'");
+
+ /* We have found the 'ROOT' attribute. A mat4 contains 4 consecutive float4 attribute
+ * locations we must map to. */
+ for (int i = 0; i < a->comp_len / 4; i++) {
+ desc.vertex_descriptor.attributes[mtl_attr.location + i].format =
+ MTLVertexFormatFloat4;
+ /* Data is consecutive in the buffer for the whole matrix, each float4 will shift
+ * the offset by 16 bytes. */
+ desc.vertex_descriptor.attributes[mtl_attr.location + i].offset =
+ attribute_offset + i * 16;
+ /* All source data for a matrix is in the same singular buffer. */
+ desc.vertex_descriptor.attributes[mtl_attr.location + i].buffer_index =
+ buffer_index;
+
+ /* Update total attribute account. */
+ desc.vertex_descriptor.num_attributes = max_ii(
+ mtl_attr.location + i + 1, desc.vertex_descriptor.num_attributes);
+ MTL_LOG_INFO("-- Sub-Attrib Location: %d, offset: %d, buffer index: %d\n",
+ mtl_attr.location + i,
+ attribute_offset + i * 16,
+ buffer_index);
+ }
+ MTL_LOG_INFO(
+ "Float4x4 attribute type added for '%s' at attribute locations: %d to %d\n",
+ name,
+ mtl_attr.location,
+ mtl_attr.location + 3);
+ }
+
+ /* Ensure we are not exceeding the attribute limit. */
+ BLI_assert(desc.vertex_descriptor.num_attributes <= MTL_MAX_VERTEX_INPUT_ATTRIBUTES);
+ }
+ }
+ else {
+
+ /* Handle Any required format conversions.
+ * NOTE(Metal): If there is a mis-match between the format of an attribute
+ * in the shader interface, and the specified format in the VertexBuffer VertexFormat,
+ * we need to perform a format conversion.
+ *
+ * The Metal API can perform certain conversions internally during vertex assembly:
+ * - Type Normalization e.g short2 to float2 between 0.0 to 1.0.
+ * - Type Truncation e.g. Float4 to Float2.
+ * - Type expansion e,g, Float3 to Float4 (Following 0,0,0,1 for assignment to empty
+ * elements).
+ *
+ * Certain conversion cannot be performed however, and in these cases, we need to
+ * instruct the shader to generate a specialized version with a conversion routine upon
+ * attribute read.
+ * - This handles cases such as conversion between types e.g. Integer to float without
+ * normalization.
+ *
+ * For more information on the supported and unsupported conversions, see:
+ * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc
+ */
+ MTLVertexFormat converted_format;
+ bool can_use_internal_conversion = mtl_convert_vertex_format(
+ mtl_attr.format,
+ (GPUVertCompType)a->comp_type,
+ a->comp_len,
+ (GPUVertFetchMode)a->fetch_mode,
+ &converted_format);
+ bool is_floating_point_format = (a->comp_type == GPU_COMP_F32);
+
+ if (can_use_internal_conversion) {
+ desc.vertex_descriptor.attributes[mtl_attr.location].format = converted_format;
+ desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
+ is_floating_point_format ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+ (GPUVertFetchMode)GPU_FETCH_INT;
+ BLI_assert(converted_format != MTLVertexFormatInvalid);
+ }
+ else {
+ /* The internal implicit conversion is not supported.
+ * In this case, we need to handle conversion inside the shader.
+ * This is handled using `format_conversion_mode`.
+ * `format_conversion_mode` is assigned the blender-specified fetch mode (GPU_FETCH_*).
+ * This then controls how a given attribute is interpreted. The data will be read
+ * as specified and then converted appropriately to the correct form.
+ *
+ * e.g. if `GPU_FETCH_INT_TO_FLOAT` is specified, the specialized read-routine
+ * in the shader will read the data as an int, and cast this to floating point
+ * representation. (Rather than reading the source data as float).
+ *
+ * NOTE: Even if full conversion is not supported, we may still partially perform an
+ * implicit conversion where possible, such as vector truncation or expansion. */
+ MTLVertexFormat converted_format;
+ bool can_convert = mtl_vertex_format_resize(
+ mtl_attr.format, a->comp_len, &converted_format);
+ desc.vertex_descriptor.attributes[mtl_attr.location].format = can_convert ?
+ converted_format :
+ mtl_attr.format;
+ desc.vertex_descriptor.attributes[mtl_attr.location].format_conversion_mode =
+ (GPUVertFetchMode)a->fetch_mode;
+ BLI_assert(desc.vertex_descriptor.attributes[mtl_attr.location].format !=
+ MTLVertexFormatInvalid);
+ }
+ desc.vertex_descriptor.attributes[mtl_attr.location].offset = attribute_offset;
+ desc.vertex_descriptor.attributes[mtl_attr.location].buffer_index = buffer_index;
+ desc.vertex_descriptor.num_attributes = ((mtl_attr.location + 1) >
+ desc.vertex_descriptor.num_attributes) ?
+ (mtl_attr.location + 1) :
+ desc.vertex_descriptor.num_attributes;
+
+ /* SSBO Vertex Fetch attribute bind. */
+ if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+ BLI_assert_msg(desc.vertex_descriptor.attributes[mtl_attr.location].format ==
+ mtl_attr.format,
+ "SSBO Vertex Fetch does not support attribute conversion.");
+
+ MTLSSBOAttribute ssbo_attr(
+ mtl_attr.index,
+ buffer_index,
+ attribute_offset,
+ buffer_stride,
+ MTLShader::ssbo_vertex_type_to_attr_type(
+ desc.vertex_descriptor.attributes[mtl_attr.location].format),
+ instanced);
+
+ active_shader_->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+ desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+ ssbo_attr;
+ desc.vertex_descriptor.num_ssbo_attributes++;
+ }
+
+ /* NOTE: We are setting num_attributes to be up to the maximum found index, because of
+ * this, it is possible that we may skip over certain attributes if they were not in the
+ * source GPUVertFormat. */
+ MTL_LOG_INFO(
+ " -- Batch Attribute(%d): ORIG Shader Format: %d, ORIG Vert format: %d, Vert "
+ "components: %d, Fetch Mode %d --> FINAL FORMAT: %d\n",
+ mtl_attr.location,
+ (int)mtl_attr.format,
+ (int)a->comp_type,
+ (int)a->comp_len,
+ (int)a->fetch_mode,
+ (int)desc.vertex_descriptor.attributes[mtl_attr.location].format);
+
+ MTL_LOG_INFO(
+ " -- [Batch] matching %s attribute '%s' (Attribute Index: %d, Buffer index: %d, "
+ "offset: %d)\n",
+ (instanced) ? "instance" : "vertex",
+ name,
+ mtl_attr.location,
+ buffer_index,
+ attribute_offset);
+ }
+ }
+ }
+ }
+ if (buffer_added) {
+ return buffer_index;
+ }
+ return -1;
+}
+
+id<MTLRenderCommandEncoder> MTLBatch::bind(uint v_first, uint v_count, uint i_first, uint i_count)
+{
+ /* Setup draw call and render pipeline state here. Called by every draw, but setup here so that
+ * MTLDrawList only needs to perform setup a single time. */
+ BLI_assert(this);
+
+ /* Fetch Metal device. */
+ MTLContext *ctx = MTLContext::get();
+ if (!ctx) {
+ BLI_assert_msg(false, "No context available for rendering.");
+ return nil;
+ }
+
+ /* Verify Shader. */
+ active_shader_ = (shader) ? static_cast<MTLShader *>(unwrap(shader)) : nullptr;
+
+ if (active_shader_ == nullptr || !active_shader_->is_valid()) {
+ /* Skip drawing if there is no valid Metal shader.
+ * This will occur if the path through which the shader is prepared
+ * is invalid (e.g. Python without create-info), or, the source shader uses a geometry pass. */
+ BLI_assert_msg(false, "No valid Metal shader!");
+ return nil;
+ }
+
+ /* Check if using SSBO Fetch Mode.
+ * This is an alternative drawing mode to geometry shaders, wherein vertex buffers
+ * are bound as readable (random-access) GPU buffers and certain descriptor properties
+ * are passed using Shader uniforms. */
+ bool uses_ssbo_fetch = active_shader_->get_uses_ssbo_vertex_fetch();
+
+ /* Prepare Vertex Descriptor and extract VertexBuffers to bind. */
+ MTLVertBuf *buffers[GPU_BATCH_VBO_MAX_LEN] = {nullptr};
+ int num_buffers = 0;
+
+ /* Ensure Index Buffer is ready. */
+ MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
+ if (mtl_elem != NULL) {
+ mtl_elem->upload_data();
+ }
+
+ /* Populate vertex descriptor with attribute binding information.
+ * The vertex descriptor and buffer layout descriptors describe
+ * how vertex data from bound vertex buffers maps to the
+ * shader's input.
+ * A unique vertex descriptor will result in a new PipelineStateObject
+ * being generated for the currently bound shader. */
+ prepare_vertex_descriptor_and_bindings(buffers, num_buffers, v_first, v_count, i_first, i_count);
+
+ /* Prepare Vertex Buffers - Run before RenderCommandEncoder in case BlitCommandEncoder buffer
+ * data operations are required. */
+ for (int i = 0; i < num_buffers; i++) {
+ MTLVertBuf *buf_at_index = buffers[i];
+ if (buf_at_index == NULL) {
+ BLI_assert_msg(
+ false,
+ "Total buffer count does not match highest buffer index, could be gaps in bindings");
+ continue;
+ }
+
+ MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
+ mtlvbo->bind();
+ }
+
+ /* Ensure render pass is active and fetch active RenderCommandEncoder. */
+ id<MTLRenderCommandEncoder> rec = ctx->ensure_begin_render_pass();
+
+ /* Fetch RenderPassState to enable resource binding for active pass. */
+ MTLRenderPassState &rps = ctx->main_command_buffer.get_render_pass_state();
+
+ /* Debug Check: Ensure Frame-buffer instance is not dirty. */
+ BLI_assert(!ctx->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+ /* Bind Shader. */
+ this->shader_bind();
+
+ /* GPU debug markers. */
+ if (G.debug & G_DEBUG_GPU) {
+ [rec pushDebugGroup:[NSString stringWithFormat:@"batch_bind%@(shader: %s)",
+ this->elem ? @"(indexed)" : @"",
+ active_shader_->get_interface()->get_name()]];
+ [rec insertDebugSignpost:[NSString
+ stringWithFormat:@"batch_bind%@(shader: %s)",
+ this->elem ? @"(indexed)" : @"",
+ active_shader_->get_interface()->get_name()]];
+ }
+
+ /* Ensure Context Render Pipeline State is fully setup and ready to execute the draw. */
+ MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+ if (!ctx->ensure_render_pipeline_state(mtl_prim_type)) {
+ printf("FAILED TO ENSURE RENDER PIPELINE STATE");
+ BLI_assert(false);
+
+ if (G.debug & G_DEBUG_GPU) {
+ [rec popDebugGroup];
+ }
+ return nil;
+ }
+
+ /*** Bind Vertex Buffers and Index Buffers **/
+
+ /* SSBO Vertex Fetch Buffer bindings. */
+ if (uses_ssbo_fetch) {
+
+ /* SSBO Vertex Fetch - Bind Index Buffer to appropriate slot -- if used. */
+ id<MTLBuffer> idx_buffer = nil;
+ GPUPrimType final_prim_type = this->prim_type;
+
+ if (mtl_elem != nullptr) {
+
+ /* Fetch index buffer. This function can situationally return an optimized
+ * index buffer of a different primitive type. If this is the case, `final_prim_type`
+ * and `v_count` will be updated with the new format.
+ * NOTE: For indexed rendering, v_count represents the number of indices. */
+ idx_buffer = mtl_elem->get_index_buffer(final_prim_type, v_count);
+ BLI_assert(idx_buffer != nil);
+
+ /* Update uniforms for SSBO-vertex-fetch-mode indexed rendering to flag usage. */
+ int &uniform_ssbo_index_mode_u16 = active_shader_->uni_ssbo_uses_index_mode_u16;
+ BLI_assert(uniform_ssbo_index_mode_u16 != -1);
+ int uses_index_mode_u16 = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 1 : 0;
+ active_shader_->uniform_int(uniform_ssbo_index_mode_u16, 1, 1, &uses_index_mode_u16);
+ }
+ else {
+ idx_buffer = ctx->get_null_buffer();
+ }
+ rps.bind_vertex_buffer(idx_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+
+ /* Ensure all attributes are set */
+ active_shader_->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+ /* Bind NULL Buffers for unused vertex data slots. */
+ id<MTLBuffer> null_buffer = ctx->get_null_buffer();
+ BLI_assert(null_buffer != nil);
+ for (int i = num_buffers; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+ if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+ rps.bind_vertex_buffer(null_buffer, 0, i);
+ }
+ }
+
+ /* Flag whether Indexed rendering is used or not. */
+ int &uniform_ssbo_use_indexed = active_shader_->uni_ssbo_uses_indexed_rendering;
+ BLI_assert(uniform_ssbo_use_indexed != -1);
+ int uses_indexed_rendering = (mtl_elem != NULL) ? 1 : 0;
+ active_shader_->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+
+ /* Set SSBO-fetch-mode status uniforms. */
+ BLI_assert(active_shader_->uni_ssbo_input_prim_type_loc != -1);
+ BLI_assert(active_shader_->uni_ssbo_input_vert_count_loc != -1);
+ GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_shader_)),
+ active_shader_->uni_ssbo_input_prim_type_loc,
+ 1,
+ 1,
+ (const int *)(&final_prim_type));
+ GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_shader_)),
+ active_shader_->uni_ssbo_input_vert_count_loc,
+ 1,
+ 1,
+ (const int *)(&v_count));
+ }
+
+ /* Bind Vertex Buffers. */
+ for (int i = 0; i < num_buffers; i++) {
+ MTLVertBuf *buf_at_index = buffers[i];
+ if (buf_at_index == NULL) {
+ BLI_assert_msg(
+ false,
+ "Total buffer count does not match highest buffer index, could be gaps in bindings");
+ continue;
+ }
+ /* Buffer handle. */
+ MTLVertBuf *mtlvbo = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(buf_at_index));
+ mtlvbo->flag_used();
+
+ /* Fetch buffer from MTLVertexBuffer and bind. */
+ id<MTLBuffer> mtl_buffer = mtlvbo->get_metal_buffer();
+
+ BLI_assert(mtl_buffer != nil);
+ rps.bind_vertex_buffer(mtl_buffer, 0, i);
+ }
+
+ if (G.debug & G_DEBUG_GPU) {
+ [rec popDebugGroup];
+ }
+
+ /* Return Render Command Encoder used with setup. */
+ return rec;
+}
+
+void MTLBatch::unbind()
+{
+}
+
+void MTLBatch::prepare_vertex_descriptor_and_bindings(
+ MTLVertBuf **buffers, int &num_buffers, int v_first, int v_count, int i_first, int i_count)
+{
+
+ /* Here we populate the MTLContext vertex descriptor and resolve which buffers need to be bound.
+ */
+ MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+ MTLContext::get()->state_manager);
+ MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+ const MTLShaderInterface *interface = active_shader_->get_interface();
+ uint16_t attr_mask = interface->get_enabled_attribute_mask();
+
+ /* Reset vertex descriptor to default state. */
+ desc.reset_vertex_descriptor();
+
+ /* Fetch Vertex and Instance Buffers. */
+ Span<MTLVertBuf *> mtl_verts(reinterpret_cast<MTLVertBuf **>(this->verts),
+ GPU_BATCH_VBO_MAX_LEN);
+ Span<MTLVertBuf *> mtl_inst(reinterpret_cast<MTLVertBuf **>(this->inst),
+ GPU_BATCH_INST_VBO_MAX_LEN);
+
+ /* SSBO Vertex fetch also passes vertex descriptor information into the shader. */
+ if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+ active_shader_->ssbo_vertex_fetch_bind_attributes_begin();
+ }
+
+ /* Resolve Metal vertex buffer bindings. */
+ /* Vertex Descriptors
+ * ------------------
+ * Vertex Descriptors are required to generate a pipeline state, based on the current Batch's
+ * buffer bindings. These bindings are a unique matching, depending on what input attributes a
+ * batch has in its buffers, and those which are supported by the shader interface.
+
+ * We iterate through the buffers and resolve which attributes satisfy the requirements of the
+ * currently bound shader. We cache this data, for a given Batch<->ShderInterface pairing in a
+ * VAO cache to avoid the need to recalculate this data. */
+ bool buffer_is_instanced[GPU_BATCH_VBO_MAX_LEN] = {false};
+
+ VertexDescriptorShaderInterfacePair *descriptor = this->vao_cache.find(interface);
+ if (descriptor) {
+ desc.vertex_descriptor = descriptor->vertex_descriptor;
+ attr_mask = descriptor->attr_mask;
+ num_buffers = descriptor->num_buffers;
+
+ for (int bid = 0; bid < GPU_BATCH_VBO_MAX_LEN; ++bid) {
+ if (descriptor->bufferIds[bid].used) {
+ if (descriptor->bufferIds[bid].is_instance) {
+ buffers[bid] = mtl_inst[descriptor->bufferIds[bid].id];
+ buffer_is_instanced[bid] = true;
+ }
+ else {
+ buffers[bid] = mtl_verts[descriptor->bufferIds[bid].id];
+ buffer_is_instanced[bid] = false;
+ }
+ }
+ }
+
+ /* Use cached ssbo attribute binding data. */
+ if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+ BLI_assert(desc.vertex_descriptor.uses_ssbo_vertex_fetch);
+ for (int attr_id = 0; attr_id < desc.vertex_descriptor.num_ssbo_attributes; attr_id++) {
+ active_shader_->ssbo_vertex_fetch_bind_attribute(
+ desc.vertex_descriptor.ssbo_attributes[attr_id]);
+ }
+ }
+ }
+ else {
+ VertexDescriptorShaderInterfacePair pair{};
+ pair.interface = interface;
+
+ for (int i = 0; i < GPU_BATCH_VBO_MAX_LEN; ++i) {
+ pair.bufferIds[i].id = -1;
+ pair.bufferIds[i].is_instance = 0;
+ pair.bufferIds[i].used = 0;
+ }
+ /* NOTE: Attribute extraction order from buffer is the reverse of the OpenGL as we flag once an
+ * attribute is found, rather than pre-setting the mask. */
+ /* Extract Instance attributes (These take highest priority). */
+ for (int v = 0; v < GPU_BATCH_INST_VBO_MAX_LEN; v++) {
+ if (mtl_inst[v]) {
+ MTL_LOG_INFO(" -- [Batch] Checking bindings for bound instance buffer %p\n", mtl_inst[v]);
+ int buffer_ind = this->prepare_vertex_binding(
+ mtl_inst[v], desc, interface, attr_mask, true);
+ if (buffer_ind >= 0) {
+ buffers[buffer_ind] = mtl_inst[v];
+ buffer_is_instanced[buffer_ind] = true;
+
+ pair.bufferIds[buffer_ind].id = v;
+ pair.bufferIds[buffer_ind].used = 1;
+ pair.bufferIds[buffer_ind].is_instance = 1;
+ num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
+ }
+ }
+ }
+
+ /* Extract Vertex attributes (First-bound vertex buffer takes priority). */
+ for (int v = 0; v < GPU_BATCH_VBO_MAX_LEN; v++) {
+ if (mtl_verts[v] != NULL) {
+ MTL_LOG_INFO(" -- [Batch] Checking bindings for bound vertex buffer %p\n", mtl_verts[v]);
+ int buffer_ind = this->prepare_vertex_binding(
+ mtl_verts[v], desc, interface, attr_mask, false);
+ if (buffer_ind >= 0) {
+ buffers[buffer_ind] = mtl_verts[v];
+ buffer_is_instanced[buffer_ind] = false;
+
+ pair.bufferIds[buffer_ind].id = v;
+ pair.bufferIds[buffer_ind].used = 1;
+ pair.bufferIds[buffer_ind].is_instance = 0;
+ num_buffers = ((buffer_ind + 1) > num_buffers) ? (buffer_ind + 1) : num_buffers;
+ }
+ }
+ }
+
+ /* Add to VertexDescriptor cache */
+ desc.vertex_descriptor.uses_ssbo_vertex_fetch = active_shader_->get_uses_ssbo_vertex_fetch();
+ pair.attr_mask = attr_mask;
+ pair.vertex_descriptor = desc.vertex_descriptor;
+ pair.num_buffers = num_buffers;
+ if (!this->vao_cache.insert(pair)) {
+ printf(
+ "[Performance Warning] cache is full (Size: %d), vertex descriptor will not be cached\n",
+ GPU_VAO_STATIC_LEN);
+ }
+ }
+
+/* DEBUG: verify if our attribute bindings have been fully provided as expected. */
+#if MTL_DEBUG_SHADER_ATTRIBUTES == 1
+ if (attr_mask != 0) {
+ for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) {
+ if (attr_mask & mask) {
+ /* Fallback for setting default attributes, for missed slots. Attributes flagged with
+ * 'MTLVertexFormatInvalid' in the vertex descriptor are bound to a NULL buffer during PSO
+ * creation. */
+ MTL_LOG_WARNING("MTLBatch: Missing expected attribute '%s' at index '%d' for shader: %s\n",
+ this->active_shader->interface->attributes[a].name,
+ a,
+ interface->name);
+ /* Ensure any assigned attribute has not been given an invalid format. This should not
+ * occur and may be the result of an unsupported attribute type conversion. */
+ BLI_assert(desc.attributes[a].format == MTLVertexFormatInvalid);
+ }
+ }
+ }
+#endif
+}
+
+void MTLBatch::draw_advanced(int v_first, int v_count, int i_first, int i_count)
+{
+
+#if TRUST_NO_ONE
+ BLI_assert(v_count > 0 && i_count > 0);
+#endif
+
+ /* Setup RenderPipelineState for batch. */
+ MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+ id<MTLRenderCommandEncoder> rec = this->bind(v_first, v_count, i_first, i_count);
+ if (rec == nil) {
+ return;
+ }
+
+ /* Fetch IndexBuffer and resolve primitive type. */
+ MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(reinterpret_cast<IndexBuf *>(this->elem));
+ MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+
+ /* Render using SSBO Vertex Fetch. */
+ if (active_shader_->get_uses_ssbo_vertex_fetch()) {
+
+ /* Submit draw call with modified vertex count, which reflects vertices per primitive defined
+ * in the USE_SSBO_VERTEX_FETCH pragma. */
+ int num_input_primitives = gpu_get_prim_count_from_type(v_count, this->prim_type);
+ int output_num_verts = num_input_primitives *
+ active_shader_->get_ssbo_vertex_fetch_output_num_verts();
+ BLI_assert_msg(
+ mtl_vertex_count_fits_primitive_type(
+ output_num_verts, active_shader_->get_ssbo_vertex_fetch_output_prim_type()),
+ "Output Vertex count is not compatible with the requested output vertex primitive type");
+ [rec drawPrimitives:active_shader_->get_ssbo_vertex_fetch_output_prim_type()
+ vertexStart:0
+ vertexCount:output_num_verts
+ instanceCount:i_count
+ baseInstance:i_first];
+ ctx->main_command_buffer.register_draw_counters(output_num_verts * i_count);
+ }
+ /* Perform regular draw. */
+ else if (mtl_elem == NULL) {
+
+ /* Primitive Type toplogy emulation. */
+ if (mtl_needs_topology_emulation(this->prim_type)) {
+
+ /* Generate index buffer for primitive types requiring emulation. */
+ GPUPrimType emulated_prim_type = this->prim_type;
+ uint32_t emulated_v_count = v_count;
+ id<MTLBuffer> generated_index_buffer = this->get_emulated_toplogy_buffer(emulated_prim_type,
+ emulated_v_count);
+ BLI_assert(generated_index_buffer != nil);
+
+ MTLPrimitiveType emulated_mtl_prim_type = gpu_prim_type_to_metal(emulated_prim_type);
+
+ /* Temp: Disable culling for emulated primitive types.
+ * TODO(Metal): Support face winding in topology buffer. */
+ [rec setCullMode:MTLCullModeNone];
+
+ if (generated_index_buffer != nil) {
+ BLI_assert(emulated_mtl_prim_type == MTLPrimitiveTypeTriangle ||
+ emulated_mtl_prim_type == MTLPrimitiveTypeLine);
+ if (emulated_mtl_prim_type == MTLPrimitiveTypeTriangle) {
+ BLI_assert(emulated_v_count % 3 == 0);
+ }
+ if (emulated_mtl_prim_type == MTLPrimitiveTypeLine) {
+ BLI_assert(emulated_v_count % 2 == 0);
+ }
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ ctx->ensure_depth_stencil_state(emulated_mtl_prim_type);
+
+ [rec drawIndexedPrimitives:emulated_mtl_prim_type
+ indexCount:emulated_v_count
+ indexType:MTLIndexTypeUInt32
+ indexBuffer:generated_index_buffer
+ indexBufferOffset:0
+ instanceCount:i_count
+ baseVertex:v_first
+ baseInstance:i_first];
+ }
+ else {
+ printf("[Note] Cannot draw batch -- Emulated Topology mode: %u not yet supported\n",
+ this->prim_type);
+ }
+ }
+ else {
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+ /* Issue draw call. */
+ [rec drawPrimitives:mtl_prim_type
+ vertexStart:v_first
+ vertexCount:v_count
+ instanceCount:i_count
+ baseInstance:i_first];
+ }
+ ctx->main_command_buffer.register_draw_counters(v_count * i_count);
+ }
+ /* Perform indexed draw. */
+ else {
+
+ MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
+ uint32_t base_index = mtl_elem->index_base_;
+ uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4;
+ uint32_t v_first_ofs = ((v_first + mtl_elem->index_start_) * index_size);
+ BLI_assert_msg((v_first_ofs % index_size) == 0,
+ "Index offset is not 2/4-byte aligned as per METAL spec");
+
+ /* Fetch index buffer. May return an index buffer of a differing format,
+ * if index buffer optimization is used. In these cases, final_prim_type and
+ * index_count get updated with the new properties. */
+ GPUPrimType final_prim_type = this->prim_type;
+ uint index_count = v_count;
+
+ id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
+ mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
+ BLI_assert(index_buffer != nil);
+
+ if (index_buffer != nil) {
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+ /* Issue draw call. */
+ [rec drawIndexedPrimitives:mtl_prim_type
+ indexCount:index_count
+ indexType:index_type
+ indexBuffer:index_buffer
+ indexBufferOffset:v_first_ofs
+ instanceCount:i_count
+ baseVertex:base_index
+ baseInstance:i_first];
+ ctx->main_command_buffer.register_draw_counters(index_count * i_count);
+ }
+ else {
+ BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
+ }
+ }
+
+ /* End of draw. */
+ this->unbind();
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Topology emulation and optimization
+ * \{ */
+
+id<MTLBuffer> MTLBatch::get_emulated_toplogy_buffer(GPUPrimType &in_out_prim_type,
+ uint32_t &in_out_v_count)
+{
+
+ BLI_assert(in_out_v_count > 0);
+ /* Determine emulated primitive types. */
+ GPUPrimType input_prim_type = in_out_prim_type;
+ uint32_t v_count = in_out_v_count;
+ GPUPrimType output_prim_type;
+ switch (input_prim_type) {
+ case GPU_PRIM_POINTS:
+ case GPU_PRIM_LINES:
+ case GPU_PRIM_TRIS:
+ BLI_assert_msg(false, "Optimal primitive types should not reach here.");
+ return nil;
+ break;
+ case GPU_PRIM_LINES_ADJ:
+ case GPU_PRIM_TRIS_ADJ:
+ BLI_assert_msg(false, "Adjacency primitive types should not reach here.");
+ return nil;
+ break;
+ case GPU_PRIM_LINE_STRIP:
+ case GPU_PRIM_LINE_LOOP:
+ case GPU_PRIM_LINE_STRIP_ADJ:
+ output_prim_type = GPU_PRIM_LINES;
+ break;
+ case GPU_PRIM_TRI_STRIP:
+ case GPU_PRIM_TRI_FAN:
+ output_prim_type = GPU_PRIM_TRIS;
+ break;
+ default:
+ BLI_assert_msg(false, "Invalid primitive type.");
+ return nil;
+ }
+
+ /* Check if topology buffer exists and is valid. */
+ if (this->emulated_topology_buffer_ != nullptr &&
+ (emulated_topology_type_ != input_prim_type || topology_buffer_input_v_count_ != v_count)) {
+
+ /* Release existing topology buffer. */
+ emulated_topology_buffer_->free();
+ emulated_topology_buffer_ = nullptr;
+ }
+
+ /* Generate new topology index buffer. */
+ if (this->emulated_topology_buffer_ == nullptr) {
+ /* Calculate IB len. */
+ uint32_t output_prim_count = 0;
+ switch (input_prim_type) {
+ case GPU_PRIM_LINE_STRIP:
+ case GPU_PRIM_LINE_STRIP_ADJ:
+ output_prim_count = v_count - 1;
+ break;
+ case GPU_PRIM_LINE_LOOP:
+ output_prim_count = v_count;
+ break;
+ case GPU_PRIM_TRI_STRIP:
+ case GPU_PRIM_TRI_FAN:
+ output_prim_count = v_count - 2;
+ break;
+ default:
+ BLI_assert_msg(false, "Cannot generate optimized topology buffer for other types.");
+ break;
+ }
+ uint32_t output_IB_elems = output_prim_count * ((output_prim_type == GPU_PRIM_TRIS) ? 3 : 2);
+
+ /* Allocate buffer. */
+ uint32_t buffer_bytes = output_IB_elems * 4;
+ BLI_assert(buffer_bytes > 0);
+ this->emulated_topology_buffer_ = MTLContext::get_global_memory_manager().allocate(
+ buffer_bytes, true);
+
+ /* Populate. */
+ uint32_t *data = (uint32_t *)this->emulated_topology_buffer_->get_host_ptr();
+ BLI_assert(data != nullptr);
+
+ /* TODO(Metal): Support inverse winding modes. */
+ bool winding_clockwise = false;
+ UNUSED_VARS(winding_clockwise);
+
+ switch (input_prim_type) {
+ /* Line Loop. */
+ case GPU_PRIM_LINE_LOOP: {
+ int line = 0;
+ for (line = 0; line < output_prim_count - 1; line++) {
+ data[line * 3 + 0] = line + 0;
+ data[line * 3 + 1] = line + 1;
+ }
+ /* Closing line. */
+ data[line * 2 + 0] = line + 0;
+ data[line * 2 + 1] = 0;
+ } break;
+
+ /* Triangle Fan. */
+ case GPU_PRIM_TRI_FAN: {
+ for (int triangle = 0; triangle < output_prim_count; triangle++) {
+ data[triangle * 3 + 0] = 0; /* Always 0 */
+ data[triangle * 3 + 1] = triangle + 1;
+ data[triangle * 3 + 2] = triangle + 2;
+ }
+ } break;
+
+ default:
+ BLI_assert_msg(false, "Other primitive types do not require emulation.");
+ return nil;
+ }
+
+ /* Flush. */
+ this->emulated_topology_buffer_->flush();
+ /* Assign members relating to current cached IB. */
+ topology_buffer_input_v_count_ = v_count;
+ topology_buffer_output_v_count_ = output_IB_elems;
+ emulated_topology_type_ = input_prim_type;
+ }
+
+ /* Return. */
+ in_out_v_count = topology_buffer_output_v_count_;
+ in_out_prim_type = output_prim_type;
+ return (emulated_topology_buffer_) ? emulated_topology_buffer_->get_metal_buffer() : nil;
+}
+
+/** \} */
+
+} // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm
index 0e13e8d4690..a9cabbb111f 100644
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -54,6 +54,7 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
desc.retainedReferences = YES;
+ BLI_assert(context_.queue != nil);
active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
}
else {
@@ -498,7 +499,7 @@ bool MTLCommandBufferManager::insert_memory_barrier(eGPUBarrier barrier_bits,
/* Rendering. */
case MTL_RENDER_COMMAND_ENCODER: {
/* Currently flagging both stages -- can use bits above to filter on stage type --
- * though full barrier is safe for now*/
+ * though full barrier is safe for now. */
MTLRenderStages before_stage_flags = 0;
MTLRenderStages after_stage_flags = 0;
if (before_stages & GPU_BARRIER_STAGE_VERTEX &&
@@ -611,40 +612,187 @@ void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot)
{
- /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be
- * implemented alongside MTLShader. */
+ /* Range check. */
+ const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+ BLI_assert(slot >= 0);
+ BLI_assert(slot <= shader_interface->get_max_texture_index());
+ BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+ UNUSED_VARS_NDEBUG(shader_interface);
+
+ /* If sampler state has not changed for the given slot, we do not need to fetch. */
+ if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil ||
+ !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) ||
+ use_argument_buffer_for_samplers) {
+
+ id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+ ctx.get_default_sampler_state() :
+ ctx.get_sampler_from_state(sampler_binding.state);
+ if (!use_argument_buffer_for_samplers) {
+ /* Update binding and cached state. */
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+ [rec setVertexSamplerState:sampler_state atIndex:slot];
+ this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+ this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state;
+ }
+
+ /* Flag last binding type. */
+ this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding =
+ use_argument_buffer_for_samplers;
+
+ /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+ * the samplers array is always up to date. */
+ ctx.samplers_.mtl_sampler[slot] = sampler_state;
+ ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+ }
}
void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot)
{
- /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be
- * implemented alongside MTLShader. */
+ /* Range check. */
+ const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+ BLI_assert(slot >= 0);
+ BLI_assert(slot <= shader_interface->get_max_texture_index());
+ BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+ UNUSED_VARS_NDEBUG(shader_interface);
+
+ /* If sampler state has not changed for the given slot, we do not need to fetch*/
+ if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil ||
+ !(this->cached_fragment_sampler_state_bindings[slot].binding_state ==
+ sampler_binding.state) ||
+ use_argument_buffer_for_samplers) {
+
+ id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+ ctx.get_default_sampler_state() :
+ ctx.get_sampler_from_state(sampler_binding.state);
+ if (!use_argument_buffer_for_samplers) {
+ /* Update binding and cached state. */
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+ [rec setFragmentSamplerState:sampler_state atIndex:slot];
+ this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+ this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state;
+ }
+
+ /* Flag last binding type */
+ this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding =
+ use_argument_buffer_for_samplers;
+
+ /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+ * the samplers array is always up to date. */
+ ctx.samplers_.mtl_sampler[slot] = sampler_state;
+ ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+ }
}
void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ BLI_assert(index >= 0);
+ BLI_assert(buffer_offset >= 0);
+ BLI_assert(buffer != nil);
+
+ BufferBindingCached &current_vert_ubo_binding = this->cached_vertex_buffer_bindings[index];
+ if (current_vert_ubo_binding.offset != buffer_offset ||
+ current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes) {
+
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+
+ if (current_vert_ubo_binding.metal_buffer == buffer) {
+ /* If buffer is the same, but offset has changed. */
+ [rec setVertexBufferOffset:buffer_offset atIndex:index];
+ }
+ else {
+ /* Bind Vertex Buffer. */
+ [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index];
+ }
+
+ /* Update Bind-state cache. */
+ this->cached_vertex_buffer_bindings[index].is_bytes = false;
+ this->cached_vertex_buffer_bindings[index].metal_buffer = buffer;
+ this->cached_vertex_buffer_bindings[index].offset = buffer_offset;
+ }
}
void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ BLI_assert(index >= 0);
+ BLI_assert(buffer_offset >= 0);
+ BLI_assert(buffer != nil);
+
+ BufferBindingCached &current_frag_ubo_binding = this->cached_fragment_buffer_bindings[index];
+ if (current_frag_ubo_binding.offset != buffer_offset ||
+ current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes) {
+
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+
+ if (current_frag_ubo_binding.metal_buffer == buffer) {
+ /* If buffer is the same, but offset has changed. */
+ [rec setFragmentBufferOffset:buffer_offset atIndex:index];
+ }
+ else {
+ /* Bind Fragment Buffer */
+ [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index];
+ }
+
+ /* Update Bind-state cache */
+ this->cached_fragment_buffer_bindings[index].is_bytes = false;
+ this->cached_fragment_buffer_bindings[index].metal_buffer = buffer;
+ this->cached_fragment_buffer_bindings[index].offset = buffer_offset;
+ }
}
void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ /* Bytes always updated as source data may have changed. */
+ BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ BLI_assert(length > 0);
+ BLI_assert(bytes != nullptr);
+
+ if (length < MTL_MAX_SET_BYTES_SIZE) {
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ [rec setVertexBytes:bytes length:length atIndex:index];
+ }
+ else {
+ /* We have run over the setBytes limit, bind buffer instead. */
+ MTLTemporaryBuffer range =
+ ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+ memcpy(range.data, bytes, length);
+ this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index);
+ }
+
+ /* Update Bind-state cache */
+ this->cached_vertex_buffer_bindings[index].is_bytes = true;
+ this->cached_vertex_buffer_bindings[index].metal_buffer = nil;
+ this->cached_vertex_buffer_bindings[index].offset = -1;
}
void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ /* Bytes always updated as source data may have changed. */
+ BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ BLI_assert(length > 0);
+ BLI_assert(bytes != nullptr);
+
+ if (length < MTL_MAX_SET_BYTES_SIZE) {
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ [rec setFragmentBytes:bytes length:length atIndex:index];
+ }
+ else {
+ /* We have run over the setBytes limit, bind buffer instead. */
+ MTLTemporaryBuffer range =
+ ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+ memcpy(range.data, bytes, length);
+ this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index);
+ }
+
+ /* Update Bind-state cache. */
+ this->cached_fragment_buffer_bindings[index].is_bytes = true;
+ this->cached_fragment_buffer_bindings[index].metal_buffer = nil;
+ this->cached_fragment_buffer_bindings[index].offset = -1;
}
/** \} */
diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh
index b6f9c0050a9..5c322efa3f9 100644
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -3,7 +3,9 @@
#ifndef __MTL_COMMON
#define __MTL_COMMON
-// -- Renderer Options --
+/** -- Renderer Options -- */
+/* Number of frames over which rolling averages are taken. */
+#define MTL_FRAME_AVERAGE_COUNT 5
#define MTL_MAX_DRAWABLES 3
#define MTL_MAX_SET_BYTES_SIZE 4096
#define MTL_FORCE_WAIT_IDLE 0
diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh
index e996193e722..6229afcef79 100644
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -12,6 +12,10 @@
#include "GPU_common_types.h"
#include "GPU_context.h"
+#include "intern/GHOST_Context.h"
+#include "intern/GHOST_ContextCGL.h"
+#include "intern/GHOST_Window.h"
+
#include "mtl_backend.hh"
#include "mtl_capabilities.hh"
#include "mtl_common.hh"
@@ -248,7 +252,7 @@ struct MTLContextTextureUtils {
/* Depth texture updates are not directly supported with Blit operations, similarly, we cannot
* use a compute shader to write to depth, so we must instead render to a depth target.
* These processes use vertex/fragment shaders to render texture data from an intermediate
- * source, in order to prime the depth buffer*/
+ * source, in order to prime the depth buffer. */
blender::Map<DepthTextureUpdateRoutineSpecialisation, GPUShader *> depth_2d_update_shaders;
GPUShader *fullscreen_blit_shader = nullptr;
@@ -348,7 +352,7 @@ struct MTLSamplerArray {
{
uint32_t hash = this->num_samplers;
for (int i = 0; i < this->num_samplers; i++) {
- hash ^= (uint32_t)this->mtl_sampler_flags[i] << (i % 3);
+ hash ^= uint32_t(this->mtl_sampler_flags[i]) << (i % 3);
}
return hash;
}
@@ -570,12 +574,44 @@ class MTLCommandBufferManager {
class MTLContext : public Context {
friend class MTLBackend;
+ friend class MTLRenderPassState;
+
+ public:
+ /* Swap-chain and latency management. */
+ static std::atomic<int> max_drawables_in_flight;
+ static std::atomic<int64_t> avg_drawable_latency_us;
+ static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT];
+
+ public:
+ /* Shaders and Pipeline state. */
+ MTLContextGlobalShaderPipelineState pipeline_state;
+
+ /* Metal API Resource Handles. */
+ id<MTLCommandQueue> queue = nil;
+ id<MTLDevice> device = nil;
+
+#ifndef NDEBUG
+ /* Label for Context debug name assignment. */
+ NSString *label = nil;
+#endif
+
+ /* Memory Management. */
+ MTLScratchBufferManager memory_manager;
+ static MTLBufferPool global_memory_manager;
+
+ /* CommandBuffer managers. */
+ MTLCommandBufferManager main_command_buffer;
private:
- /* Null buffers for empty/uninitialized bindings.
- * Null attribute buffer follows default attribute format of OpenGL Back-end. */
- id<MTLBuffer> null_buffer_; /* All zero's. */
- id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
+ /* Parent Context. */
+ GHOST_ContextCGL *ghost_context_;
+
+ /* Render Passes and Frame-buffers. */
+ id<MTLTexture> default_fbo_mtltexture_ = nil;
+ gpu::MTLTexture *default_fbo_gputexture_ = nullptr;
+
+ /* Depth-stencil state cache. */
+ blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache;
/* Compute and specialization caches. */
MTLContextTextureUtils texture_utils_;
@@ -601,23 +637,20 @@ class MTLContext : public Context {
gpu::MTLBuffer *visibility_buffer_ = nullptr;
bool visibility_is_dirty_ = false;
- public:
- /* Shaders and Pipeline state. */
- MTLContextGlobalShaderPipelineState pipeline_state;
-
- /* Metal API Resource Handles. */
- id<MTLCommandQueue> queue = nil;
- id<MTLDevice> device = nil;
-
- /* Memory Management */
- MTLScratchBufferManager memory_manager;
- static MTLBufferPool global_memory_manager;
+ /* Null buffers for empty/uninitialized bindings.
+ * Null attribute buffer follows default attribute format of OpenGL Backend. */
+ id<MTLBuffer> null_buffer_; /* All zero's. */
+ id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
- /* CommandBuffer managers. */
- MTLCommandBufferManager main_command_buffer;
+ /** Dummy Resources */
+ /* Maximum of 32 texture types. Though most combinations invalid. */
+ gpu::MTLTexture *dummy_textures_[GPU_TEXTURE_BUFFER] = {nullptr};
+ GPUVertFormat dummy_vertformat_;
+ GPUVertBuf *dummy_verts_ = nullptr;
+ public:
/* GPUContext interface. */
- MTLContext(void *ghost_window);
+ MTLContext(void *ghost_window, void *ghost_context);
~MTLContext();
static void check_error(const char *info);
@@ -673,6 +706,35 @@ class MTLContext : public Context {
void pipeline_state_init();
MTLShader *get_active_shader();
+ /* These functions ensure that the current RenderCommandEncoder has
+ * the correct global state assigned. This should be called prior
+ * to every draw call, to ensure that all state is applied and up
+ * to date. We handle:
+ *
+ * - Buffer bindings (Vertex buffers, Uniforms, UBOs, transform feedback)
+ * - Texture bindings
+ * - Sampler bindings (+ argument buffer bindings)
+ * - Dynamic Render pipeline state (on encoder)
+ * - Baking Pipeline State Objects (PSOs) for current shader, based
+ * on final pipeline state.
+ *
+ * `ensure_render_pipeline_state` will return false if the state is
+ * invalid and cannot be applied. This should cancel a draw call. */
+ bool ensure_render_pipeline_state(MTLPrimitiveType prim_type);
+ bool ensure_uniform_buffer_bindings(
+ id<MTLRenderCommandEncoder> rec,
+ const MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance);
+ void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec,
+ MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance);
+ void ensure_depth_stencil_state(MTLPrimitiveType prim_type);
+
+ id<MTLBuffer> get_null_buffer();
+ id<MTLBuffer> get_null_attribute_buffer();
+ gpu::MTLTexture *get_dummy_texture(eGPUTextureType type);
+ void free_dummy_resources();
+
/* State assignment. */
void set_viewport(int origin_x, int origin_y, int width, int height);
void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height);
@@ -720,9 +782,37 @@ class MTLContext : public Context {
{
return MTLContext::global_memory_manager;
}
- /* Uniform Buffer Bindings to command encoders. */
- id<MTLBuffer> get_null_buffer();
- id<MTLBuffer> get_null_attribute_buffer();
+
+ /* Swap-chain and latency management. */
+ static void latency_resolve_average(int64_t frame_latency_us)
+ {
+ int64_t avg = 0;
+ int64_t frame_c = 0;
+ for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) {
+ MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1];
+ avg += MTLContext::frame_latency[i];
+ frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0;
+ }
+ MTLContext::frame_latency[0] = frame_latency_us;
+ avg += MTLContext::frame_latency[0];
+ if (frame_c > 0) {
+ avg /= frame_c;
+ }
+ else {
+ avg = 0;
+ }
+ MTLContext::avg_drawable_latency_us = avg;
+ }
+
+ private:
+ void set_ghost_context(GHOST_ContextHandle ghostCtxHandle);
+ void set_ghost_window(GHOST_WindowHandle ghostWinHandle);
};
+/* GHOST Context callback and present. */
+void present(MTLRenderPassDescriptor *blit_descriptor,
+ id<MTLRenderPipelineState> blit_pso,
+ id<MTLTexture> swapchain_texture,
+ id<CAMetalDrawable> drawable);
+
} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index a66645e5fb5..50576379f0d 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -5,13 +5,29 @@
*/
#include "mtl_context.hh"
#include "mtl_debug.hh"
+#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
+#include "mtl_memory.hh"
+#include "mtl_primitive.hh"
#include "mtl_shader.hh"
#include "mtl_shader_interface.hh"
#include "mtl_state.hh"
+#include "mtl_uniform_buffer.hh"
#include "DNA_userdef_types.h"
#include "GPU_capabilities.h"
+#include "GPU_matrix.h"
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+#include "GPU_uniform_buffer.h"
+#include "GPU_vertex_buffer.h"
+#include "intern/gpu_matrix_private.h"
+
+#include "PIL_time.h"
+
+#include <fstream>
+#include <string>
using namespace blender;
using namespace blender::gpu;
@@ -21,21 +37,118 @@ namespace blender::gpu {
/* Global memory manager. */
MTLBufferPool MTLContext::global_memory_manager;
+/* Swap-chain and latency management. */
+std::atomic<int> MTLContext::max_drawables_in_flight = 0;
+std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0;
+int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0};
+
+/* -------------------------------------------------------------------- */
+/** \name GHOST Context interaction.
+ * \{ */
+
+void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle)
+{
+ GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle);
+ BLI_assert(ghost_ctx != nullptr);
+
+ /* Release old MTLTexture handle */
+ if (default_fbo_mtltexture_) {
+ [default_fbo_mtltexture_ release];
+ default_fbo_mtltexture_ = nil;
+ }
+
+ /* Release Framebuffer attachments */
+ MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left);
+ MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left);
+ mtl_front_left->remove_all_attachments();
+ mtl_back_left->remove_all_attachments();
+
+ GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx);
+ if (ghost_cgl_ctx != NULL) {
+ default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture();
+
+ MTL_LOG_INFO(
+ "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n",
+ ghost_cgl_ctx,
+ this,
+ this->device,
+ this->queue,
+ default_fbo_gputexture_);
+
+ /* Check if the GHOST Context provides a default framebuffer: */
+ if (default_fbo_mtltexture_) {
+
+ /* Release old GPUTexture handle */
+ if (default_fbo_gputexture_) {
+ GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_)));
+ default_fbo_gputexture_ = nullptr;
+ }
+
+ /* Retain handle */
+ [default_fbo_mtltexture_ retain];
+
+ /*** Create front and back-buffers ***/
+ /* Create gpu::MTLTexture objects */
+ default_fbo_gputexture_ = new gpu::MTLTexture(
+ "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_);
+
+ /* Update frame-buffers with new texture attachments. */
+ mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+ mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+#ifndef NDEBUG
+ this->label = default_fbo_mtltexture_.label;
+#endif
+ }
+ else {
+
+ /* Add default texture for cases where no other framebuffer is bound */
+ if (!default_fbo_gputexture_) {
+ default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>(
+ unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr)));
+ }
+ mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+
+ MTL_LOG_INFO(
+ "-- Bound context %p for GPU context: %p is offscreen and does not have a default "
+ "framebuffer\n",
+ ghost_cgl_ctx,
+ this);
+#ifndef NDEBUG
+ this->label = @"Offscreen Metal Context";
+#endif
+ }
+ }
+ else {
+ MTL_LOG_INFO(
+ "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null "
+ "(GhostContext: %p, GhostContext_CGL: %p)\n",
+ ghost_ctx,
+ ghost_cgl_ctx);
+ BLI_assert(false);
+ }
+}
+
+void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle)
+{
+ GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle);
+ this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL));
+}
+
+/** \} */
+
/* -------------------------------------------------------------------- */
/** \name MTLContext
* \{ */
/* Placeholder functions */
-MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this)
+MTLContext::MTLContext(void *ghost_window, void *ghost_context)
+ : memory_manager(*this), main_command_buffer(*this)
{
/* Init debug. */
debug::mtl_debug_init();
- /* Device creation.
- * TODO(Metal): This is a temporary initialization path to enable testing of features
- * and shader compilation tests. Future functionality should fetch the existing device
- * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */
- this->device = MTLCreateSystemDefaultDevice();
+ /* Initialize Render-pass and Frame-buffer State. */
+ this->back_left = nullptr;
/* Initialize command buffer state. */
this->main_command_buffer.prepare();
@@ -47,10 +160,35 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
is_inside_frame_ = false;
current_frame_index_ = 0;
- /* Prepare null data buffer */
+ /* Prepare null data buffer. */
null_buffer_ = nil;
null_attribute_buffer_ = nil;
+ /* Zero-initialize MTL textures. */
+ default_fbo_mtltexture_ = nil;
+ default_fbo_gputexture_ = nullptr;
+
+ /** Fetch GHOSTContext and fetch Metal device/queue. */
+ ghost_window_ = ghost_window;
+ if (ghost_window_ && ghost_context == NULL) {
+ /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided.
+ * Regardless of whether windowed or not, we need access to the GhostContext
+ * for presentation, and device/queue access. */
+ GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_);
+ ghost_context = (ghostWin ? ghostWin->getContext() : NULL);
+ }
+ BLI_assert(ghost_context);
+ this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context);
+ this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue();
+ this->device = (id<MTLDevice>)this->ghost_context_->metalDevice();
+ BLI_assert(this->queue);
+ BLI_assert(this->device);
+ [this->queue retain];
+ [this->device retain];
+
+ /* Register present callback. */
+ this->ghost_context_->metalRegisterPresentCallback(&present);
+
/* Create FrameBuffer handles. */
MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
@@ -66,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
/* Initialize Metal modules. */
this->memory_manager.init();
this->state_manager = new MTLStateManager(this);
+ this->imm = new MTLImmediate(this);
/* Ensure global memory manager is initialized. */
MTLContext::global_memory_manager.init(this->device);
@@ -99,9 +238,29 @@ MTLContext::~MTLContext()
this->end_frame();
}
}
+
+ /* Release Memory Manager */
+ this->get_scratchbuffer_manager().free();
+
/* Release update/blit shaders. */
this->get_texture_utils().cleanup();
+ /* Detach resource references */
+ GPU_texture_unbind_all();
+
+ /* Unbind UBOs */
+ for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+ if (this->pipeline_state.ubo_bindings[i].bound &&
+ this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+ GPUUniformBuf *ubo = wrap(
+ static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo));
+ GPU_uniformbuf_unbind(ubo);
+ }
+ }
+
+ /* Release Dummy resources */
+ this->free_dummy_resources();
+
/* Release Sampler States. */
for (int i = 0; i < GPU_SAMPLER_MAX; i++) {
if (sampler_state_cache_[i] != nil) {
@@ -109,12 +268,28 @@ MTLContext::~MTLContext()
sampler_state_cache_[i] = nil;
}
}
+
+ /* Empty cached sampler argument buffers. */
+ for (auto entry : cached_sampler_buffers_.values()) {
+ entry->free();
+ }
+ cached_sampler_buffers_.clear();
+
+ /* Free null buffers. */
if (null_buffer_) {
[null_buffer_ release];
}
if (null_attribute_buffer_) {
[null_attribute_buffer_ release];
}
+
+ /* Free Metal objects. */
+ if (this->queue) {
+ [this->queue release];
+ }
+ if (this->device) {
+ [this->device release];
+ }
}
void MTLContext::begin_frame()
@@ -146,20 +321,49 @@ void MTLContext::check_error(const char *info)
void MTLContext::activate()
{
- /* TODO(Metal): Implement. */
+ /* Make sure no other context is already bound to this thread. */
+ BLI_assert(is_active_ == false);
+ is_active_ = true;
+ thread_ = pthread_self();
+
+ /* Re-apply ghost window/context for resizing */
+ if (ghost_window_) {
+ this->set_ghost_window((GHOST_WindowHandle)ghost_window_);
+ }
+ else if (ghost_context_) {
+ this->set_ghost_context((GHOST_ContextHandle)ghost_context_);
+ }
+
+ /* Reset UBO bind state. */
+ for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+ if (this->pipeline_state.ubo_bindings[i].bound &&
+ this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+ this->pipeline_state.ubo_bindings[i].bound = false;
+ this->pipeline_state.ubo_bindings[i].ubo = nullptr;
+ }
+ }
+
+ /* Ensure imm active. */
+ immActivate();
}
+
void MTLContext::deactivate()
{
- /* TODO(Metal): Implement. */
+ BLI_assert(this->is_active_on_thread());
+ /* Flush context on deactivate. */
+ this->flush();
+ is_active_ = false;
+ immDeactivate();
}
void MTLContext::flush()
{
- /* TODO(Metal): Implement. */
+ this->main_command_buffer.submit(false);
}
+
void MTLContext::finish()
{
- /* TODO(Metal): Implement. */
+ this->main_command_buffer.submit(true);
}
void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
@@ -200,9 +404,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
/* Ensure command buffer workload submissions are optimal --
* Though do not split a batch mid-IMM recording. */
- /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */
- if (this->main_command_buffer.do_break_submission()/*&&
- !((MTLImmediate *)(this->imm))->imm_is_recording()*/) {
+ if (this->main_command_buffer.do_break_submission() &&
+ !((MTLImmediate *)(this->imm))->imm_is_recording()) {
this->flush();
}
@@ -293,6 +496,72 @@ id<MTLBuffer> MTLContext::get_null_attribute_buffer()
return null_attribute_buffer_;
}
+gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type)
+{
+ /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */
+ gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1];
+ if (dummy_tex != nullptr) {
+ return dummy_tex;
+ }
+ else {
+ GPUTexture *tex = nullptr;
+ switch (type) {
+ case GPU_TEXTURE_1D:
+ tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_1D_ARRAY:
+ tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_2D:
+ tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_2D_ARRAY:
+ tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_3D:
+ tex = GPU_texture_create_3d(
+ "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr);
+ break;
+ case GPU_TEXTURE_CUBE:
+ tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_CUBE_ARRAY:
+ tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_BUFFER:
+ if (!dummy_verts_) {
+ GPU_vertformat_clear(&dummy_vertformat_);
+ GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+ dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC);
+ GPU_vertbuf_data_alloc(dummy_verts_, 64);
+ }
+ tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_);
+ break;
+ default:
+ BLI_assert_msg(false, "Unrecognised texture type");
+ return nullptr;
+ }
+ gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex));
+ dummy_textures_[type - 1] = metal_tex;
+ return metal_tex;
+ }
+ return nullptr;
+}
+
+void MTLContext::free_dummy_resources()
+{
+ for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) {
+ if (dummy_textures_[tex]) {
+ GPU_texture_free(
+ reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex])));
+ dummy_textures_[tex] = nullptr;
+ }
+ }
+ if (dummy_verts_) {
+ GPU_vertbuf_discard(dummy_verts_);
+ }
+}
+
/** \} */
/* -------------------------------------------------------------------- */
@@ -439,6 +708,757 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled)
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Command Encoder and pipeline state
+ * These utilities ensure that all of the globally bound resources and state have been
+ * correctly encoded within the current RenderCommandEncoder. This involves managing
+ * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state.
+ *
+ * We will also trigger compilation of new PSOs where the input state has changed
+ * and is required.
+ * All of this setup is required in order to perform a valid draw call.
+ * \{ */
+
+bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type)
+{
+ BLI_assert(this->pipeline_state.initialised);
+
+ /* Check if an active shader is bound. */
+ if (!this->pipeline_state.active_shader) {
+ MTL_LOG_WARNING("No Metal shader for bound GL shader\n");
+ return false;
+ }
+
+ /* Also ensure active shader is valid. */
+ if (!this->pipeline_state.active_shader->is_valid()) {
+ MTL_LOG_WARNING(
+ "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", );
+ return false;
+ }
+
+ /* Apply global state. */
+ this->state_manager->apply_state();
+
+ /* Main command buffer tracks the current state of the render pass, based on bound
+ * MTLFrameBuffer. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ /* Debug Check: Ensure Framebuffer instance is not dirty. */
+ BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+ /* Fetch shader interface. */
+ MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
+ if (shader_interface == nullptr) {
+ MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", );
+ return false;
+ }
+
+ /* Fetch shader and bake valid PipelineStateObject (PSO) based on current
+ * shader and state combination. This PSO represents the final GPU-executable
+ * permutation of the shader. */
+ MTLRenderPipelineStateInstance *pipeline_state_instance =
+ this->pipeline_state.active_shader->bake_current_pipeline_state(
+ this, mtl_prim_type_to_topology_class(mtl_prim_type));
+ if (!pipeline_state_instance) {
+ MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n",
+ shader_interface->get_name());
+ return false;
+ }
+
+ bool result = false;
+ if (pipeline_state_instance->pso) {
+
+ /* Fetch render command encoder. A render pass should already be active.
+ * This will be NULL if invalid. */
+ id<MTLRenderCommandEncoder> rec =
+ this->main_command_buffer.get_active_render_command_encoder();
+ BLI_assert(rec);
+ if (rec == nil) {
+ MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n");
+ return false;
+ }
+
+ /* Bind Render Pipeline State. */
+ BLI_assert(pipeline_state_instance->pso);
+ if (rps.bound_pso != pipeline_state_instance->pso) {
+ [rec setRenderPipelineState:pipeline_state_instance->pso];
+ rps.bound_pso = pipeline_state_instance->pso;
+ }
+
+ /** Ensure resource bindings. */
+ /* Texture Bindings. */
+ /* We will iterate through all texture bindings on the context and determine if any of the
+ * active slots match those in our shader interface. If so, textures will be bound. */
+ if (shader_interface->get_total_textures() > 0) {
+ this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance);
+ }
+
+ /* Transform feedback buffer binding. */
+ /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which
+ * transform feedback data will be written. */
+ // GPUVertBuf *tf_vbo =
+ // this->pipeline_state.active_shader->get_transform_feedback_active_buffer();
+ // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) {
+
+ // /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */
+ // BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine ||
+ // mtl_prim_type == MTLPrimitiveTypeTriangle ||
+ // mtl_prim_type == MTLPrimitiveTypePoint);
+
+ // /* Fetch active transform feedback buffer from vertbuf */
+ // MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo));
+ // int tf_buffer_offset = 0;
+ // id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset);
+
+ // if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) {
+ // [rec setVertexBuffer:tf_buffer_mtl
+ // offset:tf_buffer_offset
+ // atIndex:pipeline_state_instance->transform_feedback_buffer_index];
+ // printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n",
+ // tf_vbo_mtl,
+ // tf_buffer_mtl);
+ // }
+ // }
+
+ /* Matrix Bindings. */
+ /* This is now called upon shader bind. We may need to re-evaluate this though,
+ * as was done here to ensure uniform changes between draws were tracked.
+ * NOTE(Metal): We may be able to remove this. */
+ GPU_matrix_bind(reinterpret_cast<struct GPUShader *>(
+ static_cast<Shader *>(this->pipeline_state.active_shader)));
+
+ /* Bind Uniforms */
+ this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance);
+
+ /* Bind Null attribute buffer, if needed. */
+ if (pipeline_state_instance->null_attribute_buffer_index >= 0) {
+ if (G.debug & G_DEBUG_GPU) {
+ MTL_LOG_INFO("Binding null attribute buffer at index: %d\n",
+ pipeline_state_instance->null_attribute_buffer_index);
+ }
+ rps.bind_vertex_buffer(this->get_null_attribute_buffer(),
+ 0,
+ pipeline_state_instance->null_attribute_buffer_index);
+ }
+
+ /** Dynamic Per-draw Render State on RenderCommandEncoder. */
+ /* State: Viewport. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) {
+ MTLViewport viewport;
+ viewport.originX = (double)this->pipeline_state.viewport_offset_x;
+ viewport.originY = (double)this->pipeline_state.viewport_offset_y;
+ viewport.width = (double)this->pipeline_state.viewport_width;
+ viewport.height = (double)this->pipeline_state.viewport_height;
+ viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near;
+ viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far;
+ [rec setViewport:viewport];
+
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_VIEWPORT_FLAG);
+ }
+
+ /* State: Scissor. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) {
+
+ /* Get FrameBuffer associated with active RenderCommandEncoder. */
+ MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer();
+
+ MTLScissorRect scissor;
+ if (this->pipeline_state.scissor_enabled) {
+ scissor.x = this->pipeline_state.scissor_x;
+ scissor.y = this->pipeline_state.scissor_y;
+ scissor.width = this->pipeline_state.scissor_width;
+ scissor.height = this->pipeline_state.scissor_height;
+
+ /* Some scissor assignments exceed the bounds of the viewport due to implicitly added
+ * padding to the width/height - Clamp width/height. */
+ BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width());
+ BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height());
+ scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x);
+ scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y);
+ BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width()));
+ BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height()));
+ }
+ else {
+ /* Scissor is disabled, reset to default size as scissor state may have been previously
+ * assigned on this encoder. */
+ scissor.x = 0;
+ scissor.y = 0;
+ scissor.width = render_fb->get_width();
+ scissor.height = render_fb->get_height();
+ }
+
+ /* Scissor state can still be flagged as changed if it is toggled on and off, without
+ * parameters changing between draws. */
+ if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) {
+ [rec setScissorRect:scissor];
+ rps.last_scissor_rect = scissor;
+ }
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_SCISSOR_FLAG);
+ }
+
+ /* State: Face winding. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) {
+ /* We need to invert the face winding in Metal, to account for the inverted-Y coordinate
+ * system. */
+ MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ?
+ MTLWindingClockwise :
+ MTLWindingCounterClockwise;
+ [rec setFrontFacingWinding:winding];
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG);
+ }
+
+ /* State: cull-mode. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) {
+
+ MTLCullMode mode = MTLCullModeNone;
+ if (this->pipeline_state.culling_enabled) {
+ switch (this->pipeline_state.cull_mode) {
+ case GPU_CULL_NONE:
+ mode = MTLCullModeNone;
+ break;
+ case GPU_CULL_FRONT:
+ mode = MTLCullModeFront;
+ break;
+ case GPU_CULL_BACK:
+ mode = MTLCullModeBack;
+ break;
+ default:
+ BLI_assert_unreachable();
+ break;
+ }
+ }
+ [rec setCullMode:mode];
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_CULLMODE_FLAG);
+ }
+
+ /* Pipeline state is now good. */
+ result = true;
+ }
+ return result;
+}
+
+/* Bind uniform buffers to an active render command encoder using the rendering state of the
+ * current context -> Active shader, Bound UBOs). */
+bool MTLContext::ensure_uniform_buffer_bindings(
+ id<MTLRenderCommandEncoder> rec,
+ const MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+ /* Fetch Render Pass state. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ /* Shader owned push constant block for uniforms.. */
+ bool active_shader_changed = (rps.last_bound_shader_state.shader_ !=
+ this->pipeline_state.active_shader ||
+ rps.last_bound_shader_state.shader_ == nullptr ||
+ rps.last_bound_shader_state.pso_index_ !=
+ pipeline_state_instance->shader_pso_index);
+
+ const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
+ if (push_constant_block.size > 0) {
+
+ /* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
+ * will be offset by the number of bound VBOs. */
+ uint32_t block_size = push_constant_block.size;
+ uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index +
+ push_constant_block.buffer_index;
+
+ /* Only need to rebind block if push constants have been modified -- or if no data is bound for
+ * the current RenderCommandEncoder. */
+ if (this->pipeline_state.active_shader->get_push_constant_is_dirty() ||
+ active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes ||
+ !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) {
+
+ /* Bind push constant data. */
+ BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr);
+ rps.bind_vertex_bytes(
+ this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+ rps.bind_fragment_bytes(
+ this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+
+ /* Only need to rebind block if it has been modified. */
+ this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false);
+ }
+ }
+ rps.last_bound_shader_state.set(this->pipeline_state.active_shader,
+ pipeline_state_instance->shader_pso_index);
+
+ /* Bind Global GPUUniformBuffers */
+ /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones
+ * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
+ * and not owned by the shader instance. */
+ for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
+ const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
+
+ if (ubo.buffer_index >= 0) {
+
+ /* Uniform Buffer index offset by 1 as the first shader buffer binding slot is reserved for
+ * the uniform PushConstantBlock. */
+ const uint32_t buffer_index = ubo.buffer_index + 1;
+ int ubo_offset = 0;
+ id<MTLBuffer> ubo_buffer = nil;
+ int ubo_size = 0;
+
+ bool bind_dummy_buffer = false;
+ if (this->pipeline_state.ubo_bindings[ubo_index].bound) {
+
+ /* Fetch UBO global-binding properties from slot. */
+ ubo_offset = 0;
+ ubo_buffer = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_metal_buffer(
+ &ubo_offset);
+ ubo_size = this->pipeline_state.ubo_bindings[ubo_index].ubo->get_size();
+
+ /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
+ * allocating zero buffers. */
+ if (ubo_buffer == nil) {
+ bind_dummy_buffer = true;
+ }
+ else {
+ BLI_assert(ubo_buffer != nil);
+ BLI_assert(ubo_size > 0);
+
+ if (pipeline_state_instance->reflection_data_available) {
+ /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each
+ * case will be the same for the same UBO.
+ * We also determine expected size and then ensure buffer of the correct size
+ * exists in one of the vertex/fragment shader binding tables. This path is used
+ * to verify that the size of the bound UBO matches what is expected in the shader. */
+ uint32_t expected_size =
+ (buffer_index <
+ pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ?
+ pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index]
+ .size :
+ 0;
+ if (expected_size == 0) {
+ expected_size =
+ (buffer_index <
+ pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ?
+ pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index]
+ .size :
+ 0;
+ }
+ BLI_assert_msg(
+ expected_size > 0,
+ "Shader interface expects UBO, but shader reflection data reports that it "
+ "is not present");
+
+ /* If ubo size is smaller than the size expected by the shader, we need to bind the
+ * dummy buffer, which will be big enough, to avoid an OOB error. */
+ if (ubo_size < expected_size) {
+ MTL_LOG_INFO(
+ "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
+ "%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
+ "over-binding, which is not used, but we need this to avoid validation "
+ "issues\n",
+ shader_interface->get_name_at_offset(ubo.name_offset),
+ buffer_index,
+ ubo_size,
+ expected_size,
+ shader_interface->get_name());
+ bind_dummy_buffer = true;
+ }
+ }
+ }
+ }
+ else {
+ MTL_LOG_INFO(
+ "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
+ "nothing was bound -- binding dummy buffer\n",
+ shader_interface->get_name(),
+ shader_interface->get_name_at_offset(ubo.name_offset),
+ buffer_index);
+ bind_dummy_buffer = true;
+ }
+
+ if (bind_dummy_buffer) {
+ /* Perform Dummy binding. */
+ ubo_offset = 0;
+ ubo_buffer = this->get_null_buffer();
+ ubo_size = [ubo_buffer length];
+ }
+
+ if (ubo_buffer != nil) {
+
+ uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index +
+ buffer_index;
+
+ /* Bind Vertex UBO. */
+ if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
+ BLI_assert(buffer_bind_index >= 0 &&
+ buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+ }
+
+ /* Bind Fragment UBOs. */
+ if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
+ BLI_assert(buffer_bind_index >= 0 &&
+ buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+ }
+ }
+ else {
+ MTL_LOG_WARNING(
+ "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
+ "is NULL!\n",
+ shader_interface->get_name(),
+ shader_interface->get_name_at_offset(ubo.name_offset),
+ buffer_index);
+ }
+ }
+ }
+ return true;
+}
+
+/* Ensure texture bindings are correct and up to date for current draw call. */
+void MTLContext::ensure_texture_bindings(
+ id<MTLRenderCommandEncoder> rec,
+ MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+ BLI_assert(shader_interface != nil);
+ BLI_assert(rec != nil);
+
+ /* Fetch Render Pass state. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ @autoreleasepool {
+ int vertex_arg_buffer_bind_index = -1;
+ int fragment_arg_buffer_bind_index = -1;
+
+ /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */
+ bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers(
+ &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index);
+
+ /* Loop through expected textures in shader interface and resolve bindings with currently
+ * bound textures.. */
+ for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+ /* Ensure the bound texture is compatible with the shader interface. If the
+ * shader does not expect a texture to be bound for the current slot, we skip
+ * binding.
+ * NOTE: Global texture bindings may be left over from prior draw calls. */
+ const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t);
+ if (!shader_texture_info.used) {
+ /* Skip unused binding points if explicit indices are specified. */
+ continue;
+ }
+
+ int slot = shader_texture_info.slot_index;
+ if (slot >= 0 && slot < GPU_max_textures()) {
+ bool bind_dummy_texture = true;
+ if (this->pipeline_state.texture_bindings[slot].used) {
+ gpu::MTLTexture *bound_texture =
+ this->pipeline_state.texture_bindings[slot].texture_resource;
+ MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
+ BLI_assert(bound_texture);
+ BLI_assert(bound_sampler.used);
+
+ if (shader_texture_info.type == bound_texture->type_) {
+ /* Bind texture and sampler if the bound texture matches the type expected by the
+ * shader. */
+ id<MTLTexture> tex = bound_texture->get_metal_handle();
+
+ if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+ rps.bind_vertex_texture(tex, slot);
+ rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+ }
+
+ if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+ rps.bind_fragment_texture(tex, slot);
+ rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+ }
+
+ /* Texture state resolved, no need to bind dummy texture */
+ bind_dummy_texture = false;
+ }
+ else {
+ /* Texture type for bound texture (e.g. Texture2DArray) does not match what was
+ * expected in the shader interface. This is a problem and we will need to bind
+ * a dummy texture to ensure correct API usage. */
+ MTL_LOG_WARNING(
+ "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
+ "texture target type. (Expecting type %d, actual type %d) (binding "
+ "name:'%s')(texture name:'%s')\n",
+ shader_interface->get_name(),
+ bound_texture,
+ slot,
+ shader_texture_info.type,
+ bound_texture->type_,
+ shader_interface->get_name_at_offset(shader_texture_info.name_offset),
+ bound_texture->get_name());
+ }
+ }
+ else {
+ MTL_LOG_WARNING(
+ "Shader '%s' expected texture to be bound to slot %d -- No texture was "
+ "bound. (name:'%s')\n",
+ shader_interface->get_name(),
+ slot,
+ shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+ }
+
+ /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats
+ * are provided -- as certain configurations may not need any binding. These issues should
+ * be fixed in the high-level, if problems crop up. */
+ if (bind_dummy_texture) {
+ if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+ rps.bind_vertex_texture(
+ get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+ /* Bind default sampler state. */
+ MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+ rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+ }
+ if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+ rps.bind_fragment_texture(
+ get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+ /* Bind default sampler state. */
+ MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+ rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+ }
+ }
+ }
+ else {
+ MTL_LOG_WARNING(
+ "Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
+ "hardware/API limit of '%d'. (name:'%s')\n",
+ this->pipeline_state.active_shader,
+ slot,
+ GPU_max_textures(),
+ shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+ }
+ }
+
+ /* Construct and Bind argument buffer.
+ * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */
+ if (use_argument_buffer_for_samplers) {
+#ifndef NDEBUG
+ /* Debug check to validate each expected texture in the shader interface has a valid
+ * sampler object bound to the context. We will need all of these to be valid
+ * when constructing the sampler argument buffer. */
+ for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+ const MTLShaderTexture &texture = shader_interface->get_texture(i);
+ if (texture.used) {
+ BLI_assert(this->samplers_.mtl_sampler[i] != nil);
+ }
+ }
+#endif
+
+ /* Check to ensure the buffer binding index for the argument buffer has been assigned.
+ * This PSO property will be set if we expect to use argument buffers, and the shader
+ * uses any amount of textures. */
+ BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0);
+ if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) {
+ /* Offset binding index to be relative to the start of static uniform buffer binding slots.
+ * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
+ * used by vertex and index buffer bindings, and the number of buffers present will vary
+ * between PSOs. */
+ int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index +
+ vertex_arg_buffer_bind_index);
+ assert(arg_buffer_idx < 32);
+ id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
+ arg_buffer_idx);
+ if (argument_encoder == nil) {
+ argument_encoder = [pipeline_state_instance->vert
+ newArgumentEncoderWithBufferIndex:arg_buffer_idx];
+ shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
+ }
+
+ /* Generate or Fetch argument buffer sampler configuration.
+ * NOTE(Metal): we need to base sampler counts off of the maximal texture
+ * index. This is not the most optimal, but in practice, not a use-case
+ * when argument buffers are required.
+ * This is because with explicit texture indices, the binding indices
+ * should match across draws, to allow the high-level to optimize bind-points. */
+ gpu::MTLBuffer *encoder_buffer = nullptr;
+ this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1;
+
+ gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr(
+ this->samplers_);
+ if (cached_smp_buffer_search != nullptr) {
+ encoder_buffer = *cached_smp_buffer_search;
+ }
+ else {
+ /* Populate argument buffer with current global sampler bindings. */
+ int size = [argument_encoder encodedLength];
+ int alignment = max_uu([argument_encoder alignment], 256);
+ int size_align_delta = (size % alignment);
+ int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ?
+ size + (alignment - (size % alignment)) :
+ size;
+
+ /* Allocate buffer to store encoded sampler arguments. */
+ encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size,
+ true);
+ BLI_assert(encoder_buffer);
+ BLI_assert(encoder_buffer->get_metal_buffer());
+ [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0];
+ [argument_encoder
+ setSamplerStates:this->samplers_.mtl_sampler
+ withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)];
+ encoder_buffer->flush();
+
+ /* Insert into cache. */
+ this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer);
+ }
+
+ BLI_assert(encoder_buffer != nullptr);
+ int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+ vertex_arg_buffer_bind_index);
+ rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index);
+
+ /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to
+ * re-encode. We can use the same argument buffer. */
+ if (fragment_arg_buffer_bind_index >= 0) {
+ BLI_assert(fragment_arg_buffer_bind_index);
+ int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+ fragment_arg_buffer_bind_index);
+ rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index);
+ }
+ }
+ }
+ }
+}
+
+/* Encode latest depth-stencil state. */
+void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type)
+{
+ /* Check if we need to update state. */
+ if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) {
+ return;
+ }
+
+ /* Fetch render command encoder. */
+ id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder();
+ BLI_assert(rec);
+
+ /* Fetch Render Pass state. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ /** Prepare Depth-stencil state based on current global pipeline state. */
+ MTLFrameBuffer *fb = this->get_current_framebuffer();
+ bool hasDepthTarget = fb->has_depth_attachment();
+ bool hasStencilTarget = fb->has_stencil_attachment();
+
+ if (hasDepthTarget || hasStencilTarget) {
+ /* Update FrameBuffer State. */
+ this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget;
+ this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget;
+
+ /* Check if current MTLContextDepthStencilState maps to an existing state object in
+ * the Depth-stencil state cache. */
+ id<MTLDepthStencilState> ds_state = nil;
+ id<MTLDepthStencilState> *depth_stencil_state_lookup =
+ this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state);
+
+ /* If not, populate DepthStencil state descriptor. */
+ if (depth_stencil_state_lookup == nullptr) {
+
+ MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init]
+ autorelease];
+
+ if (hasDepthTarget) {
+ ds_state_desc.depthWriteEnabled =
+ this->pipeline_state.depth_stencil_state.depth_write_enable;
+ ds_state_desc.depthCompareFunction =
+ this->pipeline_state.depth_stencil_state.depth_test_enabled ?
+ this->pipeline_state.depth_stencil_state.depth_function :
+ MTLCompareFunctionAlways;
+ }
+
+ if (hasStencilTarget) {
+ ds_state_desc.backFaceStencil.readMask =
+ this->pipeline_state.depth_stencil_state.stencil_read_mask;
+ ds_state_desc.backFaceStencil.writeMask =
+ this->pipeline_state.depth_stencil_state.stencil_write_mask;
+ ds_state_desc.backFaceStencil.stencilFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail;
+ ds_state_desc.backFaceStencil.depthFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail;
+ ds_state_desc.backFaceStencil.depthStencilPassOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass;
+ ds_state_desc.backFaceStencil.stencilCompareFunction =
+ (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+ this->pipeline_state.depth_stencil_state.stencil_func :
+ MTLCompareFunctionAlways;
+
+ ds_state_desc.frontFaceStencil.readMask =
+ this->pipeline_state.depth_stencil_state.stencil_read_mask;
+ ds_state_desc.frontFaceStencil.writeMask =
+ this->pipeline_state.depth_stencil_state.stencil_write_mask;
+ ds_state_desc.frontFaceStencil.stencilFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail;
+ ds_state_desc.frontFaceStencil.depthFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail;
+ ds_state_desc.frontFaceStencil.depthStencilPassOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass;
+ ds_state_desc.frontFaceStencil.stencilCompareFunction =
+ (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+ this->pipeline_state.depth_stencil_state.stencil_func :
+ MTLCompareFunctionAlways;
+ }
+
+ /* Bake new DS state. */
+ ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc];
+
+ /* Store state in cache. */
+ BLI_assert(ds_state != nil);
+ this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state);
+ }
+ else {
+ ds_state = *depth_stencil_state_lookup;
+ BLI_assert(ds_state != nil);
+ }
+
+ /* Bind Depth Stencil State to render command encoder. */
+ BLI_assert(ds_state != nil);
+ if (ds_state != nil) {
+ if (rps.bound_ds_state != ds_state) {
+ [rec setDepthStencilState:ds_state];
+ rps.bound_ds_state = ds_state;
+ }
+ }
+
+ /* Apply dynamic depth-stencil state on encoder. */
+ if (hasStencilTarget) {
+ uint32_t stencil_ref_value =
+ (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+ this->pipeline_state.depth_stencil_state.stencil_ref :
+ 0;
+ if (stencil_ref_value != rps.last_used_stencil_ref_value) {
+ [rec setStencilReferenceValue:stencil_ref_value];
+ rps.last_used_stencil_ref_value = stencil_ref_value;
+ }
+ }
+
+ if (hasDepthTarget) {
+ bool doBias = false;
+ switch (prim_type) {
+ case MTLPrimitiveTypeTriangle:
+ case MTLPrimitiveTypeTriangleStrip:
+ doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris;
+ break;
+ case MTLPrimitiveTypeLine:
+ case MTLPrimitiveTypeLineStrip:
+ doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines;
+ break;
+ case MTLPrimitiveTypePoint:
+ doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points;
+ break;
+ }
+ [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0
+ slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0
+ clamp:0];
+ }
+ }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Visibility buffer control for MTLQueryPool.
* \{ */
@@ -605,4 +1625,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state()
/** \} */
+/* -------------------------------------------------------------------- */
+/** \name Swap-chain management and Metal presentation.
+ * \{ */
+
+void present(MTLRenderPassDescriptor *blit_descriptor,
+ id<MTLRenderPipelineState> blit_pso,
+ id<MTLTexture> swapchain_texture,
+ id<CAMetalDrawable> drawable)
+{
+
+ MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+ BLI_assert(ctx);
+
+ /* Flush any outstanding work. */
+ ctx->flush();
+
+ /* Always pace CPU to maximum of 3 drawables in flight.
+ * nextDrawable may have more in flight if backing swapchain
+ * textures are re-allocate, such as during resize events.
+ *
+ * Determine frames in flight based on current latency. If
+ * we are in a high-latency situation, limit frames in flight
+ * to increase app responsiveness and keep GPU execution under control.
+ * If latency improves, increase frames in flight to improve overall
+ * performance. */
+ int perf_max_drawables = MTL_MAX_DRAWABLES;
+ if (MTLContext::avg_drawable_latency_us > 185000) {
+ perf_max_drawables = 1;
+ }
+ else if (MTLContext::avg_drawable_latency_us > 85000) {
+ perf_max_drawables = 2;
+ }
+
+ while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) {
+ PIL_sleep_ms(2);
+ }
+
+ /* Present is submitted in its own CMD Buffer to ensure drawable reference released as early as
+ * possible. This command buffer is separate as it does not utilize the global state
+ * for rendering as the main context does. */
+ id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer];
+ MTLCommandBufferManager::num_active_cmd_bufs++;
+
+ if (MTLCommandBufferManager::sync_event != nil) {
+ /* Ensure command buffer ordering. */
+ [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event
+ value:MTLCommandBufferManager::event_signal_val];
+ }
+
+ /* Do Present Call and final Blit to MTLDrawable. */
+ id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor];
+ [enc setRenderPipelineState:blit_pso];
+ [enc setFragmentTexture:swapchain_texture atIndex:0];
+ [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
+ [enc endEncoding];
+
+ /* Present drawable. */
+ BLI_assert(drawable);
+ [cmdbuf presentDrawable:drawable];
+
+ /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */
+ MTLSafeFreeList *cmd_free_buffer_list =
+ MTLContext::get_global_memory_manager().get_current_safe_list();
+ BLI_assert(cmd_free_buffer_list);
+
+ id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf;
+ [cmd_buffer_ref retain];
+
+ /* Increment drawables in flight limiter. */
+ MTLContext::max_drawables_in_flight++;
+ std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now();
+
+ /* Increment free pool reference and decrement upon command buffer completion. */
+ cmd_free_buffer_list->increment_reference();
+ [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) {
+ /* Flag freed buffers associated with this CMD buffer as ready to be freed. */
+ cmd_free_buffer_list->decrement_reference();
+ [cmd_buffer_ref release];
+
+ /* Decrement count */
+ MTLCommandBufferManager::num_active_cmd_bufs--;
+ MTL_LOG_INFO("[Metal] Active command buffers: %d\n",
+ MTLCommandBufferManager::num_active_cmd_bufs);
+
+ /* Drawable count and latency management. */
+ MTLContext::max_drawables_in_flight--;
+ std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now();
+ int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>(
+ completion_time - submission_time)
+ .count();
+ MTLContext::latency_resolve_average(microseconds_per_frame);
+
+ MTL_LOG_INFO("Frame Latency: %f ms (Rolling avg: %f ms Drawables: %d)\n",
+ ((float)microseconds_per_frame) / 1000.0f,
+ ((float)MTLContext::avg_drawable_latency_us) / 1000.0f,
+ perf_max_drawables);
+ }];
+
+ if (MTLCommandBufferManager::sync_event == nil) {
+ MTLCommandBufferManager::sync_event = [ctx->device newEvent];
+ BLI_assert(MTLCommandBufferManager::sync_event);
+ [MTLCommandBufferManager::sync_event retain];
+ }
+ BLI_assert(MTLCommandBufferManager::sync_event != nil);
+
+ MTLCommandBufferManager::event_signal_val++;
+ [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event
+ value:MTLCommandBufferManager::event_signal_val];
+
+ [cmdbuf commit];
+
+ /* When debugging, fetch advanced command buffer errors. */
+ if (G.debug & G_DEBUG_GPU) {
+ [cmdbuf waitUntilCompleted];
+ NSError *error = [cmdbuf error];
+ if (error != nil) {
+ NSLog(@"%@", error);
+ BLI_assert(false);
+
+ @autoreleasepool {
+ const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String];
+
+ std::ofstream outfile;
+ outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+ outfile << stringAsChar;
+ outfile.close();
+ }
+ }
+ else {
+ @autoreleasepool {
+ NSString *str = @"Command buffer completed successfully!\n";
+ const char *stringAsChar = [str UTF8String];
+
+ std::ofstream outfile;
+ outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+ outfile << stringAsChar;
+ outfile.close();
+ }
+ }
+ }
+}
+
+/** \} */
+
} // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_drawlist.hh b/source/blender/gpu/metal/mtl_drawlist.hh
new file mode 100644
index 00000000000..47055f3d7f4
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.hh
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#pragma once
+
+#include "BLI_sys_types.h"
+#include "GPU_batch.h"
+#include "MEM_guardedalloc.h"
+#include "gpu_drawlist_private.hh"
+
+#include "mtl_batch.hh"
+#include "mtl_context.hh"
+
+namespace blender::gpu {
+
+/**
+ * Implementation of Multi Draw Indirect using OpenGL.
+ **/
+class MTLDrawList : public DrawList {
+
+ private:
+ /** Batch for which we are recording commands for. */
+ MTLBatch *batch_;
+ /** Mapped memory bounds. */
+ void *data_;
+ /** Length of the mapped buffer (in byte). */
+ size_t data_size_;
+ /** Current offset inside the mapped buffer (in byte). */
+ size_t command_offset_;
+ /** Current number of command recorded inside the mapped buffer. */
+ uint32_t command_len_;
+ /** Is UINT_MAX if not drawing indexed geom. Also Avoid dereferencing batch. */
+ uint32_t base_index_;
+ /** Also Avoid dereferencing batch. */
+ uint32_t v_first_, v_count_;
+ /** Length of whole the buffer (in byte). */
+ uint32_t buffer_size_;
+
+ public:
+ MTLDrawList(int length);
+ ~MTLDrawList();
+
+ void append(GPUBatch *batch, int i_first, int i_count) override;
+ void submit() override;
+
+ private:
+ void init();
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList");
+};
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_drawlist.mm b/source/blender/gpu/metal/mtl_drawlist.mm
new file mode 100644
index 00000000000..99194d2b72c
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.mm
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#include "BLI_assert.h"
+
+#include "GPU_batch.h"
+#include "mtl_common.hh"
+#include "mtl_drawlist.hh"
+#include "mtl_primitive.hh"
+
+using namespace blender::gpu;
+
+namespace blender::gpu {
+
+/* Indirect draw call structure for reference. */
+/* MTLDrawPrimitivesIndirectArguments --
+ * https://developer.apple.com/documentation/metal/mtldrawprimitivesindirectarguments?language=objc
+ */
+/* struct MTLDrawPrimitivesIndirectArguments {
+ * uint32_t vertexCount;
+ * uint32_t instanceCount;
+ * uint32_t vertexStart;
+ * uint32_t baseInstance;
+};*/
+
+/* MTLDrawIndexedPrimitivesIndirectArguments --
+ * https://developer.apple.com/documentation/metal/mtldrawindexedprimitivesindirectarguments?language=objc
+ */
+/* struct MTLDrawIndexedPrimitivesIndirectArguments {
+ * uint32_t indexCount;
+ * uint32_t instanceCount;
+ * uint32_t indexStart;
+ * uint32_t baseVertex;
+ * uint32_t baseInstance;
+};*/
+
+#define MDI_ENABLED (buffer_size_ != 0)
+#define MDI_DISABLED (buffer_size_ == 0)
+#define MDI_INDEXED (base_index_ != UINT_MAX)
+
+MTLDrawList::MTLDrawList(int length)
+{
+ BLI_assert(length > 0);
+ batch_ = nullptr;
+ command_len_ = 0;
+ base_index_ = 0;
+ command_offset_ = 0;
+ data_size_ = 0;
+ buffer_size_ = sizeof(MTLDrawIndexedPrimitivesIndirectArguments) * length;
+ data_ = (void *)MEM_mallocN(buffer_size_, __func__);
+}
+
+MTLDrawList::~MTLDrawList()
+{
+ if (data_) {
+ MEM_freeN(data_);
+ data_ = nullptr;
+ }
+}
+
+void MTLDrawList::init()
+{
+ MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+ BLI_assert(ctx);
+ BLI_assert(MDI_ENABLED);
+ BLI_assert(data_ == nullptr);
+ UNUSED_VARS_NDEBUG(ctx);
+
+ batch_ = nullptr;
+ command_len_ = 0;
+ BLI_assert(data_);
+
+ command_offset_ = 0;
+}
+
+void MTLDrawList::append(GPUBatch *gpu_batch, int i_first, int i_count)
+{
+ /* Fallback when MultiDrawIndirect is not supported/enabled. */
+ MTLShader *shader = static_cast<MTLShader *>(unwrap(gpu_batch->shader));
+ bool requires_ssbo = (shader->get_uses_ssbo_vertex_fetch());
+ bool requires_emulation = mtl_needs_topology_emulation(gpu_batch->prim_type);
+ if (MDI_DISABLED || requires_ssbo || requires_emulation) {
+ GPU_batch_draw_advanced(gpu_batch, 0, 0, i_first, i_count);
+ return;
+ }
+
+ if (data_ == nullptr) {
+ this->init();
+ }
+ BLI_assert(data_);
+
+ MTLBatch *mtl_batch = static_cast<MTLBatch *>(gpu_batch);
+ BLI_assert(mtl_batch);
+ if (mtl_batch != batch_) {
+ /* Submit existing calls. */
+ this->submit();
+
+ /* Begin new batch. */
+ batch_ = mtl_batch;
+
+ /* Cached for faster access. */
+ MTLIndexBuf *el = batch_->elem_();
+ base_index_ = el ? el->index_base_ : UINT_MAX;
+ v_first_ = el ? el->index_start_ : 0;
+ v_count_ = el ? el->index_len_ : batch_->verts_(0)->vertex_len;
+ }
+
+ if (v_count_ == 0) {
+ /* Nothing to draw. */
+ return;
+ }
+
+ if (MDI_INDEXED) {
+ MTLDrawIndexedPrimitivesIndirectArguments *cmd =
+ reinterpret_cast<MTLDrawIndexedPrimitivesIndirectArguments *>((char *)data_ +
+ command_offset_);
+ cmd->indexStart = v_first_;
+ cmd->indexCount = v_count_;
+ cmd->instanceCount = i_count;
+ cmd->baseVertex = base_index_;
+ cmd->baseInstance = i_first;
+ }
+ else {
+ MTLDrawPrimitivesIndirectArguments *cmd =
+ reinterpret_cast<MTLDrawPrimitivesIndirectArguments *>((char *)data_ + command_offset_);
+ cmd->vertexStart = v_first_;
+ cmd->vertexCount = v_count_;
+ cmd->instanceCount = i_count;
+ cmd->baseInstance = i_first;
+ }
+
+ size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) :
+ sizeof(MTLDrawPrimitivesIndirectArguments);
+
+ command_offset_ += command_size;
+ command_len_++;
+
+ /* Check if we can fit at least one other command. */
+ if (command_offset_ + command_size > buffer_size_) {
+ this->submit();
+ }
+
+ return;
+}
+
+void MTLDrawList::submit()
+{
+ /* Metal does not support MDI from the host side, but we still benefit from only executing the
+ * batch bind a single time, rather than per-draw.
+ * NOTE(Metal): Consider using #MTLIndirectCommandBuffer to achieve similar behavior. */
+ if (command_len_ == 0) {
+ return;
+ }
+
+ /* Something's wrong if we get here without MDI support. */
+ BLI_assert(MDI_ENABLED);
+ BLI_assert(data_);
+
+ /* Host-side MDI Currently unsupported on Metal. */
+ bool can_use_MDI = false;
+
+ /* Verify context. */
+ MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get());
+ BLI_assert(ctx);
+
+ /* Execute indirect draw calls. */
+ MTLShader *shader = static_cast<MTLShader *>(unwrap(batch_->shader));
+ bool SSBO_MODE = (shader->get_uses_ssbo_vertex_fetch());
+ if (SSBO_MODE) {
+ can_use_MDI = false;
+ BLI_assert(false);
+ return;
+ }
+
+ /* Heuristic to determine whether using indirect drawing is more efficient. */
+ size_t command_size = MDI_INDEXED ? sizeof(MTLDrawIndexedPrimitivesIndirectArguments) :
+ sizeof(MTLDrawPrimitivesIndirectArguments);
+ const bool is_finishing_a_buffer = (command_offset_ + command_size > buffer_size_);
+ can_use_MDI = can_use_MDI && (is_finishing_a_buffer || command_len_ > 2);
+
+ /* Bind Batch to setup render pipeline state. */
+ id<MTLRenderCommandEncoder> rec = batch_->bind(0, 0, 0, 0);
+ if (!rec) {
+ BLI_assert_msg(false, "A RenderCommandEncoder should always be available!\n");
+ return;
+ }
+
+ /* Common properties. */
+ MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(batch_->prim_type);
+
+ /* Execute multi-draw indirect. */
+ if (can_use_MDI && false) {
+ /* Metal Doesn't support MDI -- Singular Indirect draw calls are supported,
+ * but Multi-draw is not.
+ * TODO(Metal): Consider using #IndirectCommandBuffers to provide similar
+ * behavior. */
+ }
+ else {
+
+ /* Execute draws manually. */
+ if (MDI_INDEXED) {
+ MTLDrawIndexedPrimitivesIndirectArguments *cmd =
+ (MTLDrawIndexedPrimitivesIndirectArguments *)data_;
+ MTLIndexBuf *mtl_elem = static_cast<MTLIndexBuf *>(
+ reinterpret_cast<IndexBuf *>(batch_->elem));
+ BLI_assert(mtl_elem);
+ MTLIndexType index_type = MTLIndexBuf::gpu_index_type_to_metal(mtl_elem->index_type_);
+ uint32_t index_size = (mtl_elem->index_type_ == GPU_INDEX_U16) ? 2 : 4;
+ uint32_t v_first_ofs = (mtl_elem->index_start_ * index_size);
+ uint32_t index_count = cmd->indexCount;
+
+ /* Fetch index buffer. May return an index buffer of a differing format,
+ * if index buffer optimization is used. In these cases, mtl_prim_type and
+ * index_count get updated with the new properties. */
+ GPUPrimType final_prim_type = batch_->prim_type;
+ id<MTLBuffer> index_buffer = mtl_elem->get_index_buffer(final_prim_type, index_count);
+ BLI_assert(index_buffer != nil);
+
+ /* Final primitive type. */
+ mtl_prim_type = gpu_prim_type_to_metal(final_prim_type);
+
+ if (index_buffer != nil) {
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+ for (int i = 0; i < command_len_; i++, cmd++) {
+ [rec drawIndexedPrimitives:mtl_prim_type
+ indexCount:index_count
+ indexType:index_type
+ indexBuffer:index_buffer
+ indexBufferOffset:v_first_ofs
+ instanceCount:cmd->instanceCount
+ baseVertex:cmd->baseVertex
+ baseInstance:cmd->baseInstance];
+ ctx->main_command_buffer.register_draw_counters(cmd->indexCount * cmd->instanceCount);
+ }
+ }
+ else {
+ BLI_assert_msg(false, "Index buffer does not have backing Metal buffer");
+ }
+ }
+ else {
+ MTLDrawPrimitivesIndirectArguments *cmd = (MTLDrawPrimitivesIndirectArguments *)data_;
+
+ /* Verify if topology emulation is required. */
+ if (mtl_needs_topology_emulation(batch_->prim_type)) {
+ BLI_assert_msg(false, "topology emulation cases should use fallback.");
+ }
+ else {
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ ctx->ensure_depth_stencil_state(mtl_prim_type);
+
+ for (int i = 0; i < command_len_; i++, cmd++) {
+ [rec drawPrimitives:mtl_prim_type
+ vertexStart:cmd->vertexStart
+ vertexCount:cmd->vertexCount
+ instanceCount:cmd->instanceCount
+ baseInstance:cmd->baseInstance];
+ ctx->main_command_buffer.register_draw_counters(cmd->vertexCount * cmd->instanceCount);
+ }
+ }
+ }
+ }
+
+ /* Unbind batch. */
+ batch_->unbind();
+
+ /* Reset command offsets. */
+ command_len_ = 0;
+ command_offset_ = 0;
+
+ /* Avoid keeping reference to the batch. */
+ batch_ = nullptr;
+}
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_immediate.hh b/source/blender/gpu/metal/mtl_immediate.hh
new file mode 100644
index 00000000000..8d852282ac8
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.hh
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_immediate_private.hh"
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+namespace blender::gpu {
+
+class MTLImmediate : public Immediate {
+ private:
+ MTLContext *context_ = nullptr;
+ MTLTemporaryBuffer current_allocation_;
+ MTLPrimitiveTopologyClass metal_primitive_mode_;
+ MTLPrimitiveType metal_primitive_type_;
+ bool has_begun_ = false;
+
+ public:
+ MTLImmediate(MTLContext *ctx);
+ ~MTLImmediate();
+
+ uchar *begin() override;
+ void end() override;
+ bool imm_is_recording()
+ {
+ return has_begun_;
+ }
+};
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm
new file mode 100644
index 00000000000..ee48bdd6ee1
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -0,0 +1,401 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#include "BKE_global.h"
+
+#include "GPU_vertex_format.h"
+#include "gpu_context_private.hh"
+#include "gpu_shader_private.hh"
+#include "gpu_vertex_format_private.h"
+
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_immediate.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+
+namespace blender::gpu {
+
+MTLImmediate::MTLImmediate(MTLContext *ctx)
+{
+ context_ = ctx;
+}
+
+MTLImmediate::~MTLImmediate()
+{
+}
+
+uchar *MTLImmediate::begin()
+{
+ BLI_assert(!has_begun_);
+
+ /* Determine primitive type. */
+ metal_primitive_type_ = gpu_prim_type_to_metal(this->prim_type);
+ metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
+ has_begun_ = true;
+
+ /* Allocate a range of data and return host-accessible pointer. */
+ const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+ current_allocation_ = context_->get_scratchbuffer_manager()
+ .scratch_buffer_allocate_range_aligned(bytes_needed, 256);
+ [current_allocation_.metal_buffer retain];
+ return reinterpret_cast<uchar *>(current_allocation_.data);
+}
+
+void MTLImmediate::end()
+{
+ /* Ensure we're between a `imm::begin` / `imm:end` pair. */
+ BLI_assert(has_begun_);
+ BLI_assert(prim_type != GPU_PRIM_NONE);
+
+ /* Verify context is valid, vertex data is written and a valid shader is bound. */
+ if (context_ && this->vertex_idx > 0 && this->shader) {
+
+ MTLShader *active_mtl_shader = static_cast<MTLShader *>(unwrap(shader));
+
+ /* Skip draw if Metal shader is not valid. */
+ if (active_mtl_shader == nullptr || !active_mtl_shader->is_valid() ||
+ active_mtl_shader->get_interface() == nullptr) {
+
+ const char *ptr = (active_mtl_shader) ? active_mtl_shader->name_get() : nullptr;
+ MTL_LOG_WARNING(
+ "MTLImmediate::end -- cannot perform draw as active shader is NULL or invalid (likely "
+ "unimplemented) (shader %p '%s')\n",
+ active_mtl_shader,
+ ptr);
+ return;
+ }
+
+ /* Ensure we are inside a render pass and fetch active RenderCommandEncoder. */
+ id<MTLRenderCommandEncoder> rec = context_->ensure_begin_render_pass();
+ BLI_assert(rec != nil);
+
+ /* Fetch active render pipeline state. */
+ MTLRenderPassState &rps = context_->main_command_buffer.get_render_pass_state();
+
+ /* Bind Shader. */
+ GPU_shader_bind(this->shader);
+
+ /* Debug markers for frame-capture and detailed error messages. */
+ if (G.debug & G_DEBUG_GPU) {
+ [rec pushDebugGroup:[NSString
+ stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+ this->vertex_idx,
+ active_mtl_shader->get_interface()->get_name()]];
+ [rec insertDebugSignpost:[NSString stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+ this->vertex_idx,
+ active_mtl_shader->get_interface()
+ ->get_name()]];
+ }
+
+ /* Populate pipeline state vertex descriptor. */
+ MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+ MTLContext::get()->state_manager);
+ MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+ const MTLShaderInterface *interface = active_mtl_shader->get_interface();
+
+ /* Reset vertex descriptor to default state. */
+ desc.reset_vertex_descriptor();
+
+ desc.vertex_descriptor.num_attributes = interface->get_total_attributes();
+ desc.vertex_descriptor.num_vert_buffers = 1;
+
+ for (int i = 0; i < desc.vertex_descriptor.num_attributes; i++) {
+ desc.vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+ }
+ desc.vertex_descriptor.uses_ssbo_vertex_fetch =
+ active_mtl_shader->get_uses_ssbo_vertex_fetch();
+ desc.vertex_descriptor.num_ssbo_attributes = 0;
+
+ /* SSBO Vertex Fetch -- Verify Attributes. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ active_mtl_shader->ssbo_vertex_fetch_bind_attributes_begin();
+
+ /* Disable Indexed rendering in SSBO vertex fetch. */
+ int uniform_ssbo_use_indexed = active_mtl_shader->uni_ssbo_uses_indexed_rendering;
+ BLI_assert_msg(uniform_ssbo_use_indexed != -1,
+ "Expected valid uniform location for ssbo_uses_indexed_rendering.");
+ int uses_indexed_rendering = 0;
+ active_mtl_shader->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+ }
+
+ /* Populate Vertex descriptor and verify attributes.
+ * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */
+ for (int i = 0; i < interface->get_total_attributes(); i++) {
+
+ /* NOTE: Attribute in VERTEX FORMAT does not necessarily share the same array index as
+ * attributes in shader interface. */
+ GPUVertAttr *attr = nullptr;
+ const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i);
+
+ /* Scan through vertex_format attributes until one with a name matching the shader interface
+ * is found. */
+ for (uint32_t a_idx = 0; a_idx < this->vertex_format.attr_len && attr == nullptr; a_idx++) {
+ GPUVertAttr *check_attribute = &this->vertex_format.attrs[a_idx];
+
+ /* Attributes can have multiple name aliases associated with them. */
+ for (uint32_t n_idx = 0; n_idx < check_attribute->name_len; n_idx++) {
+ const char *name = GPU_vertformat_attr_name_get(
+ &this->vertex_format, check_attribute, n_idx);
+
+ if (strcmp(name, interface->get_name_at_offset(mtl_shader_attribute.name_offset)) == 0) {
+ attr = check_attribute;
+ break;
+ }
+ }
+ }
+
+ BLI_assert_msg(attr != nullptr,
+ "Could not find expected attribute in immediate mode vertex format.");
+ if (attr == nullptr) {
+ MTL_LOG_ERROR(
+ "MTLImmediate::end Could not find matching attribute '%s' from Shader Interface in "
+ "Vertex Format! - TODO: Bind Dummy attribute\n",
+ interface->get_name_at_offset(mtl_shader_attribute.name_offset));
+ return;
+ }
+
+ /* Determine whether implicit type conversion between input vertex format
+ * and shader interface vertex format is supported. */
+ MTLVertexFormat convertedFormat;
+ bool can_use_implicit_conversion = mtl_convert_vertex_format(
+ mtl_shader_attribute.format,
+ (GPUVertCompType)attr->comp_type,
+ attr->comp_len,
+ (GPUVertFetchMode)attr->fetch_mode,
+ &convertedFormat);
+
+ if (can_use_implicit_conversion) {
+ /* Metal API can implicitly convert some formats during vertex assembly:
+ * - Converting from a normalized short2 format to float2
+ * - Type truncation e.g. Float4 to Float2.
+ * - Type expansion from Float3 to Float4.
+ * - Note: extra components are filled with the corresponding components of (0,0,0,1).
+ * (See
+ * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format)
+ */
+ bool is_floating_point_format = (attr->comp_type == GPU_COMP_F32);
+ desc.vertex_descriptor.attributes[i].format = convertedFormat;
+ desc.vertex_descriptor.attributes[i].format_conversion_mode =
+ (is_floating_point_format) ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+ (GPUVertFetchMode)GPU_FETCH_INT;
+ BLI_assert(convertedFormat != MTLVertexFormatInvalid);
+ }
+ else {
+ /* Some conversions are NOT valid, e.g. Int4 to Float4
+ * - In this case, we need to implement a conversion routine inside the shader.
+ * - This is handled using the format_conversion_mode flag
+ * - This flag is passed into the PSO as a function specialization,
+ * and will generate an appropriate conversion function when reading the vertex attribute
+ * value into local shader storage.
+ * (If no explicit conversion is needed, the function specialize to a pass-through). */
+ MTLVertexFormat converted_format;
+ bool can_convert = mtl_vertex_format_resize(
+ mtl_shader_attribute.format, attr->comp_len, &converted_format);
+ desc.vertex_descriptor.attributes[i].format = (can_convert) ? converted_format :
+ mtl_shader_attribute.format;
+ desc.vertex_descriptor.attributes[i].format_conversion_mode = (GPUVertFetchMode)
+ attr->fetch_mode;
+ BLI_assert(desc.vertex_descriptor.attributes[i].format != MTLVertexFormatInvalid);
+ }
+ /* Using attribute offset in vertex format, as this will be correct */
+ desc.vertex_descriptor.attributes[i].offset = attr->offset;
+ desc.vertex_descriptor.attributes[i].buffer_index = mtl_shader_attribute.buffer_index;
+
+ /* SSBO Vertex Fetch Attribute bind. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ BLI_assert_msg(mtl_shader_attribute.buffer_index == 0,
+ "All attributes should be in buffer index zero");
+ MTLSSBOAttribute ssbo_attr(
+ mtl_shader_attribute.index,
+ mtl_shader_attribute.buffer_index,
+ attr->offset,
+ this->vertex_format.stride,
+ MTLShader::ssbo_vertex_type_to_attr_type(desc.vertex_descriptor.attributes[i].format),
+ false);
+ desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+ ssbo_attr;
+ desc.vertex_descriptor.num_ssbo_attributes++;
+ active_mtl_shader->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+ }
+ }
+
+ /* Buffer bindings for singular vertex buffer. */
+ desc.vertex_descriptor.buffer_layouts[0].step_function = MTLVertexStepFunctionPerVertex;
+ desc.vertex_descriptor.buffer_layouts[0].step_rate = 1;
+ desc.vertex_descriptor.buffer_layouts[0].stride = this->vertex_format.stride;
+ BLI_assert(this->vertex_format.stride > 0);
+
+ /* SSBO Vertex Fetch -- Verify Attributes. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ active_mtl_shader->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+ /* Set Status uniforms. */
+ BLI_assert_msg(active_mtl_shader->uni_ssbo_input_prim_type_loc != -1,
+ "ssbo_input_prim_type uniform location invalid!");
+ BLI_assert_msg(active_mtl_shader->uni_ssbo_input_vert_count_loc != -1,
+ "ssbo_input_vert_count uniform location invalid!");
+ GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+ active_mtl_shader->uni_ssbo_input_prim_type_loc,
+ 1,
+ 1,
+ (const int *)(&this->prim_type));
+ GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+ active_mtl_shader->uni_ssbo_input_vert_count_loc,
+ 1,
+ 1,
+ (const int *)(&this->vertex_idx));
+ }
+
+ MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+ if (context_->ensure_render_pipeline_state(mtl_prim_type)) {
+
+ /* Issue draw call. */
+ BLI_assert(this->vertex_idx > 0);
+
+ /* Metal API does not support triangle fan, so we can emulate this
+ * input data by generating an index buffer to re-map indices to
+ * a TriangleList.
+ *
+ * NOTE(Metal): Consider caching generated triangle fan index buffers.
+ * For immediate mode, generating these is currently very cheap, as we use
+ * fast scratch buffer allocations. Though we may benefit from caching of
+ * frequently used buffer sizes. */
+ if (mtl_needs_topology_emulation(this->prim_type)) {
+
+ /* Debug safety check for SSBO FETCH MODE. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
+ }
+
+ /* Emulate Tri-fan. */
+ if (this->prim_type == GPU_PRIM_TRI_FAN) {
+ /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
+ * vertices. */
+ uint32_t base_vert_count = this->vertex_idx;
+ uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
+ uint32_t fan_index_count = num_triangles * 3;
+ BLI_assert(num_triangles > 0);
+
+ uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
+ uint32_t *index_buffer = nullptr;
+
+ MTLTemporaryBuffer allocation =
+ context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
+ alloc_size, 128);
+ index_buffer = (uint32_t *)allocation.data;
+
+ int a = 0;
+ for (int i = 0; i < num_triangles; i++) {
+ index_buffer[a++] = 0;
+ index_buffer[a++] = i + 1;
+ index_buffer[a++] = i + 2;
+ }
+
+ @autoreleasepool {
+
+ id<MTLBuffer> index_buffer_mtl = nil;
+ uint32_t index_buffer_offset = 0;
+
+ /* Region of scratch buffer used for topology emulation element data.
+ * NOTE(Metal): We do not need to manually flush as the entire scratch
+ * buffer for current command buffer is flushed upon submission. */
+ index_buffer_mtl = allocation.metal_buffer;
+ index_buffer_offset = allocation.buffer_offset;
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
+
+ /* Bind Vertex Buffer. */
+ rps.bind_vertex_buffer(
+ current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+ /* Draw. */
+ [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+ indexCount:fan_index_count
+ indexType:MTLIndexTypeUInt32
+ indexBuffer:index_buffer_mtl
+ indexBufferOffset:index_buffer_offset];
+ }
+ }
+ else {
+ /* TODO(Metal): Topology emulation for line loop.
+ * NOTE(Metal): This is currently not used anywhere and modified at the high
+ * level for efficiency in such cases. */
+ BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
+ }
+ }
+ else {
+ MTLPrimitiveType primitive_type = metal_primitive_type_;
+ int vertex_count = this->vertex_idx;
+
+ /* Bind Vertex Buffer. */
+ rps.bind_vertex_buffer(
+ current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ context_->ensure_depth_stencil_state(primitive_type);
+
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+
+ /* Bind Null Buffers for empty/missing bind slots. */
+ id<MTLBuffer> null_buffer = context_->get_null_buffer();
+ BLI_assert(null_buffer != nil);
+ for (int i = 1; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+
+ /* We only need to ensure a buffer is bound to the context, its contents do not matter
+ * as it will not be used. */
+ if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+ rps.bind_vertex_buffer(null_buffer, 0, i);
+ }
+ }
+
+ /* SSBO vertex fetch - Nullify elements buffer. */
+ if (rps.cached_vertex_buffer_bindings[MTL_SSBO_VERTEX_FETCH_IBO_INDEX].metal_buffer ==
+ nil) {
+ rps.bind_vertex_buffer(null_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+ }
+
+ /* Submit draw call with modified vertex count, which reflects vertices per primitive
+ * defined in the USE_SSBO_VERTEX_FETCH `pragma`. */
+ int num_input_primitives = gpu_get_prim_count_from_type(vertex_count, this->prim_type);
+ int output_num_verts = num_input_primitives *
+ active_mtl_shader->get_ssbo_vertex_fetch_output_num_verts();
+#ifndef NDEBUG
+ BLI_assert(
+ mtl_vertex_count_fits_primitive_type(
+ output_num_verts, active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()) &&
+ "Output Vertex count is not compatible with the requested output vertex primitive "
+ "type");
+#endif
+ [rec drawPrimitives:active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()
+ vertexStart:0
+ vertexCount:output_num_verts];
+ context_->main_command_buffer.register_draw_counters(output_num_verts);
+ }
+ else {
+ /* Regular draw. */
+ [rec drawPrimitives:primitive_type vertexStart:0 vertexCount:vertex_count];
+ context_->main_command_buffer.register_draw_counters(vertex_count);
+ }
+ }
+ }
+ if (G.debug & G_DEBUG_GPU) {
+ [rec popDebugGroup];
+ }
+ }
+
+ /* Reset allocation after draw submission. */
+ has_begun_ = false;
+ if (current_allocation_.metal_buffer) {
+ [current_allocation_.metal_buffer release];
+ current_allocation_.metal_buffer = nil;
+ }
+}
+
+} // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_index_buffer.mm b/source/blender/gpu/metal/mtl_index_buffer.mm
index 2195ab7538d..9712dce7b40 100644
--- a/source/blender/gpu/metal/mtl_index_buffer.mm
+++ b/source/blender/gpu/metal/mtl_index_buffer.mm
@@ -138,7 +138,7 @@ void MTLIndexBuf::update_sub(uint32_t start, uint32_t len, const void *data)
BLI_assert(ibo_ != nullptr);
/* Otherwise, we will inject a data update, using staged data, into the command stream.
- * Stage update contents in temporary buffer*/
+ * Stage update contents in temporary buffer. */
MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(ctx);
MTLTemporaryBuffer range = ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range(len);
diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh
index df80df6543f..bd354376b12 100644
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -340,13 +340,13 @@ class MTLBufferPool {
private:
/* Memory statistics. */
- long long int total_allocation_bytes_ = 0;
+ int64_t total_allocation_bytes_ = 0;
#if MTL_DEBUG_MEMORY_STATISTICS == 1
/* Debug statistics. */
std::atomic<int> per_frame_allocation_count_;
- std::atomic<long long int> allocations_in_pool_;
- std::atomic<long long int> buffers_in_pool_;
+ std::atomic<int64_t> allocations_in_pool_;
+ std::atomic<int64_t> buffers_in_pool_;
#endif
/* Metal resources. */
diff --git a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
index 1906350679a..04ceb5bdf03 100644
--- a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
+++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh
@@ -28,8 +28,8 @@ struct MTLVertexAttributeDescriptorPSO {
uint64_t hash() const
{
- return (uint64_t)((uint64_t)this->format ^ (this->offset << 4) ^ (this->buffer_index << 8) ^
- (this->format_conversion_mode << 12));
+ return uint64_t((uint64_t(this->format) ^ (this->offset << 4) ^ (this->buffer_index << 8) ^
+ (this->format_conversion_mode << 12)));
}
};
@@ -46,8 +46,7 @@ struct MTLVertexBufferLayoutDescriptorPSO {
uint64_t hash() const
{
- return (uint64_t)((uint64_t)this->step_function ^ (this->step_rate << 4) ^
- (this->stride << 8));
+ return uint64_t(uint64_t(this->step_function) ^ (this->step_rate << 4) ^ (this->stride << 8));
}
};
@@ -217,34 +216,46 @@ struct MTLRenderPipelineStateDescriptor {
* has collisions. */
uint64_t hash = this->vertex_descriptor.hash();
- hash ^= (uint64_t)this->num_color_attachments << 16; /* up to 6 (3 bits). */
- hash ^= (uint64_t)this->depth_attachment_format << 18; /* up to 555 (9 bits). */
- hash ^= (uint64_t)this->stencil_attachment_format << 20; /* up to 555 (9 bits). */
- hash ^= (uint64_t)(*(
- (uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */
+ hash ^= uint64_t(this->num_color_attachments) << 16; /* up to 6 (3 bits). */
+ hash ^= uint64_t(this->depth_attachment_format) << 18; /* up to 555 (9 bits). */
+ hash ^= uint64_t(this->stencil_attachment_format) << 20; /* up to 555 (9 bits). */
+ hash ^= uint64_t(
+ *((uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */
/* Only include elements in Hash if they are needed - avoids variable null assignments
* influencing hash. */
if (this->num_color_attachments > 0) {
- hash ^= (uint64_t)this->color_write_mask << 22; /* 4 bit bit-mask. */
- hash ^= (uint64_t)this->alpha_blend_op << 26; /* Up to 4 (3 bits). */
- hash ^= (uint64_t)this->rgb_blend_op << 29; /* Up to 4 (3 bits). */
- hash ^= (uint64_t)this->dest_alpha_blend_factor << 32; /* Up to 18 (5 bits). */
- hash ^= (uint64_t)this->dest_rgb_blend_factor << 37; /* Up to 18 (5 bits). */
- hash ^= (uint64_t)this->src_alpha_blend_factor << 42; /* Up to 18 (5 bits). */
- hash ^= (uint64_t)this->src_rgb_blend_factor << 47; /* Up to 18 (5 bits). */
+ hash ^= uint64_t(this->color_write_mask) << 22; /* 4 bit bit-mask. */
+ hash ^= uint64_t(this->alpha_blend_op) << 26; /* Up to 4 (3 bits). */
+ hash ^= uint64_t(this->rgb_blend_op) << 29; /* Up to 4 (3 bits). */
+ hash ^= uint64_t(this->dest_alpha_blend_factor) << 32; /* Up to 18 (5 bits). */
+ hash ^= uint64_t(this->dest_rgb_blend_factor) << 37; /* Up to 18 (5 bits). */
+ hash ^= uint64_t(this->src_alpha_blend_factor) << 42; /* Up to 18 (5 bits). */
+ hash ^= uint64_t(this->src_rgb_blend_factor) << 47; /* Up to 18 (5 bits). */
}
for (const uint c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) {
- hash ^= (uint64_t)this->color_attachment_format[c] << (c + 52); // up to 555 (9 bits)
+ hash ^= uint64_t(this->color_attachment_format[c]) << (c + 52); /* Up to 555 (9 bits). */
}
- hash |= (uint64_t)((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0)
- << 62;
- hash ^= (uint64_t)this->point_size;
+ hash |= uint64_t((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0) << 62;
+ hash ^= uint64_t(this->point_size);
return hash;
}
+
+ /* Reset the Vertex Descriptor to default. */
+ void reset_vertex_descriptor()
+ {
+ vertex_descriptor.num_attributes = 0;
+ vertex_descriptor.num_vert_buffers = 0;
+ for (int i = 0; i < GPU_VERT_ATTR_MAX_LEN; i++) {
+ vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+ vertex_descriptor.attributes[i].offset = 0;
+ }
+ vertex_descriptor.uses_ssbo_vertex_fetch = false;
+ vertex_descriptor.num_ssbo_attributes = 0;
+ }
};
} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh
index 64d9d1cf849..5485b32dd31 100644
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -261,8 +261,6 @@ class MTLShader : public Shader {
bool get_push_constant_is_dirty();
void push_constant_bindstate_mark_dirty(bool is_dirty);
- void vertformat_from_shader(GPUVertFormat *format) const override;
-
/* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */
int program_handle_get() const override
{
diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm
index 23097f312f0..006d3394378 100644
--- a/source/blender/gpu/metal/mtl_shader.mm
+++ b/source/blender/gpu/metal/mtl_shader.mm
@@ -129,6 +129,7 @@ MTLShader::~MTLShader()
if (shd_builder_ != nullptr) {
delete shd_builder_;
+ shd_builder_ = nullptr;
}
}
@@ -209,6 +210,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Release temporary compilation resources. */
delete shd_builder_;
+ shd_builder_ = nullptr;
return false;
}
}
@@ -279,6 +281,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Release temporary compilation resources. */
delete shd_builder_;
+ shd_builder_ = nullptr;
return false;
}
}
@@ -324,6 +327,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Release temporary compilation resources. */
delete shd_builder_;
+ shd_builder_ = nullptr;
return true;
}
@@ -536,27 +540,6 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
push_constant_modified_ = is_dirty;
}
-void MTLShader::vertformat_from_shader(GPUVertFormat *format) const
-{
- GPU_vertformat_clear(format);
-
- const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface);
- for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) {
- const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id);
-
- /* Extract type parameters from Metal type. */
- GPUVertCompType comp_type = comp_type_from_vert_format(attr.format);
- uint comp_len = comp_count_from_vert_format(attr.format);
- GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format);
-
- GPU_vertformat_attr_add(format,
- mtl_interface->get_name_at_offset(attr.name_offset),
- comp_type,
- comp_len,
- fetch_mode);
- }
-}
-
/** \} */
/* -------------------------------------------------------------------- */
@@ -1167,6 +1150,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_at
MTLShaderInterface *mtl_interface = this->get_interface();
BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
+ UNUSED_VARS_NDEBUG(mtl_interface);
/* Update bind-mask to verify this attribute has been used. */
BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh
index 43890ca0170..63e2e6d5924 100644
--- a/source/blender/gpu/metal/mtl_shader_generator.hh
+++ b/source/blender/gpu/metal/mtl_shader_generator.hh
@@ -497,7 +497,7 @@ inline std::string get_stage_class_name(ShaderStage stage)
inline bool is_builtin_type(std::string type)
{
/* Add Types as needed. */
- /* TODO(Metal): Consider replacing this with a switch and constexpr hash and switch.
+ /* TODO(Metal): Consider replacing this with a switch and `constexpr` hash and switch.
* Though most efficient and maintainable approach to be determined. */
static std::map<std::string, eMTLDataType> glsl_builtin_types = {
{"float", MTL_DATATYPE_FLOAT},
diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm
index 977e97dbd82..4a2be0753bb 100644
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@@ -724,10 +724,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
}
if (msl_iface.uses_ssbo_vertex_fetch_mode) {
ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
- ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS
- << std::endl;
- ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX
- << std::endl;
for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) {
ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl;
}
diff --git a/source/blender/gpu/metal/mtl_shader_interface.mm b/source/blender/gpu/metal/mtl_shader_interface.mm
index 3703d5b5684..97a82345761 100644
--- a/source/blender/gpu/metal/mtl_shader_interface.mm
+++ b/source/blender/gpu/metal/mtl_shader_interface.mm
@@ -117,9 +117,7 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_];
uni_block.name_offset = name_offset;
- /* We offset the buffer binding index by one, as the first slot is reserved for push constant
- * data. */
- uni_block.buffer_index = buffer_index + 1;
+ uni_block.buffer_index = buffer_index;
uni_block.size = size;
uni_block.current_offset = 0;
uni_block.stage_mask = ShaderStage::BOTH;
@@ -297,8 +295,10 @@ void MTLShaderInterface::prepare_common_shader_inputs()
current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset));
/* Location refers to the index in the ubos_ array. */
current_input->location = ubo_index;
- /* Final binding location refers to the buffer binding index within the shader (Relative to
- * MTL_uniform_buffer_base_index). */
+ /* Binding location refers to the UBO bind slot in
+ * #MTLContextGlobalShaderPipelineState::ubo_bindings. The buffer bind index [[buffer(N)]]
+ * within the shader will apply an offset for bound vertex buffers and the default uniform
+ * PushConstantBlock. */
current_input->binding = shd_ubo.buffer_index;
current_input++;
}
diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh
index be6f3a3a02b..28b55306707 100644
--- a/source/blender/gpu/metal/mtl_texture.hh
+++ b/source/blender/gpu/metal/mtl_texture.hh
@@ -108,10 +108,10 @@ struct TextureReadRoutineSpecialisation {
uint64_t hash() const
{
blender::DefaultHash<std::string> string_hasher;
- return (uint64_t)string_hasher(this->input_data_type + this->output_data_type +
- std::to_string((this->component_count_input << 8) +
- this->component_count_output +
- (this->depth_format_mode << 28)));
+ return uint64_t(string_hasher(this->input_data_type + this->output_data_type +
+ std::to_string((this->component_count_input << 8) +
+ this->component_count_output +
+ (this->depth_format_mode << 28))));
}
};
@@ -138,12 +138,12 @@ struct MTLSamplerState {
operator uint() const
{
- return (uint)state;
+ return uint(state);
}
operator uint64_t() const
{
- return (uint64_t)state;
+ return uint64_t(state);
}
};
@@ -200,7 +200,7 @@ class MTLTexture : public Texture {
TEXTURE_VIEW_SWIZZLE_DIRTY = (1 << 0),
TEXTURE_VIEW_MIP_DIRTY = (1 << 1)
};
- id<MTLTexture> mip_swizzle_view_;
+ id<MTLTexture> mip_swizzle_view_ = nil;
char tex_swizzle_mask_[4];
MTLTextureSwizzleChannels mtl_swizzle_mask_;
bool mip_range_dirty_ = false;
@@ -216,7 +216,6 @@ class MTLTexture : public Texture {
/* VBO. */
MTLVertBuf *vert_buffer_;
id<MTLBuffer> vert_buffer_mtl_;
- int vert_buffer_offset_;
/* Core parameters and sub-resources. */
eGPUTextureUsage gpu_image_usage_flags_;
@@ -247,7 +246,7 @@ class MTLTexture : public Texture {
void mip_range_set(int min, int max) override;
void *read(int mip, eGPUDataFormat type) override;
- /* Remove once no longer required -- will just return 0 for now in MTL path*/
+ /* Remove once no longer required -- will just return 0 for now in MTL path. */
uint gl_bindcode_get() const override;
bool texture_is_baked();
@@ -256,6 +255,14 @@ class MTLTexture : public Texture {
return name_;
}
+ id<MTLBuffer> get_vertex_buffer() const
+ {
+ if (resource_mode_ == MTL_TEXTURE_MODE_VBO) {
+ return vert_buffer_mtl_;
+ }
+ return nil;
+ }
+
protected:
bool init_internal() override;
bool init_internal(GPUVertBuf *vbo) override;
@@ -324,8 +331,6 @@ class MTLTexture : public Texture {
int height);
GPUFrameBuffer *get_blit_framebuffer(uint dst_slice, uint dst_mip);
- MEM_CXX_CLASS_ALLOC_FUNCS("gpu::MTLTexture")
-
/* Texture Update function Utilities. */
/* Metal texture updating does not provide the same range of functionality for type conversion
* and format compatibility as are available in OpenGL. To achieve the same level of
@@ -357,9 +362,9 @@ class MTLTexture : public Texture {
*/
struct TextureUpdateParams {
int mip_index;
- int extent[3]; /* Width, Height, Slice on 2D Array tex*/
- int offset[3]; /* Width, Height, Slice on 2D Array tex*/
- uint unpack_row_length; /* Number of pixels between bytes in input data */
+ int extent[3]; /* Width, Height, Slice on 2D Array tex. */
+ int offset[3]; /* Width, Height, Slice on 2D Array tex. */
+ uint unpack_row_length; /* Number of pixels between bytes in input data. */
};
id<MTLComputePipelineState> texture_update_1d_get_kernel(
@@ -383,7 +388,7 @@ class MTLTexture : public Texture {
/* Depth texture updates are not directly supported with Blit operations, similarly, we cannot
* use a compute shader to write to depth, so we must instead render to a depth target.
* These processes use vertex/fragment shaders to render texture data from an intermediate
- * source, in order to prime the depth buffer*/
+ * source, in order to prime the depth buffer. */
GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialization);
void update_sub_depth_2d(
@@ -392,8 +397,8 @@ class MTLTexture : public Texture {
/* Texture Read function utilities -- Follows a similar mechanism to the updating routines */
struct TextureReadParams {
int mip_index;
- int extent[3]; /* Width, Height, Slice on 2D Array tex*/
- int offset[3]; /* Width, Height, Slice on 2D Array tex*/
+ int extent[3]; /* Width, Height, Slice on 2D Array tex. */
+ int offset[3]; /* Width, Height, Slice on 2D Array tex. */
};
id<MTLComputePipelineState> texture_read_1d_get_kernel(
@@ -415,6 +420,8 @@ class MTLTexture : public Texture {
/* fullscreen blit utilities. */
GPUShader *fullscreen_blit_sh_get();
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLTexture")
};
/* Utility */
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index 2b7c2333bff..29dcc8d32ee 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -12,6 +12,7 @@
#include "GPU_batch_presets.h"
#include "GPU_capabilities.h"
#include "GPU_framebuffer.h"
+#include "GPU_immediate.h"
#include "GPU_platform.h"
#include "GPU_state.h"
@@ -20,6 +21,7 @@
#include "mtl_context.hh"
#include "mtl_debug.hh"
#include "mtl_texture.hh"
+#include "mtl_vertex_buffer.hh"
#include "GHOST_C-api.h"
@@ -50,7 +52,6 @@ void gpu::MTLTexture::mtl_texture_init()
/* VBO. */
vert_buffer_ = nullptr;
vert_buffer_mtl_ = nil;
- vert_buffer_offset_ = -1;
/* Default Swizzle. */
tex_swizzle_mask_[0] = 'r';
@@ -169,26 +170,39 @@ void gpu::MTLTexture::bake_mip_swizzle_view()
id<MTLTexture> gpu::MTLTexture::get_metal_handle()
{
- /* ensure up to date and baked. */
- this->ensure_baked();
-
/* Verify VBO texture shares same buffer. */
if (resource_mode_ == MTL_TEXTURE_MODE_VBO) {
- int r_offset = -1;
+ id<MTLBuffer> buf = vert_buffer_->get_metal_buffer();
+
+ /* Source vertex buffer has been re-generated, require re-initialization. */
+ if (buf != vert_buffer_mtl_) {
+ MTL_LOG_INFO(
+ "MTLTexture '%p' using MTL_TEXTURE_MODE_VBO requires re-generation due to updated "
+ "Vertex-Buffer.\n",
+ this);
+ /* Clear state. */
+ this->reset();
+
+ /* Re-initialize. */
+ this->init_internal(wrap(vert_buffer_));
+
+ /* Update for assertion check below. */
+ buf = vert_buffer_->get_metal_buffer();
+ }
- /* TODO(Metal): Fetch buffer from MTLVertBuf when implemented. */
- id<MTLBuffer> buf = nil; /*vert_buffer_->get_metal_buffer(&r_offset);*/
+ /* Ensure buffer is valid.
+ * Fetch-vert buffer handle directly in-case it changed above. */
BLI_assert(vert_buffer_mtl_ != nil);
- BLI_assert(buf == vert_buffer_mtl_ && r_offset == vert_buffer_offset_);
-
- UNUSED_VARS(buf);
- UNUSED_VARS_NDEBUG(r_offset);
+ BLI_assert(vert_buffer_->get_metal_buffer() == vert_buffer_mtl_);
}
+ /* ensure up to date and baked. */
+ this->ensure_baked();
+
if (is_baked_) {
/* For explicit texture views, ensure we always return the texture view. */
if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
- BLI_assert(mip_swizzle_view_ && "Texture view should always have a valid handle.");
+ BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle.");
}
if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) {
@@ -208,7 +222,7 @@ id<MTLTexture> gpu::MTLTexture::get_metal_handle_base()
/* For explicit texture views, always return the texture view. */
if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
- BLI_assert(mip_swizzle_view_ && "Texture view should always have a valid handle.");
+ BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle.");
if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) {
bake_mip_swizzle_view();
}
@@ -290,7 +304,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
/* Execute graphics draw call to perform the blit. */
GPUBatch *quad = GPU_batch_preset_quad();
-
GPU_batch_set_shader(quad, shader);
float w = dst->width_get();
@@ -915,7 +928,7 @@ void gpu::MTLTexture::generate_mipmap()
/* Ensure texture is baked. */
this->ensure_baked();
- BLI_assert(is_baked_ && texture_ && "MTLTexture is not valid");
+ BLI_assert_msg(is_baked_ && texture_, "MTLTexture is not valid");
if (mipmaps_ == 1 || mtl_max_mips_ == 1) {
MTL_LOG_WARNING("Call to generate mipmaps on texture with 'mipmaps_=1\n'");
@@ -1231,7 +1244,7 @@ void gpu::MTLTexture::read_internal(int mip,
depth_format_mode = 4;
break;
default:
- BLI_assert(false && "Unhandled depth read format case");
+ BLI_assert_msg(false, "Unhandled depth read format case");
break;
}
}
@@ -1445,11 +1458,84 @@ bool gpu::MTLTexture::init_internal()
bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
{
- /* Zero initialize. */
- this->prepare_internal();
+ if (this->format_ == GPU_DEPTH24_STENCIL8) {
+ /* Apple Silicon requires GPU_DEPTH32F_STENCIL8 instead of GPU_DEPTH24_STENCIL8. */
+ this->format_ = GPU_DEPTH32F_STENCIL8;
+ }
+
+ MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
+ mtl_max_mips_ = 1;
+ mipmaps_ = 0;
+ this->mip_range_set(0, 0);
+
+ /* Create texture from GPUVertBuf's buffer. */
+ MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo));
+ mtl_vbo->bind();
+ mtl_vbo->flag_used();
+
+ /* Get Metal Buffer. */
+ id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
+ BLI_assert(source_buffer);
+
+ /* Verify size. */
+ if (w_ <= 0) {
+ MTL_LOG_WARNING("Allocating texture buffer of width 0!\n");
+ w_ = 1;
+ }
+
+ /* Verify Texture and vertex buffer alignment. */
+ int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
+ int bytes_per_row = bytes_per_pixel * w_;
+
+ MTLContext *mtl_ctx = MTLContext::get();
+ uint32_t align_requirement = static_cast<uint32_t>(
+ [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
+
+ /* Verify per-vertex size aligns with texture size. */
+ const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
+ BLI_assert(bytes_per_pixel == format->stride &&
+ "Pixel format stride MUST match the texture format stride -- These being different "
+ "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
+ UNUSED_VARS_NDEBUG(format);
+
+ /* Create texture descriptor. */
+ BLI_assert(type_ == GPU_TEXTURE_BUFFER);
+ texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
+ texture_descriptor_.pixelFormat = mtl_format;
+ texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
+ texture_descriptor_.width = w_;
+ texture_descriptor_.height = 1;
+ texture_descriptor_.depth = 1;
+ texture_descriptor_.arrayLength = 1;
+ texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
+ texture_descriptor_.usage =
+ MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
+ MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
+ texture_descriptor_.storageMode = [source_buffer storageMode];
+ texture_descriptor_.sampleCount = 1;
+ texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
+ texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
+
+ texture_ = [source_buffer
+ newTextureWithDescriptor:texture_descriptor_
+ offset:0
+ bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)];
+ aligned_w_ = bytes_per_row / bytes_per_pixel;
+
+ BLI_assert(texture_);
+ texture_.label = [NSString stringWithUTF8String:this->get_name()];
+ is_baked_ = true;
+ is_dirty_ = false;
+ resource_mode_ = MTL_TEXTURE_MODE_VBO;
- /* TODO(Metal): Add implementation for GPU Vert buf. */
- return false;
+ /* Track Status. */
+ vert_buffer_ = mtl_vbo;
+ vert_buffer_mtl_ = source_buffer;
+ /* Cleanup. */
+ [texture_descriptor_ release];
+ texture_descriptor_ = nullptr;
+
+ return true;
}
bool gpu::MTLTexture::init_internal(const GPUTexture *src, int mip_offset, int layer_offset)
@@ -1494,7 +1580,6 @@ bool gpu::MTLTexture::texture_is_baked()
/* Prepare texture parameters after initialization, but before baking. */
void gpu::MTLTexture::prepare_internal()
{
-
/* Derive implicit usage flags for Depth/Stencil attachments. */
if (format_flag_ & GPU_FORMAT_DEPTH || format_flag_ & GPU_FORMAT_STENCIL) {
gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT;
@@ -1659,7 +1744,7 @@ void gpu::MTLTexture::ensure_baked()
/* Determine Resource Mode. */
resource_mode_ = MTL_TEXTURE_MODE_DEFAULT;
- /* Create texture. */
+ /* Standard texture allocation. */
texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_];
[texture_descriptor_ release];
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index 928393fb39e..33a62e2e3ef 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -22,13 +22,7 @@
/* Utility file for secondary functionality which supports mtl_texture.mm. */
extern char datatoc_compute_texture_update_msl[];
-extern char datatoc_depth_2d_update_vert_glsl[];
-extern char datatoc_depth_2d_update_float_frag_glsl[];
-extern char datatoc_depth_2d_update_int24_frag_glsl[];
-extern char datatoc_depth_2d_update_int32_frag_glsl[];
extern char datatoc_compute_texture_read_msl[];
-extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[];
-extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[];
namespace blender::gpu {
@@ -40,7 +34,7 @@ MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
{
switch (tex_format) {
- /* Formats texture & renderbuffer. */
+ /* Formats texture & render-buffer. */
case GPU_RGBA8UI:
return MTLPixelFormatRGBA8Uint;
case GPU_RGBA8I:
@@ -447,42 +441,34 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
return *result;
}
- const char *fragment_source = nullptr;
+ const char *depth_2d_info_variant = nullptr;
switch (specialization.data_mode) {
case MTL_DEPTH_UPDATE_MODE_FLOAT:
- fragment_source = datatoc_depth_2d_update_float_frag_glsl;
+ depth_2d_info_variant = "depth_2d_update_float";
break;
case MTL_DEPTH_UPDATE_MODE_INT24:
- fragment_source = datatoc_depth_2d_update_int24_frag_glsl;
+ depth_2d_info_variant = "depth_2d_update_int24";
break;
case MTL_DEPTH_UPDATE_MODE_INT32:
- fragment_source = datatoc_depth_2d_update_int32_frag_glsl;
+ depth_2d_info_variant = "depth_2d_update_int32";
break;
default:
BLI_assert(false && "Invalid format mode\n");
return nullptr;
}
- GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl,
- fragment_source,
- nullptr,
- nullptr,
- nullptr,
- "depth_2d_update_sh_get");
+ GPUShader *shader = GPU_shader_create_from_info_name(depth_2d_info_variant);
mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader);
return shader;
}
GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get()
{
-
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) {
- const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl;
- const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl;
- GPUShader *shader = GPU_shader_create(
- vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit");
+ GPUShader *shader = GPU_shader_create_from_info_name("fullscreen_blit");
+
mtl_context->get_texture_utils().fullscreen_blit_shader = shader;
}
return mtl_context->get_texture_utils().fullscreen_blit_shader;
@@ -614,7 +600,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
stringWithUTF8String:datatoc_compute_texture_read_msl];
/* Defensive Debug Checks. */
- long long int depth_scale_factor = 1;
+ int64_t depth_scale_factor = 1;
if (specialization_params.depth_format_mode > 0) {
BLI_assert(specialization_params.component_count_input == 1);
BLI_assert(specialization_params.component_count_output == 1);
diff --git a/source/blender/gpu/metal/mtl_vertex_buffer.hh b/source/blender/gpu/metal/mtl_vertex_buffer.hh
new file mode 100644
index 00000000000..2cc8b0a9636
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_vertex_buffer.hh
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "GPU_vertex_buffer.h"
+#include "gpu_vertex_buffer_private.hh"
+#include "mtl_context.hh"
+
+namespace blender::gpu {
+
+class MTLVertBuf : public VertBuf {
+ friend class gpu::MTLTexture; /* For buffer texture. */
+ friend class MTLShader; /* For transform feedback. */
+ friend class MTLBatch;
+ friend class MTLContext; /* For transform feedback. */
+
+ private:
+ /** Metal buffer allocation. **/
+ gpu::MTLBuffer *vbo_ = nullptr;
+ /** Texture used if the buffer is bound as buffer texture. Init on first use. */
+ struct ::GPUTexture *buffer_texture_ = nullptr;
+ /** Defines whether the buffer handle is wrapped by this MTLVertBuf, i.e. we do not own it and
+ * should not free it. */
+ bool is_wrapper_ = false;
+ /** Requested allocation size for Metal buffer.
+ * Differs from raw buffer size as alignment is not included. */
+ uint64_t alloc_size_ = 0;
+ /** Whether existing allocation has been submitted for use by the GPU. */
+ bool contents_in_flight_ = false;
+
+ /* Fetch Metal buffer and offset into allocation if necessary.
+ * Access limited to friend classes. */
+ id<MTLBuffer> get_metal_buffer()
+ {
+ vbo_->debug_ensure_used();
+ return vbo_->get_metal_buffer();
+ }
+
+ public:
+ MTLVertBuf();
+ ~MTLVertBuf();
+
+ void bind();
+ void flag_used();
+
+ void update_sub(uint start, uint len, const void *data) override;
+
+ const void *read() const override;
+ void *unmap(const void *mapped_data) const override;
+
+ void wrap_handle(uint64_t handle) override;
+
+ protected:
+ void acquire_data() override;
+ void resize_data() override;
+ void release_data() override;
+ void upload_data() override;
+ void duplicate_data(VertBuf *dst) override;
+ void bind_as_ssbo(uint binding) override;
+ void bind_as_texture(uint binding) override;
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLVertBuf");
+};
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_vertex_buffer.mm b/source/blender/gpu/metal/mtl_vertex_buffer.mm
new file mode 100644
index 00000000000..1c7201ce5f9
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_vertex_buffer.mm
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+#include "mtl_vertex_buffer.hh"
+#include "mtl_debug.hh"
+
+namespace blender::gpu {
+
+MTLVertBuf::MTLVertBuf() : VertBuf()
+{
+}
+
+MTLVertBuf::~MTLVertBuf()
+{
+ this->release_data();
+}
+
+void MTLVertBuf::acquire_data()
+{
+ /* Discard previous data, if any. */
+ MEM_SAFE_FREE(data);
+ if (usage_ == GPU_USAGE_DEVICE_ONLY) {
+ data = nullptr;
+ }
+ else {
+ data = (uchar *)MEM_mallocN(sizeof(uchar) * this->size_alloc_get(), __func__);
+ }
+}
+
+void MTLVertBuf::resize_data()
+{
+ if (usage_ == GPU_USAGE_DEVICE_ONLY) {
+ data = nullptr;
+ }
+ else {
+ data = (uchar *)MEM_reallocN(data, sizeof(uchar) * this->size_alloc_get());
+ }
+}
+
+void MTLVertBuf::release_data()
+{
+ if (vbo_ != nullptr) {
+ vbo_->free();
+ vbo_ = nullptr;
+ is_wrapper_ = false;
+ }
+
+ GPU_TEXTURE_FREE_SAFE(buffer_texture_);
+
+ MEM_SAFE_FREE(data);
+}
+
+void MTLVertBuf::duplicate_data(VertBuf *dst_)
+{
+ BLI_assert(MTLContext::get() != NULL);
+ MTLVertBuf *src = this;
+ MTLVertBuf *dst = static_cast<MTLVertBuf *>(dst_);
+
+ /* Ensure buffer has been initialized. */
+ src->bind();
+
+ if (src->vbo_) {
+
+ /* Fetch active context. */
+ MTLContext *ctx = MTLContext::get();
+ BLI_assert(ctx);
+
+ /* Ensure destination does not have an active VBO. */
+ BLI_assert(dst->vbo_ == nullptr);
+
+ /* Allocate VBO for destination vertbuf. */
+ uint length = src->vbo_->get_size();
+ dst->vbo_ = MTLContext::get_global_memory_manager().allocate(
+ length, (dst->get_usage_type() != GPU_USAGE_DEVICE_ONLY));
+ dst->alloc_size_ = length;
+
+ /* Fetch Metal buffer handles. */
+ id<MTLBuffer> src_buffer = src->vbo_->get_metal_buffer();
+ id<MTLBuffer> dest_buffer = dst->vbo_->get_metal_buffer();
+
+ /* Use blit encoder to copy data to duplicate buffer allocation. */
+ id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
+ if (G.debug & G_DEBUG_GPU) {
+ [enc insertDebugSignpost:@"VertexBufferDuplicate"];
+ }
+ [enc copyFromBuffer:src_buffer
+ sourceOffset:0
+ toBuffer:dest_buffer
+ destinationOffset:0
+ size:length];
+
+ /* Flush results back to host buffer, if one exists. */
+ if (dest_buffer.storageMode == MTLStorageModeManaged) {
+ [enc synchronizeResource:dest_buffer];
+ }
+
+ if (G.debug & G_DEBUG_GPU) {
+ [enc insertDebugSignpost:@"VertexBufferDuplicateEnd"];
+ }
+
+ /* Mark as in-use, as contents are updated via GPU command. */
+ src->flag_used();
+ }
+
+ /* Copy raw CPU data. */
+ if (data != nullptr) {
+ dst->data = (uchar *)MEM_dupallocN(src->data);
+ }
+}
+
+void MTLVertBuf::upload_data()
+{
+ this->bind();
+}
+
+void MTLVertBuf::bind()
+{
+ /* Determine allocation size. Set minimum allocation size to be
+ * the maximal of a single attribute to avoid validation and
+ * correctness errors. */
+ uint64_t required_size_raw = sizeof(uchar) * this->size_used_get();
+ uint64_t required_size = max_ulul(required_size_raw, 128);
+
+ if (required_size_raw == 0) {
+ MTL_LOG_WARNING("Warning: Vertex buffer required_size = 0\n");
+ }
+
+ /* If the vertex buffer has already been allocated, but new data is ready,
+ * or the usage size has changed, we release the existing buffer and
+ * allocate a new buffer to ensure we do not overwrite in-use GPU resources.
+ *
+ * NOTE: We only need to free the existing allocation if contents have been
+ * submitted to the GPU. Otherwise we can simply upload new data to the
+ * existing buffer, if it will fit.
+ *
+ * NOTE: If a buffer is re-sized, but no new data is provided, the previous
+ * contents are copied into the newly allocated buffer. */
+ bool requires_reallocation = (vbo_ != nullptr) && (alloc_size_ != required_size);
+ bool new_data_ready = (this->flag & GPU_VERTBUF_DATA_DIRTY) && this->data;
+
+ gpu::MTLBuffer *prev_vbo = nullptr;
+ GPUVertBufStatus prev_flag = this->flag;
+
+ if (vbo_ != nullptr) {
+ if (requires_reallocation || (new_data_ready && contents_in_flight_)) {
+ /* Track previous VBO to copy data from. */
+ prev_vbo = vbo_;
+
+ /* Reset current allocation status. */
+ vbo_ = nullptr;
+ is_wrapper_ = false;
+ alloc_size_ = 0;
+
+ /* Flag as requiring data upload. */
+ if (requires_reallocation) {
+ this->flag &= ~GPU_VERTBUF_DATA_UPLOADED;
+ }
+ }
+ }
+
+ /* Create MTLBuffer of requested size. */
+ if (vbo_ == nullptr) {
+ vbo_ = MTLContext::get_global_memory_manager().allocate(
+ required_size, (this->get_usage_type() != GPU_USAGE_DEVICE_ONLY));
+ vbo_->set_label(@"Vertex Buffer");
+ BLI_assert(vbo_ != nullptr);
+ BLI_assert(vbo_->get_metal_buffer() != nil);
+
+ is_wrapper_ = false;
+ alloc_size_ = required_size;
+ contents_in_flight_ = false;
+ }
+
+ /* Upload new data, if provided. */
+ if (new_data_ready) {
+
+ /* Only upload data if usage size is greater than zero.
+ * Do not upload data for device-only buffers. */
+ if (required_size_raw > 0 && usage_ != GPU_USAGE_DEVICE_ONLY) {
+
+ /* Debug: Verify allocation is large enough. */
+ BLI_assert(vbo_->get_size() >= required_size_raw);
+
+ /* Fetch mapped buffer host ptr and upload data. */
+ void *dst_data = vbo_->get_host_ptr();
+ memcpy((uint8_t *)dst_data, this->data, required_size_raw);
+ vbo_->flush_range(0, required_size_raw);
+ }
+
+ /* If static usage, free host-side data. */
+ if (usage_ == GPU_USAGE_STATIC) {
+ MEM_SAFE_FREE(data);
+ }
+
+ /* Flag data as having been uploaded. */
+ this->flag &= ~GPU_VERTBUF_DATA_DIRTY;
+ this->flag |= GPU_VERTBUF_DATA_UPLOADED;
+ }
+ else if (requires_reallocation) {
+
+ /* If buffer has been re-sized, copy existing data if host
+ * data had been previously uploaded. */
+ BLI_assert(prev_vbo != nullptr);
+
+ if (prev_flag & GPU_VERTBUF_DATA_UPLOADED) {
+
+ /* Fetch active context. */
+ MTLContext *ctx = MTLContext::get();
+ BLI_assert(ctx);
+
+ id<MTLBuffer> copy_prev_buffer = prev_vbo->get_metal_buffer();
+ id<MTLBuffer> copy_new_buffer = vbo_->get_metal_buffer();
+ BLI_assert(copy_prev_buffer != nil);
+ BLI_assert(copy_new_buffer != nil);
+
+ /* Ensure a blit command encoder is active for buffer copy operation. */
+ id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
+ [enc copyFromBuffer:copy_prev_buffer
+ sourceOffset:0
+ toBuffer:copy_new_buffer
+ destinationOffset:0
+ size:min_ii([copy_new_buffer length], [copy_prev_buffer length])];
+
+ /* Flush newly copied data back to host-side buffer, if one exists.
+ * Ensures data and cache coherency for managed MTLBuffers. */
+ if (copy_new_buffer.storageMode == MTLStorageModeManaged) {
+ [enc synchronizeResource:copy_new_buffer];
+ }
+
+ /* For VBOs flagged as static, release host data as it will no longer be needed. */
+ if (usage_ == GPU_USAGE_STATIC) {
+ MEM_SAFE_FREE(data);
+ }
+
+ /* Flag data as uploaded. */
+ this->flag |= GPU_VERTBUF_DATA_UPLOADED;
+
+ /* Flag as in-use, as contents have been updated via GPU commands. */
+ this->flag_used();
+ }
+ }
+
+ /* Release previous buffer if re-allocated. */
+ if (prev_vbo != nullptr) {
+ prev_vbo->free();
+ }
+
+ /* Ensure buffer has been created. */
+ BLI_assert(vbo_ != nullptr);
+}
+
+/* Update Sub currently only used by hair */
+void MTLVertBuf::update_sub(uint start, uint len, const void *data)
+{
+ /* Fetch and verify active context. */
+ MTLContext *ctx = reinterpret_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+ BLI_assert(ctx);
+ BLI_assert(ctx->device);
+
+ /* Ensure vertbuf has been created. */
+ this->bind();
+ BLI_assert(start + len <= alloc_size_);
+
+ /* Create temporary scratch buffer allocation for sub-range of data. */
+ MTLTemporaryBuffer scratch_allocation =
+ ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(len, 256);
+ memcpy(scratch_allocation.data, data, len);
+ [scratch_allocation.metal_buffer
+ didModifyRange:NSMakeRange(scratch_allocation.buffer_offset, len)];
+ id<MTLBuffer> data_buffer = scratch_allocation.metal_buffer;
+ uint data_buffer_offset = scratch_allocation.buffer_offset;
+
+ BLI_assert(vbo_ != nullptr && data != nullptr);
+ BLI_assert((start + len) <= vbo_->get_size());
+
+ /* Fetch destination buffer. */
+ id<MTLBuffer> dst_buffer = vbo_->get_metal_buffer();
+
+ /* Ensure blit command encoder for copying data. */
+ id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
+ [enc copyFromBuffer:data_buffer
+ sourceOffset:data_buffer_offset
+ toBuffer:dst_buffer
+ destinationOffset:start
+ size:len];
+
+ /* Flush modified buffer back to host buffer, if one exists. */
+ if (dst_buffer.storageMode == MTLStorageModeManaged) {
+ [enc synchronizeResource:dst_buffer];
+ }
+}
+
+void MTLVertBuf::bind_as_ssbo(uint binding)
+{
+ /* TODO(Metal): Support binding of buffers as SSBOs.
+ * Pending overall compute support for Metal backend. */
+ MTL_LOG_WARNING("MTLVertBuf::bind_as_ssbo not yet implemented!\n");
+ this->flag_used();
+}
+
+void MTLVertBuf::bind_as_texture(uint binding)
+{
+ /* Ensure allocations are ready, and data uploaded. */
+ this->bind();
+ BLI_assert(vbo_ != nullptr);
+
+ /* If vertex buffer updated, release existing texture and re-create. */
+ id<MTLBuffer> buf = this->get_metal_buffer();
+ if (buffer_texture_ != nullptr) {
+ gpu::MTLTexture *mtl_buffer_tex = static_cast<gpu::MTLTexture *>(
+ unwrap(this->buffer_texture_));
+ id<MTLBuffer> tex_buf = mtl_buffer_tex->get_vertex_buffer();
+ if (tex_buf != buf) {
+ GPU_TEXTURE_FREE_SAFE(buffer_texture_);
+ buffer_texture_ = nullptr;
+ }
+ }
+
+ /* Create texture from vertex buffer. */
+ if (buffer_texture_ == nullptr) {
+ buffer_texture_ = GPU_texture_create_from_vertbuf("vertbuf_as_texture", wrap(this));
+ }
+
+ /* Verify successful creation and bind. */
+ BLI_assert(buffer_texture_ != nullptr);
+ GPU_texture_bind(buffer_texture_, binding);
+}
+
+const void *MTLVertBuf::read() const
+{
+ BLI_assert(vbo_ != nullptr);
+ BLI_assert(usage_ != GPU_USAGE_DEVICE_ONLY);
+ void *return_ptr = vbo_->get_host_ptr();
+ BLI_assert(return_ptr != nullptr);
+
+ return return_ptr;
+}
+
+void *MTLVertBuf::unmap(const void *mapped_data) const
+{
+ void *result = MEM_mallocN(alloc_size_, __func__);
+ memcpy(result, mapped_data, alloc_size_);
+ return result;
+}
+
+void MTLVertBuf::wrap_handle(uint64_t handle)
+{
+ BLI_assert(vbo_ == nullptr);
+
+ /* Attempt to cast to Metal buffer handle. */
+ BLI_assert(handle != 0);
+ id<MTLBuffer> buffer = reinterpret_cast<id<MTLBuffer>>((void *)handle);
+
+ is_wrapper_ = true;
+ vbo_ = new gpu::MTLBuffer(buffer);
+
+ /* We assume the data is already on the device, so no need to allocate or send it. */
+ flag = GPU_VERTBUF_DATA_UPLOADED;
+}
+
+void MTLVertBuf::flag_used()
+{
+ contents_in_flight_ = true;
+}
+
+} // namespace blender::gpu