Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'source/blender/gpu/metal/mtl_memory.hh')
-rw-r--r--source/blender/gpu/metal/mtl_memory.hh482
1 files changed, 482 insertions, 0 deletions
diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh
new file mode 100644
index 00000000000..df80df6543f
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -0,0 +1,482 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include <atomic>
+#include <functional>
+#include <map>
+#include <mutex>
+#include <set>
+#include <unordered_map>
+
+#include "mtl_common.hh"
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+@class CAMetalLayer;
+@class MTLCommandQueue;
+@class MTLRenderPipelineState;
+
+/* Metal Memory Manager Overview. */
+/*
+ * The Metal Backend Memory manager is designed to provide an interface
+ * for all other MTL_* modules where memory allocation is required.
+ *
+ * Different allocation strategies and data-structures are used depending
+ * on how the data is used by the backend. These aim to optimally handle
+ * system memory and abstract away any complexity from the MTL_* modules
+ * themselves.
+ *
+ * There are two primary allocation modes which can be used:
+ *
+ * ** MTLScratchBufferManager **
+ *
+ * Each MTLContext owns a ScratchBufferManager which is implemented
+ * as a pool of circular buffers, designed to handle temporary
+ * memory allocations which occur on a per-frame basis. The scratch
+ * buffers allow flushing of host memory to the GPU to be batched.
+ *
+ * Each frame, the next scratch buffer is reset, then later flushed upon
+ * command buffer submission.
+ *
+ * NOTE: This is allocated per-context due to allocations being tied
+ * to workload submissions and context-specific submissions.
+ *
+ * Examples of scratch buffer usage are:
+ * - Immediate-mode temporary vertex buffers.
+ * - Shader uniform data updates
+ * - Staging of data for resource copies, or, data reads/writes.
+ *
+ * Usage:
+ *
+ * MTLContext::get_scratchbuffer_manager() - to fetch active manager.
+ *
+ * MTLTemporaryBuffer scratch_buffer_allocate_range(size)
+ * MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(size, align)
+ *
+ * ---------------------------------------------------------------------------------
+ * ** MTLBufferPool **
+ *
+ * For static and longer-lasting memory allocations, such as those for UBOs,
+ * Vertex buffers, index buffers, etc; We want an optimal abstraction for
+ * fetching a MTLBuffer of the desired size and resource options.
+ *
+ * Memory allocations can be expensive so the MTLBufferPool provides
+ * functionality to track usage of these buffers and once a buffer
+ * is no longer in use, it is returned to the buffer pool for use
+ * by another backend resource.
+ *
+ * The MTLBufferPool provides functionality for safe tracking of resources,
+ * as buffers freed on the host side must have their usage by the GPU tracked,
+ * to ensure they are not prematurely re-used before they have finished being
+ * used by the GPU.
+ *
+ * NOTE: The MTLBufferPool is a global construct which can be fetched from anywhere.
+ *
+ * Usage:
+ * MTLContext::get_global_memory_manager(); - static routine to fetch global memory manager.
+ *
+ * gpu::MTLBuffer *allocate(size, is_cpu_visibile)
+ * gpu::MTLBuffer *allocate_aligned(size, alignment, is_cpu_visibile)
+ * gpu::MTLBuffer *allocate_with_data(size, is_cpu_visibile, data_ptr)
+ * gpu::MTLBuffer *allocate_aligned_with_data(size, alignment, is_cpu_visibile, data_ptr)
+ */
+
+/* Debug memory statistics: Disabled by Macro rather than guarded for
+ * performance considerations. */
+#define MTL_DEBUG_MEMORY_STATISTICS 0
+
+/* Allows a scratch buffer to temporarily grow beyond its maximum, which allows submission
+ * of one-time-use data packets which are too large. */
+#define MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION 1
+
+namespace blender::gpu {
+
+/* Forward Declarations. */
+class MTLContext;
+class MTLCommandBufferManager;
+class MTLUniformBuf;
+
+/* -------------------------------------------------------------------- */
+/** \name Memory Management.
+ * \{ */
+
+/* MTLBuffer allocation wrapper. */
+class MTLBuffer {
+
+ private:
+ /* Metal resource. */
+ id<MTLBuffer> metal_buffer_;
+
+ /* Host-visible mapped-memory pointer. Behavior depends on buffer type:
+ * - Shared buffers: pointer represents base address of #MTLBuffer whose data
+ * access has shared access by both the CPU and GPU on
+ * Unified Memory Architectures (UMA).
+ * - Managed buffer: Host-side mapped buffer region for CPU (Host) access. Managed buffers
+ * must be manually flushed to transfer data to GPU-resident buffer.
+ * - Private buffer: Host access is invalid, `data` will be nullptr. */
+ void *data_;
+
+ /* Whether buffer is allocated from an external source. */
+ bool is_external_ = false;
+
+ /* Allocation info. */
+ MTLResourceOptions options_;
+ id<MTLDevice> device_;
+ uint64_t alignment_;
+ uint64_t size_;
+
+ /* Allocated size may be larger than actual size. */
+ uint64_t usage_size_;
+
+ /* Lifetime info - whether the current buffer is actively in use. A buffer
+ * should be in use after it has been allocated. De-allocating the buffer, and
+ * returning it to the free buffer pool will set in_use to false. Using a buffer
+ * while it is not in-use should not be allowed and result in an error. */
+ std::atomic<bool> in_use_;
+
+ public:
+ MTLBuffer(id<MTLDevice> device, uint64_t size, MTLResourceOptions options, uint alignment = 1);
+ MTLBuffer(id<MTLBuffer> external_buffer);
+ ~MTLBuffer();
+
+ /* Fetch information about backing MTLBuffer. */
+ id<MTLBuffer> get_metal_buffer() const;
+ void *get_host_ptr() const;
+ uint64_t get_size_used() const;
+ uint64_t get_size() const;
+
+ /* Flush data to GPU. */
+ void flush();
+ void flush_range(uint64_t offset, uint64_t length);
+ bool requires_flush();
+
+ /* Buffer usage tracking. */
+ void flag_in_use(bool used);
+ bool get_in_use();
+ void set_usage_size(uint64_t size_used);
+
+ /* Debug. */
+ void set_label(NSString *str);
+
+ /* Read properties. */
+ MTLResourceOptions get_resource_options();
+ uint64_t get_alignment();
+
+ /* Resource-local free: For buffers allocated via memory manager,
+ * this will call the context `free_buffer` method to return the buffer to the context memory
+ * pool.
+ *
+ * Otherwise, free will release the associated metal resource.
+ * As a note, calling the destructor will also destroy the buffer and associated metal
+ * resource. */
+ void free();
+
+ /* Safety check to ensure buffers are not used after free. */
+ void debug_ensure_used();
+};
+
+/* View into part of an MTLBuffer. */
+struct MTLBufferRange {
+ id<MTLBuffer> metal_buffer;
+ void *data;
+ uint64_t buffer_offset;
+ uint64_t size;
+ MTLResourceOptions options;
+
+ void flush();
+ bool requires_flush();
+};
+
+/* Circular scratch buffer allocations should be seen as temporary and only used within the
+ * lifetime of the frame. */
+using MTLTemporaryBuffer = MTLBufferRange;
+
+/* Round-Robin Circular-buffer. */
+class MTLCircularBuffer {
+ friend class MTLScratchBufferManager;
+
+ private:
+ MTLContext &own_context_;
+
+ /* Wrapped MTLBuffer allocation handled. */
+ gpu::MTLBuffer *cbuffer_;
+
+ /* Current offset where next allocation will begin. */
+ uint64_t current_offset_;
+
+ /* Whether the Circular Buffer can grow during re-allocation if
+ * the size is exceeded. */
+ bool can_resize_;
+
+ /* Usage information. */
+ uint64_t used_frame_index_;
+ uint64_t last_flush_base_offset_;
+
+ public:
+ MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow);
+ ~MTLCircularBuffer();
+ MTLTemporaryBuffer allocate_range(uint64_t alloc_size);
+ MTLTemporaryBuffer allocate_range_aligned(uint64_t alloc_size, uint alignment);
+ void flush();
+
+ /* Reset pointer back to start of circular buffer. */
+ void reset();
+};
+
+/* Wrapper struct used by Memory Manager to sort and compare gpu::MTLBuffer resources inside the
+ * memory pools. */
+struct MTLBufferHandle {
+ gpu::MTLBuffer *buffer;
+ uint64_t buffer_size;
+
+ inline MTLBufferHandle(gpu::MTLBuffer *buf)
+ {
+ this->buffer = buf;
+ this->buffer_size = this->buffer->get_size();
+ }
+
+ inline MTLBufferHandle(uint64_t compare_size)
+ {
+ this->buffer = nullptr;
+ this->buffer_size = compare_size;
+ }
+};
+
+struct CompareMTLBuffer {
+ bool operator()(const MTLBufferHandle &lhs, const MTLBufferHandle &rhs) const
+ {
+ return lhs.buffer_size < rhs.buffer_size;
+ }
+};
+
+/* An MTLSafeFreeList is a temporary list of gpu::MTLBuffers which have
+ * been freed by the high level backend, but are pending GPU work execution before
+ * the gpu::MTLBuffers can be returned to the Memory manager pools.
+ * This list is implemented as a chunked linked-list.
+ *
+ * Only a single MTLSafeFreeList is active at one time and is associated with current command
+ * buffer submissions. If an MTLBuffer is freed during the lifetime of a command buffer, it could
+ * still possibly be in-use and as such, the MTLSafeFreeList will increment its reference count for
+ * each command buffer submitted while the current pool is active.
+ *
+ * -- Reference count is incremented upon MTLCommandBuffer commit.
+ * -- Reference count is decremented in the MTLCommandBuffer completion callback handler.
+ *
+ * A new MTLSafeFreeList will begin each render step (frame). This pooling of buffers, rather than
+ * individual buffer resource tracking reduces performance overhead.
+ *
+ * * The reference count starts at 1 to ensure that the reference count cannot prematurely reach
+ * zero until any command buffers have been submitted. This additional decrement happens
+ * when the next MTLSafeFreeList is created, to allow the existing pool to be released once
+ * the reference count hits zero after submitted command buffers complete.
+ *
+ * NOTE: the Metal API independently tracks resources used by command buffers for the purpose of
+ * keeping resources alive while in-use by the driver and CPU, however, this differs from the
+ * MTLSafeFreeList mechanism in the Metal backend, which exists for the purpose of allowing
+ * previously allocated MTLBuffer resources to be re-used. This allows us to save on the expensive
+ * cost of memory allocation.
+ */
+class MTLSafeFreeList {
+ friend class MTLBufferPool;
+
+ private:
+ std::atomic<int> reference_count_;
+ std::atomic<bool> in_free_queue_;
+ std::recursive_mutex lock_;
+
+ /* Linked list of next MTLSafeFreeList chunk if current chunk is full. */
+ std::atomic<int> has_next_pool_;
+ std::atomic<MTLSafeFreeList *> next_;
+
+ /* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations
+ * for performance and memory. */
+ static const int MAX_NUM_BUFFERS_ = 1024;
+ std::atomic<int> current_list_index_;
+ gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_];
+
+ public:
+ MTLSafeFreeList();
+
+ /* Add buffer to Safe Free List, can be called from secondary threads.
+ * Performs a lockless list insert. */
+ void insert_buffer(gpu::MTLBuffer *buffer);
+
+ /* Increments command buffer reference count. */
+ void increment_reference();
+
+ /* Decrement and return of buffers to pool occur on MTLCommandBuffer completion callback thread.
+ */
+ void decrement_reference();
+
+ void flag_in_queue()
+ {
+ in_free_queue_ = true;
+ if (has_next_pool_) {
+ MTLSafeFreeList *next_pool = next_.load();
+ BLI_assert(next_pool != nullptr);
+ next_pool->flag_in_queue();
+ }
+ }
+};
+
+/* MTLBuffer pools. */
+/* Allocating Metal buffers is expensive, so we cache all allocated buffers,
+ * and when requesting a new buffer, find one which fits the required dimensions
+ * from an existing pool of buffers.
+ *
+ * When freeing MTLBuffers, we insert them into the current MTLSafeFreeList, which defers
+ * release of the buffer until the associated command buffers have finished executing.
+ * This prevents a buffer from being re-used while it is still in-use by the GPU.
+ *
+ * * Once command buffers complete, MTLSafeFreeList's associated with the current
+ * command buffer submission are added to the `completed_safelist_queue_`.
+ *
+ * * At a set point in time, all MTLSafeFreeList's in `completed_safelist_queue_` have their
+ * MTLBuffers re-inserted into the Memory Manager's pools. */
+class MTLBufferPool {
+
+ private:
+ /* Memory statistics. */
+ long long int total_allocation_bytes_ = 0;
+
+#if MTL_DEBUG_MEMORY_STATISTICS == 1
+ /* Debug statistics. */
+ std::atomic<int> per_frame_allocation_count_;
+ std::atomic<long long int> allocations_in_pool_;
+ std::atomic<long long int> buffers_in_pool_;
+#endif
+
+ /* Metal resources. */
+ bool ensure_initialised_ = false;
+ id<MTLDevice> device_ = nil;
+
+ /* The buffer selection aims to pick a buffer which meets the minimum size requirements.
+ * To do this, we keep an ordered set of all available buffers. If the buffer is larger than the
+ * desired allocation size, we check it against `mtl_buffer_size_threshold_factor_`,
+ * which defines what % larger than the original allocation the buffer can be.
+ * - A higher value results in greater re-use of previously allocated buffers of similar sizes.
+ * - A lower value may result in more dynamic allocations, but minimized memory usage for a given
+ * scenario.
+ * The current value of 1.26 is calibrated for optimal performance and memory utilization. */
+ static constexpr float mtl_buffer_size_threshold_factor_ = 1.26;
+
+ /* Buffer pools using MTLResourceOptions as key for allocation type.
+ * Aliased as 'uint64_t' for map type compatibility.
+ * - A size-ordered list (MultiSet) of allocated buffers is kept per MTLResourceOptions
+ * permutation. This allows efficient lookup for buffers of a given requested size.
+ * - MTLBufferHandle wraps a gpu::MTLBuffer pointer to achieve easy size-based sorting
+ * via CompareMTLBuffer. */
+ using MTLBufferPoolOrderedList = std::multiset<MTLBufferHandle, CompareMTLBuffer>;
+ using MTLBufferResourceOptions = uint64_t;
+
+ blender::Map<MTLBufferResourceOptions, MTLBufferPoolOrderedList *> buffer_pools_;
+ blender::Vector<gpu::MTLBuffer *> allocations_;
+
+ /* Maintain a queue of all MTLSafeFreeList's that have been released
+ * by the GPU and are ready to have their buffers re-inserted into the
+ * MemoryManager pools.
+ * Access to this queue is made thread-safe through safelist_lock_. */
+ std::mutex safelist_lock_;
+ blender::Vector<MTLSafeFreeList *> completed_safelist_queue_;
+
+ /* Current free list, associated with active MTLCommandBuffer submission. */
+ /* MTLBuffer::free() can be called from separate threads, due to usage within animation
+ * system/worker threads. */
+ std::atomic<MTLSafeFreeList *> current_free_list_;
+
+ public:
+ void init(id<MTLDevice> device);
+ ~MTLBufferPool();
+
+ gpu::MTLBuffer *allocate(uint64_t size, bool cpu_visible);
+ gpu::MTLBuffer *allocate_aligned(uint64_t size, uint alignment, bool cpu_visible);
+ gpu::MTLBuffer *allocate_with_data(uint64_t size, bool cpu_visible, const void *data = nullptr);
+ gpu::MTLBuffer *allocate_aligned_with_data(uint64_t size,
+ uint alignment,
+ bool cpu_visible,
+ const void *data = nullptr);
+ bool free_buffer(gpu::MTLBuffer *buffer);
+
+ /* Flush MTLSafeFreeList buffers, for completed lists in `completed_safelist_queue_`,
+ * back to memory pools. */
+ void update_memory_pools();
+
+ /* Access and control over active MTLSafeFreeList. */
+ MTLSafeFreeList *get_current_safe_list();
+ void begin_new_safe_list();
+
+ /* Add a completed MTLSafeFreeList to completed_safelist_queue_. */
+ void push_completed_safe_list(MTLSafeFreeList *list);
+
+ private:
+ void ensure_buffer_pool(MTLResourceOptions options);
+ void insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer);
+ void free();
+};
+
+/* Scratch buffers are circular-buffers used for temporary data within the current frame.
+ * In order to preserve integrity of contents when having multiple-frames-in-flight,
+ * we cycle through a collection of scratch buffers which are reset upon next use.
+ *
+ * Below are a series of properties, declared to manage scratch buffers. If a scratch buffer
+ * overflows, then the original buffer will be flushed and submitted, with retained references
+ * by usage within the command buffer, and a new buffer will be created.
+ * - The new buffer will grow in size to account for increased demand in temporary memory.
+ */
+class MTLScratchBufferManager {
+
+ private:
+ /* Maximum number of scratch buffers to allocate. This should be the maximum number of
+ * simultaneous frames in flight. */
+ static constexpr uint mtl_max_scratch_buffers_ = MTL_NUM_SAFE_FRAMES;
+
+ public:
+ /* Maximum size of single scratch buffer allocation. When re-sizing, this is the maximum size the
+ * newly allocated buffers will grow to. Larger allocations are possible if
+ * `MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION` is enabled, but these will instead allocate new
+ * buffers from the memory pools on the fly. */
+ static constexpr uint mtl_scratch_buffer_max_size_ = 128 * 1024 * 1024;
+
+ /* Initial size of circular scratch buffers prior to growth. */
+ static constexpr uint mtl_scratch_buffer_initial_size_ = 16 * 1024 * 1024;
+
+ private:
+ /* Parent MTLContext. */
+ MTLContext &context_;
+ bool initialised_ = false;
+
+ /* Scratch buffer currently in-use. */
+ uint current_scratch_buffer_ = 0;
+
+ /* Scratch buffer pool. */
+ MTLCircularBuffer *scratch_buffers_[mtl_max_scratch_buffers_];
+
+ public:
+ MTLScratchBufferManager(MTLContext &context) : context_(context){};
+ ~MTLScratchBufferManager();
+
+ /* Explicit initialization and freeing of resources.
+ * Initialization must occur after device creation. */
+ void init();
+ void free();
+
+ /* Allocation functions for creating temporary allocations from active circular buffer. */
+ MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size);
+ MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(uint64_t alloc_size, uint alignment);
+
+ /* Ensure a new scratch buffer is started if we move onto a new frame.
+ * Called when a new command buffer begins. */
+ void ensure_increment_scratch_buffer();
+
+ /* Flush memory for active scratch buffer to GPU.
+ * This call will perform a partial flush of the buffer starting from
+ * the last offset the data was flushed from, to the current offset. */
+ void flush_active_scratch_buffer();
+};
+
+/** \} */
+
+} // namespace blender::gpu