From 9835d5e58ba28ff69abec74c1cb3f3959ff9451a Mon Sep 17 00:00:00 2001
From: Jason Fielder <jason_apple>
Date: Tue, 19 Jul 2022 17:11:03 +0200
Subject: Metal: MTLUniformBuffer module implementation

Initial implementation.

Authored by Apple: Michael Parkin-White
Ref T96261

Reviewed By: fclem
Differential Revision: https://developer.blender.org/D15357
---
 source/blender/gpu/metal/mtl_backend.mm        |   4 +-
 source/blender/gpu/metal/mtl_context.hh        |   6 +
 source/blender/gpu/metal/mtl_memory.hh         |  18 +--
 source/blender/gpu/metal/mtl_memory.mm         |  48 +++++---
 source/blender/gpu/metal/mtl_uniform_buffer.hh |  48 ++++++++
 source/blender/gpu/metal/mtl_uniform_buffer.mm | 160 +++++++++++++++++++++++++
 6 files changed, 258 insertions(+), 26 deletions(-)
 create mode 100644 source/blender/gpu/metal/mtl_uniform_buffer.hh
 create mode 100644 source/blender/gpu/metal/mtl_uniform_buffer.mm

(limited to 'source/blender/gpu/metal')
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index af1fd6a06bb..3328855bbf8 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -10,6 +10,7 @@
 #include "mtl_backend.hh"
 #include "mtl_context.hh"
 #include "mtl_framebuffer.hh"
+#include "mtl_uniform_buffer.hh"
 #include "mtl_query.hh"
 
 #include "gpu_capabilities_private.hh"
@@ -81,8 +82,7 @@ Texture *MTLBackend::texture_alloc(const char *name)
 
 UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name)
 {
-  /* TODO(Metal): Implement MTLUniformBuf. */
-  return nullptr;
+  return new MTLUniformBuf(size, name);
 };
 
 StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name)
diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh
index caabd59a1b5..0db87bf5da5 100644
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -3,6 +3,7 @@
 /** \file
  * \ingroup gpu
  */
+
 #pragma once
 
 #include "MEM_guardedalloc.h"
@@ -625,6 +626,11 @@ class MTLContext : public Context {
 
   void memory_statistics_get(int *total_mem, int *free_mem) override;
 
+  static MTLContext *get()
+  {
+    return static_cast<MTLContext *>(Context::get());
+  }
+
   void debug_group_begin(const char *name, int index) override;
   void debug_group_end() override;
 
diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh
index daa049e78af..dc5417dc11a 100644
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -78,8 +78,10 @@
  *  Usage:
  *    MTLContext::get_global_memory_manager();  - static routine to fetch global memory manager.
  *
- *    gpu::MTLBuffer *allocate_buffer(size, is_cpu_visibile, bytes=nullptr)
- *    gpu::MTLBuffer *allocate_buffer_aligned(size, alignment, is_cpu_visibile, bytes=nullptr)
+ *    gpu::MTLBuffer *allocate(size, is_cpu_visibile)
+ *    gpu::MTLBuffer *allocate_aligned(size, alignment, is_cpu_visibile)
+ *    gpu::MTLBuffer *allocate_with_data(size, is_cpu_visibile, data_ptr)
+ *    gpu::MTLBuffer *allocate_aligned_with_data(size, alignment, is_cpu_visibile, data_ptr)
  */
 
 /* Debug memory statistics: Disabled by Macro rather than guarded for
@@ -389,11 +391,13 @@ class MTLBufferPool {
   void init(id<MTLDevice> device);
   ~MTLBufferPool();
 
-  gpu::MTLBuffer *allocate_buffer(uint64_t size, bool cpu_visible, const void *bytes = nullptr);
-  gpu::MTLBuffer *allocate_buffer_aligned(uint64_t size,
-                                          uint alignment,
-                                          bool cpu_visible,
-                                          const void *bytes = nullptr);
+  gpu::MTLBuffer *allocate(uint64_t size, bool cpu_visible);
+  gpu::MTLBuffer *allocate_aligned(uint64_t size, uint alignment, bool cpu_visible);
+  gpu::MTLBuffer *allocate_with_data(uint64_t size, bool cpu_visible, const void *data = nullptr);
+  gpu::MTLBuffer *allocate_aligned_with_data(uint64_t size,
+                                             uint alignment,
+                                             bool cpu_visible,
+                                             const void *data = nullptr);
   bool free_buffer(gpu::MTLBuffer *buffer);
 
   /* Flush MTLSafeFreeList buffers, for completed lists in `completed_safelist_queue_`,
diff --git a/source/blender/gpu/metal/mtl_memory.mm b/source/blender/gpu/metal/mtl_memory.mm
index e5db32ed1b1..48e27dd2bb6 100644
--- a/source/blender/gpu/metal/mtl_memory.mm
+++ b/source/blender/gpu/metal/mtl_memory.mm
@@ -57,17 +57,23 @@ void MTLBufferPool::free()
   buffer_pools_.clear();
 }
 
-gpu::MTLBuffer *MTLBufferPool::allocate_buffer(uint64_t size, bool cpu_visible, const void *bytes)
+gpu::MTLBuffer *MTLBufferPool::allocate(uint64_t size, bool cpu_visible)
 {
   /* Allocate buffer with default HW-compatible alignment of 256 bytes.
    * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
-  return this->allocate_buffer_aligned(size, 256, cpu_visible, bytes);
+  return this->allocate_aligned(size, 256, cpu_visible);
 }
 
-gpu::MTLBuffer *MTLBufferPool::allocate_buffer_aligned(uint64_t size,
-                                                       uint alignment,
-                                                       bool cpu_visible,
-                                                       const void *bytes)
+gpu::MTLBuffer *MTLBufferPool::allocate_with_data(uint64_t size,
+                                                  bool cpu_visible,
+                                                  const void *data)
+{
+  /* Allocate buffer with default HW-compatible alignemnt of 256 bytes.
+   * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */
+  return this->allocate_aligned_with_data(size, 256, cpu_visible, data);
+}
+
+gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
 {
   /* Check not required. Main GPU module usage considered thread-safe. */
   // BLI_assert(BLI_thread_is_main());
@@ -153,15 +159,6 @@ gpu::MTLBuffer *MTLBufferPool::allocate_buffer_aligned(uint64_t size,
   /* Flag buffer as actively in-use. */
   new_buffer->flag_in_use(true);
 
-  /* Upload initial data if provided -- Size based on original size param, not aligned size*/
-  if (bytes) {
-    BLI_assert(!(options & MTLResourceStorageModePrivate));
-    BLI_assert(size <= aligned_alloc_size);
-    BLI_assert(size <= [new_buffer->get_metal_buffer() length]);
-    memcpy(new_buffer->get_host_ptr(), bytes, size);
-    new_buffer->flush_range(0, size);
-  }
-
 #if MTL_DEBUG_MEMORY_STATISTICS == 1
   this->per_frame_allocation_count++;
 #endif
@@ -169,6 +166,23 @@ gpu::MTLBuffer *MTLBufferPool::allocate_buffer_aligned(uint64_t size,
   return new_buffer;
 }
 
+gpu::MTLBuffer *MTLBufferPool::allocate_aligned_with_data(uint64_t size,
+                                                          uint alignment,
+                                                          bool cpu_visible,
+                                                          const void *data)
+{
+  gpu::MTLBuffer *buf = this->allocate_aligned(size, 256, cpu_visible);
+
+  /* Upload initial data. */
+  BLI_assert(data != nullptr);
+  BLI_assert(!(buf->get_resource_options() & MTLResourceStorageModePrivate));
+  BLI_assert(size <= buf->get_size());
+  BLI_assert(size <= [buf->get_metal_buffer() length]);
+  memcpy(buf->get_host_ptr(), data, size);
+  buf->flush_range(0, size);
+  return buf;
+}
+
 bool MTLBufferPool::free_buffer(gpu::MTLBuffer *buffer)
 {
   /* Ensure buffer is flagged as in-use. I.e. has not already been returned to memory pools. */
@@ -356,7 +370,7 @@ void MTLBufferPool::insert_buffer_into_pool(MTLResourceOptions options, gpu::MTL
 
 #if MTL_DEBUG_MEMORY_STATISTICS == 1
   /* Debug statistics. */
-  allocations_in_pool_ += buffer->size;
+  allocations_in_pool_ += buffer->get_size();
   buffers_in_pool_++;
 #endif
 }
@@ -413,7 +427,7 @@ void MTLSafeFreeList::decrement_reference()
 {
   lock_.lock();
   BLI_assert(in_free_queue_ == false);
-  int ref_count = reference_count_--;
+  int ref_count = --reference_count_;
 
   if (ref_count == 0) {
     MTLContext::get_global_memory_manager().push_completed_safe_list(this);
diff --git a/source/blender/gpu/metal/mtl_uniform_buffer.hh b/source/blender/gpu/metal/mtl_uniform_buffer.hh
new file mode 100644
index 00000000000..351194c2ff9
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_uniform_buffer.hh
@@ -0,0 +1,48 @@
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_uniform_buffer_private.hh"
+
+#include "mtl_context.hh"
+
+namespace blender::gpu {
+
+/**
+ * Implementation of Uniform Buffers using Metal.
+ **/
+class MTLUniformBuf : public UniformBuf {
+ private:
+  /* Allocation Handle. */
+  gpu::MTLBuffer *metal_buffer_ = nullptr;
+
+  /* Whether buffer has contents, if false, no GPU buffer will
+   * have yet been allocated. */
+  bool has_data_ = false;
+
+  /* Bindstate tracking. */
+  int bind_slot_ = -1;
+  MTLContext *bound_ctx_ = nullptr;
+
+ public:
+  MTLUniformBuf(size_t size, const char *name);
+  ~MTLUniformBuf();
+
+  void update(const void *data) override;
+  void bind(int slot) override;
+  void unbind() override;
+
+  id<MTLBuffer> get_metal_buffer(int *r_offset);
+  int get_size();
+  const char *get_name()
+  {
+    return name_;
+  }
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLUniformBuf");
+};
+
+}  // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_uniform_buffer.mm b/source/blender/gpu/metal/mtl_uniform_buffer.mm
new file mode 100644
index 00000000000..e45ae76c921
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_uniform_buffer.mm
@@ -0,0 +1,160 @@
+/** \file
+ * \ingroup gpu
+ */
+
+#include "BKE_global.h"
+
+#include "BLI_string.h"
+
+#include "gpu_backend.hh"
+#include "gpu_context_private.hh"
+
+#include "mtl_backend.hh"
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_uniform_buffer.hh"
+
+namespace blender::gpu {
+
+MTLUniformBuf::MTLUniformBuf(size_t size, const char *name) : UniformBuf(size, name)
+{
+}
+
+MTLUniformBuf::~MTLUniformBuf()
+{
+  if (metal_buffer_ != nullptr) {
+    metal_buffer_->free();
+    metal_buffer_ = nullptr;
+  }
+  has_data_ = false;
+
+  /* Ensure UBO is not bound to active CTX.
+   * UBO bindings are reset upon Context-switch so we do not need
+   * to check deactivated context's. */
+  MTLContext *ctx = MTLContext::get();
+  if (ctx) {
+    for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+      MTLUniformBufferBinding &slot = ctx->pipeline_state.ubo_bindings[i];
+      if (slot.bound && slot.ubo == this) {
+        slot.bound = false;
+        slot.ubo = nullptr;
+      }
+    }
+  }
+}
+
+void MTLUniformBuf::update(const void *data)
+{
+  BLI_assert(this);
+  BLI_assert(size_in_bytes_ > 0);
+
+  /* Free existing allocation.
+   * The previous UBO resource will be tracked by the memory manager,
+   * in case dependent GPU work is still executing. */
+  if (metal_buffer_ != nullptr) {
+    metal_buffer_->free();
+    metal_buffer_ = nullptr;
+  }
+
+  /* Allocate MTL buffer */
+  MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+  BLI_assert(ctx);
+  BLI_assert(ctx->device);
+  UNUSED_VARS_NDEBUG(ctx);
+
+  if (data != nullptr) {
+    metal_buffer_ = MTLContext::get_global_memory_manager().allocate_with_data(
+        size_in_bytes_, true, data);
+    has_data_ = true;
+
+    metal_buffer_->set_label(@"Uniform Buffer");
+    BLI_assert(metal_buffer_ != nullptr);
+    BLI_assert(metal_buffer_->get_metal_buffer() != nil);
+  }
+  else {
+    /* If data is not yet present, no buffer will be allocated and MTLContext will use an empty
+     * null buffer, containing zeroes, if the UBO is bound. */
+    metal_buffer_ = nullptr;
+    has_data_ = false;
+  }
+}
+
+void MTLUniformBuf::bind(int slot)
+{
+  if (slot < 0) {
+    MTL_LOG_WARNING("Failed to bind UBO %p. uniform location %d invalid.\n", this, slot);
+    return;
+  }
+
+  BLI_assert(slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+
+  /* Bind current UBO to active context. */
+  MTLContext *ctx = MTLContext::get();
+  BLI_assert(ctx);
+
+  MTLUniformBufferBinding &ctx_ubo_bind_slot = ctx->pipeline_state.ubo_bindings[slot];
+  ctx_ubo_bind_slot.ubo = this;
+  ctx_ubo_bind_slot.bound = true;
+
+  bind_slot_ = slot;
+  bound_ctx_ = ctx;
+
+  /* Check if we have any deferred data to upload. */
+  if (data_ != nullptr) {
+    this->update(data_);
+    MEM_SAFE_FREE(data_);
+  }
+
+  /* Ensure there is atleast an empty dummy buffer. */
+  if (metal_buffer_ == nullptr) {
+    this->update(nullptr);
+  }
+}
+
+void MTLUniformBuf::unbind()
+{
+  /* Unbind in debug mode to validate missing binds.
+   * Otherwise, only perform a full unbind upon destruction
+   * to ensure no lingering references. */
+#ifndef NDEBUG
+  if (true) {
+#else
+  if (G.debug & G_DEBUG_GPU) {
+#endif
+    if (bound_ctx_ != nullptr && bind_slot_ > -1) {
+      MTLUniformBufferBinding &ctx_ubo_bind_slot =
+          bound_ctx_->pipeline_state.ubo_bindings[bind_slot_];
+      if (ctx_ubo_bind_slot.bound && ctx_ubo_bind_slot.ubo == this) {
+        ctx_ubo_bind_slot.bound = false;
+        ctx_ubo_bind_slot.ubo = nullptr;
+      }
+    }
+  }
+
+  /* Reset bind index. */
+  bind_slot_ = -1;
+  bound_ctx_ = nullptr;
+}
+
+id<MTLBuffer> MTLUniformBuf::get_metal_buffer(int *r_offset)
+{
+  BLI_assert(this);
+  *r_offset = 0;
+  if (metal_buffer_ != nullptr && has_data_) {
+    *r_offset = 0;
+    metal_buffer_->debug_ensure_used();
+    return metal_buffer_->get_metal_buffer();
+  }
+  else {
+    *r_offset = 0;
+    return nil;
+  }
+}
+
+int MTLUniformBuf::get_size()
+{
+  BLI_assert(this);
+  return size_in_bytes_;
+}
+
+}  // blender::gpu
-- 
cgit v1.2.3