Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'source/blender/gpu')
-rw-r--r--source/blender/gpu/CMakeLists.txt4
-rw-r--r--source/blender/gpu/GPU_context.h2
-rw-r--r--source/blender/gpu/GPU_material.h25
-rw-r--r--source/blender/gpu/intern/gpu_backend.hh2
-rw-r--r--source/blender/gpu/intern/gpu_codegen.cc101
-rw-r--r--source/blender/gpu/intern/gpu_codegen.h4
-rw-r--r--source/blender/gpu/intern/gpu_context.cc7
-rw-r--r--source/blender/gpu/intern/gpu_material.c215
-rw-r--r--source/blender/gpu/intern/gpu_node_graph.c19
-rw-r--r--source/blender/gpu/intern/gpu_node_graph.h15
-rw-r--r--source/blender/gpu/intern/gpu_shader_builder.cc2
-rw-r--r--source/blender/gpu/intern/gpu_shader_interface.cc4
-rw-r--r--source/blender/gpu/metal/mtl_backend.hh2
-rw-r--r--source/blender/gpu/metal/mtl_backend.mm15
-rw-r--r--source/blender/gpu/metal/mtl_batch.hh41
-rw-r--r--source/blender/gpu/metal/mtl_command_buffer.mm172
-rw-r--r--source/blender/gpu/metal/mtl_common.hh4
-rw-r--r--source/blender/gpu/metal/mtl_context.hh132
-rw-r--r--source/blender/gpu/metal/mtl_context.mm1191
-rw-r--r--source/blender/gpu/metal/mtl_drawlist.hh34
-rw-r--r--source/blender/gpu/metal/mtl_immediate.hh41
-rw-r--r--source/blender/gpu/metal/mtl_immediate.mm397
-rw-r--r--source/blender/gpu/metal/mtl_memory.hh6
-rw-r--r--source/blender/gpu/metal/mtl_shader.hh2
-rw-r--r--source/blender/gpu/metal/mtl_shader.mm27
-rw-r--r--source/blender/gpu/metal/mtl_shader_generator.mm4
-rw-r--r--source/blender/gpu/metal/mtl_texture.mm89
-rw-r--r--source/blender/gpu/metal/mtl_texture_util.mm30
-rw-r--r--source/blender/gpu/opengl/gl_backend.hh2
-rw-r--r--source/blender/gpu/tests/gpu_testing.cc2
30 files changed, 2372 insertions, 219 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt
index 18da5169620..0ce4011b2b4 100644
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -192,6 +192,7 @@ set(METAL_SRC
metal/mtl_context.mm
metal/mtl_debug.mm
metal/mtl_framebuffer.mm
+ metal/mtl_immediate.mm
metal/mtl_index_buffer.mm
metal/mtl_memory.mm
metal/mtl_query.mm
@@ -205,11 +206,14 @@ set(METAL_SRC
metal/mtl_vertex_buffer.mm
metal/mtl_backend.hh
+ metal/mtl_batch.hh
metal/mtl_capabilities.hh
metal/mtl_common.hh
metal/mtl_context.hh
metal/mtl_debug.hh
+ metal/mtl_drawlist.hh
metal/mtl_framebuffer.hh
+ metal/mtl_immediate.hh
metal/mtl_index_buffer.hh
metal/mtl_memory.hh
metal/mtl_primitive.hh
diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h
index a242bb7cc94..b59ea9e55d2 100644
--- a/source/blender/gpu/GPU_context.h
+++ b/source/blender/gpu/GPU_context.h
@@ -26,7 +26,7 @@ eGPUBackendType GPU_backend_get_type(void);
/** Opaque type hiding blender::gpu::Context. */
typedef struct GPUContext GPUContext;
-GPUContext *GPU_context_create(void *ghost_window);
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context);
/**
* To be called after #GPU_context_active_set(ctx_to_destroy).
*/
diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h
index 922988bf95a..11500f5af60 100644
--- a/source/blender/gpu/GPU_material.h
+++ b/source/blender/gpu/GPU_material.h
@@ -117,6 +117,15 @@ typedef enum eGPUMaterialStatus {
GPU_MAT_SUCCESS,
} eGPUMaterialStatus;
+/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not
+ * plan to perform optimization on a given material. */
+typedef enum eGPUMaterialOptimizationStatus {
+ GPU_MAT_OPTIMIZATION_SKIP = 0,
+ GPU_MAT_OPTIMIZATION_READY,
+ GPU_MAT_OPTIMIZATION_QUEUED,
+ GPU_MAT_OPTIMIZATION_SUCCESS,
+} eGPUMaterialOptimizationStatus;
+
typedef enum eGPUDefaultValue {
GPU_DEFAULT_0 = 0,
GPU_DEFAULT_1,
@@ -246,6 +255,15 @@ struct Scene *GPU_material_scene(GPUMaterial *material);
struct GPUPass *GPU_material_get_pass(GPUMaterial *material);
struct GPUShader *GPU_material_get_shader(GPUMaterial *material);
const char *GPU_material_get_name(GPUMaterial *material);
+
+/**
+ * Material Optimization.
+ * \note Compiles optimal version of shader graph, populating mat->optimized_pass.
+ * This operation should always be deferred until existing compilations have completed.
+ * Default un-optimized materials will still exist for interactive material editing performance.
+ */
+void GPU_material_optimize(GPUMaterial *mat);
+
/**
* Return can be NULL if it's a world material.
*/
@@ -256,6 +274,13 @@ struct Material *GPU_material_get_material(GPUMaterial *material);
eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status);
+/**
+ * Return status for async optimization jobs.
+ */
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat);
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status);
+bool GPU_material_optimization_ready(GPUMaterial *mat);
+
struct GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
/**
* Create dynamic UBO from parameters
diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh
index d2890efee72..2a545c8114e 100644
--- a/source/blender/gpu/intern/gpu_backend.hh
+++ b/source/blender/gpu/intern/gpu_backend.hh
@@ -38,7 +38,7 @@ class GPUBackend {
virtual void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) = 0;
virtual void compute_dispatch_indirect(StorageBuf *indirect_buf) = 0;
- virtual Context *context_alloc(void *ghost_window) = 0;
+ virtual Context *context_alloc(void *ghost_window, void *ghost_context) = 0;
virtual Batch *batch_alloc() = 0;
virtual DrawList *drawlist_alloc(int list_length) = 0;
diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc
index 2241bcf9f9b..85cfa9749fa 100644
--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@@ -95,6 +95,9 @@ struct GPUPass {
uint32_t hash;
/** Did we already tried to compile the attached GPUShader. */
bool compiled;
+ /** Hint that an optimized variant of this pass should be created based on a complexity heuristic
+ * during pass code generation. */
+ bool should_optimize;
};
/* -------------------------------------------------------------------- */
@@ -242,6 +245,11 @@ class GPUCodegen {
ListBase ubo_inputs_ = {nullptr, nullptr};
GPUInput *cryptomatte_input_ = nullptr;
+ /** Cache paramters for complexity heuristic. */
+ uint nodes_total_ = 0;
+ uint textures_total_ = 0;
+ uint uniforms_total_ = 0;
+
public:
GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
{
@@ -282,6 +290,14 @@ class GPUCodegen {
return hash_;
}
+ /* Heuristic determined during pass codegen for whether a
+ * more optimal variant of this material should be compiled. */
+ bool should_optimize_heuristic() const
+ {
+ bool do_optimize = (nodes_total_ >= 100 || textures_total_ >= 4 || uniforms_total_ >= 64);
+ return do_optimize;
+ }
+
private:
void set_unique_ids();
@@ -403,6 +419,9 @@ void GPUCodegen::generate_resources()
}
}
+ /* Increment heuristic. */
+ textures_total_ = slot;
+
if (!BLI_listbase_is_empty(&ubo_inputs_)) {
/* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
ss << "struct NodeTree {\n";
@@ -440,11 +459,16 @@ void GPUCodegen::generate_library()
GPUCodegenCreateInfo &info = *create_info;
void *value;
- GSetIterState pop_state = {};
- while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) {
+ /* Iterate over libraries. We need to keep this struct intact incase
+ * it is required for the optimization an pass. */
+ GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
+ while (!BLI_ghashIterator_done(ihash)) {
+ value = BLI_ghashIterator_getKey(ihash);
auto deps = gpu_shader_dependency_get_resolved_source((const char *)value);
info.dependencies_generated.extend_non_duplicates(deps);
+ BLI_ghashIterator_step(ihash);
}
+ BLI_ghashIterator_free(ihash);
}
void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
@@ -512,6 +536,9 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
}
}
eval_ss << ");\n\n";
+
+ /* Increment heuristic. */
+ nodes_total_++;
}
char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link)
@@ -575,6 +602,7 @@ void GPUCodegen::generate_uniform_buffer()
if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
/* We handle the UBO uniforms separately. */
BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
+ uniforms_total_++;
}
}
}
@@ -602,6 +630,7 @@ void GPUCodegen::generate_graphs()
{
set_unique_ids();
+ /* Serialize graph. */
output.surface = graph_serialize(GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface);
output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume);
output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement);
@@ -637,10 +666,17 @@ void GPUCodegen::generate_graphs()
GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUNodeGraph *graph,
GPUCodegenCallbackFn finalize_source_cb,
- void *thunk)
+ void *thunk,
+ bool optimize_graph)
{
gpu_node_graph_prune_unused(graph);
+ /* If Optimize flag is passed in, we are generating an optimized
+ * variant of the GPUMaterial's GPUPass. */
+ if (optimize_graph) {
+ gpu_node_graph_optimize(graph);
+ }
+
/* Extract attributes before compiling so the generated VBOs are ready to accept the future
* shader. */
gpu_node_graph_finalize_uniform_attrs(graph);
@@ -648,23 +684,33 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUCodegen codegen(material, graph);
codegen.generate_graphs();
codegen.generate_cryptomatte();
- codegen.generate_uniform_buffer();
- /* Cache lookup: Reuse shaders already compiled. */
- GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
-
- /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
- * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
- if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
- if (!gpu_pass_is_valid(pass_hash)) {
- /* Shader has already been created but failed to compile. */
- return nullptr;
+ GPUPass *pass_hash = nullptr;
+
+ if (!optimize_graph) {
+ /* The optimized version of the shader should not re-generate a UBO.
+ * The UBO will not be used for this variant. */
+ codegen.generate_uniform_buffer();
+
+ /** Cache lookup: Reuse shaders already compiled.
+ * NOTE: We only perform cache look-up for non-optimized shader
+ * graphs, as baked constant data amongst other optimizations will generate too many
+ * shader source permutations, with minimal re-usability. */
+ pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
+
+ /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
+ * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
+ if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
+ if (!gpu_pass_is_valid(pass_hash)) {
+ /* Shader has already been created but failed to compile. */
+ return nullptr;
+ }
+ /* No collision, just return the pass. */
+ BLI_spin_lock(&pass_cache_spin);
+ pass_hash->refcount += 1;
+ BLI_spin_unlock(&pass_cache_spin);
+ return pass_hash;
}
- /* No collision, just return the pass. */
- BLI_spin_lock(&pass_cache_spin);
- pass_hash->refcount += 1;
- BLI_spin_unlock(&pass_cache_spin);
- return pass_hash;
}
/* Either the shader is not compiled or there is a hash collision...
@@ -702,14 +748,31 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
pass->create_info = codegen.create_info;
pass->hash = codegen.hash_get();
pass->compiled = false;
+ /* Only flag pass optimization hint if this is the first generated pass for a material.
+ * Optimized passes cannot be optimized further, even if the heuristic is still not
+ * favourable. */
+ pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
codegen.create_info = nullptr;
- gpu_pass_cache_insert_after(pass_hash, pass);
+ /* Only insert non-optimized graphs into cache.
+ * Optimized graphs will continuously be recompiled with new unique source during material
+ * editing, and thus causing the cache to fill up quickly with materials offering minimal
+ * re-use. */
+ if (!optimize_graph) {
+ gpu_pass_cache_insert_after(pass_hash, pass);
+ }
}
return pass;
}
+bool GPU_pass_should_optimize(GPUPass *pass)
+{
+ /* Returns optimization heuristic prepared during
+ * initial codegen. */
+ return pass->should_optimize;
+}
+
/** \} */
/* -------------------------------------------------------------------- */
diff --git a/source/blender/gpu/intern/gpu_codegen.h b/source/blender/gpu/intern/gpu_codegen.h
index 95a672c0400..aabdf1ac003 100644
--- a/source/blender/gpu/intern/gpu_codegen.h
+++ b/source/blender/gpu/intern/gpu_codegen.h
@@ -25,10 +25,12 @@ typedef struct GPUPass GPUPass;
GPUPass *GPU_generate_pass(GPUMaterial *material,
struct GPUNodeGraph *graph,
GPUCodegenCallbackFn finalize_source_cb,
- void *thunk);
+ void *thunk,
+ bool optimize_graph);
GPUShader *GPU_pass_shader_get(GPUPass *pass);
bool GPU_pass_compile(GPUPass *pass, const char *shname);
void GPU_pass_release(GPUPass *pass);
+bool GPU_pass_should_optimize(GPUPass *pass);
/* Module */
diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc
index bcc418169b7..92cbbc5b4b0 100644
--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -94,7 +94,7 @@ Context *Context::get()
/* -------------------------------------------------------------------- */
-GPUContext *GPU_context_create(void *ghost_window)
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context)
{
{
std::scoped_lock lock(backend_users_mutex);
@@ -105,7 +105,7 @@ GPUContext *GPU_context_create(void *ghost_window)
num_backend_users++;
}
- Context *ctx = GPUBackend::get()->context_alloc(ghost_window);
+ Context *ctx = GPUBackend::get()->context_alloc(ghost_window, ghost_context);
GPU_context_active_set(wrap(ctx));
return wrap(ctx);
@@ -216,6 +216,9 @@ void GPU_render_step()
/** \name Backend selection
* \{ */
+/* NOTE: To enable Metal API, we need to temporarily change this to `GPU_BACKEND_METAL`.
+ * Until a global switch is added, Metal also needs to be enabled in GHOST_ContextCGL:
+ * `m_useMetalForRendering = true`. */
static const eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL;
static GPUBackend *g_backend = nullptr;
diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c
index 96809db1587..991cb229eda 100644
--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -34,6 +34,8 @@
#include "DRW_engine.h"
+#include "PIL_time.h"
+
#include "gpu_codegen.h"
#include "gpu_node_graph.h"
@@ -43,6 +45,17 @@
#define MAX_COLOR_BAND 128
#define MAX_GPU_SKIES 8
+/** Whether the optimized variant of the GPUPass should be created asynchronously.
+ * Usage of this depends on whether there are possible threading challenges of doing so.
+ * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
+ * compilation, though this option exists in case any potential scenarios for material graph
+ * optimization cause a slow down on the main thread.
+ *
+ * NOTE: The actual shader program for the optimized pass will alwaysbe compiled asynchronously,
+ * this flag controls whether shader node graph source serialization happens on the compilation
+ * worker thread. */
+#define ASYNC_OPTIMIZED_PASS_CREATION 0
+
typedef struct GPUColorBandBuilder {
float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
int current_layer;
@@ -57,6 +70,27 @@ struct GPUMaterial {
/* Contains GPUShader and source code for deferred compilation.
* Can be shared between similar material (i.e: sharing same nodetree topology). */
GPUPass *pass;
+ /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
+ * This shader variant bakes dynamic uniform data as constant. This variant will not use
+ * the ubo, and instead bake constants directly into the shader source. */
+ GPUPass *optimized_pass;
+ /* Optimization status.
+ * We also use this status to determine whether this material should be considered for
+ * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
+ * `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
+ * `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
+ * performance to do so, based on the heuristic.
+ */
+ eGPUMaterialOptimizationStatus optimization_status;
+ double creation_time;
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+ struct DeferredOptimizePass {
+ GPUCodegenCallbackFn callback;
+ void *thunk;
+ } DeferredOptimizePass;
+ struct DeferredOptimizePass optimize_pass_info;
+#endif
+
/** UBOs for this material parameters. */
GPUUniformBuf *ubo;
/** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
@@ -209,6 +243,9 @@ void GPU_material_free_single(GPUMaterial *material)
gpu_node_graph_free(&material->graph);
+ if (material->optimized_pass != NULL) {
+ GPU_pass_release(material->optimized_pass);
+ }
if (material->pass != NULL) {
GPU_pass_release(material->pass);
}
@@ -247,12 +284,15 @@ Scene *GPU_material_scene(GPUMaterial *material)
GPUPass *GPU_material_get_pass(GPUMaterial *material)
{
- return material->pass;
+ return (material->optimized_pass) ? material->optimized_pass : material->pass;
}
GPUShader *GPU_material_get_shader(GPUMaterial *material)
{
- return material->pass ? GPU_pass_shader_get(material->pass) : NULL;
+ /* First attempt to select optimized shader. If not available, fetch original. */
+ GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) :
+ NULL;
+ return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL);
}
const char *GPU_material_get_name(GPUMaterial *material)
@@ -665,6 +705,29 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
mat->status = status;
}
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
+{
+ return mat->optimization_status;
+}
+
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
+{
+ mat->optimization_status = status;
+ if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+ /* Reset creation timer to delay optimization pass. */
+ mat->creation_time = PIL_check_seconds_timer();
+ }
+}
+
+bool GPU_material_optimization_ready(GPUMaterial *mat)
+{
+ /* Timer threshold before optimizations will be queued.
+ * When materials are frequently being modified, optimization
+ * can incur CPU overhead from excessive compilation. */
+ const double optimization_time_threshold_s = 5.0;
+ return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s);
+}
+
/* Code generation */
bool GPU_material_has_surface_output(GPUMaterial *mat)
@@ -730,6 +793,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
mat->uuid = shader_uuid;
mat->flag = GPU_MATFLAG_UPDATED;
mat->status = GPU_MAT_CREATED;
+ mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
mat->is_volume_shader = is_volume_shader;
mat->graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
@@ -748,7 +812,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
{
/* Create source code and search pass cache for an already compiled version. */
- mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk);
+ mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);
if (mat->pass == NULL) {
/* We had a cache hit and the shader has already failed to compile. */
@@ -756,11 +820,44 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
gpu_node_graph_free(&mat->graph);
}
else {
+ /* Determine whether we should generate an optimized variant of the graph.
+ * Heuristic is based on complexity of default material pass and shader node graph. */
+ if (GPU_pass_should_optimize(mat->pass)) {
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+ }
+
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
/* We had a cache hit and the shader is already compiled. */
mat->status = GPU_MAT_SUCCESS;
- gpu_node_graph_free_nodes(&mat->graph);
+
+ if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+ gpu_node_graph_free_nodes(&mat->graph);
+ }
+ }
+
+ /* Generate optimized pass. */
+ if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+ mat->optimized_pass = NULL;
+ mat->optimize_pass_info.callback = callback;
+ mat->optimize_pass_info.thunk = thunk;
+#else
+ mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
+ if (mat->optimized_pass == NULL) {
+ /* Failed to create optimized pass. */
+ gpu_node_graph_free_nodes(&mat->graph);
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+ }
+ else {
+ GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
+ if (optimized_sh != NULL) {
+ /* Optimized shader already available. */
+ gpu_node_graph_free_nodes(&mat->graph);
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+ }
+ }
+#endif
}
}
}
@@ -811,7 +908,11 @@ void GPU_material_compile(GPUMaterial *mat)
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != NULL) {
mat->status = GPU_MAT_SUCCESS;
- gpu_node_graph_free_nodes(&mat->graph);
+
+ if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+ /* Only free node graph nodes if not required by secondary optimization pass. */
+ gpu_node_graph_free_nodes(&mat->graph);
+ }
}
else {
mat->status = GPU_MAT_FAILED;
@@ -825,6 +926,71 @@ void GPU_material_compile(GPUMaterial *mat)
}
}
+void GPU_material_optimize(GPUMaterial *mat)
+{
+ /* If shader is flagged for skipping optimization or has already been successfully
+ * optimized, skip. */
+ if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
+ return;
+ }
+
+ /* If original shader has not been fully compiled, we are not
+ * ready to perform optimization. */
+ if (mat->status != GPU_MAT_SUCCESS) {
+ /* Reset optimization status. */
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+ return;
+ }
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+ /* If the optimized pass is not valid, first generate optimized pass.
+ * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
+ * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
+ * optimal, as these do not benefit from caching, due to baked constants. However, this could
+ * possibly be cause for concern for certain cases. */
+ if (!mat->optimized_pass) {
+ mat->optimized_pass = GPU_generate_pass(
+ mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
+ BLI_assert(mat->optimized_pass);
+ }
+#else
+ if (!mat->optimized_pass) {
+ /* Optimized pass has not been created, skip future optimization attempts. */
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+ return;
+ }
+#endif
+
+ bool success;
+ /* NOTE: The shader may have already been compiled here since we are
+ * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
+#ifndef NDEBUG
+ success = GPU_pass_compile(mat->optimized_pass, mat->name);
+#else
+ success = GPU_pass_compile(mat->optimized_pass, __func__);
+#endif
+
+ if (success) {
+ GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
+ if (sh != NULL) {
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+ }
+ else {
+ /* Optimized pass failed to compile. Disable any future optimization attempts. */
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+ }
+ }
+ else {
+ /* Optimization pass generation failed. Disable future attempts to optimize. */
+ GPU_pass_release(mat->optimized_pass);
+ mat->optimized_pass = NULL;
+ GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+ }
+
+ /* Release node graph as no longer needed. */
+ gpu_node_graph_free_nodes(&mat->graph);
+}
+
void GPU_materials_free(Main *bmain)
{
LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
@@ -848,6 +1014,8 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
material->graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
material->refcount = 1;
+ material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
+ material->optimized_pass = NULL;
/* Construct the material graph by adding and linking the necessary GPU material nodes. */
construct_function_cb(thunk, material);
@@ -856,7 +1024,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
gpu_material_ramp_texture_build(material);
/* Lookup an existing pass in the cache or generate a new one. */
- material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk);
+ material->pass = GPU_generate_pass(
+ material, &material->graph, generate_code_function_cb, thunk, false);
+ material->optimized_pass = NULL;
/* The pass already exists in the pass cache but its shader already failed to compile. */
if (material->pass == NULL) {
@@ -865,11 +1035,42 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
return material;
}
+ /* Generate optimized pass. */
+ if (GPU_pass_should_optimize(material->pass)) {
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+ mmaterial->optimized_pass = NULL;
+ material->optimize_pass_info.callback = generate_code_function_cb;
+ material->optimize_pass_info.thunk = thunk;
+ GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY);
+#else
+ material->optimized_pass = GPU_generate_pass(
+ material, &material->graph, generate_code_function_cb, thunk, true);
+
+ if (material->optimized_pass == NULL) {
+ /* Failed to create optimized pass. */
+ gpu_node_graph_free_nodes(&material->graph);
+ GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP);
+ }
+ else {
+ GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass);
+ if (optimized_sh != NULL) {
+ /* Optimized shader already available. */
+ gpu_node_graph_free_nodes(&material->graph);
+ GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS);
+ }
+ }
+#endif
+ }
+
/* The pass already exists in the pass cache and its shader is already compiled. */
GPUShader *shader = GPU_pass_shader_get(material->pass);
if (shader != NULL) {
material->status = GPU_MAT_SUCCESS;
- gpu_node_graph_free_nodes(&material->graph);
+ if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+ /* Only free node graph if not required by secondary optimization pass. */
+ gpu_node_graph_free_nodes(&material->graph);
+ }
return material;
}
diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c
index a305413905b..3ca2399a547 100644
--- a/source/blender/gpu/intern/gpu_node_graph.c
+++ b/source/blender/gpu/intern/gpu_node_graph.c
@@ -914,3 +914,22 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
}
}
}
+
+void gpu_node_graph_optimize(GPUNodeGraph *graph)
+{
+ /* Replace all uniform node links with constant. */
+ LISTBASE_FOREACH (GPUNode *, node, &graph->nodes) {
+ LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
+ if (input->link) {
+ if (input->link->link_type == GPU_NODE_LINK_UNIFORM) {
+ input->link->link_type = GPU_NODE_LINK_CONSTANT;
+ }
+ }
+ if (input->source == GPU_SOURCE_UNIFORM) {
+ input->source = (input->type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT;
+ }
+ }
+ }
+
+ /* TODO: Consider performing other node graph optimizations here. */
+}
diff --git a/source/blender/gpu/intern/gpu_node_graph.h b/source/blender/gpu/intern/gpu_node_graph.h
index 085620b30e4..75ca05ffaea 100644
--- a/source/blender/gpu/intern/gpu_node_graph.h
+++ b/source/blender/gpu/intern/gpu_node_graph.h
@@ -179,6 +179,21 @@ typedef struct GPUNodeGraph {
void gpu_node_graph_prune_unused(GPUNodeGraph *graph);
void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph);
+
+/**
+ * Optimize node graph for optimized material shader path.
+ * Once the base material has been generated, we can modify the shader
+ * node graph to create one which will produce an optimally performing shader.
+ * This currently involves baking uniform data into constant data to enable
+ * aggressive constant folding by the compiler in order to reduce complexity and
+ * shader core memory pressure.
+ *
+ * NOTE: Graph optimizations will produce a shader which needs to be re-compiled
+ * more frequently, however, the default material pass will always exist to fall
+ * back on.
+ */
+void gpu_node_graph_optimize(GPUNodeGraph *graph);
+
/**
* Free intermediate node graph.
*/
diff --git a/source/blender/gpu/intern/gpu_shader_builder.cc b/source/blender/gpu/intern/gpu_shader_builder.cc
index 9b699c60126..3aa2963ecd0 100644
--- a/source/blender/gpu/intern/gpu_shader_builder.cc
+++ b/source/blender/gpu/intern/gpu_shader_builder.cc
@@ -45,7 +45,7 @@ void ShaderBuilder::init()
ghost_context_ = GHOST_CreateOpenGLContext(ghost_system_, glSettings);
GHOST_ActivateOpenGLContext(ghost_context_);
- gpu_context_ = GPU_context_create(nullptr);
+ gpu_context_ = GPU_context_create(nullptr, ghost_context_);
GPU_init();
}
diff --git a/source/blender/gpu/intern/gpu_shader_interface.cc b/source/blender/gpu/intern/gpu_shader_interface.cc
index 6f43b379d31..d9e5e066fea 100644
--- a/source/blender/gpu/intern/gpu_shader_interface.cc
+++ b/source/blender/gpu/intern/gpu_shader_interface.cc
@@ -22,8 +22,8 @@ ShaderInterface::ShaderInterface() = default;
ShaderInterface::~ShaderInterface()
{
/* Free memory used by name_buffer. */
- MEM_freeN(name_buffer_);
- MEM_freeN(inputs_);
+ MEM_SAFE_FREE(name_buffer_);
+ MEM_SAFE_FREE(inputs_);
}
static void sort_input_list(MutableSpan<ShaderInput> dst)
diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh
index 214a5d738a9..082fab24ba4 100644
--- a/source/blender/gpu/metal/mtl_backend.hh
+++ b/source/blender/gpu/metal/mtl_backend.hh
@@ -63,7 +63,7 @@ class MTLBackend : public GPUBackend {
/* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
* objects. */
- Context *context_alloc(void *ghost_window) override;
+ Context *context_alloc(void *ghost_window, void *ghost_context) override;
Batch *batch_alloc() override;
DrawList *drawlist_alloc(int list_length) override;
FrameBuffer *framebuffer_alloc(const char *name) override;
diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm
index ec9e8ab4d15..2ca1fd3f3d0 100644
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -8,8 +8,11 @@
#include "gpu_backend.hh"
#include "mtl_backend.hh"
+#include "mtl_batch.hh"
#include "mtl_context.hh"
+#include "mtl_drawlist.hh"
#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
#include "mtl_index_buffer.hh"
#include "mtl_query.hh"
#include "mtl_shader.hh"
@@ -37,21 +40,21 @@ void MTLBackend::samplers_update(){
/* Placeholder -- Handled in MTLContext. */
};
-Context *MTLBackend::context_alloc(void *ghost_window)
+Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
{
- return new MTLContext(ghost_window);
+ return new MTLContext(ghost_window, ghost_context);
};
Batch *MTLBackend::batch_alloc()
{
- /* TODO(Metal): Implement MTLBatch. */
- return nullptr;
+ /* TODO(Metal): Full MTLBatch implementation. */
+ return new MTLBatch();
};
DrawList *MTLBackend::drawlist_alloc(int list_length)
{
- /* TODO(Metal): Implement MTLDrawList. */
- return nullptr;
+ /* TODO(Metal): Full MTLDrawList implementation. */
+ return new MTLDrawList(list_length);
};
FrameBuffer *MTLBackend::framebuffer_alloc(const char *name)
diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh
new file mode 100644
index 00000000000..66603dabd15
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "gpu_batch_private.hh"
+
+namespace blender {
+namespace gpu {
+
+
+/* Pass-through MTLBatch. TODO(Metal): Implement. */
+class MTLBatch : public Batch {
+ public:
+ void draw(int v_first, int v_count, int i_first, int i_count) override {
+
+ }
+
+ void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override {
+
+ }
+
+ void multi_draw_indirect(GPUStorageBuf *indirect_buf,
+ int count,
+ intptr_t offset,
+ intptr_t stride) override {
+
+ }
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
+};
+
+} // namespace gpu
+} // namespace blender
diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm
index d2936e8e91f..a9cabbb111f 100644
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -54,6 +54,7 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
desc.retainedReferences = YES;
+ BLI_assert(context_.queue != nil);
active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
}
else {
@@ -611,40 +612,187 @@ void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot)
{
- /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be
- * implemented alongside MTLShader. */
+ /* Range check. */
+ const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+ BLI_assert(slot >= 0);
+ BLI_assert(slot <= shader_interface->get_max_texture_index());
+ BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+ UNUSED_VARS_NDEBUG(shader_interface);
+
+ /* If sampler state has not changed for the given slot, we do not need to fetch. */
+ if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil ||
+ !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) ||
+ use_argument_buffer_for_samplers) {
+
+ id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+ ctx.get_default_sampler_state() :
+ ctx.get_sampler_from_state(sampler_binding.state);
+ if (!use_argument_buffer_for_samplers) {
+ /* Update binding and cached state. */
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+ [rec setVertexSamplerState:sampler_state atIndex:slot];
+ this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+ this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state;
+ }
+
+ /* Flag last binding type. */
+ this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding =
+ use_argument_buffer_for_samplers;
+
+ /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+ * the samplers array is always up to date. */
+ ctx.samplers_.mtl_sampler[slot] = sampler_state;
+ ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+ }
}
void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
bool use_argument_buffer_for_samplers,
uint slot)
{
- /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be
- * implemented alongside MTLShader. */
+ /* Range check. */
+ const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+ BLI_assert(slot >= 0);
+ BLI_assert(slot <= shader_interface->get_max_texture_index());
+ BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+ UNUSED_VARS_NDEBUG(shader_interface);
+
+ /* If sampler state has not changed for the given slot, we do not need to fetch*/
+ if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil ||
+ !(this->cached_fragment_sampler_state_bindings[slot].binding_state ==
+ sampler_binding.state) ||
+ use_argument_buffer_for_samplers) {
+
+ id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+ ctx.get_default_sampler_state() :
+ ctx.get_sampler_from_state(sampler_binding.state);
+ if (!use_argument_buffer_for_samplers) {
+ /* Update binding and cached state. */
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+ [rec setFragmentSamplerState:sampler_state atIndex:slot];
+ this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+ this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state;
+ }
+
+ /* Flag last binding type */
+ this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding =
+ use_argument_buffer_for_samplers;
+
+ /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+ * the samplers array is always up to date. */
+ ctx.samplers_.mtl_sampler[slot] = sampler_state;
+ ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+ }
}
void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ BLI_assert(index >= 0);
+ BLI_assert(buffer_offset >= 0);
+ BLI_assert(buffer != nil);
+
+ BufferBindingCached &current_vert_ubo_binding = this->cached_vertex_buffer_bindings[index];
+ if (current_vert_ubo_binding.offset != buffer_offset ||
+ current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes) {
+
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+
+ if (current_vert_ubo_binding.metal_buffer == buffer) {
+ /* If buffer is the same, but offset has changed. */
+ [rec setVertexBufferOffset:buffer_offset atIndex:index];
+ }
+ else {
+ /* Bind Vertex Buffer. */
+ [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index];
+ }
+
+ /* Update Bind-state cache. */
+ this->cached_vertex_buffer_bindings[index].is_bytes = false;
+ this->cached_vertex_buffer_bindings[index].metal_buffer = buffer;
+ this->cached_vertex_buffer_bindings[index].offset = buffer_offset;
+ }
}
void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ BLI_assert(index >= 0);
+ BLI_assert(buffer_offset >= 0);
+ BLI_assert(buffer != nil);
+
+ BufferBindingCached &current_frag_ubo_binding = this->cached_fragment_buffer_bindings[index];
+ if (current_frag_ubo_binding.offset != buffer_offset ||
+ current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes) {
+
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ BLI_assert(rec != nil);
+
+ if (current_frag_ubo_binding.metal_buffer == buffer) {
+ /* If buffer is the same, but offset has changed. */
+ [rec setFragmentBufferOffset:buffer_offset atIndex:index];
+ }
+ else {
+ /* Bind Fragment Buffer */
+ [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index];
+ }
+
+ /* Update Bind-state cache */
+ this->cached_fragment_buffer_bindings[index].is_bytes = false;
+ this->cached_fragment_buffer_bindings[index].metal_buffer = buffer;
+ this->cached_fragment_buffer_bindings[index].offset = buffer_offset;
+ }
}
void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ /* Bytes always updated as source data may have changed. */
+ BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ BLI_assert(length > 0);
+ BLI_assert(bytes != nullptr);
+
+ if (length < MTL_MAX_SET_BYTES_SIZE) {
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ [rec setVertexBytes:bytes length:length atIndex:index];
+ }
+ else {
+ /* We have run over the setBytes limit, bind buffer instead. */
+ MTLTemporaryBuffer range =
+ ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+ memcpy(range.data, bytes, length);
+ this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index);
+ }
+
+ /* Update Bind-state cache */
+ this->cached_vertex_buffer_bindings[index].is_bytes = true;
+ this->cached_vertex_buffer_bindings[index].metal_buffer = nil;
+ this->cached_vertex_buffer_bindings[index].offset = -1;
}
void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index)
{
- /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be
- * implemented alongside the full MTLMemoryManager. */
+ /* Bytes always updated as source data may have changed. */
+ BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ BLI_assert(length > 0);
+ BLI_assert(bytes != nullptr);
+
+ if (length < MTL_MAX_SET_BYTES_SIZE) {
+ id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+ [rec setFragmentBytes:bytes length:length atIndex:index];
+ }
+ else {
+ /* We have run over the setBytes limit, bind buffer instead. */
+ MTLTemporaryBuffer range =
+ ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+ memcpy(range.data, bytes, length);
+ this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index);
+ }
+
+ /* Update Bind-state cache. */
+ this->cached_fragment_buffer_bindings[index].is_bytes = true;
+ this->cached_fragment_buffer_bindings[index].metal_buffer = nil;
+ this->cached_fragment_buffer_bindings[index].offset = -1;
}
/** \} */
diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh
index b6f9c0050a9..5c322efa3f9 100644
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -3,7 +3,9 @@
#ifndef __MTL_COMMON
#define __MTL_COMMON
-// -- Renderer Options --
+/** -- Renderer Options -- */
+/* Number of frames over which rolling averages are taken. */
+#define MTL_FRAME_AVERAGE_COUNT 5
#define MTL_MAX_DRAWABLES 3
#define MTL_MAX_SET_BYTES_SIZE 4096
#define MTL_FORCE_WAIT_IDLE 0
diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh
index 577438667d6..5991fe2bc3e 100644
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -12,6 +12,10 @@
#include "GPU_common_types.h"
#include "GPU_context.h"
+#include "intern/GHOST_Context.h"
+#include "intern/GHOST_ContextCGL.h"
+#include "intern/GHOST_Window.h"
+
#include "mtl_backend.hh"
#include "mtl_capabilities.hh"
#include "mtl_common.hh"
@@ -570,12 +574,44 @@ class MTLCommandBufferManager {
class MTLContext : public Context {
friend class MTLBackend;
+ friend class MTLRenderPassState;
+
+ public:
+ /* Swapchain and latency management. */
+ static std::atomic<int> max_drawables_in_flight;
+ static std::atomic<int64_t> avg_drawable_latency_us;
+ static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT];
+
+ public:
+ /* Shaders and Pipeline state. */
+ MTLContextGlobalShaderPipelineState pipeline_state;
+
+ /* Metal API Resource Handles. */
+ id<MTLCommandQueue> queue = nil;
+ id<MTLDevice> device = nil;
+
+#ifndef NDEBUG
+ /* Label for Context debug name assignemnt. */
+ NSString *label = nil;
+#endif
+
+ /* Memory Management. */
+ MTLScratchBufferManager memory_manager;
+ static MTLBufferPool global_memory_manager;
+
+ /* CommandBuffer managers. */
+ MTLCommandBufferManager main_command_buffer;
private:
- /* Null buffers for empty/uninitialized bindings.
- * Null attribute buffer follows default attribute format of OpenGL Back-end. */
- id<MTLBuffer> null_buffer_; /* All zero's. */
- id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
+ /* Parent Context. */
+ GHOST_ContextCGL *ghost_context_;
+
+ /* Render Passes and Framebuffers. */
+ id<MTLTexture> default_fbo_mtltexture_ = nil;
+ gpu::MTLTexture *default_fbo_gputexture_ = nullptr;
+
+ /* Depth-stencil state cache. */
+ blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache;
/* Compute and specialization caches. */
MTLContextTextureUtils texture_utils_;
@@ -601,23 +637,20 @@ class MTLContext : public Context {
gpu::MTLBuffer *visibility_buffer_ = nullptr;
bool visibility_is_dirty_ = false;
- public:
- /* Shaders and Pipeline state. */
- MTLContextGlobalShaderPipelineState pipeline_state;
-
- /* Metal API Resource Handles. */
- id<MTLCommandQueue> queue = nil;
- id<MTLDevice> device = nil;
-
- /* Memory Management */
- MTLScratchBufferManager memory_manager;
- static MTLBufferPool global_memory_manager;
+ /* Null buffers for empty/unintialized bindings.
+ * Null attribute buffer follows default attribute format of OpenGL Backend. */
+ id<MTLBuffer> null_buffer_; /* All zero's. */
+ id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
- /* CommandBuffer managers. */
- MTLCommandBufferManager main_command_buffer;
+ /** Dummy Resources */
+ /* Maximum of 32 texture types. Though most combinations invalid. */
+ gpu::MTLTexture *dummy_textures_[GPU_TEXTURE_BUFFER] = {nullptr};
+ GPUVertFormat dummy_vertformat_;
+ GPUVertBuf *dummy_verts_ = nullptr;
+ public:
/* GPUContext interface. */
- MTLContext(void *ghost_window);
+ MTLContext(void *ghost_window, void *ghost_context);
~MTLContext();
static void check_error(const char *info);
@@ -673,6 +706,35 @@ class MTLContext : public Context {
void pipeline_state_init();
MTLShader *get_active_shader();
+ /* These functions ensure that the current RenderCommandEncoder has
+ * the correct global state assigned. This should be called prior
+ * to every draw call, to ensure that all state is applied and up
+ * to date. We handle:
+ *
+ * - Buffer bindings (Vertex buffers, Uniforms, UBOs, transform feedback)
+ * - Texture bindings
+ * - Sampler bindings (+ argument buffer bindings)
+ * - Dynamic Render pipeline state (on encoder)
+ * - Baking Pipeline State Objects (PSOs) for current shader, based
+ * on final pipeline state.
+ *
+ * `ensure_render_pipeline_state` will return false if the state is
+ * invalid and cannot be applied. This should cancel a draw call. */
+ bool ensure_render_pipeline_state(MTLPrimitiveType prim_type);
+ bool ensure_uniform_buffer_bindings(
+ id<MTLRenderCommandEncoder> rec,
+ const MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance);
+ void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec,
+ MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance);
+ void ensure_depth_stencil_state(MTLPrimitiveType prim_type);
+
+ id<MTLBuffer> get_null_buffer();
+ id<MTLBuffer> get_null_attribute_buffer();
+ gpu::MTLTexture *get_dummy_texture(eGPUTextureType type);
+ void free_dummy_resources();
+
/* State assignment. */
void set_viewport(int origin_x, int origin_y, int width, int height);
void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height);
@@ -720,9 +782,37 @@ class MTLContext : public Context {
{
return MTLContext::global_memory_manager;
}
- /* Uniform Buffer Bindings to command encoders. */
- id<MTLBuffer> get_null_buffer();
- id<MTLBuffer> get_null_attribute_buffer();
+
+ /* Swapchain and latency management. */
+ static void latency_resolve_average(int64_t frame_latency_us)
+ {
+ int64_t avg = 0;
+ int64_t frame_c = 0;
+ for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) {
+ MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1];
+ avg += MTLContext::frame_latency[i];
+ frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0;
+ }
+ MTLContext::frame_latency[0] = frame_latency_us;
+ avg += MTLContext::frame_latency[0];
+ if (frame_c > 0) {
+ avg /= frame_c;
+ }
+ else {
+ avg = 0;
+ }
+ MTLContext::avg_drawable_latency_us = avg;
+ }
+
+ private:
+ void set_ghost_context(GHOST_ContextHandle ghostCtxHandle);
+ void set_ghost_window(GHOST_WindowHandle ghostWinHandle);
};
+/* GHOST Context callback and present. */
+void present(MTLRenderPassDescriptor *blit_descriptor,
+ id<MTLRenderPipelineState> blit_pso,
+ id<MTLTexture> swapchain_texture,
+ id<CAMetalDrawable> drawable);
+
} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm
index 1302cf0dabd..a89339d0d14 100644
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@@ -5,13 +5,29 @@
*/
#include "mtl_context.hh"
#include "mtl_debug.hh"
+#include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
+#include "mtl_memory.hh"
+#include "mtl_primitive.hh"
#include "mtl_shader.hh"
#include "mtl_shader_interface.hh"
#include "mtl_state.hh"
+#include "mtl_uniform_buffer.hh"
#include "DNA_userdef_types.h"
#include "GPU_capabilities.h"
+#include "GPU_matrix.h"
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+#include "GPU_uniform_buffer.h"
+#include "GPU_vertex_buffer.h"
+#include "intern/gpu_matrix_private.h"
+
+#include "PIL_time.h"
+
+#include <fstream>
+#include <string>
using namespace blender;
using namespace blender::gpu;
@@ -21,21 +37,118 @@ namespace blender::gpu {
/* Global memory manager. */
MTLBufferPool MTLContext::global_memory_manager;
+/* Swapchain and latency management. */
+std::atomic<int> MTLContext::max_drawables_in_flight = 0;
+std::atomic<int64_t> MTLContext::avg_drawable_latency_us = 0;
+int64_t MTLContext::frame_latency[MTL_FRAME_AVERAGE_COUNT] = {0};
+
+/* -------------------------------------------------------------------- */
+/** \name GHOST Context interaction.
+ * \{ */
+
+void MTLContext::set_ghost_context(GHOST_ContextHandle ghostCtxHandle)
+{
+ GHOST_Context *ghost_ctx = reinterpret_cast<GHOST_Context *>(ghostCtxHandle);
+ BLI_assert(ghost_ctx != nullptr);
+
+ /* Release old MTLTexture handle */
+ if (default_fbo_mtltexture_) {
+ [default_fbo_mtltexture_ release];
+ default_fbo_mtltexture_ = nil;
+ }
+
+ /* Release Framebuffer attachments */
+ MTLFrameBuffer *mtl_front_left = static_cast<MTLFrameBuffer *>(this->front_left);
+ MTLFrameBuffer *mtl_back_left = static_cast<MTLFrameBuffer *>(this->back_left);
+ mtl_front_left->remove_all_attachments();
+ mtl_back_left->remove_all_attachments();
+
+ GHOST_ContextCGL *ghost_cgl_ctx = dynamic_cast<GHOST_ContextCGL *>(ghost_ctx);
+ if (ghost_cgl_ctx != NULL) {
+ default_fbo_mtltexture_ = ghost_cgl_ctx->metalOverlayTexture();
+
+ MTL_LOG_INFO(
+ "Binding GHOST context CGL %p to GPU context %p. (Device: %p, queue: %p, texture: %p)\n",
+ ghost_cgl_ctx,
+ this,
+ this->device,
+ this->queue,
+ default_fbo_gputexture_);
+
+ /* Check if the GHOST Context provides a default framebuffer: */
+ if (default_fbo_mtltexture_) {
+
+ /* Release old GPUTexture handle */
+ if (default_fbo_gputexture_) {
+ GPU_texture_free(wrap(static_cast<Texture *>(default_fbo_gputexture_)));
+ default_fbo_gputexture_ = nullptr;
+ }
+
+ /* Retain handle */
+ [default_fbo_mtltexture_ retain];
+
+ /*** Create front and back-buffers ***/
+ /* Create gpu::MTLTexture objects */
+ default_fbo_gputexture_ = new gpu::MTLTexture(
+ "MTL_BACKBUFFER", GPU_RGBA16F, GPU_TEXTURE_2D, default_fbo_mtltexture_);
+
+ /* Update framebuffers with new texture attachments */
+ mtl_front_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+ mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+#ifndef NDEBUG
+ this->label = default_fbo_mtltexture_.label;
+#endif
+ }
+ else {
+
+ /* Add default texture for cases where no other framebuffer is bound */
+ if (!default_fbo_gputexture_) {
+ default_fbo_gputexture_ = static_cast<gpu::MTLTexture *>(
+ unwrap(GPU_texture_create_2d(__func__, 16, 16, 1, GPU_RGBA16F, nullptr)));
+ }
+ mtl_back_left->add_color_attachment(default_fbo_gputexture_, 0, 0, 0);
+
+ MTL_LOG_INFO(
+ "-- Bound context %p for GPU context: %p is offscreen and does not have a default "
+ "framebuffer\n",
+ ghost_cgl_ctx,
+ this);
+#ifndef NDEBUG
+ this->label = @"Offscreen Metal Context";
+#endif
+ }
+ }
+ else {
+ MTL_LOG_INFO(
+ "[ERROR] Failed to bind GHOST context to MTLContext -- GHOST_ContextCGL is null "
+ "(GhostContext: %p, GhostContext_CGL: %p)\n",
+ ghost_ctx,
+ ghost_cgl_ctx);
+ BLI_assert(false);
+ }
+}
+
+void MTLContext::set_ghost_window(GHOST_WindowHandle ghostWinHandle)
+{
+ GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghostWinHandle);
+ this->set_ghost_context((GHOST_ContextHandle)(ghostWin ? ghostWin->getContext() : NULL));
+}
+
+/** \} */
+
/* -------------------------------------------------------------------- */
/** \name MTLContext
* \{ */
/* Placeholder functions */
-MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this)
+MTLContext::MTLContext(void *ghost_window, void *ghost_context)
+ : memory_manager(*this), main_command_buffer(*this)
{
/* Init debug. */
debug::mtl_debug_init();
- /* Device creation.
- * TODO(Metal): This is a temporary initialization path to enable testing of features
- * and shader compilation tests. Future functionality should fetch the existing device
- * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */
- this->device = MTLCreateSystemDefaultDevice();
+ /* Initialise Renderpass and Framebuffer State */
+ this->back_left = nullptr;
/* Initialize command buffer state. */
this->main_command_buffer.prepare();
@@ -47,10 +160,35 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
is_inside_frame_ = false;
current_frame_index_ = 0;
- /* Prepare null data buffer */
+ /* Prepare null data buffer. */
null_buffer_ = nil;
null_attribute_buffer_ = nil;
+ /* Zero-initialise MTL Textures. */
+ default_fbo_mtltexture_ = nil;
+ default_fbo_gputexture_ = nullptr;
+
+ /** Fetch GHOSTContext and fetch Metal device/queue. */
+ ghost_window_ = ghost_window;
+ if (ghost_window_ && ghost_context == NULL) {
+ /* NOTE(Metal): Fetch ghost_context from ghost_window if it is not provided.
+ * Regardless of whether windowed or not, we need access to the GhostContext
+ * for presentation, and device/queue access. */
+ GHOST_Window *ghostWin = reinterpret_cast<GHOST_Window *>(ghost_window_);
+ ghost_context = (ghostWin ? ghostWin->getContext() : NULL);
+ }
+ BLI_assert(ghost_context);
+ this->ghost_context_ = static_cast<GHOST_ContextCGL *>(ghost_context);
+ this->queue = (id<MTLCommandQueue>)this->ghost_context_->metalCommandQueue();
+ this->device = (id<MTLDevice>)this->ghost_context_->metalDevice();
+ BLI_assert(this->queue);
+ BLI_assert(this->device);
+ [this->queue retain];
+ [this->device retain];
+
+ /* Register present callback. */
+ this->ghost_context_->metalRegisterPresentCallback(&present);
+
/* Create FrameBuffer handles. */
MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left");
MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left");
@@ -66,6 +204,7 @@ MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command
/* Initialize Metal modules. */
this->memory_manager.init();
this->state_manager = new MTLStateManager(this);
+ this->imm = new MTLImmediate(this);
/* Ensure global memory manager is initialized. */
MTLContext::global_memory_manager.init(this->device);
@@ -99,9 +238,29 @@ MTLContext::~MTLContext()
this->end_frame();
}
}
+
+ /* Release Memory Manager */
+ this->get_scratchbuffer_manager().free();
+
/* Release update/blit shaders. */
this->get_texture_utils().cleanup();
+ /* Detach resource references */
+ GPU_texture_unbind_all();
+
+ /* Unbind UBOs */
+ for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+ if (this->pipeline_state.ubo_bindings[i].bound &&
+ this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+ GPUUniformBuf *ubo = wrap(
+ static_cast<UniformBuf *>(this->pipeline_state.ubo_bindings[i].ubo));
+ GPU_uniformbuf_unbind(ubo);
+ }
+ }
+
+ /* Release Dummy resources */
+ this->free_dummy_resources();
+
/* Release Sampler States. */
for (int i = 0; i < GPU_SAMPLER_MAX; i++) {
if (sampler_state_cache_[i] != nil) {
@@ -109,12 +268,28 @@ MTLContext::~MTLContext()
sampler_state_cache_[i] = nil;
}
}
+
+ /* Empty cached sampler argument buffers. */
+ for (auto entry : cached_sampler_buffers_.values()) {
+ entry->free();
+ }
+ cached_sampler_buffers_.clear();
+
+ /* Free null buffers. */
if (null_buffer_) {
[null_buffer_ release];
}
if (null_attribute_buffer_) {
[null_attribute_buffer_ release];
}
+
+ /* Free Metal objects. */
+ if (this->queue) {
+ [this->queue release];
+ }
+ if (this->device) {
+ [this->device release];
+ }
}
void MTLContext::begin_frame()
@@ -146,20 +321,49 @@ void MTLContext::check_error(const char *info)
void MTLContext::activate()
{
- /* TODO(Metal): Implement. */
+ /* Make sure no other context is already bound to this thread. */
+ BLI_assert(is_active_ == false);
+ is_active_ = true;
+ thread_ = pthread_self();
+
+ /* Re-apply ghost window/context for resizing */
+ if (ghost_window_) {
+ this->set_ghost_window((GHOST_WindowHandle)ghost_window_);
+ }
+ else if (ghost_context_) {
+ this->set_ghost_context((GHOST_ContextHandle)ghost_context_);
+ }
+
+ /* Reset UBO bind state. */
+ for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) {
+ if (this->pipeline_state.ubo_bindings[i].bound &&
+ this->pipeline_state.ubo_bindings[i].ubo != nullptr) {
+ this->pipeline_state.ubo_bindings[i].bound = false;
+ this->pipeline_state.ubo_bindings[i].ubo = nullptr;
+ }
+ }
+
+ /* Ensure imm active. */
+ immActivate();
}
+
void MTLContext::deactivate()
{
- /* TODO(Metal): Implement. */
+ BLI_assert(this->is_active_on_thread());
+ /* Flush context on deactivate. */
+ this->flush();
+ is_active_ = false;
+ immDeactivate();
}
void MTLContext::flush()
{
- /* TODO(Metal): Implement. */
+ this->main_command_buffer.submit(false);
}
+
void MTLContext::finish()
{
- /* TODO(Metal): Implement. */
+ this->main_command_buffer.submit(true);
}
void MTLContext::memory_statistics_get(int *total_mem, int *free_mem)
@@ -200,10 +404,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass()
/* Ensure command buffer workload submissions are optimal --
* Though do not split a batch mid-IMM recording. */
- /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */
- if (this->main_command_buffer.do_break_submission()
- // && !((MTLImmediate *)(this->imm))->imm_is_recording()
- ) {
+ if (this->main_command_buffer.do_break_submission() &&
+ !((MTLImmediate *)(this->imm))->imm_is_recording()) {
this->flush();
}
@@ -294,6 +496,72 @@ id<MTLBuffer> MTLContext::get_null_attribute_buffer()
return null_attribute_buffer_;
}
+gpu::MTLTexture *MTLContext::get_dummy_texture(eGPUTextureType type)
+{
+ /* Decrement 1 from texture type as they start from 1 and go to 32 (inclusive). Remap to 0..31 */
+ gpu::MTLTexture *dummy_tex = dummy_textures_[type - 1];
+ if (dummy_tex != nullptr) {
+ return dummy_tex;
+ }
+ else {
+ GPUTexture *tex = nullptr;
+ switch (type) {
+ case GPU_TEXTURE_1D:
+ tex = GPU_texture_create_1d("Dummy 1D", 128, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_1D_ARRAY:
+ tex = GPU_texture_create_1d_array("Dummy 1DArray", 128, 1, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_2D:
+ tex = GPU_texture_create_2d("Dummy 2D", 128, 128, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_2D_ARRAY:
+ tex = GPU_texture_create_2d_array("Dummy 2DArray", 128, 128, 1, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_3D:
+ tex = GPU_texture_create_3d(
+ "Dummy 3D", 128, 128, 1, 1, GPU_RGBA8, GPU_DATA_UBYTE, nullptr);
+ break;
+ case GPU_TEXTURE_CUBE:
+ tex = GPU_texture_create_cube("Dummy Cube", 128, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_CUBE_ARRAY:
+ tex = GPU_texture_create_cube_array("Dummy CubeArray", 128, 1, 1, GPU_RGBA8, nullptr);
+ break;
+ case GPU_TEXTURE_BUFFER:
+ if (!dummy_verts_) {
+ GPU_vertformat_clear(&dummy_vertformat_);
+ GPU_vertformat_attr_add(&dummy_vertformat_, "dummy", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
+ dummy_verts_ = GPU_vertbuf_create_with_format_ex(&dummy_vertformat_, GPU_USAGE_STATIC);
+ GPU_vertbuf_data_alloc(dummy_verts_, 64);
+ }
+ tex = GPU_texture_create_from_vertbuf("Dummy TextureBuffer", dummy_verts_);
+ break;
+ default:
+ BLI_assert_msg(false, "Unrecognised texture type");
+ return nullptr;
+ }
+ gpu::MTLTexture *metal_tex = static_cast<gpu::MTLTexture *>(reinterpret_cast<Texture *>(tex));
+ dummy_textures_[type - 1] = metal_tex;
+ return metal_tex;
+ }
+ return nullptr;
+}
+
+void MTLContext::free_dummy_resources()
+{
+ for (int tex = 0; tex < GPU_TEXTURE_BUFFER; tex++) {
+ if (dummy_textures_[tex]) {
+ GPU_texture_free(
+ reinterpret_cast<GPUTexture *>(static_cast<Texture *>(dummy_textures_[tex])));
+ dummy_textures_[tex] = nullptr;
+ }
+ }
+ if (dummy_verts_) {
+ GPU_vertbuf_discard(dummy_verts_);
+ }
+}
+
/** \} */
/* -------------------------------------------------------------------- */
@@ -440,6 +708,755 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled)
/** \} */
/* -------------------------------------------------------------------- */
+/** \name Command Encoder and pipeline state
+ * These utilities ensure that all of the globally bound resources and state have been
+ * correctly encoded within the current RenderCommandEncoder. This involves managing
+ * buffer bindings, texture bindings, depth stencil state and dynamic pipeline state.
+ *
+ * We will also trigger compilation of new PSOs where the input state has changed
+ * and is required.
+ * All of this setup is required in order to perform a valid draw call.
+ * \{ */
+
+bool MTLContext::ensure_render_pipeline_state(MTLPrimitiveType mtl_prim_type)
+{
+ BLI_assert(this->pipeline_state.initialised);
+
+ /* Check if an active shader is bound. */
+ if (!this->pipeline_state.active_shader) {
+ MTL_LOG_WARNING("No Metal shader for bound GL shader\n");
+ return false;
+ }
+
+ /* Also ensure active shader is valid. */
+ if (!this->pipeline_state.active_shader->is_valid()) {
+ MTL_LOG_WARNING(
+ "Bound active shader is not valid (Missing/invalid implementation for Metal).\n", );
+ return false;
+ }
+
+ /* Apply global state. */
+ this->state_manager->apply_state();
+
+ /* Main command buffer tracks the current state of the render pass, based on bound
+ * MTLFrameBuffer. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ /* Debug Check: Ensure Framebuffer instance is not dirty. */
+ BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty());
+
+ /* Fetch shader interface. */
+ MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
+ if (shader_interface == nullptr) {
+ MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!\n", );
+ return false;
+ }
+
+ /* Fetch shader and bake valid PipelineStateObject (PSO) based on current
+ * shader and state combination. This PSO represents the final GPU-executable
+ * permutation of the shader. */
+ MTLRenderPipelineStateInstance *pipeline_state_instance =
+ this->pipeline_state.active_shader->bake_current_pipeline_state(
+ this, mtl_prim_type_to_topology_class(mtl_prim_type));
+ if (!pipeline_state_instance) {
+ MTL_LOG_ERROR("Failed to bake Metal pipeline state for shader: %s\n",
+ shader_interface->get_name());
+ return false;
+ }
+
+ bool result = false;
+ if (pipeline_state_instance->pso) {
+
+ /* Fetch render command encoder. A render pass should already be active.
+ * This will be NULL if invalid. */
+ id<MTLRenderCommandEncoder> rec =
+ this->main_command_buffer.get_active_render_command_encoder();
+ BLI_assert(rec);
+ if (rec == nil) {
+ MTL_LOG_ERROR("ensure_render_pipeline_state called while render pass is not active.\n");
+ return false;
+ }
+
+ /* Bind Render Pipeline State. */
+ BLI_assert(pipeline_state_instance->pso);
+ if (rps.bound_pso != pipeline_state_instance->pso) {
+ [rec setRenderPipelineState:pipeline_state_instance->pso];
+ rps.bound_pso = pipeline_state_instance->pso;
+ }
+
+ /** Ensure resource bindings. */
+ /* Texture Bindings. */
+ /* We will iterate through all texture bindings on the context and determine if any of the
+ * active slots match those in our shader interface. If so, textures will be bound. */
+ if (shader_interface->get_total_textures() > 0) {
+ this->ensure_texture_bindings(rec, shader_interface, pipeline_state_instance);
+ }
+
+ /* Transform feedback buffer binding. */
+ /* TOOD(Metal): Include this code once MTLVertBuf is merged. We bind the vertex buffer to which
+ * transform feedback data will be written. */
+ // GPUVertBuf *tf_vbo =
+ // this->pipeline_state.active_shader->get_transform_feedback_active_buffer();
+ // if (tf_vbo != nullptr && pipeline_state_instance->transform_feedback_buffer_index >= 0) {
+
+ // /* Ensure primitive type is either GPU_LINES, GPU_TRIANGLES or GPU_POINT */
+ // BLI_assert(mtl_prim_type == MTLPrimitiveTypeLine ||
+ // mtl_prim_type == MTLPrimitiveTypeTriangle ||
+ // mtl_prim_type == MTLPrimitiveTypePoint);
+
+ // /* Fetch active transform feedback buffer from vertbuf */
+ // MTLVertBuf *tf_vbo_mtl = static_cast<MTLVertBuf *>(reinterpret_cast<VertBuf *>(tf_vbo));
+ // int tf_buffer_offset = 0;
+ // id<MTLBuffer> tf_buffer_mtl = tf_vbo_mtl->get_metal_buffer(&tf_buffer_offset);
+
+ // if (tf_buffer_mtl != nil && tf_buffer_offset >= 0) {
+ // [rec setVertexBuffer:tf_buffer_mtl
+ // offset:tf_buffer_offset
+ // atIndex:pipeline_state_instance->transform_feedback_buffer_index];
+ // printf("Successfully bound VBO: %p for transform feedback (MTL Buffer: %p)\n",
+ // tf_vbo_mtl,
+ // tf_buffer_mtl);
+ // }
+ // }
+
+ /* Matrix Bindings. */
+ /* This is now called upon shader bind. We may need to re-evaluate this though,
+ * as was done here to ensure uniform changes beween draws were tracked.
+ * NOTE(Metal): We may be able to remove this. */
+ GPU_matrix_bind(reinterpret_cast<struct GPUShader *>(
+ static_cast<Shader *>(this->pipeline_state.active_shader)));
+
+ /* Bind Uniforms */
+ this->ensure_uniform_buffer_bindings(rec, shader_interface, pipeline_state_instance);
+
+ /* Bind Null attribute buffer, if needed. */
+ if (pipeline_state_instance->null_attribute_buffer_index >= 0) {
+ if (G.debug & G_DEBUG_GPU) {
+ MTL_LOG_INFO("Binding null attribute buffer at index: %d\n",
+ pipeline_state_instance->null_attribute_buffer_index);
+ }
+ rps.bind_vertex_buffer(this->get_null_attribute_buffer(),
+ 0,
+ pipeline_state_instance->null_attribute_buffer_index);
+ }
+
+ /** Dynamic Per-draw Render State on RenderCommandEncoder. */
+ /* State: Viewport. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_VIEWPORT_FLAG) {
+ MTLViewport viewport;
+ viewport.originX = (double)this->pipeline_state.viewport_offset_x;
+ viewport.originY = (double)this->pipeline_state.viewport_offset_y;
+ viewport.width = (double)this->pipeline_state.viewport_width;
+ viewport.height = (double)this->pipeline_state.viewport_height;
+ viewport.znear = this->pipeline_state.depth_stencil_state.depth_range_near;
+ viewport.zfar = this->pipeline_state.depth_stencil_state.depth_range_far;
+ [rec setViewport:viewport];
+
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_VIEWPORT_FLAG);
+ }
+
+ /* State: Scissor. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_SCISSOR_FLAG) {
+
+ /* Get FrameBuffer associated with active RenderCommandEncoder. */
+ MTLFrameBuffer *render_fb = this->main_command_buffer.get_active_framebuffer();
+
+ MTLScissorRect scissor;
+ if (this->pipeline_state.scissor_enabled) {
+ scissor.x = this->pipeline_state.scissor_x;
+ scissor.y = this->pipeline_state.scissor_y;
+ scissor.width = this->pipeline_state.scissor_width;
+ scissor.height = this->pipeline_state.scissor_height;
+
+ /* Some scissor assignments exceed the bounds of the viewport due to implictly added
+ * padding to the width/height - Clamp width/height. */
+ BLI_assert(scissor.x >= 0 && scissor.x < render_fb->get_width());
+ BLI_assert(scissor.y >= 0 && scissor.y < render_fb->get_height());
+ scissor.width = min_ii(scissor.width, render_fb->get_width() - scissor.x);
+ scissor.height = min_ii(scissor.height, render_fb->get_height() - scissor.y);
+ BLI_assert(scissor.width > 0 && (scissor.x + scissor.width <= render_fb->get_width()));
+ BLI_assert(scissor.height > 0 && (scissor.height <= render_fb->get_height()));
+ }
+ else {
+ /* Scissor is disabled, reset to default size as scissor state may have been previously
+ * assigned on this encoder. */
+ scissor.x = 0;
+ scissor.y = 0;
+ scissor.width = render_fb->get_width();
+ scissor.height = render_fb->get_height();
+ }
+
+ /* Scissor state can still be flagged as changed if it is toggled on and off, without
+ * parameters changing between draws. */
+ if (memcmp(&scissor, &rps.last_scissor_rect, sizeof(MTLScissorRect))) {
+ [rec setScissorRect:scissor];
+ rps.last_scissor_rect = scissor;
+ }
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_SCISSOR_FLAG);
+ }
+
+ /* State: Face winding. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_FRONT_FACING_FLAG) {
+ /* We nede to invert the face winding in Metal, to account for the inverted-Y coordinate
+ * system. */
+ MTLWinding winding = (this->pipeline_state.front_face == GPU_CLOCKWISE) ?
+ MTLWindingClockwise :
+ MTLWindingCounterClockwise;
+ [rec setFrontFacingWinding:winding];
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_FRONT_FACING_FLAG);
+ }
+
+ /* State: cullmode. */
+ if (this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_CULLMODE_FLAG) {
+
+ MTLCullMode mode = MTLCullModeNone;
+ if (this->pipeline_state.culling_enabled) {
+ switch (this->pipeline_state.cull_mode) {
+ case GPU_CULL_NONE:
+ mode = MTLCullModeNone;
+ break;
+ case GPU_CULL_FRONT:
+ mode = MTLCullModeFront;
+ break;
+ case GPU_CULL_BACK:
+ mode = MTLCullModeBack;
+ break;
+ default:
+ BLI_assert_unreachable();
+ break;
+ }
+ }
+ [rec setCullMode:mode];
+ this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags &
+ ~MTL_PIPELINE_STATE_CULLMODE_FLAG);
+ }
+
+ /* Pipeline state is now good. */
+ result = true;
+ }
+ return result;
+}
+
+/* Bind uniform buffers to an active render command encoder using the rendering state of the
+ * current context -> Active shader, Bound UBOs). */
+bool MTLContext::ensure_uniform_buffer_bindings(
+ id<MTLRenderCommandEncoder> rec,
+ const MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+ /* Fetch Render Pass state. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ /* Shader owned push constant block for uniforms.. */
+ bool active_shader_changed = (rps.last_bound_shader_state.shader_ !=
+ this->pipeline_state.active_shader ||
+ rps.last_bound_shader_state.shader_ == nullptr ||
+ rps.last_bound_shader_state.pso_index_ !=
+ pipeline_state_instance->shader_pso_index);
+
+ const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
+ if (push_constant_block.size > 0) {
+
+ /* Fetch uniform buffer base binding index from pipeline_state_instance - Terhe buffer index
+ * will be offset by the number of bound VBOs. */
+ uint32_t block_size = push_constant_block.size;
+ uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index +
+ push_constant_block.buffer_index;
+
+ /* Only need to rebind block if push constants have been modified -- or if no data is bound for
+ * the current RenderCommandEncoder. */
+ if (this->pipeline_state.active_shader->get_push_constant_is_dirty() ||
+ active_shader_changed || !rps.cached_vertex_buffer_bindings[buffer_index].is_bytes ||
+ !rps.cached_fragment_buffer_bindings[buffer_index].is_bytes || true) {
+
+ /* Bind push constant data. */
+ BLI_assert(this->pipeline_state.active_shader->get_push_constant_data() != nullptr);
+ rps.bind_vertex_bytes(
+ this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+ rps.bind_fragment_bytes(
+ this->pipeline_state.active_shader->get_push_constant_data(), block_size, buffer_index);
+
+ /* Only need to rebind block if it has been modified. */
+ this->pipeline_state.active_shader->push_constant_bindstate_mark_dirty(false);
+ }
+ }
+ rps.last_bound_shader_state.set(this->pipeline_state.active_shader,
+ pipeline_state_instance->shader_pso_index);
+
+ /* Bind Global GPUUniformBuffers */
+ /* Iterate through expected UBOs in the shader interface, and check if the globally bound ones
+ * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
+ * and not owned by the shader instance. */
+ for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
+ const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
+
+ if (ubo.buffer_index >= 0) {
+
+ const uint32_t buffer_index = ubo.buffer_index;
+ int ubo_offset = 0;
+ id<MTLBuffer> ubo_buffer = nil;
+ int ubo_size = 0;
+
+ bool bind_dummy_buffer = false;
+ if (this->pipeline_state.ubo_bindings[buffer_index].bound) {
+
+ /* Fetch UBO global-binding properties from slot. */
+ ubo_offset = 0;
+ ubo_buffer = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_metal_buffer(
+ &ubo_offset);
+ ubo_size = this->pipeline_state.ubo_bindings[buffer_index].ubo->get_size();
+
+ /* Use dummy zero buffer if no buffer assigned -- this is an optimization to avoid
+ * allocating zero buffers. */
+ if (ubo_buffer == nil) {
+ bind_dummy_buffer = true;
+ }
+ else {
+ BLI_assert(ubo_buffer != nil);
+ BLI_assert(ubo_size > 0);
+
+ if (pipeline_state_instance->reflection_data_available) {
+ /* NOTE: While the vertex and fragment stages have different UBOs, the indices in each
+ * case will be the same for the same UBO.
+ * We also determine expected size and then ensure buffer of the correct size
+ * exists in one of the vertex/fragment shader binding tables. This path is used
+ * to verify that the size of the bound UBO matches what is expected in the shader. */
+ uint32_t expected_size =
+ (buffer_index <
+ pipeline_state_instance->buffer_bindings_reflection_data_vert.size()) ?
+ pipeline_state_instance->buffer_bindings_reflection_data_vert[buffer_index]
+ .size :
+ 0;
+ if (expected_size == 0) {
+ expected_size =
+ (buffer_index <
+ pipeline_state_instance->buffer_bindings_reflection_data_frag.size()) ?
+ pipeline_state_instance->buffer_bindings_reflection_data_frag[buffer_index]
+ .size :
+ 0;
+ }
+ BLI_assert_msg(
+ expected_size > 0,
+ "Shader interface expects UBO, but shader reflection data reports that it "
+ "is not present");
+
+ /* If ubo size is smaller than the size expected by the shader, we need to bind the
+ * dummy buffer, which will be big enough, to avoid an OOB error. */
+ if (ubo_size < expected_size) {
+ MTL_LOG_INFO(
+ "[Error][UBO] UBO (UBO Name: %s) bound at index: %d with size %d (Expected size "
+ "%d) (Shader Name: %s) is too small -- binding NULL buffer. This is likely an "
+ "over-binding, which is not used, but we need this to avoid validation "
+ "issues\n",
+ shader_interface->get_name_at_offset(ubo.name_offset),
+ buffer_index,
+ ubo_size,
+ expected_size,
+ shader_interface->get_name());
+ bind_dummy_buffer = true;
+ }
+ }
+ }
+ }
+ else {
+ MTL_LOG_INFO(
+ "[Warning][UBO] Shader '%s' expected UBO '%s' to be bound at buffer index: %d -- but "
+ "nothing was bound -- binding dummy buffer\n",
+ shader_interface->get_name(),
+ shader_interface->get_name_at_offset(ubo.name_offset),
+ buffer_index);
+ bind_dummy_buffer = true;
+ }
+
+ if (bind_dummy_buffer) {
+ /* Perform Dummy binding. */
+ ubo_offset = 0;
+ ubo_buffer = this->get_null_buffer();
+ ubo_size = [ubo_buffer length];
+ }
+
+ if (ubo_buffer != nil) {
+
+ uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index +
+ buffer_index;
+
+ /* Bind Vertex UBO. */
+ if (bool(ubo.stage_mask & ShaderStage::VERTEX)) {
+ BLI_assert(buffer_bind_index >= 0 &&
+ buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ rps.bind_vertex_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+ }
+
+ /* Bind Fragment UBOs. */
+ if (bool(ubo.stage_mask & ShaderStage::FRAGMENT)) {
+ BLI_assert(buffer_bind_index >= 0 &&
+ buffer_bind_index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+ rps.bind_fragment_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
+ }
+ }
+ else {
+ MTL_LOG_WARNING(
+ "[UBO] Shader '%s' has UBO '%s' bound at buffer index: %d -- but MTLBuffer "
+ "is NULL!\n",
+ shader_interface->get_name(),
+ shader_interface->get_name_at_offset(ubo.name_offset),
+ buffer_index);
+ }
+ }
+ }
+ return true;
+}
+
+/* Ensure texture bindings are correct and up to date for current draw call. */
+void MTLContext::ensure_texture_bindings(
+ id<MTLRenderCommandEncoder> rec,
+ MTLShaderInterface *shader_interface,
+ const MTLRenderPipelineStateInstance *pipeline_state_instance)
+{
+ BLI_assert(shader_interface != nil);
+ BLI_assert(rec != nil);
+
+ /* Fetch Render Pass state. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ @autoreleasepool {
+ int vertex_arg_buffer_bind_index = -1;
+ int fragment_arg_buffer_bind_index = -1;
+
+ /* Argument buffers are used for samplers, when the limit of 16 is exceeded. */
+ bool use_argument_buffer_for_samplers = shader_interface->get_use_argument_buffer_for_samplers(
+ &vertex_arg_buffer_bind_index, &fragment_arg_buffer_bind_index);
+
+ /* Loop through expected textures in shader interface and resolve bindings with currently
+ * bound textures.. */
+ for (const uint t : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+ /* Ensure the bound texture is compatible with the shader interface. If the
+ * shader does not expect a texture to be bound for the current slot, we skip
+ * binding.
+ * NOTE: Global texture bindings may be left over from prior draw calls. */
+ const MTLShaderTexture &shader_texture_info = shader_interface->get_texture(t);
+ if (!shader_texture_info.used) {
+ /* Skip unused binding points if explicit indices are specified. */
+ continue;
+ }
+
+ int slot = shader_texture_info.slot_index;
+ if (slot >= 0 && slot < GPU_max_textures()) {
+ bool bind_dummy_texture = true;
+ if (this->pipeline_state.texture_bindings[slot].used) {
+ gpu::MTLTexture *bound_texture =
+ this->pipeline_state.texture_bindings[slot].texture_resource;
+ MTLSamplerBinding &bound_sampler = this->pipeline_state.sampler_bindings[slot];
+ BLI_assert(bound_texture);
+ BLI_assert(bound_sampler.used);
+
+ if (shader_texture_info.type == bound_texture->type_) {
+ /* Bind texture and sampler if the bound texture matches the type expected by the
+ * shader. */
+ id<MTLTexture> tex = bound_texture->get_metal_handle();
+
+ if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+ rps.bind_vertex_texture(tex, slot);
+ rps.bind_vertex_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+ }
+
+ if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+ rps.bind_fragment_texture(tex, slot);
+ rps.bind_fragment_sampler(bound_sampler, use_argument_buffer_for_samplers, slot);
+ }
+
+ /* Texture state resolved, no need to bind dummy texture */
+ bind_dummy_texture = false;
+ }
+ else {
+ /* Texture type for bound texture (e.g. Texture2DArray) does not match what was
+ * expected in the shader interface. This is a problem and we will need to bind
+ * a dummy texture to ensure correct API usage. */
+ MTL_LOG_WARNING(
+ "(Shader '%s') Texture %p bound to slot %d is incompatible -- Wrong "
+ "texture target type. (Expecting type %d, actual type %d) (binding "
+ "name:'%s')(texture name:'%s')\n",
+ shader_interface->get_name(),
+ bound_texture,
+ slot,
+ shader_texture_info.type,
+ bound_texture->type_,
+ shader_interface->get_name_at_offset(shader_texture_info.name_offset),
+ bound_texture->get_name());
+ }
+ }
+ else {
+ MTL_LOG_WARNING(
+ "Shader '%s' expected texture to be bound to slot %d -- No texture was "
+ "bound. (name:'%s')\n",
+ shader_interface->get_name(),
+ slot,
+ shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+ }
+
+ /* Bind Dummy texture -- will temporarily resolve validation issues while incorrect formats
+ * are provided -- as certain configurations may not need any binding. These issues should
+ * be fixed in the high-level, if problems crop up. */
+ if (bind_dummy_texture) {
+ if (bool(shader_texture_info.stage_mask & ShaderStage::VERTEX)) {
+ rps.bind_vertex_texture(
+ get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+ /* Bind default sampler state. */
+ MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+ rps.bind_vertex_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+ }
+ if (bool(shader_texture_info.stage_mask & ShaderStage::FRAGMENT)) {
+ rps.bind_fragment_texture(
+ get_dummy_texture(shader_texture_info.type)->get_metal_handle(), slot);
+
+ /* Bind default sampler state. */
+ MTLSamplerBinding default_binding = {true, DEFAULT_SAMPLER_STATE};
+ rps.bind_fragment_sampler(default_binding, use_argument_buffer_for_samplers, slot);
+ }
+ }
+ }
+ else {
+ MTL_LOG_WARNING(
+ "Shader %p expected texture to be bound to slot %d -- Slot exceeds the "
+ "hardware/API limit of '%d'. (name:'%s')\n",
+ this->pipeline_state.active_shader,
+ slot,
+ GPU_max_textures(),
+ shader_interface->get_name_at_offset(shader_texture_info.name_offset));
+ }
+ }
+
+ /* Construct and Bind argument buffer.
+ * NOTE(Metal): Samplers use an argument buffer when the limit of 16 samplers is exceeded. */
+ if (use_argument_buffer_for_samplers) {
+#ifndef NDEBUG
+ /* Debug check to validate each expected texture in the shader interface has a valid
+ * sampler object bound to the context. We will need all of these to be valid
+ * when constructing the sampler argument buffer. */
+ for (const uint i : IndexRange(shader_interface->get_max_texture_index() + 1)) {
+ const MTLShaderTexture &texture = shader_interface->get_texture(i);
+ if (texture.used) {
+ BLI_assert(this->samplers_.mtl_sampler[i] != nil);
+ }
+ }
+#endif
+
+ /* Check to ensure the buffer binding index for the argument buffer has been assigned.
+ * This PSO property will be set if we expect to use argument buffers, and the shader
+ * uses any amount of textures. */
+ BLI_assert(vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0);
+ if (vertex_arg_buffer_bind_index >= 0 || fragment_arg_buffer_bind_index >= 0) {
+ /* Offset binding index to be relative to the start of static uniform buffer binding slots.
+ * The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
+ * used by vertex and index buffer bindings, and the number of buffers present will vary
+ * between PSOs. */
+ int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index +
+ vertex_arg_buffer_bind_index);
+ assert(arg_buffer_idx < 32);
+ id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
+ arg_buffer_idx);
+ if (argument_encoder == nil) {
+ argument_encoder = [pipeline_state_instance->vert
+ newArgumentEncoderWithBufferIndex:arg_buffer_idx];
+ shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
+ }
+
+ /* Generate or Fetch argument buffer sampler configuration.
+ * NOTE(Metal): we need to base sampler counts off of the maximal texture
+ * index. This is not the most optimal, but in practise, not a use-case
+ * when argument buffers are required.
+ * This is because with explicit texture indices, the binding indices
+ * should match across draws, to allow the high-level to optimise bindpoints. */
+ gpu::MTLBuffer *encoder_buffer = nullptr;
+ this->samplers_.num_samplers = shader_interface->get_max_texture_index() + 1;
+
+ gpu::MTLBuffer **cached_smp_buffer_search = this->cached_sampler_buffers_.lookup_ptr(
+ this->samplers_);
+ if (cached_smp_buffer_search != nullptr) {
+ encoder_buffer = *cached_smp_buffer_search;
+ }
+ else {
+ /* Populate argument buffer with current global sampler bindings. */
+ int size = [argument_encoder encodedLength];
+ int alignment = max_uu([argument_encoder alignment], 256);
+ int size_align_delta = (size % alignment);
+ int aligned_alloc_size = ((alignment > 1) && (size_align_delta > 0)) ?
+ size + (alignment - (size % alignment)) :
+ size;
+
+ /* Allocate buffer to store encoded sampler arguments. */
+ encoder_buffer = MTLContext::get_global_memory_manager().allocate(aligned_alloc_size,
+ true);
+ BLI_assert(encoder_buffer);
+ BLI_assert(encoder_buffer->get_metal_buffer());
+ [argument_encoder setArgumentBuffer:encoder_buffer->get_metal_buffer() offset:0];
+ [argument_encoder
+ setSamplerStates:this->samplers_.mtl_sampler
+ withRange:NSMakeRange(0, shader_interface->get_max_texture_index() + 1)];
+ encoder_buffer->flush();
+
+ /* Insert into cache. */
+ this->cached_sampler_buffers_.add_new(this->samplers_, encoder_buffer);
+ }
+
+ BLI_assert(encoder_buffer != nullptr);
+ int vert_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+ vertex_arg_buffer_bind_index);
+ rps.bind_vertex_buffer(encoder_buffer->get_metal_buffer(), 0, vert_buffer_index);
+
+ /* Fragment shader shares its argument buffer binding with the vertex shader, So no need to
+ * re-encode. We can use the same argument buffer. */
+ if (fragment_arg_buffer_bind_index >= 0) {
+ BLI_assert(fragment_arg_buffer_bind_index);
+ int frag_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
+ fragment_arg_buffer_bind_index);
+ rps.bind_fragment_buffer(encoder_buffer->get_metal_buffer(), 0, frag_buffer_index);
+ }
+ }
+ }
+ }
+}
+
+/* Encode latest depth-stencil state. */
+void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type)
+{
+ /* Check if we need to update state. */
+ if (!(this->pipeline_state.dirty_flags & MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG)) {
+ return;
+ }
+
+ /* Fetch render command encoder. */
+ id<MTLRenderCommandEncoder> rec = this->main_command_buffer.get_active_render_command_encoder();
+ BLI_assert(rec);
+
+ /* Fetch Render Pass state. */
+ MTLRenderPassState &rps = this->main_command_buffer.get_render_pass_state();
+
+ /** Prepare Depth-stencil state based on current global pipeline state. */
+ MTLFrameBuffer *fb = this->get_current_framebuffer();
+ bool hasDepthTarget = fb->has_depth_attachment();
+ bool hasStencilTarget = fb->has_stencil_attachment();
+
+ if (hasDepthTarget || hasStencilTarget) {
+ /* Update FrameBuffer State. */
+ this->pipeline_state.depth_stencil_state.has_depth_target = hasDepthTarget;
+ this->pipeline_state.depth_stencil_state.has_stencil_target = hasStencilTarget;
+
+ /* Check if current MTLContextDepthStencilState maps to an existing state object in
+ * the Depth-stencil state cache. */
+ id<MTLDepthStencilState> ds_state = nil;
+ id<MTLDepthStencilState> *depth_stencil_state_lookup =
+ this->depth_stencil_state_cache.lookup_ptr(this->pipeline_state.depth_stencil_state);
+
+ /* If not, populate DepthStencil state descriptor. */
+ if (depth_stencil_state_lookup == nullptr) {
+
+ MTLDepthStencilDescriptor *ds_state_desc = [[[MTLDepthStencilDescriptor alloc] init]
+ autorelease];
+
+ if (hasDepthTarget) {
+ ds_state_desc.depthWriteEnabled =
+ this->pipeline_state.depth_stencil_state.depth_write_enable;
+ ds_state_desc.depthCompareFunction =
+ this->pipeline_state.depth_stencil_state.depth_test_enabled ?
+ this->pipeline_state.depth_stencil_state.depth_function :
+ MTLCompareFunctionAlways;
+ }
+
+ if (hasStencilTarget) {
+ ds_state_desc.backFaceStencil.readMask =
+ this->pipeline_state.depth_stencil_state.stencil_read_mask;
+ ds_state_desc.backFaceStencil.writeMask =
+ this->pipeline_state.depth_stencil_state.stencil_write_mask;
+ ds_state_desc.backFaceStencil.stencilFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_back_stencil_fail;
+ ds_state_desc.backFaceStencil.depthFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_back_depth_fail;
+ ds_state_desc.backFaceStencil.depthStencilPassOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_back_depthstencil_pass;
+ ds_state_desc.backFaceStencil.stencilCompareFunction =
+ (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+ this->pipeline_state.depth_stencil_state.stencil_func :
+ MTLCompareFunctionAlways;
+
+ ds_state_desc.frontFaceStencil.readMask =
+ this->pipeline_state.depth_stencil_state.stencil_read_mask;
+ ds_state_desc.frontFaceStencil.writeMask =
+ this->pipeline_state.depth_stencil_state.stencil_write_mask;
+ ds_state_desc.frontFaceStencil.stencilFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_front_stencil_fail;
+ ds_state_desc.frontFaceStencil.depthFailureOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_front_depth_fail;
+ ds_state_desc.frontFaceStencil.depthStencilPassOperation =
+ this->pipeline_state.depth_stencil_state.stencil_op_front_depthstencil_pass;
+ ds_state_desc.frontFaceStencil.stencilCompareFunction =
+ (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+ this->pipeline_state.depth_stencil_state.stencil_func :
+ MTLCompareFunctionAlways;
+ }
+
+ /* Bake new DS state. */
+ ds_state = [this->device newDepthStencilStateWithDescriptor:ds_state_desc];
+
+ /* Store state in cache. */
+ BLI_assert(ds_state != nil);
+ this->depth_stencil_state_cache.add_new(this->pipeline_state.depth_stencil_state, ds_state);
+ }
+ else {
+ ds_state = *depth_stencil_state_lookup;
+ BLI_assert(ds_state != nil);
+ }
+
+ /* Bind Depth Stencil State to render command encoder. */
+ BLI_assert(ds_state != nil);
+ if (ds_state != nil) {
+ if (rps.bound_ds_state != ds_state) {
+ [rec setDepthStencilState:ds_state];
+ rps.bound_ds_state = ds_state;
+ }
+ }
+
+ /* Apply dynamic depth-stencil state on encoder. */
+ if (hasStencilTarget) {
+ uint32_t stencil_ref_value =
+ (this->pipeline_state.depth_stencil_state.stencil_test_enabled) ?
+ this->pipeline_state.depth_stencil_state.stencil_ref :
+ 0;
+ if (stencil_ref_value != rps.last_used_stencil_ref_value) {
+ [rec setStencilReferenceValue:stencil_ref_value];
+ rps.last_used_stencil_ref_value = stencil_ref_value;
+ }
+ }
+
+ if (hasDepthTarget) {
+ bool doBias = false;
+ switch (prim_type) {
+ case MTLPrimitiveTypeTriangle:
+ case MTLPrimitiveTypeTriangleStrip:
+ doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_tris;
+ break;
+ case MTLPrimitiveTypeLine:
+ case MTLPrimitiveTypeLineStrip:
+ doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_lines;
+ break;
+ case MTLPrimitiveTypePoint:
+ doBias = this->pipeline_state.depth_stencil_state.depth_bias_enabled_for_points;
+ break;
+ }
+ [rec setDepthBias:(doBias) ? this->pipeline_state.depth_stencil_state.depth_bias : 0
+ slopeScale:(doBias) ? this->pipeline_state.depth_stencil_state.depth_slope_scale : 0
+ clamp:0];
+ }
+ }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
/** \name Visibility buffer control for MTLQueryPool.
* \{ */
@@ -606,4 +1623,148 @@ id<MTLSamplerState> MTLContext::get_default_sampler_state()
/** \} */
+/* -------------------------------------------------------------------- */
+/** \name Swapchain management and Metal presentation.
+ * \{ */
+
+void present(MTLRenderPassDescriptor *blit_descriptor,
+ id<MTLRenderPipelineState> blit_pso,
+ id<MTLTexture> swapchain_texture,
+ id<CAMetalDrawable> drawable)
+{
+
+ MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+ BLI_assert(ctx);
+
+ /* Flush any oustanding work. */
+ ctx->flush();
+
+ /* Always pace CPU to maximum of 3 drawables in flight.
+ * nextDrawable may have more in flight if backing swapchain
+ * textures are re-allocate, such as during resize events.
+ *
+ * Determine frames in flight based on current latency. If
+ * we are in a high-latency situation, limit frames in flight
+ * to increase app responsiveness and keep GPU execution under control.
+ * If latency improves, increase frames in flight to improve overall
+ * performance. */
+ int perf_max_drawables = MTL_MAX_DRAWABLES;
+ if (MTLContext::avg_drawable_latency_us > 185000) {
+ perf_max_drawables = 1;
+ }
+ else if (MTLContext::avg_drawable_latency_us > 85000) {
+ perf_max_drawables = 2;
+ }
+
+ while (MTLContext::max_drawables_in_flight > min_ii(perf_max_drawables, MTL_MAX_DRAWABLES)) {
+ PIL_sleep_ms(2);
+ }
+
+ /* Present is submitted in its own CMD Buffer to enusure drawable reference released as early as
+ * possible. This command buffer is separate as it does not utilise the global state
+ * for rendering as the main context does. */
+ id<MTLCommandBuffer> cmdbuf = [ctx->queue commandBuffer];
+ MTLCommandBufferManager::num_active_cmd_bufs++;
+
+ if (MTLCommandBufferManager::sync_event != nil) {
+ /* Ensure command buffer ordering. */
+ [cmdbuf encodeWaitForEvent:MTLCommandBufferManager::sync_event
+ value:MTLCommandBufferManager::event_signal_val];
+ }
+
+ /* Do Present Call and final Blit to MTLDrawable. */
+ id<MTLRenderCommandEncoder> enc = [cmdbuf renderCommandEncoderWithDescriptor:blit_descriptor];
+ [enc setRenderPipelineState:blit_pso];
+ [enc setFragmentTexture:swapchain_texture atIndex:0];
+ [enc drawPrimitives:MTLPrimitiveTypeTriangle vertexStart:0 vertexCount:3];
+ [enc endEncoding];
+
+ /* Present drawable. */
+ BLI_assert(drawable);
+ [cmdbuf presentDrawable:drawable];
+
+ /* Ensure freed buffers have usage tracked against active CommandBuffer submissions. */
+ MTLSafeFreeList *cmd_free_buffer_list =
+ MTLContext::get_global_memory_manager().get_current_safe_list();
+ BLI_assert(cmd_free_buffer_list);
+
+ id<MTLCommandBuffer> cmd_buffer_ref = cmdbuf;
+ [cmd_buffer_ref retain];
+
+ /* Increment drawables in flight limiter. */
+ MTLContext::max_drawables_in_flight++;
+ std::chrono::time_point submission_time = std::chrono::high_resolution_clock::now();
+
+ /* Increment free pool reference and decrement upon command buffer completion. */
+ cmd_free_buffer_list->increment_reference();
+ [cmdbuf addCompletedHandler:^(id<MTLCommandBuffer> cb) {
+ /* Flag freed buffers associated with this CMD buffer as ready to be freed. */
+ cmd_free_buffer_list->decrement_reference();
+ [cmd_buffer_ref release];
+
+ /* Decrement count */
+ MTLCommandBufferManager::num_active_cmd_bufs--;
+ MTL_LOG_INFO("[Metal] Active command buffers: %d\n",
+ MTLCommandBufferManager::num_active_cmd_bufs);
+
+ /* Drawable count and latency management. */
+ MTLContext::max_drawables_in_flight--;
+ std::chrono::time_point completion_time = std::chrono::high_resolution_clock::now();
+ int64_t microseconds_per_frame = std::chrono::duration_cast<std::chrono::microseconds>(
+ completion_time - submission_time)
+ .count();
+ MTLContext::latency_resolve_average(microseconds_per_frame);
+
+ MTL_LOG_INFO("Frame Latency: %f ms (Rolling avg: %f ms Drawables: %d)\n",
+ ((float)microseconds_per_frame) / 1000.0f,
+ ((float)MTLContext::avg_drawable_latency_us) / 1000.0f,
+ perf_max_drawables);
+ }];
+
+ if (MTLCommandBufferManager::sync_event == nil) {
+ MTLCommandBufferManager::sync_event = [ctx->device newEvent];
+ BLI_assert(MTLCommandBufferManager::sync_event);
+ [MTLCommandBufferManager::sync_event retain];
+ }
+ BLI_assert(MTLCommandBufferManager::sync_event != nil);
+
+ MTLCommandBufferManager::event_signal_val++;
+ [cmdbuf encodeSignalEvent:MTLCommandBufferManager::sync_event
+ value:MTLCommandBufferManager::event_signal_val];
+
+ [cmdbuf commit];
+
+ /* When debugging, fetch advanced command buffer errors. */
+ if (G.debug & G_DEBUG_GPU) {
+ [cmdbuf waitUntilCompleted];
+ NSError *error = [cmdbuf error];
+ if (error != nil) {
+ NSLog(@"%@", error);
+ BLI_assert(false);
+
+ @autoreleasepool {
+ const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String];
+
+ std::ofstream outfile;
+ outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+ outfile << stringAsChar;
+ outfile.close();
+ }
+ }
+ else {
+ @autoreleasepool {
+ NSString *str = @"Command buffer completed successfully!\n";
+ const char *stringAsChar = [str UTF8String];
+
+ std::ofstream outfile;
+ outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app);
+ outfile << stringAsChar;
+ outfile.close();
+ }
+ }
+ }
+}
+
+/** \} */
+
} // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_drawlist.hh b/source/blender/gpu/metal/mtl_drawlist.hh
new file mode 100644
index 00000000000..9eb465b26a0
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_drawlist.hh
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#pragma once
+
+#pragma once
+
+#include "gpu_drawlist_private.hh"
+
+namespace blender {
+namespace gpu {
+
+/**
+ * TODO(Metal): MTLDrawList Implementation. Included as temporary stub.
+ */
+class MTLDrawList : public DrawList {
+ public:
+ MTLDrawList(int length) {}
+ ~MTLDrawList() {}
+
+ void append(GPUBatch *batch, int i_first, int i_count) override {}
+ void submit() override {}
+
+ MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList");
+};
+
+} // namespace gpu
+} // namespace blender
diff --git a/source/blender/gpu/metal/mtl_immediate.hh b/source/blender/gpu/metal/mtl_immediate.hh
new file mode 100644
index 00000000000..b743efb397d
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_immediate_private.hh"
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+namespace blender::gpu {
+
+class MTLImmediate : public Immediate {
+ private:
+ MTLContext *context_ = nullptr;
+ MTLTemporaryBuffer current_allocation_;
+ MTLPrimitiveTopologyClass metal_primitive_mode_;
+ MTLPrimitiveType metal_primitive_type_;
+ bool has_begun_ = false;
+
+ public:
+ MTLImmediate(MTLContext *ctx);
+ ~MTLImmediate();
+
+ uchar *begin() override;
+ void end() override;
+ bool imm_is_recording()
+ {
+ return has_begun_;
+ }
+};
+
+} // namespace blender::gpu
diff --git a/source/blender/gpu/metal/mtl_immediate.mm b/source/blender/gpu/metal/mtl_immediate.mm
new file mode 100644
index 00000000000..41632e39092
--- /dev/null
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -0,0 +1,397 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#include "BKE_global.h"
+
+#include "GPU_vertex_format.h"
+#include "gpu_context_private.hh"
+#include "gpu_shader_private.hh"
+#include "gpu_vertex_format_private.h"
+
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_immediate.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+
+namespace blender::gpu {
+
+MTLImmediate::MTLImmediate(MTLContext *ctx)
+{
+ context_ = ctx;
+}
+
+MTLImmediate::~MTLImmediate()
+{
+}
+
+uchar *MTLImmediate::begin()
+{
+ BLI_assert(!has_begun_);
+
+ /* Determine primitive type. */
+ metal_primitive_type_ = gpu_prim_type_to_metal(this->prim_type);
+ metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
+ has_begun_ = true;
+
+ /* Allocate a range of data and return host-accessible pointer. */
+ const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+ current_allocation_ = context_->get_scratchbuffer_manager()
+ .scratch_buffer_allocate_range_aligned(bytes_needed, 256);
+ [current_allocation_.metal_buffer retain];
+ return reinterpret_cast<uchar *>(current_allocation_.data);
+}
+
+void MTLImmediate::end()
+{
+ /* Ensure we're between a imm::begin/imm:end pair. */
+ BLI_assert(has_begun_);
+ BLI_assert(prim_type != GPU_PRIM_NONE);
+
+ /* Verify context is valid, vertex data is written and a valid shader is bound. */
+ if (context_ && this->vertex_idx > 0 && this->shader) {
+
+ MTLShader *active_mtl_shader = static_cast<MTLShader *>(unwrap(shader));
+
+ /* Skip draw if Metal shader is not valid. */
+ if (active_mtl_shader == nullptr || !active_mtl_shader->is_valid() ||
+ active_mtl_shader->get_interface() == nullptr) {
+
+ const char *ptr = (active_mtl_shader) ? active_mtl_shader->name_get() : nullptr;
+ MTL_LOG_WARNING(
+ "MTLImmediate::end -- cannot perform draw as active shader is NULL or invalid (likely "
+ "unimplemented) (shader %p '%s')\n",
+ active_mtl_shader,
+ ptr);
+ return;
+ }
+
+ /* Ensure we are inside a render pass and fetch active RenderCommandEncoder. */
+ id<MTLRenderCommandEncoder> rec = context_->ensure_begin_render_pass();
+ BLI_assert(rec != nil);
+
+ /* Fetch active render pipeline state. */
+ MTLRenderPassState &rps = context_->main_command_buffer.get_render_pass_state();
+
+ /* Bind Shader. */
+ GPU_shader_bind(this->shader);
+
+ /* Debug markers for frame-capture and detailed error messages. */
+ if (G.debug & G_DEBUG_GPU) {
+ [rec pushDebugGroup:[NSString
+ stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+ this->vertex_idx,
+ active_mtl_shader->get_interface()->get_name()]];
+ [rec insertDebugSignpost:[NSString stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+ this->vertex_idx,
+ active_mtl_shader->get_interface()
+ ->get_name()]];
+ }
+
+ /* Populate pipeline state vertex descriptor. */
+ MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+ MTLContext::get()->state_manager);
+ MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+ const MTLShaderInterface *interface = active_mtl_shader->get_interface();
+
+ desc.vertex_descriptor.num_attributes = interface->get_total_attributes();
+ desc.vertex_descriptor.num_vert_buffers = 1;
+
+ for (int i = 0; i < desc.vertex_descriptor.num_attributes; i++) {
+ desc.vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+ }
+ desc.vertex_descriptor.uses_ssbo_vertex_fetch =
+ active_mtl_shader->get_uses_ssbo_vertex_fetch();
+ desc.vertex_descriptor.num_ssbo_attributes = 0;
+
+ /* SSBO Vertex Fetch -- Verify Attributes. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ active_mtl_shader->ssbo_vertex_fetch_bind_attributes_begin();
+
+ /* Disable Indexed rendering in SSBO vertex fetch. */
+ int uniform_ssbo_use_indexed = active_mtl_shader->uni_ssbo_uses_indexed_rendering;
+ BLI_assert_msg(uniform_ssbo_use_indexed != -1, "Expected valid uniform location for ssbo_uses_indexed_rendering.");
+ int uses_indexed_rendering = 0;
+ active_mtl_shader->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+ }
+
+ /* Populate Vertex descriptor and verify attributes.
+ * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */
+ for (int i = 0; i < interface->get_total_attributes(); i++) {
+
+ /* Note: Attribute in VERTEX FORMAT does not necessarily share the same array index as
+ * attributes in shader interface. */
+ GPUVertAttr *attr = nullptr;
+ const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i);
+
+ /* Scan through vertex_format attributes until one with a name matching the shader interface
+ * is found. */
+ for (uint32_t a_idx = 0; a_idx < this->vertex_format.attr_len && attr == nullptr; a_idx++) {
+ GPUVertAttr *check_attribute = &this->vertex_format.attrs[a_idx];
+
+ /* Attributes can have multiple name aliases associated with them. */
+ for (uint32_t n_idx = 0; n_idx < check_attribute->name_len; n_idx++) {
+ const char *name = GPU_vertformat_attr_name_get(
+ &this->vertex_format, check_attribute, n_idx);
+
+ if (strcmp(name, interface->get_name_at_offset(mtl_shader_attribute.name_offset)) == 0) {
+ attr = check_attribute;
+ break;
+ }
+ }
+ }
+
+ BLI_assert_msg(attr != nullptr,
+ "Could not find expected attribute in immediate mode vertex format.");
+ if (attr == nullptr) {
+ MTL_LOG_ERROR(
+ "MTLImmediate::end Could not find matching attribute '%s' from Shader Interface in "
+ "Vertex Format! - TODO: Bind Dummy attribute\n",
+ interface->get_name_at_offset(mtl_shader_attribute.name_offset));
+ return;
+ }
+
+ /* Determine whether implicit type conversion between input vertex format
+ * and shader interface vertex format is supported. */
+ MTLVertexFormat convertedFormat;
+ bool can_use_implicit_conversion = mtl_convert_vertex_format(
+ mtl_shader_attribute.format,
+ (GPUVertCompType)attr->comp_type,
+ attr->comp_len,
+ (GPUVertFetchMode)attr->fetch_mode,
+ &convertedFormat);
+
+ if (can_use_implicit_conversion) {
+ /* Metal API can implicitly convert some formats during vertex assembly:
+ * - Converting from a normalized short2 format to float2
+ * - Type truncation e.g. Float4 to Float2.
+ * - Type expansion from Float3 to Float4.
+ * - Note: extra components are filled with the corresponding components of (0,0,0,1).
+ * (See
+ * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format)
+ */
+ bool is_floating_point_format = (attr->comp_type == GPU_COMP_F32);
+ desc.vertex_descriptor.attributes[i].format = convertedFormat;
+ desc.vertex_descriptor.attributes[i].format_conversion_mode =
+ (is_floating_point_format) ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+ (GPUVertFetchMode)GPU_FETCH_INT;
+ BLI_assert(convertedFormat != MTLVertexFormatInvalid);
+ }
+ else {
+ /* Some conversions are NOT valid, e.g. Int4 to Float4
+ * - In this case, we need to implement a conversion routine inside the shader.
+ * - This is handled using the format_conversion_mode flag
+ * - This flag is passed into the PSO as a function specialisation,
+ * and will generate an appropriate conversion function when reading the vertex attribute
+ * value into local shader storage.
+ * (If no explicit conversion is needed, the function specialize to a pass-through). */
+ MTLVertexFormat converted_format;
+ bool can_convert = mtl_vertex_format_resize(
+ mtl_shader_attribute.format, attr->comp_len, &converted_format);
+ desc.vertex_descriptor.attributes[i].format = (can_convert) ? converted_format :
+ mtl_shader_attribute.format;
+ desc.vertex_descriptor.attributes[i].format_conversion_mode = (GPUVertFetchMode)
+ attr->fetch_mode;
+ BLI_assert(desc.vertex_descriptor.attributes[i].format != MTLVertexFormatInvalid);
+ }
+ /* Using attribute offset in vertex format, as this will be correct */
+ desc.vertex_descriptor.attributes[i].offset = attr->offset;
+ desc.vertex_descriptor.attributes[i].buffer_index = mtl_shader_attribute.buffer_index;
+
+ /* SSBO Vertex Fetch Attribute bind. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ BLI_assert_msg(mtl_shader_attribute.buffer_index == 0,
+ "All attributes should be in buffer index zero");
+ MTLSSBOAttribute ssbo_attr(
+ mtl_shader_attribute.index,
+ mtl_shader_attribute.buffer_index,
+ attr->offset,
+ this->vertex_format.stride,
+ MTLShader::ssbo_vertex_type_to_attr_type(desc.vertex_descriptor.attributes[i].format),
+ false);
+ desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+ ssbo_attr;
+ desc.vertex_descriptor.num_ssbo_attributes++;
+ active_mtl_shader->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+ }
+ }
+
+ /* Buffer bindings for singular vertex buffer. */
+ desc.vertex_descriptor.buffer_layouts[0].step_function = MTLVertexStepFunctionPerVertex;
+ desc.vertex_descriptor.buffer_layouts[0].step_rate = 1;
+ desc.vertex_descriptor.buffer_layouts[0].stride = this->vertex_format.stride;
+ BLI_assert(this->vertex_format.stride > 0);
+
+ /* SSBO Vertex Fetch -- Verify Attributes. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ active_mtl_shader->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+ /* Set Status uniforms. */
+ BLI_assert_msg(active_mtl_shader->uni_ssbo_input_prim_type_loc != -1,
+ "ssbo_input_prim_type uniform location invalid!");
+ BLI_assert_msg(active_mtl_shader->uni_ssbo_input_vert_count_loc != -1,
+ "ssbo_input_vert_count uniform location invalid!");
+ GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+ active_mtl_shader->uni_ssbo_input_prim_type_loc,
+ 1,
+ 1,
+ (const int *)(&this->prim_type));
+ GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+ active_mtl_shader->uni_ssbo_input_vert_count_loc,
+ 1,
+ 1,
+ (const int *)(&this->vertex_idx));
+ }
+
+ MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+ if (context_->ensure_render_pipeline_state(mtl_prim_type)) {
+
+ /* Issue draw call. */
+ BLI_assert(this->vertex_idx > 0);
+
+ /* Metal API does not support triangle fan, so we can emulate this
+ * input data by generating an index buffer to re-map indices to
+ * a TriangleList.
+ *
+ * NOTE(Metal): Consider caching generated triangle fan index buffers.
+ * For immediate mode, generating these is currently very cheap, as we use
+ * fast scratch buffer allocations. Though we may benefit from caching of
+ * frequently used buffer sizes. */
+ if (mtl_needs_topology_emulation(this->prim_type)) {
+
+ /* Debug safety check for SSBO FETCH MODE. */
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+ BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
+ }
+
+ /* Emulate Tri-fan. */
+ if (this->prim_type == GPU_PRIM_TRI_FAN) {
+ /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
+ * vertices. */
+ uint32_t base_vert_count = this->vertex_idx;
+ uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
+ uint32_t fan_index_count = num_triangles * 3;
+ BLI_assert(num_triangles > 0);
+
+ uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
+ uint32_t *index_buffer = nullptr;
+
+ MTLTemporaryBuffer allocation =
+ context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
+ alloc_size, 128);
+ index_buffer = (uint32_t *)allocation.data;
+
+ int a = 0;
+ for (int i = 0; i < num_triangles; i++) {
+ index_buffer[a++] = 0;
+ index_buffer[a++] = i + 1;
+ index_buffer[a++] = i + 2;
+ }
+
+ @autoreleasepool {
+
+ id<MTLBuffer> index_buffer_mtl = nil;
+ uint32_t index_buffer_offset = 0;
+
+ /* Region of scratch buffer used for topology emulation element data.
+ * NOTE(Metal): We do not need to manually flush as the entire scratch
+ * buffer for current command buffer is flushed upon submission. */
+ index_buffer_mtl = allocation.metal_buffer;
+ index_buffer_offset = allocation.buffer_offset;
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
+
+ /* Bind Vertex Buffer. */
+ rps.bind_vertex_buffer(
+ current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+ /* Draw. */
+ [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+ indexCount:fan_index_count
+ indexType:MTLIndexTypeUInt32
+ indexBuffer:index_buffer_mtl
+ indexBufferOffset:index_buffer_offset];
+ }
+ }
+ else {
+ /* TODO(Metal): Topology emulation for line loop.
+ * NOTE(Metal): This is currently not used anywhere and modified at the high
+ * level for efficiency in such cases. */
+ BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
+ }
+ }
+ else {
+ MTLPrimitiveType primitive_type = metal_primitive_type_;
+ int vertex_count = this->vertex_idx;
+
+ /* Bind Vertex Buffer. */
+ rps.bind_vertex_buffer(
+ current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+ /* Set depth stencil state (requires knowledge of primitive type). */
+ context_->ensure_depth_stencil_state(primitive_type);
+
+ if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+
+ /* Bind Null Buffers for empty/missing bind slots. */
+ id<MTLBuffer> null_buffer = context_->get_null_buffer();
+ BLI_assert(null_buffer != nil);
+ for (int i = 1; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+
+ /* We only need to ensure a buffer is bound to the context, its contents do not matter
+ * as it will not be used. */
+ if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+ rps.bind_vertex_buffer(null_buffer, 0, i);
+ }
+ }
+
+ /* SSBO vertex fetch - Nullify elements buffer. */
+ if (rps.cached_vertex_buffer_bindings[MTL_SSBO_VERTEX_FETCH_IBO_INDEX].metal_buffer ==
+ nil) {
+ rps.bind_vertex_buffer(null_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+ }
+
+ /* Submit draw call with modified vertex count, which reflects vertices per primitive
+ * defined in the USE_SSBO_VERTEX_FETCH pragma. */
+ int num_input_primitives = gpu_get_prim_count_from_type(vertex_count, this->prim_type);
+ int output_num_verts = num_input_primitives *
+ active_mtl_shader->get_ssbo_vertex_fetch_output_num_verts();
+#ifndef NDEBUG
+ BLI_assert(
+ mtl_vertex_count_fits_primitive_type(
+ output_num_verts, active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()) &&
+ "Output Vertex count is not compatible with the requested output vertex primitive "
+ "type");
+#endif
+ [rec drawPrimitives:active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()
+ vertexStart:0
+ vertexCount:output_num_verts];
+ context_->main_command_buffer.register_draw_counters(output_num_verts);
+ }
+ else {
+ /* Regular draw. */
+ [rec drawPrimitives:primitive_type vertexStart:0 vertexCount:vertex_count];
+ context_->main_command_buffer.register_draw_counters(vertex_count);
+ }
+ }
+ }
+ if (G.debug & G_DEBUG_GPU) {
+ [rec popDebugGroup];
+ }
+ }
+
+ /* Reset allocation after draw submission. */
+ has_begun_ = false;
+ if (current_allocation_.metal_buffer) {
+ [current_allocation_.metal_buffer release];
+ current_allocation_.metal_buffer = nil;
+ }
+}
+
+} // blender::gpu
diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh
index df80df6543f..bd354376b12 100644
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -340,13 +340,13 @@ class MTLBufferPool {
private:
/* Memory statistics. */
- long long int total_allocation_bytes_ = 0;
+ int64_t total_allocation_bytes_ = 0;
#if MTL_DEBUG_MEMORY_STATISTICS == 1
/* Debug statistics. */
std::atomic<int> per_frame_allocation_count_;
- std::atomic<long long int> allocations_in_pool_;
- std::atomic<long long int> buffers_in_pool_;
+ std::atomic<int64_t> allocations_in_pool_;
+ std::atomic<int64_t> buffers_in_pool_;
#endif
/* Metal resources. */
diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh
index 64d9d1cf849..5485b32dd31 100644
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -261,8 +261,6 @@ class MTLShader : public Shader {
bool get_push_constant_is_dirty();
void push_constant_bindstate_mark_dirty(bool is_dirty);
- void vertformat_from_shader(GPUVertFormat *format) const override;
-
/* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */
int program_handle_get() const override
{
diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm
index 23097f312f0..3b27b60bca0 100644
--- a/source/blender/gpu/metal/mtl_shader.mm
+++ b/source/blender/gpu/metal/mtl_shader.mm
@@ -129,6 +129,7 @@ MTLShader::~MTLShader()
if (shd_builder_ != nullptr) {
delete shd_builder_;
+ shd_builder_ = nullptr;
}
}
@@ -209,6 +210,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Release temporary compilation resources. */
delete shd_builder_;
+ shd_builder_ = nullptr;
return false;
}
}
@@ -279,6 +281,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Release temporary compilation resources. */
delete shd_builder_;
+ shd_builder_ = nullptr;
return false;
}
}
@@ -324,6 +327,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
/* Release temporary compilation resources. */
delete shd_builder_;
+ shd_builder_ = nullptr;
return true;
}
@@ -535,28 +539,6 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
{
push_constant_modified_ = is_dirty;
}
-
-void MTLShader::vertformat_from_shader(GPUVertFormat *format) const
-{
- GPU_vertformat_clear(format);
-
- const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface);
- for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) {
- const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id);
-
- /* Extract type parameters from Metal type. */
- GPUVertCompType comp_type = comp_type_from_vert_format(attr.format);
- uint comp_len = comp_count_from_vert_format(attr.format);
- GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format);
-
- GPU_vertformat_attr_add(format,
- mtl_interface->get_name_at_offset(attr.name_offset),
- comp_type,
- comp_len,
- fetch_mode);
- }
-}
-
/** \} */
/* -------------------------------------------------------------------- */
@@ -1167,6 +1149,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_at
MTLShaderInterface *mtl_interface = this->get_interface();
BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
+ UNUSED_VARS_NDEBUG(mtl_interface);
/* Update bind-mask to verify this attribute has been used. */
BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm
index 977e97dbd82..4a2be0753bb 100644
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@@ -724,10 +724,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
}
if (msl_iface.uses_ssbo_vertex_fetch_mode) {
ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
- ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS
- << std::endl;
- ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX
- << std::endl;
for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) {
ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl;
}
diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm
index 4af46c13751..b4e913e5be6 100644
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -12,6 +12,7 @@
#include "GPU_batch_presets.h"
#include "GPU_capabilities.h"
#include "GPU_framebuffer.h"
+#include "GPU_immediate.h"
#include "GPU_platform.h"
#include "GPU_state.h"
@@ -303,7 +304,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
/* Execute graphics draw call to perform the blit. */
GPUBatch *quad = GPU_batch_preset_quad();
-
GPU_batch_set_shader(quad, shader);
float w = dst->width_get();
@@ -337,6 +337,20 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
GPU_batch_draw(quad);
+ /* TMP draw with IMM TODO(Metal): Remove this once GPUBatch is supported. */
+ GPUVertFormat *imm_format = immVertexFormat();
+ uint pos = GPU_vertformat_attr_add(imm_format, "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+
+ immBindShader(shader);
+ immBegin(GPU_PRIM_TRI_STRIP, 4);
+ immVertex2f(pos, 1, 0);
+ immVertex2f(pos, 0, 0);
+ immVertex2f(pos, 1, 1);
+ immVertex2f(pos, 0, 1);
+ immEnd();
+ immUnbindProgram();
+ /**********************/
+
/* restoring old pipeline state. */
GPU_depth_mask(depth_write_prev);
GPU_stencil_write_mask_set(stencil_mask_prev);
@@ -1463,79 +1477,6 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
BLI_assert_msg(this->format_ != GPU_DEPTH24_STENCIL8,
"Apple silicon does not support GPU_DEPTH24_S8");
- MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
- mtl_max_mips_ = 1;
- mipmaps_ = 0;
- this->mip_range_set(0, 0);
-
- /* Create texture from GPUVertBuf's buffer. */
- MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo));
- mtl_vbo->bind();
- mtl_vbo->flag_used();
-
- /* Get Metal Buffer. */
- id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
- BLI_assert(source_buffer);
-
- /* Verify size. */
- if (w_ <= 0) {
- MTL_LOG_WARNING("Allocating texture buffer of width 0!\n");
- w_ = 1;
- }
-
- /* Verify Texture and vertex buffer alignment. */
- int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
- int bytes_per_row = bytes_per_pixel * w_;
-
- MTLContext *mtl_ctx = MTLContext::get();
- uint align_requirement = static_cast<uint>(
- [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
-
- /* Verify per-vertex size aligns with texture size. */
- const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
- BLI_assert(bytes_per_pixel == format->stride &&
- "Pixel format stride MUST match the texture format stride -- These being different "
- "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
- UNUSED_VARS_NDEBUG(format);
-
- /* Create texture descriptor. */
- BLI_assert(type_ == GPU_TEXTURE_BUFFER);
- texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
- texture_descriptor_.pixelFormat = mtl_format;
- texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
- texture_descriptor_.width = w_;
- texture_descriptor_.height = 1;
- texture_descriptor_.depth = 1;
- texture_descriptor_.arrayLength = 1;
- texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
- texture_descriptor_.usage =
- MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
- MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
- texture_descriptor_.storageMode = [source_buffer storageMode];
- texture_descriptor_.sampleCount = 1;
- texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
- texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
-
- texture_ = [source_buffer
- newTextureWithDescriptor:texture_descriptor_
- offset:0
- bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)];
- aligned_w_ = bytes_per_row / bytes_per_pixel;
-
- BLI_assert(texture_);
- texture_.label = [NSString stringWithUTF8String:this->get_name()];
- is_baked_ = true;
- is_dirty_ = false;
- resource_mode_ = MTL_TEXTURE_MODE_VBO;
-
- /* Track Status. */
- vert_buffer_ = mtl_vbo;
- vert_buffer_mtl_ = source_buffer;
-
- /* Cleanup. */
- [texture_descriptor_ release];
- texture_descriptor_ = nullptr;
-
return true;
}
diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm
index 928393fb39e..5ed7659f260 100644
--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -22,13 +22,7 @@
/* Utility file for secondary functionality which supports mtl_texture.mm. */
extern char datatoc_compute_texture_update_msl[];
-extern char datatoc_depth_2d_update_vert_glsl[];
-extern char datatoc_depth_2d_update_float_frag_glsl[];
-extern char datatoc_depth_2d_update_int24_frag_glsl[];
-extern char datatoc_depth_2d_update_int32_frag_glsl[];
extern char datatoc_compute_texture_read_msl[];
-extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[];
-extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[];
namespace blender::gpu {
@@ -447,42 +441,34 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
return *result;
}
- const char *fragment_source = nullptr;
+ const char *depth_2d_info_variant = nullptr;
switch (specialization.data_mode) {
case MTL_DEPTH_UPDATE_MODE_FLOAT:
- fragment_source = datatoc_depth_2d_update_float_frag_glsl;
+ depth_2d_info_variant = "depth_2d_update_float";
break;
case MTL_DEPTH_UPDATE_MODE_INT24:
- fragment_source = datatoc_depth_2d_update_int24_frag_glsl;
+ depth_2d_info_variant = "depth_2d_update_int24";
break;
case MTL_DEPTH_UPDATE_MODE_INT32:
- fragment_source = datatoc_depth_2d_update_int32_frag_glsl;
+ depth_2d_info_variant = "depth_2d_update_int32";
break;
default:
BLI_assert(false && "Invalid format mode\n");
return nullptr;
}
- GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl,
- fragment_source,
- nullptr,
- nullptr,
- nullptr,
- "depth_2d_update_sh_get");
+ GPUShader *shader = GPU_shader_create_from_info_name(depth_2d_info_variant);
mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader);
return shader;
}
GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get()
{
-
MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
BLI_assert(mtl_context != nullptr);
if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) {
- const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl;
- const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl;
- GPUShader *shader = GPU_shader_create(
- vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit");
+ GPUShader *shader = GPU_shader_create_from_info_name("fullscreen_blit");
+
mtl_context->get_texture_utils().fullscreen_blit_shader = shader;
}
return mtl_context->get_texture_utils().fullscreen_blit_shader;
@@ -614,7 +600,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
stringWithUTF8String:datatoc_compute_texture_read_msl];
/* Defensive Debug Checks. */
- long long int depth_scale_factor = 1;
+ int64_t depth_scale_factor = 1;
if (specialization_params.depth_format_mode > 0) {
BLI_assert(specialization_params.component_count_input == 1);
BLI_assert(specialization_params.component_count_output == 1);
diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh
index 8646d94e2fd..14fca9f061d 100644
--- a/source/blender/gpu/opengl/gl_backend.hh
+++ b/source/blender/gpu/opengl/gl_backend.hh
@@ -61,7 +61,7 @@ class GLBackend : public GPUBackend {
GLTexture::samplers_update();
};
- Context *context_alloc(void *ghost_window) override
+ Context *context_alloc(void *ghost_window, void *ghost_context) override
{
return new GLContext(ghost_window, shared_orphan_list_);
};
diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc
index 224a9afcf59..67e296b11d5 100644
--- a/source/blender/gpu/tests/gpu_testing.cc
+++ b/source/blender/gpu/tests/gpu_testing.cc
@@ -19,7 +19,7 @@ void GPUTest::SetUp()
ghost_system = GHOST_CreateSystem();
ghost_context = GHOST_CreateOpenGLContext(ghost_system, glSettings);
GHOST_ActivateOpenGLContext(ghost_context);
- context = GPU_context_create(nullptr);
+ context = GPU_context_create(nullptr, ghost_context);
GPU_init();
}