diff options
Diffstat (limited to 'source/blender/gpu')
48 files changed, 2193 insertions, 199 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index d8ed74390f4..5e97909a2b8 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -5,7 +5,7 @@ # to more easily highlight code-paths in other libraries that need to be refactored, # bf_gpu is allowed to have opengl regardless of this option. -if(NOT WITH_OPENGL AND NOT WITH_METAL_BACKEND) +if(NOT WITH_OPENGL AND NOT WITH_METAL_BACKEND AND NOT WITH_HEADLESS) add_definitions(-DWITH_OPENGL) endif() @@ -25,6 +25,9 @@ set(INC # For theme color access. ../editors/include + # For *_info.hh includes. + ../draw/engines/eevee_next + # For node muting stuff. ../nodes @@ -191,9 +194,12 @@ set(METAL_SRC metal/mtl_command_buffer.mm metal/mtl_debug.mm metal/mtl_framebuffer.mm + metal/mtl_memory.mm + metal/mtl_query.mm metal/mtl_state.mm metal/mtl_texture.mm metal/mtl_texture_util.mm + metal/mtl_uniform_buffer.mm metal/mtl_backend.hh metal/mtl_capabilities.hh @@ -201,8 +207,11 @@ set(METAL_SRC metal/mtl_context.hh metal/mtl_debug.hh metal/mtl_framebuffer.hh + metal/mtl_memory.hh + metal/mtl_query.hh metal/mtl_state.hh metal/mtl_texture.hh + metal/mtl_uniform_buffer.hh ) # Select Backend source based on availability @@ -445,6 +454,7 @@ list(APPEND INC ${CMAKE_CURRENT_BINARY_DIR}) set(SRC_SHADER_CREATE_INFOS ../draw/engines/basic/shaders/infos/basic_depth_info.hh + ../draw/engines/eevee_next/shaders/infos/eevee_film_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh ../draw/engines/gpencil/shaders/infos/gpencil_info.hh @@ -459,6 +469,7 @@ set(SRC_SHADER_CREATE_INFOS ../draw/engines/overlay/shaders/infos/overlay_outline_info.hh ../draw/engines/overlay/shaders/infos/overlay_paint_info.hh ../draw/engines/overlay/shaders/infos/overlay_sculpt_info.hh + ../draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh ../draw/engines/overlay/shaders/infos/overlay_volume_info.hh ../draw/engines/overlay/shaders/infos/overlay_wireframe_info.hh ../draw/engines/select/shaders/infos/select_id_info.hh @@ -554,7 +565,7 @@ endif() -if(WITH_GPU_SHADER_BUILDER) +if(WITH_GPU_BUILDTIME_SHADER_BUILDER) # TODO(@fclem) Fix this mess. if(APPLE) add_executable(shader_builder @@ -563,11 +574,7 @@ if(WITH_GPU_SHADER_BUILDER) ) setup_platform_linker_flags(shader_builder) - - target_link_libraries(shader_builder PUBLIC - bf_blenkernel - buildinfoobj - ) + target_link_libraries(shader_builder PUBLIC buildinfoobj) else() if(WIN32) # We can re-use the manifest from tests.exe here since it's @@ -582,12 +589,14 @@ if(WITH_GPU_SHADER_BUILDER) ${MANIFEST} ) - target_link_libraries(shader_builder PUBLIC - bf_blenkernel - ${PLATFORM_LINKLIBS} - ) endif() - + target_link_libraries(shader_builder PUBLIC + bf_gpu + bf_intern_clog + bf_blenlib + bf_intern_ghost + ${PLATFORM_LINKLIBS} + ) target_include_directories(shader_builder PRIVATE ${INC} ${CMAKE_CURRENT_BINARY_DIR}) set(SRC_BAKED_CREATE_INFOS_FILE ${CMAKE_CURRENT_BINARY_DIR}/shader_baked.hh) diff --git a/source/blender/gpu/GPU_buffers.h b/source/blender/gpu/GPU_buffers.h index 1fe3b363687..89473ac0fe0 100644 --- a/source/blender/gpu/GPU_buffers.h +++ b/source/blender/gpu/GPU_buffers.h @@ -58,7 +58,9 @@ GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(const struct MPoly *mpoly, /** * Threaded: do not call any functions that use OpenGL calls! */ -GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(int totgrid, unsigned int **grid_hidden); +GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(int totgrid, + unsigned int **grid_hidden, + bool smooth); /** * Threaded: do not call any functions that use OpenGL calls! diff --git a/source/blender/gpu/GPU_common_types.h b/source/blender/gpu/GPU_common_types.h index 5913caf72e3..13535a4fb3b 100644 --- a/source/blender/gpu/GPU_common_types.h +++ b/source/blender/gpu/GPU_common_types.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + /** \file * \ingroup gpu */ diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h index 1fcd94c48fc..a242bb7cc94 100644 --- a/source/blender/gpu/GPU_context.h +++ b/source/blender/gpu/GPU_context.h @@ -17,10 +17,10 @@ extern "C" { #endif -void GPU_backend_init(eGPUBackendType backend); -void GPU_backend_exit(void); -bool GPU_backend_supported(eGPUBackendType type); - +/* GPU back-ends abstract the differences between different APIs. #GPU_context_create + * automatically initializes the back-end, and #GPU_context_discard frees it when there + * are no more contexts. */ +bool GPU_backend_supported(void); eGPUBackendType GPU_backend_get_type(void); /** Opaque type hiding blender::gpu::Context. */ diff --git a/source/blender/gpu/GPU_shader_shared_utils.h b/source/blender/gpu/GPU_shader_shared_utils.h index 474549d1f42..88bdad2bf76 100644 --- a/source/blender/gpu/GPU_shader_shared_utils.h +++ b/source/blender/gpu/GPU_shader_shared_utils.h @@ -41,6 +41,7 @@ # define floorf floor # define ceilf ceil # define sqrtf sqrt +# define expf exp # define float2 vec2 # define float3 vec3 diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh index 6e07e6c3229..d2890efee72 100644 --- a/source/blender/gpu/intern/gpu_backend.hh +++ b/source/blender/gpu/intern/gpu_backend.hh @@ -30,6 +30,7 @@ class VertBuf; class GPUBackend { public: virtual ~GPUBackend() = default; + virtual void delete_resources() = 0; static GPUBackend *get(); diff --git a/source/blender/gpu/intern/gpu_buffers.c b/source/blender/gpu/intern/gpu_buffers.c index a1fe3d79223..14bbd82c282 100644 --- a/source/blender/gpu/intern/gpu_buffers.c +++ b/source/blender/gpu/intern/gpu_buffers.c @@ -881,13 +881,14 @@ void GPU_pbvh_grid_buffers_update(PBVHGPUFormat *vbo_id, buffers->show_overlay = !empty_mask || !default_face_set; } -GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(int totgrid, BLI_bitmap **grid_hidden) +GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(int totgrid, BLI_bitmap **grid_hidden, bool smooth) { GPU_PBVH_Buffers *buffers; buffers = MEM_callocN(sizeof(GPU_PBVH_Buffers), "GPU_Buffers"); buffers->grid_hidden = grid_hidden; buffers->totgrid = totgrid; + buffers->smooth = smooth; buffers->show_overlay = false; @@ -1181,9 +1182,9 @@ GPU_PBVH_Buffers *GPU_pbvh_bmesh_buffers_build(bool smooth_shading) * Builds a list of attributes from a set of domains and a set of * customdata types. * - * \param active_only Returns only one item, a GPUAttrRef to active_layer - * \param active_layer CustomDataLayer to use for the active layer - * \param active_layer CustomDataLayer to use for the render layer + * \param active_only: Returns only one item, a #GPUAttrRef to active_layer. + * \param active_layer: #CustomDataLayer to use for the active layer. + * \param active_layer: #CustomDataLayer to use for the render layer. */ static int gpu_pbvh_make_attr_offs(eAttrDomainMask domain_mask, eCustomDataMask type_mask, diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc index 453428cb648..82441c3c89c 100644 --- a/source/blender/gpu/intern/gpu_codegen.cc +++ b/source/blender/gpu/intern/gpu_codegen.cc @@ -302,7 +302,7 @@ void GPUCodegen::generate_attribs() info.vertex_out(iface); /* Input declaration, loading / assignment to interface and geometry shader passthrough. */ - std::stringstream decl_ss, iface_ss, load_ss; + std::stringstream load_ss; int slot = 15; LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) { diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc index 4a0a9ecc7f6..e29b0d5801d 100644 --- a/source/blender/gpu/intern/gpu_context.cc +++ b/source/blender/gpu/intern/gpu_context.cc @@ -27,6 +27,7 @@ #include "gpu_batch_private.hh" #include "gpu_context_private.hh" #include "gpu_matrix_private.h" +#include "gpu_private.h" #ifdef WITH_OPENGL_BACKEND # include "gl_backend.hh" @@ -43,6 +44,12 @@ using namespace blender::gpu; static thread_local Context *active_ctx = nullptr; +static std::mutex backend_users_mutex; +static int num_backend_users = 0; + +static void gpu_backend_create(); +static void gpu_backend_discard(); + /* -------------------------------------------------------------------- */ /** \name gpu::Context methods * \{ */ @@ -85,9 +92,13 @@ Context *Context::get() GPUContext *GPU_context_create(void *ghost_window) { - if (GPUBackend::get() == nullptr) { - /* TODO: move where it make sense. */ - GPU_backend_init(GPU_BACKEND_OPENGL); + { + std::scoped_lock lock(backend_users_mutex); + if (num_backend_users == 0) { + /* Automatically create backend when first context is created. */ + gpu_backend_create(); + } + num_backend_users++; } Context *ctx = GPUBackend::get()->context_alloc(ghost_window); @@ -101,6 +112,16 @@ void GPU_context_discard(GPUContext *ctx_) Context *ctx = unwrap(ctx_); delete ctx; active_ctx = nullptr; + + { + std::scoped_lock lock(backend_users_mutex); + num_backend_users--; + BLI_assert(num_backend_users >= 0); + if (num_backend_users == 0) { + /* Discard backend when last context is discarded. */ + gpu_backend_discard(); + } + } } void GPU_context_active_set(GPUContext *ctx_) @@ -191,11 +212,12 @@ void GPU_render_step() /** \name Backend selection * \{ */ +static const eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL; static GPUBackend *g_backend = nullptr; -bool GPU_backend_supported(eGPUBackendType type) +bool GPU_backend_supported(void) { - switch (type) { + switch (g_backend_type) { case GPU_BACKEND_OPENGL: #ifdef WITH_OPENGL_BACKEND return true; @@ -214,12 +236,12 @@ bool GPU_backend_supported(eGPUBackendType type) } } -void GPU_backend_init(eGPUBackendType backend_type) +static void gpu_backend_create() { BLI_assert(g_backend == nullptr); - BLI_assert(GPU_backend_supported(backend_type)); + BLI_assert(GPU_backend_supported()); - switch (backend_type) { + switch (g_backend_type) { #ifdef WITH_OPENGL_BACKEND case GPU_BACKEND_OPENGL: g_backend = new GLBackend; @@ -236,10 +258,15 @@ void GPU_backend_init(eGPUBackendType backend_type) } } -void GPU_backend_exit() +void gpu_backend_delete_resources() +{ + BLI_assert(g_backend); + g_backend->delete_resources(); +} + +void gpu_backend_discard() { - /* TODO: assert no resource left. Currently UI textures are still not freed in their context - * correctly. */ + /* TODO: assert no resource left. */ delete g_backend; g_backend = nullptr; } diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh index 9cdf0075632..f823a92893c 100644 --- a/source/blender/gpu/intern/gpu_context_private.hh +++ b/source/blender/gpu/intern/gpu_context_private.hh @@ -28,11 +28,11 @@ namespace blender::gpu { class Context { public: /** State management */ - Shader *shader = NULL; - FrameBuffer *active_fb = NULL; - GPUMatrixState *matrix_state = NULL; - StateManager *state_manager = NULL; - Immediate *imm = NULL; + Shader *shader = nullptr; + FrameBuffer *active_fb = nullptr; + GPUMatrixState *matrix_state = nullptr; + StateManager *state_manager = nullptr; + Immediate *imm = nullptr; /** * All 4 window frame-buffers. @@ -41,10 +41,10 @@ class Context { * Front frame-buffers contains (in principle, but not always) the last frame color. * Default frame-buffer is back_left. */ - FrameBuffer *back_left = NULL; - FrameBuffer *front_left = NULL; - FrameBuffer *back_right = NULL; - FrameBuffer *front_right = NULL; + FrameBuffer *back_left = nullptr; + FrameBuffer *front_left = nullptr; + FrameBuffer *back_right = nullptr; + FrameBuffer *front_right = nullptr; DebugStack debug_stack; @@ -52,7 +52,7 @@ class Context { /** Thread on which this context is active. */ pthread_t thread_; bool is_active_; - /** Avoid including GHOST headers. Can be NULL for off-screen contexts. */ + /** Avoid including GHOST headers. Can be nullptr for off-screen contexts. */ void *ghost_window_; public: diff --git a/source/blender/gpu/intern/gpu_immediate_private.hh b/source/blender/gpu/intern/gpu_immediate_private.hh index 6c50fa01071..74ebbdc7ae3 100644 --- a/source/blender/gpu/intern/gpu_immediate_private.hh +++ b/source/blender/gpu/intern/gpu_immediate_private.hh @@ -19,7 +19,7 @@ namespace blender::gpu { class Immediate { public: /** Pointer to the mapped buffer data for the current vertex. */ - uchar *vertex_data = NULL; + uchar *vertex_data = nullptr; /** Current vertex index. */ uint vertex_idx = 0; /** Length of the buffer in vertices. */ @@ -32,12 +32,12 @@ class Immediate { /** Current draw call specification. */ GPUPrimType prim_type = GPU_PRIM_NONE; GPUVertFormat vertex_format = {}; - GPUShader *shader = NULL; + GPUShader *shader = nullptr; /** Enforce strict vertex count (disabled when using #immBeginAtMost). */ bool strict_vertex_len = true; /** Batch in construction when using #immBeginBatch. */ - GPUBatch *batch = NULL; + GPUBatch *batch = nullptr; /** Wide Line workaround. */ diff --git a/source/blender/gpu/intern/gpu_immediate_util.c b/source/blender/gpu/intern/gpu_immediate_util.c index a275fd8fc6c..5233ff2dbf6 100644 --- a/source/blender/gpu/intern/gpu_immediate_util.c +++ b/source/blender/gpu/intern/gpu_immediate_util.c @@ -142,7 +142,7 @@ static void imm_draw_circle(GPUPrimType prim_type, int nsegments) { if (prim_type == GPU_PRIM_LINE_LOOP) { - /* Note(Metal/AMD): For small primitives, line list more efficient than line strip.. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line strip.. */ immBegin(GPU_PRIM_LINES, nsegments * 2); immVertex2f(shdr_pos, x + (radius_x * cosf(0.0f)), y + (radius_y * sinf(0.0f))); @@ -333,7 +333,7 @@ static void imm_draw_circle_3D( GPUPrimType prim_type, uint pos, float x, float y, float radius, int nsegments) { if (prim_type == GPU_PRIM_LINE_LOOP) { - /* Note(Metal/AMD): For small primitives, line list more efficient than line strip. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line strip. */ immBegin(GPU_PRIM_LINES, nsegments * 2); const float angle = (float)(2 * M_PI) / (float)nsegments; @@ -386,7 +386,7 @@ void imm_draw_circle_fill_3d(uint pos, float x, float y, float radius, int nsegm void imm_draw_box_wire_2d(uint pos, float x1, float y1, float x2, float y2) { - /* Note(Metal/AMD): For small primitives, line list more efficient than line-strip. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line-strip. */ immBegin(GPU_PRIM_LINES, 8); immVertex2f(pos, x1, y1); immVertex2f(pos, x1, y2); @@ -405,7 +405,7 @@ void imm_draw_box_wire_2d(uint pos, float x1, float y1, float x2, float y2) void imm_draw_box_wire_3d(uint pos, float x1, float y1, float x2, float y2) { /* use this version when GPUVertFormat has a vec3 position */ - /* Note(Metal/AMD): For small primitives, line list more efficient than line-strip. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line-strip. */ immBegin(GPU_PRIM_LINES, 8); immVertex3f(pos, x1, y1, 0.0f); immVertex3f(pos, x1, y2, 0.0f); diff --git a/source/blender/gpu/intern/gpu_init_exit.c b/source/blender/gpu/intern/gpu_init_exit.c index 062614fb5cb..34b355eefaf 100644 --- a/source/blender/gpu/intern/gpu_init_exit.c +++ b/source/blender/gpu/intern/gpu_init_exit.c @@ -55,6 +55,8 @@ void GPU_exit(void) gpu_shader_dependency_exit(); gpu_shader_create_info_exit(); + gpu_backend_delete_resources(); + initialized = false; } diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c index 3c6a03c56d3..1338c5312c2 100644 --- a/source/blender/gpu/intern/gpu_node_graph.c +++ b/source/blender/gpu/intern/gpu_node_graph.c @@ -378,7 +378,7 @@ static GPUMaterialAttribute *gpu_node_graph_add_attribute(GPUNodeGraph *graph, } /* Add new requested attribute if it's within GPU limits. */ - if (attr == NULL && num_attributes < GPU_MAX_ATTR) { + if (attr == NULL) { attr = MEM_callocN(sizeof(*attr), __func__); attr->type = type; STRNCPY(attr->name, name); diff --git a/source/blender/gpu/intern/gpu_private.h b/source/blender/gpu/intern/gpu_private.h index a8ee5187d98..0e293302086 100644 --- a/source/blender/gpu/intern/gpu_private.h +++ b/source/blender/gpu/intern/gpu_private.h @@ -10,6 +10,10 @@ extern "C" { #endif +/* gpu_backend.cc */ + +void gpu_backend_delete_resources(void); + /* gpu_pbvh.c */ void gpu_pbvh_init(void); diff --git a/source/blender/gpu/intern/gpu_shader_builder.cc b/source/blender/gpu/intern/gpu_shader_builder.cc index fc99b892554..9b699c60126 100644 --- a/source/blender/gpu/intern/gpu_shader_builder.cc +++ b/source/blender/gpu/intern/gpu_shader_builder.cc @@ -51,7 +51,6 @@ void ShaderBuilder::init() void ShaderBuilder::exit() { - GPU_backend_exit(); GPU_exit(); GPU_context_discard(gpu_context_); diff --git a/source/blender/gpu/intern/gpu_shader_builder_stubs.cc b/source/blender/gpu/intern/gpu_shader_builder_stubs.cc index 515f65adb73..d8af2fc584d 100644 --- a/source/blender/gpu/intern/gpu_shader_builder_stubs.cc +++ b/source/blender/gpu/intern/gpu_shader_builder_stubs.cc @@ -12,6 +12,7 @@ #include "IMB_imbuf.h" #include "IMB_imbuf_types.h" +#include "BKE_attribute.h" #include "BKE_customdata.h" #include "BKE_global.h" #include "BKE_material.h" @@ -101,6 +102,38 @@ void UI_GetThemeColorShadeAlpha4ubv(int UNUSED(colorid), /** \} */ /* -------------------------------------------------------------------- */ +/** \name Stubs of BKE_attribute.h + * \{ */ + +void BKE_id_attribute_copy_domains_temp(short UNUSED(id_type), + const struct CustomData *UNUSED(vdata), + const struct CustomData *UNUSED(edata), + const struct CustomData *UNUSED(ldata), + const struct CustomData *UNUSED(pdata), + const struct CustomData *UNUSED(cdata), + struct ID *UNUSED(i_id)) +{ +} + +struct CustomDataLayer *BKE_id_attributes_active_color_get(const struct ID *UNUSED(id)) +{ + return nullptr; +} + +struct CustomDataLayer *BKE_id_attributes_render_color_get(const struct ID *UNUSED(id)) +{ + return nullptr; +} + +eAttrDomain BKE_id_attribute_domain(const struct ID *UNUSED(id), + const struct CustomDataLayer *UNUSED(layer)) +{ + return ATTR_DOMAIN_AUTO; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Stubs of BKE_paint.h * \{ */ bool paint_is_face_hidden(const struct MLoopTri *UNUSED(lt), @@ -170,6 +203,28 @@ int CustomData_get_offset(const struct CustomData *UNUSED(data), int UNUSED(type return 0; } +int CustomData_get_named_layer_index(const struct CustomData *UNUSED(data), + int UNUSED(type), + const char *UNUSED(name)) +{ + return -1; +} + +int CustomData_get_active_layer_index(const struct CustomData *UNUSED(data), int UNUSED(type)) +{ + return -1; +} + +int CustomData_get_render_layer_index(const struct CustomData *UNUSED(data), int UNUSED(type)) +{ + return -1; +} + +bool CustomData_has_layer(const struct CustomData *UNUSED(data), int UNUSED(type)) +{ + return false; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -237,5 +292,14 @@ void DRW_deferred_shader_remove(struct GPUMaterial *UNUSED(mat)) BLI_assert_unreachable(); } +void DRW_cdlayer_attr_aliases_add(struct GPUVertFormat *UNUSED(format), + const char *UNUSED(base_name), + const struct CustomData *UNUSED(data), + const struct CustomDataLayer *UNUSED(cl), + bool UNUSED(is_active_render), + bool UNUSED(is_active_layer)) +{ +} + /** \} */ } diff --git a/source/blender/gpu/intern/gpu_shader_create_info.cc b/source/blender/gpu/intern/gpu_shader_create_info.cc index 16ce4f7723e..bc0731862cb 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.cc +++ b/source/blender/gpu/intern/gpu_shader_create_info.cc @@ -333,8 +333,11 @@ bool gpu_shader_create_info_compile_all() int skipped = 0; int total = 0; for (ShaderCreateInfo *info : g_create_infos->values()) { + info->finalize(); if (info->do_static_compilation_) { - if (GPU_compute_shader_support() == false && info->compute_source_ != nullptr) { + if ((GPU_compute_shader_support() == false && info->compute_source_ != nullptr) || + (GPU_shader_image_load_store_support() == false && info->has_resource_image()) || + (GPU_shader_storage_buffer_objects_support() == false && info->has_resource_storage())) { skipped++; continue; } diff --git a/source/blender/gpu/intern/gpu_shader_create_info.hh b/source/blender/gpu/intern/gpu_shader_create_info.hh index 4927ef75a75..8e05412d0ee 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.hh +++ b/source/blender/gpu/intern/gpu_shader_create_info.hh @@ -872,6 +872,31 @@ struct ShaderCreateInfo { return stream; } + bool has_resource_type(Resource::BindType bind_type) const + { + for (auto &res : batch_resources_) { + if (res.bind_type == bind_type) { + return true; + } + } + for (auto &res : pass_resources_) { + if (res.bind_type == bind_type) { + return true; + } + } + return false; + } + + bool has_resource_image() const + { + return has_resource_type(Resource::BindType::IMAGE); + } + + bool has_resource_storage() const + { + return has_resource_type(Resource::BindType::STORAGE_BUFFER); + } + /** \} */ #undef TEST_EQUAL diff --git a/source/blender/gpu/intern/gpu_shader_interface.hh b/source/blender/gpu/intern/gpu_shader_interface.hh index ac78af38fcc..60344757b43 100644 --- a/source/blender/gpu/intern/gpu_shader_interface.hh +++ b/source/blender/gpu/intern/gpu_shader_interface.hh @@ -39,9 +39,9 @@ class ShaderInterface { /* TODO(fclem): should be protected. */ public: /** Flat array. In this order: Attributes, Ubos, Uniforms. */ - ShaderInput *inputs_ = NULL; + ShaderInput *inputs_ = nullptr; /** Buffer containing all inputs names separated by '\0'. */ - char *name_buffer_ = NULL; + char *name_buffer_ = nullptr; /** Input counts inside input array. */ uint attr_len_ = 0; uint ubo_len_ = 0; @@ -187,7 +187,7 @@ inline const char *ShaderInterface::builtin_uniform_name(GPUUniformBuiltin u) return "srgbTarget"; default: - return NULL; + return nullptr; } } @@ -208,7 +208,7 @@ inline const char *ShaderInterface::builtin_uniform_block_name(GPUUniformBlockBu case GPU_UNIFORM_BLOCK_DRW_INFOS: return "drw_infos"; default: - return NULL; + return nullptr; } } @@ -258,7 +258,7 @@ inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const return inputs + i; /* not found */ } } - return NULL; /* not found */ + return nullptr; /* not found */ } /* This is a bit dangerous since we could have a hash collision. @@ -268,7 +268,7 @@ inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const return inputs + i; } } - return NULL; /* not found */ + return nullptr; /* not found */ } inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const inputs, @@ -281,7 +281,7 @@ inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const return inputs + i; } } - return NULL; /* not found */ + return nullptr; /* not found */ } } // namespace blender::gpu diff --git a/source/blender/gpu/intern/gpu_storage_buffer_private.hh b/source/blender/gpu/intern/gpu_storage_buffer_private.hh index 091e6c2d386..9baec0c2a77 100644 --- a/source/blender/gpu/intern/gpu_storage_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_storage_buffer_private.hh @@ -29,7 +29,7 @@ class StorageBuf { /** Data size in bytes. */ size_t size_in_bytes_; /** Continuous memory block to copy to GPU. This data is owned by the StorageBuf. */ - void *data_ = NULL; + void *data_ = nullptr; /** Debugging name */ char name_[DEBUG_NAME_LEN]; diff --git a/source/blender/gpu/intern/gpu_uniform_buffer_private.hh b/source/blender/gpu/intern/gpu_uniform_buffer_private.hh index 6e3285b6fef..e3d70634ce1 100644 --- a/source/blender/gpu/intern/gpu_uniform_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_uniform_buffer_private.hh @@ -29,7 +29,7 @@ class UniformBuf { /** Data size in bytes. */ size_t size_in_bytes_; /** Continuous memory block to copy to GPU. This data is owned by the UniformBuf. */ - void *data_ = NULL; + void *data_ = nullptr; /** Debugging name */ char name_[DEBUG_NAME_LEN]; diff --git a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh index 7a0b53cf958..a7920bacaec 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh @@ -29,7 +29,7 @@ class VertBuf { /** Status flag. */ GPUVertBufStatus flag = GPU_VERTBUF_INVALID; /** NULL indicates data in VRAM (unmapped) */ - uchar *data = NULL; + uchar *data = nullptr; protected: /** Usage hint for GL optimization. */ diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh index 7228a5f7596..3e09408e43e 100644 --- a/source/blender/gpu/metal/mtl_backend.hh +++ b/source/blender/gpu/metal/mtl_backend.hh @@ -40,6 +40,11 @@ class MTLBackend : public GPUBackend { MTLBackend::platform_exit(); } + void delete_resources() + { + /* Delete any resources with context active. */ + } + static bool metal_is_supported(); static MTLBackend *get() { diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index 81f8f279759..83cf3af0804 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -10,6 +10,8 @@ #include "mtl_backend.hh" #include "mtl_context.hh" #include "mtl_framebuffer.hh" +#include "mtl_query.hh" +#include "mtl_uniform_buffer.hh" #include "gpu_capabilities_private.hh" #include "gpu_platform_private.hh" @@ -64,8 +66,7 @@ IndexBuf *MTLBackend::indexbuf_alloc() QueryPool *MTLBackend::querypool_alloc() { - /* TODO(Metal): Implement MTLQueryPool. */ - return nullptr; + return new MTLQueryPool(); }; Shader *MTLBackend::shader_alloc(const char *name) @@ -81,8 +82,7 @@ Texture *MTLBackend::texture_alloc(const char *name) UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name) { - /* TODO(Metal): Implement MTLUniformBuf. */ - return nullptr; + return new MTLUniformBuf(size, name); }; StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name) @@ -127,7 +127,21 @@ void MTLBackend::render_end() void MTLBackend::render_step() { - /* Placeholder */ + /* NOTE(Metal): Primarily called from main thread, but below data-structures + * and operations are thread-safe, and GPUContext rendering coordination + * is also thread-safe. */ + + /* Flush any MTLSafeFreeLists which have previously been released by any MTLContext. */ + MTLContext::get_global_memory_manager().update_memory_pools(); + + /* End existing MTLSafeFreeList and begin new list -- + * Buffers wont `free` until all associated in-flight command buffers have completed. + * Decrement final reference count for ensuring the previous list is certainly + * released. */ + MTLSafeFreeList *cmd_free_buffer_list = + MTLContext::get_global_memory_manager().get_current_safe_list(); + MTLContext::get_global_memory_manager().begin_new_safe_list(); + cmd_free_buffer_list->decrement_reference(); } bool MTLBackend::is_inside_render_boundary() diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm index 434bc664ee6..9a9a2d55103 100644 --- a/source/blender/gpu/metal/mtl_command_buffer.mm +++ b/source/blender/gpu/metal/mtl_command_buffer.mm @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ #include "DNA_userdef_types.h" @@ -19,19 +20,18 @@ namespace blender::gpu { * dependencies not being honored for work submitted between * different GPUContext's. */ id<MTLEvent> MTLCommandBufferManager::sync_event = nil; -unsigned long long MTLCommandBufferManager::event_signal_val = 0; +uint64_t MTLCommandBufferManager::event_signal_val = 0; /* Counter for active command buffers. */ int MTLCommandBufferManager::num_active_cmd_bufs = 0; /* -------------------------------------------------------------------- */ -/** \name MTLCommandBuffer initialisation and render coordination. +/** \name MTLCommandBuffer initialization and render coordination. * \{ */ -void MTLCommandBufferManager::prepare(MTLContext *ctx, bool supports_render) +void MTLCommandBufferManager::prepare(bool supports_render) { - context_ = ctx; - render_pass_state_.prepare(this, ctx); + render_pass_state_.reset_state(); } void MTLCommandBufferManager::register_encoder_counters() @@ -54,10 +54,10 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin() MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init]; desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus; desc.retainedReferences = YES; - active_command_buffer_ = [context_->queue commandBufferWithDescriptor:desc]; + active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc]; } else { - active_command_buffer_ = [context_->queue commandBuffer]; + active_command_buffer_ = [context_.queue commandBuffer]; } [active_command_buffer_ retain]; MTLCommandBufferManager::num_active_cmd_bufs++; @@ -67,6 +67,10 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin() [active_command_buffer_ encodeWaitForEvent:this->sync_event value:this->event_signal_val]; } + /* Ensure we begin new Scratch Buffer if we are on a new frame. */ + MTLScratchBufferManager &mem = context_.memory_manager; + mem.ensure_increment_scratch_buffer(); + /* Reset Command buffer heuristics. */ this->reset_counters(); } @@ -86,12 +90,15 @@ bool MTLCommandBufferManager::submit(bool wait) this->end_active_command_encoder(); BLI_assert(active_command_encoder_type_ == MTL_NO_COMMAND_ENCODER); + /* Flush active ScratchBuffer associated with parent MTLContext. */ + context_.memory_manager.flush_active_scratch_buffer(); + /*** Submit Command Buffer. ***/ /* Strict ordering ensures command buffers are guaranteed to execute after a previous * one has completed. Resolves flickering when command buffers are submitted from * different MTLContext's. */ if (MTLCommandBufferManager::sync_event == nil) { - MTLCommandBufferManager::sync_event = [context_->device newEvent]; + MTLCommandBufferManager::sync_event = [context_.device newEvent]; BLI_assert(MTLCommandBufferManager::sync_event); [MTLCommandBufferManager::sync_event retain]; } @@ -102,14 +109,27 @@ bool MTLCommandBufferManager::submit(bool wait) value:MTLCommandBufferManager::event_signal_val]; /* Command buffer lifetime tracking. */ - /* TODO(Metal): This routine will later be used to track released memory allocations within the - * lifetime of a command buffer such that memory is only released once no longer in use. */ - id<MTLCommandBuffer> cmd_buffer_ref = [active_command_buffer_ retain]; + /* Increment current MTLSafeFreeList reference counter to flag MTLBuffers freed within + * the current command buffer lifetime as used. + * This ensures that in-use resources are not prematurely de-referenced and returned to the + * available buffer pool while they are in-use by the GPU. */ + MTLSafeFreeList *cmd_free_buffer_list = + MTLContext::get_global_memory_manager().get_current_safe_list(); + BLI_assert(cmd_free_buffer_list); + cmd_free_buffer_list->increment_reference(); + + id<MTLCommandBuffer> cmd_buffer_ref = active_command_buffer_; + [cmd_buffer_ref retain]; + [cmd_buffer_ref addCompletedHandler:^(id<MTLCommandBuffer> cb) { + /* Upon command buffer completion, decrement MTLSafeFreeList reference count + * to allow buffers no longer in use by this CommandBuffer to be freed. */ + cmd_free_buffer_list->decrement_reference(); + /* Release command buffer after completion callback handled. */ [cmd_buffer_ref release]; - /* Decrement active cmd buffer count. */ + /* Decrement count. */ MTLCommandBufferManager::num_active_cmd_bufs--; }]; @@ -288,6 +308,12 @@ id<MTLRenderCommandEncoder> MTLCommandBufferManager::ensure_begin_render_command active_pass_descriptor_ = active_frame_buffer_->bake_render_pass_descriptor( is_rebind && (!active_frame_buffer_->get_pending_clear())); + /* Determine if there is a visibility buffer assigned to the context. */ + gpu::MTLBuffer *visibility_buffer = context_.get_visibility_buffer(); + this->active_pass_descriptor_.visibilityResultBuffer = + (visibility_buffer) ? visibility_buffer->get_metal_buffer() : nil; + context_.clear_visibility_dirty(); + /* Ensure we have already cleaned up our previous render command encoder. */ BLI_assert(active_render_command_encoder_ == nil); @@ -516,15 +542,6 @@ bool MTLCommandBufferManager::insert_memory_barrier(eGPUBarrier barrier_bits, /* -------------------------------------------------------------------- */ /** \name Render Pass State for active RenderCommandEncoder * \{ */ - -/* Metal Render Pass State. */ -void MTLRenderPassState::prepare(MTLCommandBufferManager *cmd, MTLContext *mtl_ctx) -{ - this->cmd = cmd; - this->ctx = mtl_ctx; - this->reset_state(); -} - /* Reset binding state when a new RenderCommandEncoder is bound, to ensure * pipeline resources are re-applied to the new Encoder. * NOTE: In Metal, state is only persistent within an MTLCommandEncoder, @@ -539,12 +556,12 @@ void MTLRenderPassState::reset_state() this->last_bound_shader_state.set(nullptr, 0); /* Other states. */ - MTLFrameBuffer *fb = this->cmd->get_active_framebuffer(); + MTLFrameBuffer *fb = this->cmd.get_active_framebuffer(); this->last_used_stencil_ref_value = 0; this->last_scissor_rect = {0, 0, - (unsigned long)((fb != nullptr) ? fb->get_width() : 0), - (unsigned long)((fb != nullptr) ? fb->get_height() : 0)}; + (uint)((fb != nullptr) ? fb->get_width() : 0), + (uint)((fb != nullptr) ? fb->get_height() : 0)}; /* Reset cached resource binding state */ for (int ubo = 0; ubo < MTL_MAX_UNIFORM_BUFFER_BINDINGS; ubo++) { @@ -573,7 +590,7 @@ void MTLRenderPassState::reset_state() void MTLRenderPassState::bind_vertex_texture(id<MTLTexture> tex, uint slot) { if (this->cached_vertex_texture_bindings[slot].metal_texture != tex) { - id<MTLRenderCommandEncoder> rec = this->cmd->get_active_render_command_encoder(); + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); BLI_assert(rec != nil); [rec setVertexTexture:tex atIndex:slot]; this->cached_vertex_texture_bindings[slot].metal_texture = tex; @@ -583,7 +600,7 @@ void MTLRenderPassState::bind_vertex_texture(id<MTLTexture> tex, uint slot) void MTLRenderPassState::bind_fragment_texture(id<MTLTexture> tex, uint slot) { if (this->cached_fragment_texture_bindings[slot].metal_texture != tex) { - id<MTLRenderCommandEncoder> rec = this->cmd->get_active_render_command_encoder(); + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); BLI_assert(rec != nil); [rec setFragmentTexture:tex atIndex:slot]; this->cached_fragment_texture_bindings[slot].metal_texture = tex; diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh index 8dda2c43585..44ba786f90f 100644 --- a/source/blender/gpu/metal/mtl_common.hh +++ b/source/blender/gpu/metal/mtl_common.hh @@ -4,8 +4,13 @@ #define __MTL_COMMON // -- Renderer Options -- +#define MTL_MAX_DRAWABLES 3 #define MTL_MAX_SET_BYTES_SIZE 4096 #define MTL_FORCE_WAIT_IDLE 0 #define MTL_MAX_COMMAND_BUFFERS 64 +/* Number of frames for which we retain in-flight resources such as scratch buffers. + * Set as number of GPU frames in flight, plus an additional value for extra possible CPU frame. */ +#define MTL_NUM_SAFE_FRAMES (MTL_MAX_DRAWABLES + 1) + #endif diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh index 1b2af6a584b..0db87bf5da5 100644 --- a/source/blender/gpu/metal/mtl_context.hh +++ b/source/blender/gpu/metal/mtl_context.hh @@ -3,6 +3,9 @@ /** \file * \ingroup gpu */ + +#pragma once + #include "MEM_guardedalloc.h" #include "gpu_context_private.hh" @@ -12,7 +15,9 @@ #include "mtl_backend.hh" #include "mtl_capabilities.hh" +#include "mtl_common.hh" #include "mtl_framebuffer.hh" +#include "mtl_memory.hh" #include "mtl_texture.hh" #include <Cocoa/Cocoa.h> @@ -30,7 +35,6 @@ class MTLContext; class MTLCommandBufferManager; class MTLShader; class MTLUniformBuf; -class MTLBuffer; /* Structs containing information on current binding state for textures and samplers. */ struct MTLTextureBinding { @@ -56,10 +60,13 @@ struct MTLSamplerBinding { struct MTLRenderPassState { friend class MTLContext; + MTLRenderPassState(MTLContext &context, MTLCommandBufferManager &command_buffer_manager) + : ctx(context), cmd(command_buffer_manager){}; + /* Given a RenderPassState is associated with a live RenderCommandEncoder, * this state sits within the MTLCommandBufferManager. */ - MTLCommandBufferManager *cmd; - MTLContext *ctx; + MTLContext &ctx; + MTLCommandBufferManager &cmd; /* Caching of resource bindings for active MTLRenderCommandEncoder. * In Metal, resource bindings are local to the MTLCommandEncoder, @@ -110,9 +117,6 @@ struct MTLRenderPassState { SamplerStateBindingCached cached_vertex_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS]; SamplerStateBindingCached cached_fragment_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS]; - /* Prepare. */ - void prepare(MTLCommandBufferManager *cmd, MTLContext *ctx); - /* Reset RenderCommandEncoder binding state. */ void reset_state(); @@ -446,18 +450,6 @@ struct MTLContextGlobalShaderPipelineState { float line_width = 1.0f; }; -/* Metal Buffer */ -struct MTLTemporaryBufferRange { - id<MTLBuffer> metal_buffer; - void *host_ptr; - unsigned long long buffer_offset; - unsigned long long size; - MTLResourceOptions options; - - void flush(); - bool requires_flush(); -}; - /* Command Buffer Manager - Owned by MTLContext. * The MTLCommandBufferManager represents all work associated with * a command buffer of a given identity. This manager is a fixed-state @@ -477,14 +469,14 @@ class MTLCommandBufferManager { public: /* Event to coordinate sequential execution across all "main" command buffers. */ static id<MTLEvent> sync_event; - static unsigned long long event_signal_val; + static uint64_t event_signal_val; /* Counter for active command buffers. */ static int num_active_cmd_bufs; private: /* Associated Context and properties. */ - MTLContext *context_ = nullptr; + MTLContext &context_; bool supports_render_ = false; /* CommandBuffer tracking. */ @@ -516,7 +508,9 @@ class MTLCommandBufferManager { bool empty_ = true; public: - void prepare(MTLContext *ctx, bool supports_render = true); + MTLCommandBufferManager(MTLContext &context) + : context_(context), render_pass_state_(context, *this){}; + void prepare(bool supports_render = true); /* If wait is true, CPU will stall until GPU work has completed. */ bool submit(bool wait); @@ -582,7 +576,7 @@ class MTLContext : public Context { /* Texture Samplers. */ /* Cache of generated MTLSamplerState objects based on permutations of `eGPUSamplerState`. */ - id<MTLSamplerState> sampler_state_cache_[GPU_SAMPLER_MAX] = {0}; + id<MTLSamplerState> sampler_state_cache_[GPU_SAMPLER_MAX]; id<MTLSamplerState> default_sampler_state_ = nil; /* When texture sampler count exceeds the resource bind limit, an @@ -595,6 +589,11 @@ class MTLContext : public Context { /* Frame. */ bool is_inside_frame_ = false; + uint current_frame_index_; + + /* Visibility buffer for MTLQuery results. */ + gpu::MTLBuffer *visibility_buffer_ = nullptr; + bool visibility_is_dirty_ = false; public: /* Shaders and Pipeline state. */ @@ -604,6 +603,10 @@ class MTLContext : public Context { id<MTLCommandQueue> queue = nil; id<MTLDevice> device = nil; + /* Memory Management */ + MTLScratchBufferManager memory_manager; + static MTLBufferPool global_memory_manager; + /* CommandBuffer managers. */ MTLCommandBufferManager main_command_buffer; @@ -623,8 +626,13 @@ class MTLContext : public Context { void memory_statistics_get(int *total_mem, int *free_mem) override; + static MTLContext *get() + { + return static_cast<MTLContext *>(Context::get()); + } + void debug_group_begin(const char *name, int index) override; - void debug_group_end(void) override; + void debug_group_end() override; /*** MTLContext Utility functions. */ /* @@ -664,6 +672,18 @@ class MTLContext : public Context { void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height); void set_scissor_enabled(bool scissor_enabled); + /* Visibility buffer control. */ + void set_visibility_buffer(gpu::MTLBuffer *buffer); + gpu::MTLBuffer *get_visibility_buffer() const; + + /* Flag whether the visibility buffer for query results + * has changed. This requires a new RenderPass in order + * to update.*/ + bool is_visibility_dirty() const; + + /* Reset dirty flag state for visibility buffer. */ + void clear_visibility_dirty(); + /* Texture utilities. */ MTLContextTextureUtils &get_texture_utils() { @@ -679,6 +699,21 @@ class MTLContext : public Context { { return is_inside_frame_; } + + uint get_current_frame_index() + { + return current_frame_index_; + } + + MTLScratchBufferManager &get_scratchbuffer_manager() + { + return this->memory_manager; + } + + static MTLBufferPool &get_global_memory_manager() + { + return MTLContext::global_memory_manager; + } }; } // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index 6ecdb3f48b3..26cfe6632ef 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -16,44 +16,25 @@ using namespace blender::gpu; namespace blender::gpu { -/* -------------------------------------------------------------------- */ -/** \name Memory Management - * \{ */ - -bool MTLTemporaryBufferRange::requires_flush() -{ - /* We do not need to flush shared memory. */ - return this->options & MTLResourceStorageModeManaged; -} - -void MTLTemporaryBufferRange::flush() -{ - if (this->requires_flush()) { - BLI_assert(this->metal_buffer); - BLI_assert((this->buffer_offset + this->size) <= [this->metal_buffer length]); - BLI_assert(this->buffer_offset >= 0); - [this->metal_buffer - didModifyRange:NSMakeRange(this->buffer_offset, this->size - this->buffer_offset)]; - } -} - -/** \} */ +/* Global memory manager. */ +MTLBufferPool MTLContext::global_memory_manager; /* -------------------------------------------------------------------- */ /** \name MTLContext * \{ */ /* Placeholder functions */ -MTLContext::MTLContext(void *ghost_window) +MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this) { /* Init debug. */ debug::mtl_debug_init(); /* Initialize command buffer state. */ - this->main_command_buffer.prepare(this); + this->main_command_buffer.prepare(); /* Frame management. */ is_inside_frame_ = false; + current_frame_index_ = 0; /* Create FrameBuffer handles. */ MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left"); @@ -65,9 +46,14 @@ MTLContext::MTLContext(void *ghost_window) * initialization). */ MTLBackend::platform_init(this); MTLBackend::capabilities_init(this); + /* Initialize Metal modules. */ + this->memory_manager.init(); this->state_manager = new MTLStateManager(this); + /* Ensure global memory manager is initialized. */ + MTLContext::global_memory_manager.init(this->device); + /* Initialize texture read/update structures. */ this->get_texture_utils().init(); @@ -93,7 +79,7 @@ MTLContext::~MTLContext() this->finish(); /* End frame. */ - if (is_inside_frame_) { + if (this->get_inside_frame()) { this->end_frame(); } } @@ -112,7 +98,7 @@ MTLContext::~MTLContext() void MTLContext::begin_frame() { BLI_assert(MTLBackend::get()->is_inside_render_boundary()); - if (is_inside_frame_) { + if (this->get_inside_frame()) { return; } @@ -122,7 +108,7 @@ void MTLContext::begin_frame() void MTLContext::end_frame() { - BLI_assert(is_inside_frame_); + BLI_assert(this->get_inside_frame()); /* Ensure pre-present work is committed. */ this->flush(); @@ -136,20 +122,20 @@ void MTLContext::check_error(const char *info) /* TODO(Metal): Implement. */ } -void MTLContext::activate(void) +void MTLContext::activate() { /* TODO(Metal): Implement. */ } -void MTLContext::deactivate(void) +void MTLContext::deactivate() { /* TODO(Metal): Implement. */ } -void MTLContext::flush(void) +void MTLContext::flush() { /* TODO(Metal): Implement. */ } -void MTLContext::finish(void) +void MTLContext::finish() { /* TODO(Metal): Implement. */ } @@ -180,7 +166,7 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass() BLI_assert(this); /* Ensure the rendering frame has started. */ - if (!is_inside_frame_) { + if (!this->get_inside_frame()) { this->begin_frame(); } @@ -202,7 +188,8 @@ id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass() * framebuffer state has been modified (is_dirty). */ if (!this->main_command_buffer.is_inside_render_pass() || this->active_fb != this->main_command_buffer.get_active_framebuffer() || - this->main_command_buffer.get_active_framebuffer()->get_dirty()) { + this->main_command_buffer.get_active_framebuffer()->get_dirty() || + this->is_visibility_dirty()) { /* Validate bound framebuffer before beginning render pass. */ if (!static_cast<MTLFrameBuffer *>(this->active_fb)->validate_render_pass()) { @@ -386,6 +373,45 @@ void MTLContext::set_scissor_enabled(bool scissor_enabled) /** \} */ /* -------------------------------------------------------------------- */ +/** \name Visibility buffer control for MTLQueryPool. + * \{ */ + +void MTLContext::set_visibility_buffer(gpu::MTLBuffer *buffer) +{ + /* Flag visibility buffer as dirty if the buffer being used for visibility has changed -- + * This is required by the render pass, and we will break the pass if the results destination + * buffer is modified. */ + if (buffer) { + visibility_is_dirty_ = (buffer != visibility_buffer_) || visibility_is_dirty_; + visibility_buffer_ = buffer; + visibility_buffer_->debug_ensure_used(); + } + else { + /* If buffer is null, reset visibility state, mark dirty to break render pass if results are no + * longer needed. */ + visibility_is_dirty_ = (visibility_buffer_ != nullptr) || visibility_is_dirty_; + visibility_buffer_ = nullptr; + } +} + +gpu::MTLBuffer *MTLContext::get_visibility_buffer() const +{ + return visibility_buffer_; +} + +void MTLContext::clear_visibility_dirty() +{ + visibility_is_dirty_ = false; +} + +bool MTLContext::is_visibility_dirty() const +{ + return visibility_is_dirty_; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Texture State Management * \{ */ diff --git a/source/blender/gpu/metal/mtl_framebuffer.hh b/source/blender/gpu/metal/mtl_framebuffer.hh index d6fa1850109..d6e9fa76b70 100644 --- a/source/blender/gpu/metal/mtl_framebuffer.hh +++ b/source/blender/gpu/metal/mtl_framebuffer.hh @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + /** \file * \ingroup gpu * diff --git a/source/blender/gpu/metal/mtl_framebuffer.mm b/source/blender/gpu/metal/mtl_framebuffer.mm index 22de255bf63..515dd70e5de 100644 --- a/source/blender/gpu/metal/mtl_framebuffer.mm +++ b/source/blender/gpu/metal/mtl_framebuffer.mm @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + /** \file * \ingroup gpu */ @@ -756,7 +758,7 @@ void MTLFrameBuffer::update_attachments(bool update_viewport) dirty_attachments_ = false; } -void MTLFrameBuffer::apply_state(void) +void MTLFrameBuffer::apply_state() { MTLContext *mtl_ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_ctx); diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh new file mode 100644 index 00000000000..dc5417dc11a --- /dev/null +++ b/source/blender/gpu/metal/mtl_memory.hh @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#include <atomic> +#include <functional> +#include <map> +#include <mutex> +#include <set> +#include <unordered_map> + +#include "mtl_common.hh" + +#include <Cocoa/Cocoa.h> +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> + +@class CAMetalLayer; +@class MTLCommandQueue; +@class MTLRenderPipelineState; + +/* Metal Memory Manager Overview. */ +/* + * The Metal Backend Memory manager is designed to provide an interface + * for all other MTL_* modules where memory allocation is required. + * + * Different allocation strategies and data-structures are used depending + * on how the data is used by the backend. These aim to optimally handle + * system memory and abstract away any complexity from the MTL_* modules + * themselves. + * + * There are two primary allocation modes which can be used: + * + * ** MTLScratchBufferManager ** + * + * Each MTLContext owns a ScratchBufferManager which is implemented + * as a pool of circular buffers, designed to handle temporary + * memory allocations which occur on a per-frame basis. The scratch + * buffers allow flushing of host memory to the GPU to be batched. + * + * Each frame, the next scratch buffer is reset, then later flushed upon + * command buffer submission. + * + * Note: This is allocated per-context due to allocations being tied + * to workload submissions and context-specific submissions. + * + * Examples of scratch buffer usage are: + * - Immediate-mode temporary vertex buffers. + * - Shader uniform data updates + * - Staging of data for resource copies, or, data reads/writes. + * + * Usage: + * + * MTLContext::get_scratchbuffer_manager() - to fetch active manager. + * + * MTLTemporaryBuffer scratch_buffer_allocate_range(size) + * MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(size, align) + * + * --------------------------------------------------------------------------------- + * ** MTLBufferPool ** + * + * For static and longer-lasting memory allocations, such as those for UBOs, + * Vertex buffers, index buffers, etc; We want an optimal abstraction for + * fetching a MTLBuffer of the desired size and resource options. + * + * Memory allocations can be expensive so the MTLBufferPool provides + * functionality to track usage of these buffers and once a buffer + * is no longer in use, it is returned to the buffer pool for use + * by another backend resource. + * + * The MTLBufferPool provides functionality for safe tracking of resources, + * as buffers freed on the host side must have their usage by the GPU tracked, + * to ensure they are not prematurely re-used before they have finished being + * used by the GPU. + * + * Note: The MTLBufferPool is a global construct which can be fetched from anywhere. + * + * Usage: + * MTLContext::get_global_memory_manager(); - static routine to fetch global memory manager. + * + * gpu::MTLBuffer *allocate(size, is_cpu_visibile) + * gpu::MTLBuffer *allocate_aligned(size, alignment, is_cpu_visibile) + * gpu::MTLBuffer *allocate_with_data(size, is_cpu_visibile, data_ptr) + * gpu::MTLBuffer *allocate_aligned_with_data(size, alignment, is_cpu_visibile, data_ptr) + */ + +/* Debug memory statistics: Disabled by Macro rather than guarded for + * performance considerations. */ +#define MTL_DEBUG_MEMORY_STATISTICS 0 + +/* Allows a scratch buffer to temporarily grow beyond its maximum, which allows submission + * of one-time-use data packets which are too large. */ +#define MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION 1 + +namespace blender::gpu { + +/* Forward Declarations. */ +class MTLContext; +class MTLCommandBufferManager; +class MTLUniformBuf; + +/* -------------------------------------------------------------------- */ +/** \name Memory Management. + * \{ */ + +/* MTLBuffer allocation wrapper. */ +class MTLBuffer { + + private: + /* Metal resource. */ + id<MTLBuffer> metal_buffer_; + + /* Host-visible mapped-memory pointer. Behavior depends on buffer type: + * - Shared buffers: pointer represents base address of #MTLBuffer whose data + * access has shared access by both the CPU and GPU on + * Unified Memory Architectures (UMA). + * - Managed buffer: Host-side mapped buffer region for CPU (Host) access. Managed buffers + * must be manually flushed to transfer data to GPU-resident buffer. + * - Private buffer: Host access is invalid, `data` will be nullptr. */ + void *data_; + + /* Whether buffer is allocated from an external source. */ + bool is_external_ = false; + + /* Allocation info. */ + MTLResourceOptions options_; + id<MTLDevice> device_; + uint64_t alignment_; + uint64_t size_; + + /* Allocated size may be larger than actual size. */ + uint64_t usage_size_; + + /* Lifetime info - whether the current buffer is actively in use. A buffer + * should be in use after it has been allocated. De-allocating the buffer, and + * returning it to the free buffer pool will set in_use to false. Using a buffer + * while it is not in-use should not be allowed and result in an error. */ + std::atomic<bool> in_use_; + + public: + MTLBuffer(id<MTLDevice> device, uint64_t size, MTLResourceOptions options, uint alignment = 1); + MTLBuffer(id<MTLBuffer> external_buffer); + ~MTLBuffer(); + + /* Fetch information about backing MTLBuffer. */ + id<MTLBuffer> get_metal_buffer() const; + void *get_host_ptr() const; + uint64_t get_size_used() const; + uint64_t get_size() const; + + /* Flush data to GPU. */ + void flush(); + void flush_range(uint64_t offset, uint64_t length); + bool requires_flush(); + + /* Buffer usage tracking. */ + void flag_in_use(bool used); + bool get_in_use(); + void set_usage_size(uint64_t size_used); + + /* Debug. */ + void set_label(NSString *str); + + /* Read properties. */ + MTLResourceOptions get_resource_options(); + uint64_t get_alignment(); + + /* Resource-local free: For buffers allocated via memory manager, + * this will call the context `free_buffer` method to return the buffer to the context memory + * pool. + * + * Otherwise, free will release the associated metal resource. + * As a note, calling the destructor will also destroy the buffer and associated metal + * resource. */ + void free(); + + /* Safety check to ensure buffers are not used after free. */ + void debug_ensure_used(); +}; + +/* View into part of an MTLBuffer. */ +struct MTLBufferRange { + id<MTLBuffer> metal_buffer; + void *data; + uint64_t buffer_offset; + uint64_t size; + MTLResourceOptions options; + + void flush(); + bool requires_flush(); +}; + +/* Circular scratch buffer allocations should be seen as temporary and only used within the + * lifetime of the frame. */ +using MTLTemporaryBuffer = MTLBufferRange; + +/* Round-Robin Circular-buffer. */ +class MTLCircularBuffer { + friend class MTLScratchBufferManager; + + private: + MTLContext &own_context_; + + /* Wrapped MTLBuffer allocation handled. */ + gpu::MTLBuffer *cbuffer_; + + /* Current offset where next allocation will begin. */ + uint64_t current_offset_; + + /* Whether the Circular Buffer can grow during re-allocation if + * the size is exceeded. */ + bool can_resize_; + + /* Usage information. */ + uint64_t used_frame_index_; + uint64_t last_flush_base_offset_; + + public: + MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow); + ~MTLCircularBuffer(); + MTLTemporaryBuffer allocate_range(uint64_t alloc_size); + MTLTemporaryBuffer allocate_range_aligned(uint64_t alloc_size, uint alignment); + void flush(); + + /* Reset pointer back to start of circular buffer. */ + void reset(); +}; + +/* Wrapper struct used by Memory Manager to sort and compare gpu::MTLBuffer resources inside the + * memory pools. */ +struct MTLBufferHandle { + gpu::MTLBuffer *buffer; + uint64_t buffer_size; + + inline MTLBufferHandle(gpu::MTLBuffer *buf) + { + this->buffer = buf; + this->buffer_size = this->buffer->get_size(); + } + + inline MTLBufferHandle(uint64_t compare_size) + { + this->buffer = nullptr; + this->buffer_size = compare_size; + } +}; + +struct CompareMTLBuffer { + bool operator()(const MTLBufferHandle &lhs, const MTLBufferHandle &rhs) const + { + return lhs.buffer_size < rhs.buffer_size; + } +}; + +/* An MTLSafeFreeList is a temporary list of gpu::MTLBuffers which have + * been freed by the high level backend, but are pending GPU work execution before + * the gpu::MTLBuffers can be returned to the Memory manager pools. + * This list is implemented as a chunked linked-list. + * + * Only a single MTLSafeFreeList is active at one time and is associated with current command + * buffer submissions. If an MTLBuffer is freed during the lifetime of a command buffer, it could + * still possibly be in-use and as such, the MTLSafeFreeList will increment its reference count for + * each command buffer submitted while the current pool is active. + * + * -- Reference count is incremented upon MTLCommandBuffer commit. + * -- Reference count is decremented in the MTLCommandBuffer completion callback handler. + * + * A new MTLSafeFreeList will begin each render step (frame). This pooling of buffers, rather than + * individual buffer resource tracking reduces performance overhead. + * + * * The reference count starts at 1 to ensure that the reference count cannot prematurely reach + * zero until any command buffers have been submitted. This additional decrement happens + * when the next MTLSafeFreeList is created, to allow the existing pool to be released once + * the reference count hits zero after submitted command buffers complete. + * + * Note: the Metal API independently tracks resources used by command buffers for the purpose of + * keeping resources alive while in-use by the driver and CPU, however, this differs from the + * MTLSafeFreeList mechanism in the Metal backend, which exists for the purpose of allowing + * previously allocated MTLBuffer resources to be re-used. This allows us to save on the expensive + * cost of memory allocation. + */ +class MTLSafeFreeList { + friend class MTLBufferPool; + + private: + std::atomic<int> reference_count_; + std::atomic<bool> in_free_queue_; + std::recursive_mutex lock_; + + /* Linked list of next MTLSafeFreeList chunk if current chunk is full. */ + std::atomic<int> has_next_pool_; + std::atomic<MTLSafeFreeList *> next_; + + /* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations + * for performance and memory. */ + static const int MAX_NUM_BUFFERS_ = 1024; + std::atomic<int> current_list_index_; + gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_]; + + public: + MTLSafeFreeList(); + + /* Add buffer to Safe Free List, can be called from secondary threads. + * Performs a lockless list insert. */ + void insert_buffer(gpu::MTLBuffer *buffer); + + /* Increments command buffer reference count. */ + void increment_reference(); + + /* Decrement and return of buffers to pool occur on MTLCommandBuffer completion callback thread. + */ + void decrement_reference(); + + void flag_in_queue() + { + in_free_queue_ = true; + if (has_next_pool_) { + MTLSafeFreeList *next_pool = next_.load(); + BLI_assert(next_pool != nullptr); + next_pool->flag_in_queue(); + } + } +}; + +/* MTLBuffer pools. */ +/* Allocating Metal buffers is expensive, so we cache all allocated buffers, + * and when requesting a new buffer, find one which fits the required dimensions + * from an existing pool of buffers. + * + * When freeing MTLBuffers, we insert them into the current MTLSafeFreeList, which defers + * release of the buffer until the associated command buffers have finished executing. + * This prevents a buffer from being re-used while it is still in-use by the GPU. + * + * * Once command buffers complete, MTLSafeFreeList's associated with the current + * command buffer submission are added to the `completed_safelist_queue_`. + * + * * At a set point in time, all MTLSafeFreeList's in `completed_safelist_queue_` have their + * MTLBuffers re-inserted into the Memory Manager's pools. */ +class MTLBufferPool { + + private: + /* Memory statistics. */ + long long int total_allocation_bytes_ = 0; + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + /* Debug statistics. */ + std::atomic<int> per_frame_allocation_count_; + std::atomic<long long int> allocations_in_pool_; + std::atomic<long long int> buffers_in_pool_; +#endif + + /* Metal resources. */ + bool ensure_initialised_ = false; + id<MTLDevice> device_ = nil; + + /* The buffer selection aims to pick a buffer which meets the minimum size requirements. + * To do this, we keep an ordered set of all available buffers. If the buffer is larger than the + * desired allocation size, we check it against `mtl_buffer_size_threshold_factor_`, + * which defines what % larger than the original allocation the buffer can be. + * - A higher value results in greater re-use of previously allocated buffers of similar sizes. + * - A lower value may result in more dynamic allocations, but minimized memory usage for a given + * scenario. + * The current value of 1.26 is calibrated for optimal performance and memory utilization. */ + static constexpr float mtl_buffer_size_threshold_factor_ = 1.26; + + /* Buffer pools using MTLResourceOptions as key for allocation type. + * Aliased as 'uint64_t' for map type compatibility. + * - A size-ordered list (MultiSet) of allocated buffers is kept per MTLResourceOptions + * permutation. This allows efficient lookup for buffers of a given requested size. + * - MTLBufferHandle wraps a gpu::MTLBuffer pointer to achieve easy size-based sorting + * via CompareMTLBuffer. */ + using MTLBufferPoolOrderedList = std::multiset<MTLBufferHandle, CompareMTLBuffer>; + using MTLBufferResourceOptions = uint64_t; + + blender::Map<MTLBufferResourceOptions, MTLBufferPoolOrderedList *> buffer_pools_; + blender::Vector<gpu::MTLBuffer *> allocations_; + + /* Maintain a queue of all MTLSafeFreeList's that have been released + * by the GPU and are ready to have their buffers re-inserted into the + * MemoryManager pools. + * Access to this queue is made thread-safe through safelist_lock_. */ + std::mutex safelist_lock_; + blender::Vector<MTLSafeFreeList *> completed_safelist_queue_; + + /* Current free list, associated with active MTLCommandBuffer submission. */ + /* MTLBuffer::free() can be called from separate threads, due to usage within animation + * system/worker threads. */ + std::atomic<MTLSafeFreeList *> current_free_list_; + + public: + void init(id<MTLDevice> device); + ~MTLBufferPool(); + + gpu::MTLBuffer *allocate(uint64_t size, bool cpu_visible); + gpu::MTLBuffer *allocate_aligned(uint64_t size, uint alignment, bool cpu_visible); + gpu::MTLBuffer *allocate_with_data(uint64_t size, bool cpu_visible, const void *data = nullptr); + gpu::MTLBuffer *allocate_aligned_with_data(uint64_t size, + uint alignment, + bool cpu_visible, + const void *data = nullptr); + bool free_buffer(gpu::MTLBuffer *buffer); + + /* Flush MTLSafeFreeList buffers, for completed lists in `completed_safelist_queue_`, + * back to memory pools. */ + void update_memory_pools(); + + /* Access and control over active MTLSafeFreeList. */ + MTLSafeFreeList *get_current_safe_list(); + void begin_new_safe_list(); + + /* Add a completed MTLSafeFreeList to completed_safelist_queue_. */ + void push_completed_safe_list(MTLSafeFreeList *list); + + private: + void ensure_buffer_pool(MTLResourceOptions options); + void insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer); + void free(); +}; + +/* Scratch buffers are circular-buffers used for temporary data within the current frame. + * In order to preserve integrity of contents when having multiple-frames-in-flight, + * we cycle through a collection of scratch buffers which are reset upon next use. + * + * Below are a series of properties, declared to manage scratch buffers. If a scratch buffer + * overflows, then the original buffer will be flushed and submitted, with retained references + * by usage within the command buffer, and a new buffer will be created. + * - The new buffer will grow in size to account for increased demand in temporary memory. + */ +class MTLScratchBufferManager { + + private: + /* Maximum number of scratch buffers to allocate. This should be the maximum number of + * simultaneous frames in flight. */ + static constexpr uint mtl_max_scratch_buffers_ = MTL_NUM_SAFE_FRAMES; + + public: + /* Maximum size of single scratch buffer allocation. When re-sizing, this is the maximum size the + * newly allocated buffers will grow to. Larger allocations are possible if + * `MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION` is enabled, but these will instead allocate new + * buffers from the memory pools on the fly. */ + static constexpr uint mtl_scratch_buffer_max_size_ = 128 * 1024 * 1024; + + /* Initial size of circular scratch buffers prior to growth. */ + static constexpr uint mtl_scratch_buffer_initial_size_ = 16 * 1024 * 1024; + + private: + /* Parent MTLContext. */ + MTLContext &context_; + bool initialised_ = false; + + /* Scratch buffer currently in-use. */ + uint current_scratch_buffer_ = 0; + + /* Scratch buffer pool. */ + MTLCircularBuffer *scratch_buffers_[mtl_max_scratch_buffers_]; + + public: + MTLScratchBufferManager(MTLContext &context) : context_(context){}; + ~MTLScratchBufferManager(); + + /* Explicit initialization and freeing of resources. + * Initialization must occur after device creation. */ + void init(); + void free(); + + /* Allocation functions for creating temporary allocations from active circular buffer. */ + MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size); + MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(uint64_t alloc_size, uint alignment); + + /* Ensure a new scratch buffer is started if we move onto a new frame. + * Called when a new command buffer begins. */ + void ensure_increment_scratch_buffer(); + + /* Flush memory for active scratch buffer to GPU. + * This call will perform a partial flush of the buffer starting from + * the last offset the data was flushed from, to the current offset. */ + void flush_active_scratch_buffer(); +}; + +/** \} */ + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_memory.mm b/source/blender/gpu/metal/mtl_memory.mm new file mode 100644 index 00000000000..07da489bdbb --- /dev/null +++ b/source/blender/gpu/metal/mtl_memory.mm @@ -0,0 +1,895 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BKE_global.h" + +#include "DNA_userdef_types.h" + +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_memory.hh" + +using namespace blender; +using namespace blender::gpu; + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Memory Management - MTLBufferPool and MTLSafeFreeList implementations. */ + +void MTLBufferPool::init(id<MTLDevice> mtl_device) +{ + if (!ensure_initialised_) { + BLI_assert(mtl_device); + ensure_initialised_ = true; + device_ = mtl_device; + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + /* Debug statistics. */ + per_frame_allocation_count_ = 0; + allocations_in_pool_ = 0; + buffers_in_pool_ = 0; +#endif + + /* Free pools -- Create initial safe free pool */ + BLI_assert(current_free_list_ == nullptr); + this->begin_new_safe_list(); + } +} + +MTLBufferPool::~MTLBufferPool() +{ + this->free(); +} + +void MTLBufferPool::free() +{ + + for (auto buffer : allocations_) { + BLI_assert(buffer); + delete buffer; + } + allocations_.clear(); + + for (std::multiset<blender::gpu::MTLBufferHandle, blender::gpu::CompareMTLBuffer> *buffer_pool : + buffer_pools_.values()) { + delete buffer_pool; + } + buffer_pools_.clear(); +} + +gpu::MTLBuffer *MTLBufferPool::allocate(uint64_t size, bool cpu_visible) +{ + /* Allocate buffer with default HW-compatible alignment of 256 bytes. + * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */ + return this->allocate_aligned(size, 256, cpu_visible); +} + +gpu::MTLBuffer *MTLBufferPool::allocate_with_data(uint64_t size, + bool cpu_visible, + const void *data) +{ + /* Allocate buffer with default HW-compatible alignment of 256 bytes. + * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */ + return this->allocate_aligned_with_data(size, 256, cpu_visible, data); +} + +gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, uint alignment, bool cpu_visible) +{ + /* Check not required. Main GPU module usage considered thread-safe. */ + // BLI_assert(BLI_thread_is_main()); + + /* Calculate aligned size */ + BLI_assert(alignment > 0); + uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment); + + /* Allocate new MTL Buffer */ + MTLResourceOptions options; + if (cpu_visible) { + options = ([device_ hasUnifiedMemory]) ? MTLResourceStorageModeShared : + MTLResourceStorageModeManaged; + } + else { + options = MTLResourceStorageModePrivate; + } + + /* Check if we have a suitable buffer */ + gpu::MTLBuffer *new_buffer = nullptr; + std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr( + (uint64_t)options); + + if (pool_search != nullptr) { + std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = *pool_search; + MTLBufferHandle size_compare(aligned_alloc_size); + auto result = pool->lower_bound(size_compare); + if (result != pool->end()) { + /* Potential buffer found, check if within size threshold requirements. */ + gpu::MTLBuffer *found_buffer = result->buffer; + BLI_assert(found_buffer); + BLI_assert(found_buffer->get_metal_buffer()); + + uint64_t found_size = found_buffer->get_size(); + + if (found_size >= aligned_alloc_size && + found_size <= (aligned_alloc_size * mtl_buffer_size_threshold_factor_)) { + MTL_LOG_INFO( + "[MemoryAllocator] Suitable Buffer of size %lld found, for requested size: %lld\n", + found_size, + aligned_alloc_size); + + new_buffer = found_buffer; + BLI_assert(!new_buffer->get_in_use()); + + /* Remove buffer from free set. */ + pool->erase(result); + } + else { + MTL_LOG_INFO( + "[MemoryAllocator] Buffer of size %lld found, but was incompatible with requested " + "size: " + "%lld\n", + found_size, + aligned_alloc_size); + new_buffer = nullptr; + } + } + } + + /* Allocate new buffer. */ + if (new_buffer == nullptr) { + new_buffer = new gpu::MTLBuffer(device_, size, options, alignment); + + /* Track allocation in context. */ + allocations_.append(new_buffer); + total_allocation_bytes_ += aligned_alloc_size; + } + else { + /* Re-use suitable buffer. */ + new_buffer->set_usage_size(aligned_alloc_size); + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + /* Debug. */ + allocations_in_pool_ -= new_buffer->get_size(); + buffers_in_pool_--; + BLI_assert(allocations_in_pool_ >= 0); +#endif + + /* Ensure buffer memory is correctly backed. */ + BLI_assert(new_buffer->get_metal_buffer()); + } + /* Flag buffer as actively in-use. */ + new_buffer->flag_in_use(true); + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + this->per_frame_allocation_count++; +#endif + + return new_buffer; +} + +gpu::MTLBuffer *MTLBufferPool::allocate_aligned_with_data(uint64_t size, + uint alignment, + bool cpu_visible, + const void *data) +{ + gpu::MTLBuffer *buf = this->allocate_aligned(size, 256, cpu_visible); + + /* Upload initial data. */ + BLI_assert(data != nullptr); + BLI_assert(!(buf->get_resource_options() & MTLResourceStorageModePrivate)); + BLI_assert(size <= buf->get_size()); + BLI_assert(size <= [buf->get_metal_buffer() length]); + memcpy(buf->get_host_ptr(), data, size); + buf->flush_range(0, size); + return buf; +} + +bool MTLBufferPool::free_buffer(gpu::MTLBuffer *buffer) +{ + /* Ensure buffer is flagged as in-use. I.e. has not already been returned to memory pools. */ + bool buffer_in_use = buffer->get_in_use(); + BLI_assert(buffer_in_use); + if (buffer_in_use) { + + /* Fetch active safe pool from atomic ptr. */ + MTLSafeFreeList *current_pool = this->get_current_safe_list(); + + /* Place buffer in safe_free_pool before returning to MemoryManager buffer pools. */ + BLI_assert(current_pool); + current_pool->insert_buffer(buffer); + buffer->flag_in_use(false); + + return true; + } + return false; +} + +void MTLBufferPool::update_memory_pools() +{ + /* Ensure thread-safe access to `completed_safelist_queue_`, which contains + * the list of MTLSafeFreeList's whose buffers are ready to be + * re-inserted into the Memory Manager pools. */ + safelist_lock_.lock(); + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + int num_buffers_added = 0; +#endif + + /* Always free oldest MTLSafeFreeList first. */ + for (int safe_pool_free_index = 0; safe_pool_free_index < completed_safelist_queue_.size(); + safe_pool_free_index++) { + MTLSafeFreeList *current_pool = completed_safelist_queue_[safe_pool_free_index]; + + /* Iterate through all MTLSafeFreeList linked-chunks. */ + while (current_pool != nullptr) { + current_pool->lock_.lock(); + BLI_assert(current_pool); + BLI_assert(current_pool->in_free_queue_); + int counter = 0; + int size = min_ii(current_pool->current_list_index_, MTLSafeFreeList::MAX_NUM_BUFFERS_); + + /* Re-add all buffers within frame index to MemoryManager pools. */ + while (counter < size) { + + gpu::MTLBuffer *buf = current_pool->safe_free_pool_[counter]; + + /* Insert buffer back into open pools. */ + BLI_assert(buf->get_in_use() == false); + this->insert_buffer_into_pool(buf->get_resource_options(), buf); + counter++; + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + num_buffers_added++; +#endif + } + + /* Fetch next MTLSafeFreeList chunk, if any. */ + MTLSafeFreeList *next_list = nullptr; + if (current_pool->has_next_pool_ > 0) { + next_list = current_pool->next_.load(); + } + + /* Delete current MTLSafeFreeList */ + current_pool->lock_.unlock(); + delete current_pool; + current_pool = nullptr; + + /* Move onto next chunk. */ + if (next_list != nullptr) { + current_pool = next_list; + } + } + } + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + printf("--- Allocation Stats ---\n"); + printf(" Num buffers processed in pool (this frame): %u\n", num_buffers_added); + + uint framealloc = (uint)this->per_frame_allocation_count; + printf(" Allocations in frame: %u\n", framealloc); + printf(" Total Buffers allocated: %u\n", (uint)allocations_.size()); + printf(" Total Memory allocated: %u MB\n", (uint)total_allocation_bytes_ / (1024 * 1024)); + + uint allocs = (uint)(allocations_in_pool_) / 1024 / 2024; + printf(" Free memory in pools: %u MB\n", allocs); + + uint buffs = (uint)buffers_in_pool_; + printf(" Buffers in pools: %u\n", buffs); + + printf(" Pools %u:\n", (uint)buffer_pools_.size()); + auto key_iterator = buffer_pools_.keys().begin(); + auto value_iterator = buffer_pools_.values().begin(); + while (key_iterator != buffer_pools_.keys().end()) { + uint64_t mem_in_pool = 0; + uint64_t iters = 0; + for (auto it = (*value_iterator)->begin(); it != (*value_iterator)->end(); it++) { + mem_in_pool += it->buffer_size; + iters++; + } + + printf(" Buffers in pool (%u)(%llu): %u (%u MB)\n", + (uint)*key_iterator, + iters, + (uint)((*value_iterator)->size()), + (uint)mem_in_pool / 1024 / 1024); + ++key_iterator; + ++value_iterator; + } + + this->per_frame_allocation_count = 0; +#endif + + /* Clear safe pools list */ + completed_safelist_queue_.clear(); + safelist_lock_.unlock(); +} + +void MTLBufferPool::push_completed_safe_list(MTLSafeFreeList *safe_list) +{ + /* When an MTLSafeFreeList has been released by the GPU, and buffers are ready to + * be re-inserted into the MemoryManager pools for future use, add the MTLSafeFreeList + * to the `completed_safelist_queue_` for flushing at a controlled point in time. */ + safe_list->lock_.lock(); + BLI_assert(safe_list); + BLI_assert(safe_list->reference_count_ == 0 && + "Pool must be fully dereferenced by all in-use cmd buffers before returning.\n"); + BLI_assert(safe_list->in_free_queue_ == false && "Pool must not already be in queue"); + + /* Flag MTLSafeFreeList as having been added, and insert into SafeFreePool queue. */ + safe_list->flag_in_queue(); + safelist_lock_.lock(); + completed_safelist_queue_.append(safe_list); + safelist_lock_.unlock(); + safe_list->lock_.unlock(); +} + +MTLSafeFreeList *MTLBufferPool::get_current_safe_list() +{ + /* Thread-safe access via atomic ptr. */ + return current_free_list_; +} + +void MTLBufferPool::begin_new_safe_list() +{ + safelist_lock_.lock(); + current_free_list_ = new MTLSafeFreeList(); + safelist_lock_.unlock(); +} + +void MTLBufferPool::ensure_buffer_pool(MTLResourceOptions options) +{ + std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr( + (uint64_t)options); + if (pool_search == nullptr) { + std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = + new std::multiset<MTLBufferHandle, CompareMTLBuffer>(); + buffer_pools_.add_new((uint64_t)options, pool); + } +} + +void MTLBufferPool::insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer) +{ + /* Ensure `safelist_lock_` is locked in calling code before modifying. */ + BLI_assert(buffer); + + /* Reset usage size to actual size of allocation. */ + buffer->set_usage_size(buffer->get_size()); + + /* Ensure pool exists. */ + this->ensure_buffer_pool(options); + + /* TODO(Metal): Support purgeability - Allow buffer in pool to have its memory taken back by the + * OS if needed. As we keep allocations around, they may not actually be in use, but we can + * ensure they do not block other apps from using memory. Upon a buffer being needed again, we + * can reset this state. + * TODO(Metal): Purgeability state does not update instantly, so this requires a deferral. */ + BLI_assert(buffer->get_metal_buffer()); + /* buffer->metal_buffer); [buffer->metal_buffer setPurgeableState:MTLPurgeableStateVolatile]; */ + + std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = buffer_pools_.lookup(options); + pool->insert(MTLBufferHandle(buffer)); + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + /* Debug statistics. */ + allocations_in_pool_ += buffer->get_size(); + buffers_in_pool_++; +#endif +} + +MTLSafeFreeList::MTLSafeFreeList() +{ + reference_count_ = 1; + in_free_queue_ = false; + current_list_index_ = 0; + next_ = nullptr; + has_next_pool_ = 0; +} + +void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer) +{ + BLI_assert(in_free_queue_ == false); + + /* Lockless list insert. */ + uint insert_index = current_list_index_++; + + /* If the current MTLSafeFreeList size is exceeded, we ripple down the linked-list chain and + * insert the buffer into the next available chunk. */ + if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) { + + /* Check if first caller to generate next pool. */ + int has_next = has_next_pool_++; + if (has_next == 0) { + next_ = new MTLSafeFreeList(); + } + MTLSafeFreeList *next_list = next_.load(); + BLI_assert(next_list); + next_list->insert_buffer(buffer); + + /* Clamp index to chunk limit if overflowing. */ + current_list_index_ = MTLSafeFreeList::MAX_NUM_BUFFERS_; + return; + } + + safe_free_pool_[insert_index] = buffer; +} + +/* Increments from active GPUContext thread. */ +void MTLSafeFreeList::increment_reference() +{ + lock_.lock(); + BLI_assert(in_free_queue_ == false); + reference_count_++; + lock_.unlock(); +} + +/* Reference decrements and addition to completed list queue can occur from MTLCommandBuffer + * completion callback thread. */ +void MTLSafeFreeList::decrement_reference() +{ + lock_.lock(); + BLI_assert(in_free_queue_ == false); + int ref_count = --reference_count_; + + if (ref_count == 0) { + MTLContext::get_global_memory_manager().push_completed_safe_list(this); + } + lock_.unlock(); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name MTLBuffer wrapper class implementation. + * \{ */ + +/* Construct a gpu::MTLBuffer wrapper around a newly created metal::MTLBuffer. */ +MTLBuffer::MTLBuffer(id<MTLDevice> mtl_device, + uint64_t size, + MTLResourceOptions options, + uint alignment) +{ + /* Calculate aligned allocation size. */ + BLI_assert(alignment > 0); + uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment); + + alignment_ = alignment; + device_ = mtl_device; + is_external_ = false; + + options_ = options; + this->flag_in_use(false); + + metal_buffer_ = [device_ newBufferWithLength:aligned_alloc_size options:options]; + BLI_assert(metal_buffer_); + [metal_buffer_ retain]; + + size_ = aligned_alloc_size; + this->set_usage_size(size_); + if (!(options_ & MTLResourceStorageModePrivate)) { + data_ = [metal_buffer_ contents]; + } + else { + data_ = nullptr; + } +} + +MTLBuffer::MTLBuffer(id<MTLBuffer> external_buffer) +{ + BLI_assert(external_buffer != nil); + + /* Ensure external_buffer remains referenced while in-use. */ + metal_buffer_ = external_buffer; + [metal_buffer_ retain]; + + /* Extract properties. */ + is_external_ = true; + device_ = nil; + alignment_ = 1; + options_ = [metal_buffer_ resourceOptions]; + size_ = [metal_buffer_ allocatedSize]; + this->set_usage_size(size_); + data_ = [metal_buffer_ contents]; + in_use_ = true; +} + +gpu::MTLBuffer::~MTLBuffer() +{ + if (metal_buffer_ != nil) { + [metal_buffer_ release]; + metal_buffer_ = nil; + } +} + +void gpu::MTLBuffer::free() +{ + if (!is_external_) { + MTLContext::get_global_memory_manager().free_buffer(this); + } + else { + if (metal_buffer_ != nil) { + [metal_buffer_ release]; + metal_buffer_ = nil; + } + } +} + +id<MTLBuffer> gpu::MTLBuffer::get_metal_buffer() const +{ + return metal_buffer_; +} + +void *gpu::MTLBuffer::get_host_ptr() const +{ + BLI_assert(!(options_ & MTLResourceStorageModePrivate)); + BLI_assert(data_); + return data_; +} + +uint64_t gpu::MTLBuffer::get_size() const +{ + return size_; +} + +uint64_t gpu::MTLBuffer::get_size_used() const +{ + return usage_size_; +} + +bool gpu::MTLBuffer::requires_flush() +{ + /* We do not need to flush shared memory, as addressable buffer is shared. */ + return options_ & MTLResourceStorageModeManaged; +} + +void gpu::MTLBuffer::set_label(NSString *str) +{ + metal_buffer_.label = str; +} + +void gpu::MTLBuffer::debug_ensure_used() +{ + /* Debug: If buffer is not flagged as in-use, this is a problem. */ + BLI_assert(in_use_ && + "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer " + "has likely already been freed."); +} + +void gpu::MTLBuffer::flush() +{ + this->debug_ensure_used(); + if (this->requires_flush()) { + [metal_buffer_ didModifyRange:NSMakeRange(0, size_)]; + } +} + +void gpu::MTLBuffer::flush_range(uint64_t offset, uint64_t length) +{ + this->debug_ensure_used(); + if (this->requires_flush()) { + BLI_assert((offset + length) <= size_); + [metal_buffer_ didModifyRange:NSMakeRange(offset, length)]; + } +} + +void gpu::MTLBuffer::flag_in_use(bool used) +{ + in_use_ = used; +} + +bool gpu::MTLBuffer::get_in_use() +{ + return in_use_; +} + +void gpu::MTLBuffer::set_usage_size(uint64_t size_used) +{ + BLI_assert(size_used > 0 && size_used <= size_); + usage_size_ = size_used; +} + +MTLResourceOptions gpu::MTLBuffer::get_resource_options() +{ + return options_; +} + +uint64_t gpu::MTLBuffer::get_alignment() +{ + return alignment_; +} + +bool MTLBufferRange::requires_flush() +{ + /* We do not need to flush shared memory. */ + return this->options & MTLResourceStorageModeManaged; +} + +void MTLBufferRange::flush() +{ + if (this->requires_flush()) { + BLI_assert(this->metal_buffer); + BLI_assert((this->buffer_offset + this->size) <= [this->metal_buffer length]); + BLI_assert(this->buffer_offset >= 0); + [this->metal_buffer + didModifyRange:NSMakeRange(this->buffer_offset, this->size - this->buffer_offset)]; + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name MTLScratchBufferManager and MTLCircularBuffer implementation. + * \{ */ + +MTLScratchBufferManager::~MTLScratchBufferManager() +{ + this->free(); +} + +void MTLScratchBufferManager::init() +{ + + if (!this->initialised_) { + BLI_assert(context_.device); + + /* Initialize Scratch buffers. */ + for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) { + scratch_buffers_[sb] = new MTLCircularBuffer( + context_, mtl_scratch_buffer_initial_size_, true); + BLI_assert(scratch_buffers_[sb]); + BLI_assert(&(scratch_buffers_[sb]->own_context_) == &context_); + } + current_scratch_buffer_ = 0; + initialised_ = true; + } +} + +void MTLScratchBufferManager::free() +{ + initialised_ = false; + + /* Release Scratch buffers */ + for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) { + delete scratch_buffers_[sb]; + scratch_buffers_[sb] = nullptr; + } + current_scratch_buffer_ = 0; +} + +MTLTemporaryBuffer MTLScratchBufferManager::scratch_buffer_allocate_range(uint64_t alloc_size) +{ + return this->scratch_buffer_allocate_range_aligned(alloc_size, 1); +} + +MTLTemporaryBuffer MTLScratchBufferManager::scratch_buffer_allocate_range_aligned( + uint64_t alloc_size, uint alignment) +{ + /* Ensure scratch buffer allocation alignment adheres to offset alignment requirements. */ + alignment = max_uu(alignment, 256); + + BLI_assert(current_scratch_buffer_ >= 0 && "Scratch Buffer index not set"); + MTLCircularBuffer *current_scratch_buff = this->scratch_buffers_[current_scratch_buffer_]; + BLI_assert(current_scratch_buff != nullptr && "Scratch Buffer does not exist"); + MTLTemporaryBuffer allocated_range = current_scratch_buff->allocate_range_aligned(alloc_size, + alignment); + BLI_assert(allocated_range.size >= alloc_size && allocated_range.size <= alloc_size + alignment); + BLI_assert(allocated_range.metal_buffer != nil); + return allocated_range; +} + +void MTLScratchBufferManager::ensure_increment_scratch_buffer() +{ + /* Fetch active scratch buffer. */ + MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_]; + BLI_assert(&active_scratch_buf->own_context_ == &context_); + + /* Ensure existing scratch buffer is no longer in use. MTL_MAX_SCRATCH_BUFFERS specifies + * the number of allocated scratch buffers. This value should be equal to the number of + * simultaneous frames in-flight. I.e. the maximal number of scratch buffers which are + * simultaneously in-use. */ + if (active_scratch_buf->used_frame_index_ < context_.get_current_frame_index()) { + current_scratch_buffer_ = (current_scratch_buffer_ + 1) % mtl_max_scratch_buffers_; + active_scratch_buf = scratch_buffers_[current_scratch_buffer_]; + active_scratch_buf->reset(); + BLI_assert(&active_scratch_buf->own_context_ == &context_); + MTL_LOG_INFO("Scratch buffer %d reset - (ctx %p)(Frame index: %d)\n", + current_scratch_buffer_, + &context_, + context_.get_current_frame_index()); + } +} + +void MTLScratchBufferManager::flush_active_scratch_buffer() +{ + /* Fetch active scratch buffer and verify context. */ + MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_]; + BLI_assert(&active_scratch_buf->own_context_ == &context_); + active_scratch_buf->flush(); +} + +/* MTLCircularBuffer implementation. */ +MTLCircularBuffer::MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow) + : own_context_(ctx) +{ + BLI_assert(this); + MTLResourceOptions options = ([own_context_.device hasUnifiedMemory]) ? + MTLResourceStorageModeShared : + MTLResourceStorageModeManaged; + cbuffer_ = new gpu::MTLBuffer(own_context_.device, initial_size, options, 256); + current_offset_ = 0; + can_resize_ = allow_grow; + cbuffer_->flag_in_use(true); + + used_frame_index_ = ctx.get_current_frame_index(); + last_flush_base_offset_ = 0; + + /* Debug label. */ + if (G.debug & G_DEBUG_GPU) { + cbuffer_->set_label(@"Circular Scratch Buffer"); + } +} + +MTLCircularBuffer::~MTLCircularBuffer() +{ + delete cbuffer_; +} + +MTLTemporaryBuffer MTLCircularBuffer::allocate_range(uint64_t alloc_size) +{ + return this->allocate_range_aligned(alloc_size, 1); +} + +MTLTemporaryBuffer MTLCircularBuffer::allocate_range_aligned(uint64_t alloc_size, uint alignment) +{ + BLI_assert(this); + + /* Ensure alignment of an allocation is aligned to compatible offset boundaries. */ + BLI_assert(alignment > 0); + alignment = max_ulul(alignment, 256); + + /* Align current offset and allocation size to desired alignment */ + uint64_t aligned_current_offset = ceil_to_multiple_ul(current_offset_, alignment); + uint64_t aligned_alloc_size = ceil_to_multiple_ul(alloc_size, alignment); + bool can_allocate = (aligned_current_offset + aligned_alloc_size) < cbuffer_->get_size(); + + BLI_assert(aligned_current_offset >= current_offset_); + BLI_assert(aligned_alloc_size >= alloc_size); + + BLI_assert(aligned_current_offset % alignment == 0); + BLI_assert(aligned_alloc_size % alignment == 0); + + /* Recreate Buffer */ + if (!can_allocate) { + uint64_t new_size = cbuffer_->get_size(); + if (can_resize_) { + /* Resize to the maximum of basic resize heuristic OR the size of the current offset + + * requested allocation -- we want the buffer to grow to a large enough size such that it + * does not need to resize mid-frame. */ + new_size = max_ulul( + min_ulul(MTLScratchBufferManager::mtl_scratch_buffer_max_size_, new_size * 1.2), + aligned_current_offset + aligned_alloc_size); + +#if MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION == 1 + /* IF a requested allocation EXCEEDS the maximum supported size, temporarily allocate up to + * this, but shrink down ASAP. */ + if (new_size > MTLScratchBufferManager::mtl_scratch_buffer_max_size_) { + + /* If new requested allocation is bigger than maximum allowed size, temporarily resize to + * maximum allocation size -- Otherwise, clamp the buffer size back down to the defined + * maximum */ + if (aligned_alloc_size > MTLScratchBufferManager::mtl_scratch_buffer_max_size_) { + new_size = aligned_alloc_size; + MTL_LOG_INFO("Temporarily growing Scratch buffer to %d MB\n", + (int)new_size / 1024 / 1024); + } + else { + new_size = MTLScratchBufferManager::mtl_scratch_buffer_max_size_; + MTL_LOG_INFO("Shrinking Scratch buffer back to %d MB\n", (int)new_size / 1024 / 1024); + } + } + BLI_assert(aligned_alloc_size <= new_size); +#else + new_size = min_ulul(MTLScratchBufferManager::mtl_scratch_buffer_max_size_, new_size); + + if (aligned_alloc_size > new_size) { + BLI_assert(false); + + /* Cannot allocate */ + MTLTemporaryBuffer alloc_range; + alloc_range.metal_buffer = nil; + alloc_range.data = nullptr; + alloc_range.buffer_offset = 0; + alloc_range.size = 0; + alloc_range.options = cbuffer_->options; + } +#endif + } + else { + MTL_LOG_WARNING( + "Performance Warning: Reached the end of circular buffer of size: %llu, but cannot " + "resize. Starting new buffer\n", + cbuffer_->get_size()); + BLI_assert(aligned_alloc_size <= new_size); + + /* Cannot allocate. */ + MTLTemporaryBuffer alloc_range; + alloc_range.metal_buffer = nil; + alloc_range.data = nullptr; + alloc_range.buffer_offset = 0; + alloc_range.size = 0; + alloc_range.options = cbuffer_->get_resource_options(); + } + + /* Flush current buffer to ensure changes are visible on the GPU. */ + this->flush(); + + /* Discard old buffer and create a new one - Relying on Metal reference counting to track + * in-use buffers */ + MTLResourceOptions prev_options = cbuffer_->get_resource_options(); + uint prev_alignment = cbuffer_->get_alignment(); + delete cbuffer_; + cbuffer_ = new gpu::MTLBuffer(own_context_.device, new_size, prev_options, prev_alignment); + cbuffer_->flag_in_use(true); + current_offset_ = 0; + last_flush_base_offset_ = 0; + + /* Debug label. */ + if (G.debug & G_DEBUG_GPU) { + cbuffer_->set_label(@"Circular Scratch Buffer"); + } + MTL_LOG_INFO("Resized Metal circular buffer to %llu bytes\n", new_size); + + /* Reset allocation Status. */ + aligned_current_offset = 0; + BLI_assert((aligned_current_offset + aligned_alloc_size) <= cbuffer_->get_size()); + } + + /* Allocate chunk. */ + MTLTemporaryBuffer alloc_range; + alloc_range.metal_buffer = cbuffer_->get_metal_buffer(); + alloc_range.data = (void *)((uint8_t *)([alloc_range.metal_buffer contents]) + + aligned_current_offset); + alloc_range.buffer_offset = aligned_current_offset; + alloc_range.size = aligned_alloc_size; + alloc_range.options = cbuffer_->get_resource_options(); + BLI_assert(alloc_range.data); + + /* Shift offset to match alignment. */ + current_offset_ = aligned_current_offset + aligned_alloc_size; + BLI_assert(current_offset_ <= cbuffer_->get_size()); + return alloc_range; +} + +void MTLCircularBuffer::flush() +{ + BLI_assert(this); + + uint64_t len = current_offset_ - last_flush_base_offset_; + if (len > 0) { + cbuffer_->flush_range(last_flush_base_offset_, len); + last_flush_base_offset_ = current_offset_; + } +} + +void MTLCircularBuffer::reset() +{ + BLI_assert(this); + + /* If circular buffer has data written to it, offset will be greater than zero. */ + if (current_offset_ > 0) { + + /* Ensure the circular buffer is no longer being used by an in-flight frame. */ + BLI_assert((own_context_.get_current_frame_index() >= + (used_frame_index_ + MTL_NUM_SAFE_FRAMES - 1)) && + "Trying to reset Circular scratch buffer's while its data is still being used by " + "an in-flight frame"); + + current_offset_ = 0; + last_flush_base_offset_ = 0; + } + + /* Update used frame index to current. */ + used_frame_index_ = own_context_.get_current_frame_index(); +} + +/** \} */ + +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_query.hh b/source/blender/gpu/metal/mtl_query.hh new file mode 100644 index 00000000000..c1ec9a2a0f5 --- /dev/null +++ b/source/blender/gpu/metal/mtl_query.hh @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "BLI_vector.hh" + +#include "gpu_query.hh" +#include "mtl_context.hh" + +namespace blender::gpu { + +class MTLQueryPool : public QueryPool { + private: + /** Number of queries that have been issued since last initialization. + * Should be equal to query_ids_.size(). */ + uint32_t query_issued_; + /** Type of this query pool. */ + GPUQueryType type_; + /** Can only be initialized once. */ + bool initialized_ = false; + MTLVisibilityResultMode mtl_type_; + Vector<gpu::MTLBuffer *> buffer_; + + void allocate_buffer(); + + public: + MTLQueryPool(); + ~MTLQueryPool(); + + void init(GPUQueryType type) override; + + void begin_query() override; + void end_query() override; + + void get_occlusion_result(MutableSpan<uint32_t> r_values) override; +}; +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_query.mm b/source/blender/gpu/metal/mtl_query.mm new file mode 100644 index 00000000000..dfda0a8de7f --- /dev/null +++ b/source/blender/gpu/metal/mtl_query.mm @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "mtl_query.hh" + +namespace blender::gpu { + +static const size_t VISIBILITY_COUNT_PER_BUFFER = 512; +/* defined in the documentation but not queryable programmatically: + * https://developer.apple.com/documentation/metal/mtlvisibilityresultmode/mtlvisibilityresultmodeboolean?language=objc + */ +static const size_t VISIBILITY_RESULT_SIZE_IN_BYTES = 8; + +MTLQueryPool::MTLQueryPool() +{ + allocate_buffer(); +} +MTLQueryPool::~MTLQueryPool() +{ + for (gpu::MTLBuffer *buf : buffer_) { + BLI_assert(buf); + buf->free(); + } +} + +void MTLQueryPool::allocate_buffer() +{ + /* Allocate Metal buffer for visibility results. */ + size_t buffer_size_in_bytes = VISIBILITY_COUNT_PER_BUFFER * VISIBILITY_RESULT_SIZE_IN_BYTES; + gpu::MTLBuffer *buffer = MTLContext::get_global_memory_manager().allocate_buffer( + buffer_size_in_bytes, true); + BLI_assert(buffer); + buffer_.append(buffer); +} + +static inline MTLVisibilityResultMode to_mtl_type(GPUQueryType type) +{ + if (type == GPU_QUERY_OCCLUSION) { + return MTLVisibilityResultModeBoolean; + } + BLI_assert(0); + return MTLVisibilityResultModeBoolean; +} + +void MTLQueryPool::init(GPUQueryType type) +{ + BLI_assert(initialized_ == false); + initialized_ = true; + type_ = type; + mtl_type_ = to_mtl_type(type); + query_issued_ = 0; +} + +void MTLQueryPool::begin_query() +{ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + + /* Ensure our allocated buffer pool has enough space for the current queries. */ + int query_id = query_issued_; + int requested_buffer = query_id / VISIBILITY_COUNT_PER_BUFFER; + if (requested_buffer >= buffer_.size()) { + allocate_buffer(); + } + + BLI_assert(requested_buffer < buffer_.size()); + gpu::MTLBuffer *buffer = buffer_[requested_buffer]; + + /* Ensure visibility buffer is set on the context. If visibility buffer changes, + * we need to begin a new render pass with an updated reference in the + * MTLRenderPassDescriptor. */ + ctx->set_visibility_buffer(buffer); + + ctx->ensure_begin_render_pass(); + id<MTLRenderCommandEncoder> rec = ctx->main_command_buffer.get_active_render_command_encoder(); + [rec setVisibilityResultMode:mtl_type_ + offset:(query_id % VISIBILITY_COUNT_PER_BUFFER) * + VISIBILITY_RESULT_SIZE_IN_BYTES]; + query_issued_ += 1; +} + +void MTLQueryPool::end_query() +{ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + + id<MTLRenderCommandEncoder> rec = ctx->main_command_buffer.get_active_render_command_encoder(); + [rec setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0]; +} + +void MTLQueryPool::get_occlusion_result(MutableSpan<uint32_t> r_values) +{ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + + /* Create a blit encoder to synchronize the query buffer results between + * GPU and CPU when not using shared-memory. */ + if ([ctx->device hasUnifiedMemory] == false) { + id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); + BLI_assert(blit_encoder); + for (gpu::MTLBuffer *buf : buffer_) { + [blit_encoder synchronizeResource:buf->get_metal_buffer()]; + } + BLI_assert(ctx->get_inside_frame()); + } + + /* Wait for GPU operations to complete and for query buffer contents + * to be synchronized back to host memory. */ + GPU_finish(); + + /* Iterate through all possible visibility buffers and copy results into provided + * container. */ + for (const int i : IndexRange(query_issued_)) { + int requested_buffer = i / VISIBILITY_COUNT_PER_BUFFER; + const uint64_t *queries = static_cast<const uint64_t *>( + buffer_[requested_buffer]->get_host_ptr()); + r_values[i] = static_cast<uint32_t>(queries[i % VISIBILITY_COUNT_PER_BUFFER]); + } + ctx->set_visibility_buffer(nullptr); +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_state.hh b/source/blender/gpu/metal/mtl_state.hh index 23bf8600ddd..e6472491b35 100644 --- a/source/blender/gpu/metal/mtl_state.hh +++ b/source/blender/gpu/metal/mtl_state.hh @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + /** \file * \ingroup gpu */ @@ -30,18 +32,18 @@ class MTLStateManager : public StateManager { public: MTLStateManager(MTLContext *ctx); - void apply_state(void) override; - void force_state(void) override; + void apply_state() override; + void force_state() override; void issue_barrier(eGPUBarrier barrier_bits) override; void texture_bind(Texture *tex, eGPUSamplerState sampler, int unit) override; void texture_unbind(Texture *tex) override; - void texture_unbind_all(void) override; + void texture_unbind_all() override; void image_bind(Texture *tex, int unit) override; void image_unbind(Texture *tex) override; - void image_unbind_all(void) override; + void image_unbind_all() override; void texture_unpack_row_length_set(uint len) override; diff --git a/source/blender/gpu/metal/mtl_state.mm b/source/blender/gpu/metal/mtl_state.mm index cf7fbdba6b9..0f2d4d7dc48 100644 --- a/source/blender/gpu/metal/mtl_state.mm +++ b/source/blender/gpu/metal/mtl_state.mm @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + /** \file * \ingroup gpu */ @@ -17,7 +19,7 @@ namespace blender::gpu { /** \name MTLStateManager * \{ */ -void MTLStateManager::mtl_state_init(void) +void MTLStateManager::mtl_state_init() { BLI_assert(context_); context_->pipeline_state_init(); @@ -36,7 +38,7 @@ MTLStateManager::MTLStateManager(MTLContext *ctx) : StateManager() set_mutable_state(mutable_state); } -void MTLStateManager::apply_state(void) +void MTLStateManager::apply_state() { this->set_state(this->state); this->set_mutable_state(this->mutable_state); @@ -45,7 +47,7 @@ void MTLStateManager::apply_state(void) static_cast<MTLFrameBuffer *>(context_->active_fb)->apply_state(); }; -void MTLStateManager::force_state(void) +void MTLStateManager::force_state() { /* Little exception for clip distances since they need to keep the old count correct. */ uint32_t clip_distances = current_.clip_distances; @@ -548,7 +550,7 @@ void MTLStateManager::issue_barrier(eGPUBarrier barrier_bits) /* Apple Silicon does not support memory barriers. * We do not currently need these due to implicit API guarantees. - * Note(Metal): MTLFence/MTLEvent may be required to synchronize work if + * NOTE(Metal): MTLFence/MTLEvent may be required to synchronize work if * untracked resources are ever used. */ if ([ctx->device hasUnifiedMemory]) { return; @@ -600,7 +602,7 @@ void MTLStateManager::texture_unbind(Texture *tex_) ctx->texture_unbind(mtl_tex); } -void MTLStateManager::texture_unbind_all(void) +void MTLStateManager::texture_unbind_all() { MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(ctx); @@ -623,7 +625,7 @@ void MTLStateManager::image_unbind(Texture *tex_) this->texture_unbind(tex_); } -void MTLStateManager::image_unbind_all(void) +void MTLStateManager::image_unbind_all() { this->texture_unbind_all(); } diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh index 0f908995a93..82a7a20a310 100644 --- a/source/blender/gpu/metal/mtl_texture.hh +++ b/source/blender/gpu/metal/mtl_texture.hh @@ -93,7 +93,7 @@ struct TextureReadRoutineSpecialisation { * 0 = Not a Depth format, * 1 = FLOAT DEPTH, * 2 = 24Bit Integer Depth, - * 4 = 32bit uinteger Depth. */ + * 4 = 32bit Unsigned-Integer Depth. */ int depth_format_mode; bool operator==(const TextureReadRoutineSpecialisation &other) const @@ -237,7 +237,7 @@ class MTLTexture : public Texture { void update_sub( int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data) override; - void generate_mipmap(void) override; + void generate_mipmap() override; void copy_to(Texture *dst) override; void clear(eGPUDataFormat format, const void *data) override; void swizzle_set(const char swizzle_mask[4]) override; @@ -248,7 +248,7 @@ class MTLTexture : public Texture { void *read(int mip, eGPUDataFormat type) override; /* Remove once no longer required -- will just return 0 for now in MTL path*/ - uint gl_bindcode_get(void) const override; + uint gl_bindcode_get() const override; bool texture_is_baked(); const char *get_name() @@ -257,7 +257,7 @@ class MTLTexture : public Texture { } protected: - bool init_internal(void) override; + bool init_internal() override; bool init_internal(GPUVertBuf *vbo) override; bool init_internal(const GPUTexture *src, int mip_offset, diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm index ff2c2fce235..0cb38a3a2b7 100644 --- a/source/blender/gpu/metal/mtl_texture.mm +++ b/source/blender/gpu/metal/mtl_texture.mm @@ -478,23 +478,6 @@ void gpu::MTLTexture::update_sub( MTLPixelFormat destination_format = gpu_texture_format_to_metal(format_); int expected_dst_bytes_per_pixel = get_mtl_format_bytesize(destination_format); int destination_num_channels = get_mtl_format_num_components(destination_format); - int destination_totalsize = 0; - switch (this->dimensions_count()) { - case 1: - destination_totalsize = expected_dst_bytes_per_pixel * max_ii(expected_update_w, 1); - break; - case 2: - destination_totalsize = expected_dst_bytes_per_pixel * max_ii(expected_update_w, 1) * - max_ii(extent[1], 1); - break; - case 3: - destination_totalsize = expected_dst_bytes_per_pixel * max_ii(expected_update_w, 1) * - max_ii(extent[1], 1) * max_ii(extent[2], 1); - break; - default: - BLI_assert(false); - break; - } /* Prepare specialisation struct (For texture update routine). */ TextureUpdateRoutineSpecialisation compute_specialisation_kernel = { @@ -568,12 +551,12 @@ void gpu::MTLTexture::update_sub( /* Prepare staging buffer for data. */ id<MTLBuffer> staging_buffer = nil; - unsigned long long staging_buffer_offset = 0; + uint64_t staging_buffer_offset = 0; /* Fetch allocation from scratch buffer. */ - MTLTemporaryBufferRange allocation; /* TODO(Metal): Metal Memory manager. */ - /* = ctx->get_memory_manager().scratch_buffer_allocate_range_aligned(totalsize, 256);*/ - memcpy(allocation.host_ptr, data, totalsize); + MTLTemporaryBuffer allocation = + ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(totalsize, 256); + memcpy(allocation.data, data, totalsize); staging_buffer = allocation.metal_buffer; staging_buffer_offset = allocation.buffer_offset; @@ -915,7 +898,7 @@ void gpu::MTLTexture::ensure_mipmaps(int miplvl) this->mip_range_set(0, mipmaps_); } -void gpu::MTLTexture::generate_mipmap(void) +void gpu::MTLTexture::generate_mipmap() { /* Fetch Active Context. */ MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); @@ -1230,7 +1213,7 @@ void gpu::MTLTexture::read_internal(int mip, destination_buffer = [ctx->device newBufferWithLength:max_ii(total_bytes, 256) options:bufferOptions]; destination_offset = 0; - destination_buffer_host_ptr = (void *)((unsigned char *)([destination_buffer contents]) + + destination_buffer_host_ptr = (void *)((uint8_t *)([destination_buffer contents]) + destination_offset); /* Prepare specialisation struct (For non-trivial texture read routine). */ @@ -1444,12 +1427,12 @@ void gpu::MTLTexture::read_internal(int mip, } /* Remove once no longer required -- will just return 0 for now in MTL path. */ -uint gpu::MTLTexture::gl_bindcode_get(void) const +uint gpu::MTLTexture::gl_bindcode_get() const { return 0; } -bool gpu::MTLTexture::init_internal(void) +bool gpu::MTLTexture::init_internal() { if (format_ == GPU_DEPTH24_STENCIL8) { /* Apple Silicon requires GPU_DEPTH32F_STENCIL8 instead of GPU_DEPTH24_STENCIL8. */ diff --git a/source/blender/gpu/metal/mtl_uniform_buffer.hh b/source/blender/gpu/metal/mtl_uniform_buffer.hh new file mode 100644 index 00000000000..789a85f0a92 --- /dev/null +++ b/source/blender/gpu/metal/mtl_uniform_buffer.hh @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "MEM_guardedalloc.h" +#include "gpu_uniform_buffer_private.hh" + +#include "mtl_context.hh" + +namespace blender::gpu { + +/** + * Implementation of Uniform Buffers using Metal. + **/ +class MTLUniformBuf : public UniformBuf { + private: + /* Allocation Handle. */ + gpu::MTLBuffer *metal_buffer_ = nullptr; + + /* Whether buffer has contents, if false, no GPU buffer will + * have yet been allocated. */ + bool has_data_ = false; + + /* Bind-state tracking. */ + int bind_slot_ = -1; + MTLContext *bound_ctx_ = nullptr; + + public: + MTLUniformBuf(size_t size, const char *name); + ~MTLUniformBuf(); + + void update(const void *data) override; + void bind(int slot) override; + void unbind() override; + + id<MTLBuffer> get_metal_buffer(int *r_offset); + int get_size(); + const char *get_name() + { + return name_; + } + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLUniformBuf"); +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_uniform_buffer.mm b/source/blender/gpu/metal/mtl_uniform_buffer.mm new file mode 100644 index 00000000000..4893014dedf --- /dev/null +++ b/source/blender/gpu/metal/mtl_uniform_buffer.mm @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "BKE_global.h" + +#include "BLI_string.h" + +#include "gpu_backend.hh" +#include "gpu_context_private.hh" + +#include "mtl_backend.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_uniform_buffer.hh" + +namespace blender::gpu { + +MTLUniformBuf::MTLUniformBuf(size_t size, const char *name) : UniformBuf(size, name) +{ +} + +MTLUniformBuf::~MTLUniformBuf() +{ + if (metal_buffer_ != nullptr) { + metal_buffer_->free(); + metal_buffer_ = nullptr; + } + has_data_ = false; + + /* Ensure UBO is not bound to active CTX. + * UBO bindings are reset upon Context-switch so we do not need + * to check deactivated context's. */ + MTLContext *ctx = MTLContext::get(); + if (ctx) { + for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + MTLUniformBufferBinding &slot = ctx->pipeline_state.ubo_bindings[i]; + if (slot.bound && slot.ubo == this) { + slot.bound = false; + slot.ubo = nullptr; + } + } + } +} + +void MTLUniformBuf::update(const void *data) +{ + BLI_assert(this); + BLI_assert(size_in_bytes_ > 0); + + /* Free existing allocation. + * The previous UBO resource will be tracked by the memory manager, + * in case dependent GPU work is still executing. */ + if (metal_buffer_ != nullptr) { + metal_buffer_->free(); + metal_buffer_ = nullptr; + } + + /* Allocate MTL buffer */ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + BLI_assert(ctx->device); + UNUSED_VARS_NDEBUG(ctx); + + if (data != nullptr) { + metal_buffer_ = MTLContext::get_global_memory_manager().allocate_with_data( + size_in_bytes_, true, data); + has_data_ = true; + + metal_buffer_->set_label(@"Uniform Buffer"); + BLI_assert(metal_buffer_ != nullptr); + BLI_assert(metal_buffer_->get_metal_buffer() != nil); + } + else { + /* If data is not yet present, no buffer will be allocated and MTLContext will use an empty + * null buffer, containing zeroes, if the UBO is bound. */ + metal_buffer_ = nullptr; + has_data_ = false; + } +} + +void MTLUniformBuf::bind(int slot) +{ + if (slot < 0) { + MTL_LOG_WARNING("Failed to bind UBO %p. uniform location %d invalid.\n", this, slot); + return; + } + + BLI_assert(slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + + /* Bind current UBO to active context. */ + MTLContext *ctx = MTLContext::get(); + BLI_assert(ctx); + + MTLUniformBufferBinding &ctx_ubo_bind_slot = ctx->pipeline_state.ubo_bindings[slot]; + ctx_ubo_bind_slot.ubo = this; + ctx_ubo_bind_slot.bound = true; + + bind_slot_ = slot; + bound_ctx_ = ctx; + + /* Check if we have any deferred data to upload. */ + if (data_ != nullptr) { + this->update(data_); + MEM_SAFE_FREE(data_); + } + + /* Ensure there is at least an empty dummy buffer. */ + if (metal_buffer_ == nullptr) { + this->update(nullptr); + } +} + +void MTLUniformBuf::unbind() +{ + /* Unbind in debug mode to validate missing binds. + * Otherwise, only perform a full unbind upon destruction + * to ensure no lingering references. */ +#ifndef NDEBUG + if (true) { +#else + if (G.debug & G_DEBUG_GPU) { +#endif + if (bound_ctx_ != nullptr && bind_slot_ > -1) { + MTLUniformBufferBinding &ctx_ubo_bind_slot = + bound_ctx_->pipeline_state.ubo_bindings[bind_slot_]; + if (ctx_ubo_bind_slot.bound && ctx_ubo_bind_slot.ubo == this) { + ctx_ubo_bind_slot.bound = false; + ctx_ubo_bind_slot.ubo = nullptr; + } + } + } + + /* Reset bind index. */ + bind_slot_ = -1; + bound_ctx_ = nullptr; +} + +id<MTLBuffer> MTLUniformBuf::get_metal_buffer(int *r_offset) +{ + BLI_assert(this); + *r_offset = 0; + if (metal_buffer_ != nullptr && has_data_) { + *r_offset = 0; + metal_buffer_->debug_ensure_used(); + return metal_buffer_->get_metal_buffer(); + } + else { + *r_offset = 0; + return nil; + } +} + +int MTLUniformBuf::get_size() +{ + BLI_assert(this); + return size_in_bytes_; +} + +} // blender::gpu diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh index 29249111294..e425b87afe8 100644 --- a/source/blender/gpu/opengl/gl_backend.hh +++ b/source/blender/gpu/opengl/gl_backend.hh @@ -42,11 +42,15 @@ class GLBackend : public GPUBackend { } ~GLBackend() { - GLTexture::samplers_free(); - GLBackend::platform_exit(); } + void delete_resources() override + { + /* Delete any resources with context active. */ + GLTexture::samplers_free(); + } + static GLBackend *get() { return static_cast<GLBackend *>(GPUBackend::get()); diff --git a/source/blender/gpu/opengl/gl_batch.hh b/source/blender/gpu/opengl/gl_batch.hh index a25e495b3b1..1a18572c683 100644 --- a/source/blender/gpu/opengl/gl_batch.hh +++ b/source/blender/gpu/opengl/gl_batch.hh @@ -35,9 +35,9 @@ class GLShaderInterface; class GLVaoCache { private: /** Context for which the vao_cache_ was generated. */ - GLContext *context_ = NULL; + GLContext *context_ = nullptr; /** Last interface this batch was drawn with. */ - GLShaderInterface *interface_ = NULL; + GLShaderInterface *interface_ = nullptr; /** Cached VAO for the last interface. */ GLuint vao_id_ = 0; /** Used when arb_base_instance is not supported. */ diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc index f82138e0d65..79b28642a67 100644 --- a/source/blender/gpu/opengl/gl_debug.cc +++ b/source/blender/gpu/opengl/gl_debug.cc @@ -189,7 +189,7 @@ void check_gl_error(const char *info) case err: { \ char msg[256]; \ SNPRINTF(msg, "%s : %s", #err, info); \ - debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL); \ + debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, nullptr); \ break; \ } diff --git a/source/blender/gpu/opengl/gl_framebuffer.hh b/source/blender/gpu/opengl/gl_framebuffer.hh index 224c7a92c0a..2dc0936d0fe 100644 --- a/source/blender/gpu/opengl/gl_framebuffer.hh +++ b/source/blender/gpu/opengl/gl_framebuffer.hh @@ -30,9 +30,9 @@ class GLFrameBuffer : public FrameBuffer { /** OpenGL handle. */ GLuint fbo_id_ = 0; /** Context the handle is from. Frame-buffers are not shared across contexts. */ - GLContext *context_ = NULL; + GLContext *context_ = nullptr; /** State Manager of the same contexts. */ - GLStateManager *state_manager_ = NULL; + GLStateManager *state_manager_ = nullptr; /** Copy of the GL state. Contains ONLY color attachments enums for slot binding. */ GLenum gl_attachments_[GPU_FB_MAX_COLOR_ATTACHMENT]; /** Internal frame-buffers are immutable. */ diff --git a/source/blender/gpu/opengl/gl_texture.cc b/source/blender/gpu/opengl/gl_texture.cc index 055c8d104e2..cfb3184c4a5 100644 --- a/source/blender/gpu/opengl/gl_texture.cc +++ b/source/blender/gpu/opengl/gl_texture.cc @@ -310,6 +310,12 @@ void GLTexture::update_sub( */ void GLTexture::generate_mipmap() { + /* Allow users to provide mipmaps stored in compressed textures. + * Skip generating mipmaps to avoid overriding the existing ones. */ + if (format_flag_ & GPU_FORMAT_COMPRESSED) { + return; + } + /* Some drivers have bugs when using #glGenerateMipmap with depth textures (see T56789). * In this case we just create a complete texture with mipmaps manually without * down-sampling. You must initialize the texture levels using other methods like diff --git a/source/blender/gpu/opengl/gl_texture.hh b/source/blender/gpu/opengl/gl_texture.hh index e5b879f1f15..aeb9fc0e6b7 100644 --- a/source/blender/gpu/opengl/gl_texture.hh +++ b/source/blender/gpu/opengl/gl_texture.hh @@ -33,7 +33,7 @@ class GLTexture : public Texture { /** opengl identifier for texture. */ GLuint tex_id_ = 0; /** Legacy workaround for texture copy. Created when using framebuffer_get(). */ - struct GPUFrameBuffer *framebuffer_ = NULL; + struct GPUFrameBuffer *framebuffer_ = nullptr; /** True if this texture is bound to at least one texture unit. */ /* TODO(fclem): How do we ensure thread safety here? */ bool is_bound_ = false; diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc index 4e93e062b50..224a9afcf59 100644 --- a/source/blender/gpu/tests/gpu_testing.cc +++ b/source/blender/gpu/tests/gpu_testing.cc @@ -26,7 +26,6 @@ void GPUTest::SetUp() void GPUTest::TearDown() { GPU_exit(); - GPU_backend_exit(); GPU_context_discard(context); GHOST_DisposeOpenGLContext(ghost_system, ghost_context); GHOST_DisposeSystem(ghost_system); |