diff options
Diffstat (limited to 'source/blender/gpu')
252 files changed, 20513 insertions, 2546 deletions
diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index faf68cf2197..47d4feb7ec9 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -5,7 +5,7 @@ # to more easily highlight code-paths in other libraries that need to be refactored, # bf_gpu is allowed to have opengl regardless of this option. -if(NOT WITH_OPENGL AND NOT WITH_METAL_BACKEND) +if(NOT WITH_OPENGL AND NOT WITH_METAL_BACKEND AND NOT WITH_HEADLESS) add_definitions(-DWITH_OPENGL) endif() @@ -21,24 +21,26 @@ set(INC ../imbuf ../makesdna ../makesrna - ../windowmanager + # For theme color access. ../editors/include - # For node muting stuff... + # For *_info.hh includes. + ../draw/engines/eevee_next + ../draw/intern + + # For node muting stuff. ../nodes - ../nodes/intern ../../../intern/atomic ../../../intern/clog ../../../intern/ghost - ../../../intern/glew-mx ../../../intern/guardedalloc ../../../intern/mantaflow/extern ) set(INC_SYS - ${GLEW_INCLUDE_PATH} + ${Epoxy_INCLUDE_DIRS} ) set(SRC @@ -91,12 +93,10 @@ set(SRC GPU_debug.h GPU_drawlist.h GPU_framebuffer.h - GPU_glew.h GPU_immediate.h GPU_immediate_util.h GPU_index_buffer.h GPU_init_exit.h - GPU_legacy_stubs.h GPU_material.h GPU_matrix.h GPU_platform.h @@ -188,19 +188,40 @@ set(OPENGL_SRC set(METAL_SRC metal/mtl_backend.mm + metal/mtl_command_buffer.mm metal/mtl_context.mm metal/mtl_debug.mm + metal/mtl_framebuffer.mm + metal/mtl_index_buffer.mm + metal/mtl_memory.mm + metal/mtl_query.mm + metal/mtl_shader.mm + metal/mtl_shader_generator.mm + metal/mtl_shader_interface.mm metal/mtl_state.mm metal/mtl_texture.mm metal/mtl_texture_util.mm + metal/mtl_uniform_buffer.mm metal/mtl_backend.hh metal/mtl_capabilities.hh metal/mtl_common.hh metal/mtl_context.hh metal/mtl_debug.hh + metal/mtl_framebuffer.hh + metal/mtl_index_buffer.hh + metal/mtl_memory.hh + metal/mtl_primitive.hh + metal/mtl_pso_descriptor_state.hh + metal/mtl_query.hh + metal/mtl_shader.hh + metal/mtl_shader_generator.hh + metal/mtl_shader_interface.hh + metal/mtl_shader_interface_type.hh + metal/mtl_shader_shared.h metal/mtl_state.hh metal/mtl_texture.hh + metal/mtl_uniform_buffer.hh ) # Select Backend source based on availability @@ -213,16 +234,12 @@ if(WITH_METAL_BACKEND) endif() set(LIB - ${BLENDER_GL_LIBRARIES} + ${Epoxy_LIBRARIES} ) -if(NOT WITH_SYSTEM_GLEW) - list(APPEND LIB - ${BLENDER_GLEW_LIBRARIES} - ) -endif() - set(MSL_SRC + shaders/metal/mtl_shader_defines.msl + shaders/metal/mtl_shader_common.msl metal/kernels/compute_texture_update.msl metal/kernels/compute_texture_read.msl @@ -255,11 +272,7 @@ set(GLSL_SRC shaders/gpu_shader_2D_widget_shadow_frag.glsl shaders/gpu_shader_2D_nodelink_frag.glsl shaders/gpu_shader_2D_nodelink_vert.glsl - shaders/gpu_shader_2D_flat_color_vert.glsl - shaders/gpu_shader_2D_line_dashed_uniform_color_vert.glsl shaders/gpu_shader_2D_line_dashed_frag.glsl - shaders/gpu_shader_2D_smooth_color_vert.glsl - shaders/gpu_shader_2D_smooth_color_frag.glsl shaders/gpu_shader_2D_image_vert.glsl shaders/gpu_shader_2D_image_rect_vert.glsl shaders/gpu_shader_2D_image_multi_rect_vert.glsl @@ -267,7 +280,6 @@ set(GLSL_SRC shaders/gpu_shader_image_desaturate_frag.glsl shaders/gpu_shader_image_overlays_merge_frag.glsl shaders/gpu_shader_image_overlays_stereo_merge_frag.glsl - shaders/gpu_shader_image_modulate_alpha_frag.glsl shaders/gpu_shader_image_shuffle_color_frag.glsl shaders/gpu_shader_image_color_frag.glsl shaders/gpu_shader_image_varying_color_frag.glsl @@ -312,6 +324,55 @@ set(GLSL_SRC shaders/common/gpu_shader_common_math_utils.glsl shaders/common/gpu_shader_common_mix_rgb.glsl + shaders/compositor/compositor_alpha_crop.glsl + shaders/compositor/compositor_bilateral_blur.glsl + shaders/compositor/compositor_bokeh_image.glsl + shaders/compositor/compositor_box_mask.glsl + shaders/compositor/compositor_convert.glsl + shaders/compositor/compositor_despeckle.glsl + shaders/compositor/compositor_directional_blur.glsl + shaders/compositor/compositor_edge_filter.glsl + shaders/compositor/compositor_ellipse_mask.glsl + shaders/compositor/compositor_filter.glsl + shaders/compositor/compositor_flip.glsl + shaders/compositor/compositor_image_crop.glsl + shaders/compositor/compositor_morphological_distance.glsl + shaders/compositor/compositor_morphological_distance_feather.glsl + shaders/compositor/compositor_morphological_distance_threshold.glsl + shaders/compositor/compositor_morphological_step.glsl + shaders/compositor/compositor_projector_lens_distortion.glsl + shaders/compositor/compositor_realize_on_domain.glsl + shaders/compositor/compositor_screen_lens_distortion.glsl + shaders/compositor/compositor_set_alpha.glsl + shaders/compositor/compositor_split_viewer.glsl + + shaders/compositor/library/gpu_shader_compositor_alpha_over.glsl + shaders/compositor/library/gpu_shader_compositor_bright_contrast.glsl + shaders/compositor/library/gpu_shader_compositor_channel_matte.glsl + shaders/compositor/library/gpu_shader_compositor_chroma_matte.glsl + shaders/compositor/library/gpu_shader_compositor_color_balance.glsl + shaders/compositor/library/gpu_shader_compositor_color_correction.glsl + shaders/compositor/library/gpu_shader_compositor_color_matte.glsl + shaders/compositor/library/gpu_shader_compositor_color_spill.glsl + shaders/compositor/library/gpu_shader_compositor_color_to_luminance.glsl + shaders/compositor/library/gpu_shader_compositor_difference_matte.glsl + shaders/compositor/library/gpu_shader_compositor_distance_matte.glsl + shaders/compositor/library/gpu_shader_compositor_exposure.glsl + shaders/compositor/library/gpu_shader_compositor_gamma.glsl + shaders/compositor/library/gpu_shader_compositor_hue_correct.glsl + shaders/compositor/library/gpu_shader_compositor_hue_saturation_value.glsl + shaders/compositor/library/gpu_shader_compositor_invert.glsl + shaders/compositor/library/gpu_shader_compositor_luminance_matte.glsl + shaders/compositor/library/gpu_shader_compositor_main.glsl + shaders/compositor/library/gpu_shader_compositor_map_value.glsl + shaders/compositor/library/gpu_shader_compositor_normal.glsl + shaders/compositor/library/gpu_shader_compositor_posterize.glsl + shaders/compositor/library/gpu_shader_compositor_separate_combine.glsl + shaders/compositor/library/gpu_shader_compositor_set_alpha.glsl + shaders/compositor/library/gpu_shader_compositor_store_output.glsl + shaders/compositor/library/gpu_shader_compositor_texture_utilities.glsl + shaders/compositor/library/gpu_shader_compositor_type_conversion.glsl + shaders/material/gpu_shader_material_add_shader.glsl shaders/material/gpu_shader_material_ambient_occlusion.glsl shaders/material/gpu_shader_material_anisotropic.glsl @@ -348,6 +409,7 @@ set(GLSL_SRC shaders/material/gpu_shader_material_light_path.glsl shaders/material/gpu_shader_material_mapping.glsl shaders/material/gpu_shader_material_map_range.glsl + shaders/material/gpu_shader_material_mix_color.glsl shaders/material/gpu_shader_material_mix_shader.glsl shaders/material/gpu_shader_material_noise.glsl shaders/material/gpu_shader_material_normal.glsl @@ -409,21 +471,44 @@ set(GLSL_SRC GPU_shader_shared_utils.h ) -set(GLSL_C) -foreach(GLSL_FILE ${GLSL_SRC}) - data_to_c_simple(${GLSL_FILE} GLSL_C) -endforeach() +set(MTL_BACKEND_GLSL_SRC + metal/kernels/compute_texture_update.msl + metal/kernels/compute_texture_read.msl + metal/kernels/depth_2d_update_float_frag.glsl + metal/kernels/depth_2d_update_int24_frag.glsl + metal/kernels/depth_2d_update_int32_frag.glsl + metal/kernels/depth_2d_update_vert.glsl + metal/kernels/gpu_shader_fullscreen_blit_vert.glsl + metal/kernels/gpu_shader_fullscreen_blit_frag.glsl +) +set(MSL_SRC + shaders/metal/mtl_shader_defines.msl + shaders/metal/mtl_shader_common.msl + metal/mtl_shader_shared.h +) if(WITH_METAL_BACKEND) + list(APPEND GLSL_SRC ${MTL_BACKEND_GLSL_SRC}) + set(MSL_C) foreach(MSL_FILE ${MSL_SRC}) data_to_c_simple(${MSL_FILE} MSL_C) endforeach() - list(APPEND GLSL_C ${MSL_C}) endif() -blender_add_lib(bf_gpu_shaders "${GLSL_C}" "" "" "") +set(GLSL_C) +foreach(GLSL_FILE ${GLSL_SRC}) + data_to_c_simple(${GLSL_FILE} GLSL_C) +endforeach() + +set(SHADER_C) +list(APPEND SHADER_C ${GLSL_C}) +if(WITH_METAL_BACKEND) + list(APPEND SHADER_C ${MSL_C}) +endif() + +blender_add_lib(bf_gpu_shaders "${SHADER_C}" "" "" "") list(APPEND LIB bf_gpu_shaders @@ -443,7 +528,12 @@ list(APPEND INC ${CMAKE_CURRENT_BINARY_DIR}) set(SRC_SHADER_CREATE_INFOS ../draw/engines/basic/shaders/infos/basic_depth_info.hh + ../draw/engines/eevee_next/shaders/infos/eevee_depth_of_field_info.hh + ../draw/engines/eevee_next/shaders/infos/eevee_film_info.hh + ../draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh + ../draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh + ../draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh ../draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh ../draw/engines/gpencil/shaders/infos/gpencil_info.hh ../draw/engines/gpencil/shaders/infos/gpencil_vfx_info.hh @@ -456,6 +546,7 @@ set(SRC_SHADER_CREATE_INFOS ../draw/engines/overlay/shaders/infos/overlay_grid_info.hh ../draw/engines/overlay/shaders/infos/overlay_outline_info.hh ../draw/engines/overlay/shaders/infos/overlay_paint_info.hh + ../draw/engines/overlay/shaders/infos/overlay_sculpt_curves_info.hh ../draw/engines/overlay/shaders/infos/overlay_sculpt_info.hh ../draw/engines/overlay/shaders/infos/overlay_volume_info.hh ../draw/engines/overlay/shaders/infos/overlay_wireframe_info.hh @@ -471,6 +562,7 @@ set(SRC_SHADER_CREATE_INFOS ../draw/engines/workbench/shaders/infos/workbench_transparent_resolve_info.hh ../draw/engines/workbench/shaders/infos/workbench_volume_info.hh ../draw/engines/image/shaders/infos/engine_image_info.hh + ../draw/intern/shaders/draw_debug_info.hh ../draw/intern/shaders/draw_fullscreen_info.hh ../draw/intern/shaders/draw_hair_refine_info.hh ../draw/intern/shaders/draw_object_infos_info.hh @@ -480,8 +572,6 @@ set(SRC_SHADER_CREATE_INFOS shaders/infos/gpu_shader_2D_area_borders_info.hh shaders/infos/gpu_shader_2D_checker_info.hh shaders/infos/gpu_shader_2D_diag_stripes_info.hh - shaders/infos/gpu_shader_2D_flat_color_info.hh - shaders/infos/gpu_shader_2D_image_color_info.hh shaders/infos/gpu_shader_2D_image_desaturate_color_info.hh shaders/infos/gpu_shader_2D_image_info.hh shaders/infos/gpu_shader_2D_image_multi_rect_color_info.hh @@ -493,13 +583,10 @@ set(SRC_SHADER_CREATE_INFOS shaders/infos/gpu_shader_2D_point_uniform_size_uniform_color_aa_info.hh shaders/infos/gpu_shader_2D_point_uniform_size_uniform_color_outline_aa_info.hh shaders/infos/gpu_shader_2D_point_varying_size_varying_color_info.hh - shaders/infos/gpu_shader_2D_smooth_color_info.hh - shaders/infos/gpu_shader_2D_uniform_color_info.hh shaders/infos/gpu_shader_2D_widget_info.hh shaders/infos/gpu_shader_3D_depth_only_info.hh shaders/infos/gpu_shader_3D_flat_color_info.hh shaders/infos/gpu_shader_3D_image_info.hh - shaders/infos/gpu_shader_3D_image_modulate_alpha_info.hh shaders/infos/gpu_shader_3D_point_info.hh shaders/infos/gpu_shader_3D_polyline_info.hh shaders/infos/gpu_shader_3D_smooth_color_info.hh @@ -511,8 +598,40 @@ set(SRC_SHADER_CREATE_INFOS shaders/infos/gpu_shader_simple_lighting_info.hh shaders/infos/gpu_shader_text_info.hh shaders/infos/gpu_srgb_to_framebuffer_space_info.hh + + shaders/compositor/infos/compositor_alpha_crop_info.hh + shaders/compositor/infos/compositor_bilateral_blur_info.hh + shaders/compositor/infos/compositor_bokeh_image_info.hh + shaders/compositor/infos/compositor_box_mask_info.hh + shaders/compositor/infos/compositor_convert_info.hh + shaders/compositor/infos/compositor_despeckle_info.hh + shaders/compositor/infos/compositor_directional_blur_info.hh + shaders/compositor/infos/compositor_edge_filter_info.hh + shaders/compositor/infos/compositor_ellipse_mask_info.hh + shaders/compositor/infos/compositor_filter_info.hh + shaders/compositor/infos/compositor_flip_info.hh + shaders/compositor/infos/compositor_image_crop_info.hh + shaders/compositor/infos/compositor_morphological_distance_feather_info.hh + shaders/compositor/infos/compositor_morphological_distance_info.hh + shaders/compositor/infos/compositor_morphological_distance_threshold_info.hh + shaders/compositor/infos/compositor_morphological_step_info.hh + shaders/compositor/infos/compositor_projector_lens_distortion_info.hh + shaders/compositor/infos/compositor_realize_on_domain_info.hh + shaders/compositor/infos/compositor_screen_lens_distortion_info.hh + shaders/compositor/infos/compositor_set_alpha_info.hh + shaders/compositor/infos/compositor_split_viewer_info.hh ) +set(SRC_SHADER_CREATE_INFOS_MTL + metal/kernels/depth_2d_update_info.hh + metal/kernels/gpu_shader_fullscreen_blit_info.hh +) + +if(WITH_METAL_BACKEND) + list(APPEND SRC_SHADER_CREATE_INFOS ${SRC_SHADER_CREATE_INFOS_MTL}) +endif() + + set(SHADER_CREATE_INFOS_CONTENT "") foreach(DESCRIPTOR_FILE ${SRC_SHADER_CREATE_INFOS}) string(APPEND SHADER_CREATE_INFOS_CONTENT "#include \"${DESCRIPTOR_FILE}\"\n") @@ -525,8 +644,6 @@ if(WITH_MOD_FLUID) add_definitions(-DWITH_FLUID) endif() -add_definitions(${GL_DEFINITIONS}) - if(WITH_IMAGE_DDS) add_definitions(-DWITH_DDS) endif() @@ -552,20 +669,17 @@ endif() -if(WITH_GPU_SHADER_BUILDER) +if(WITH_GPU_BUILDTIME_SHADER_BUILDER) # TODO(@fclem) Fix this mess. if(APPLE) add_executable(shader_builder intern/gpu_shader_builder.cc + intern/gpu_shader_builder_stubs.cc ${shader_create_info_list_file} ) setup_platform_linker_flags(shader_builder) - - target_link_libraries(shader_builder PUBLIC - bf_blenkernel - buildinfoobj - ) + target_link_libraries(shader_builder PUBLIC buildinfoobj) else() if(WIN32) # We can re-use the manifest from tests.exe here since it's @@ -580,12 +694,14 @@ if(WITH_GPU_SHADER_BUILDER) ${MANIFEST} ) - target_link_libraries(shader_builder PUBLIC - bf_blenkernel - ${PLATFORM_LINKLIBS} - ) endif() - + target_link_libraries(shader_builder PUBLIC + bf_gpu + bf_intern_clog + bf_blenlib + bf_intern_ghost + ${PLATFORM_LINKLIBS} + ) target_include_directories(shader_builder PRIVATE ${INC} ${CMAKE_CURRENT_BINARY_DIR}) set(SRC_BAKED_CREATE_INFOS_FILE ${CMAKE_CURRENT_BINARY_DIR}/shader_baked.hh) diff --git a/source/blender/gpu/GPU_batch.h b/source/blender/gpu/GPU_batch.h index 7fad8dd23be..4935ced7f48 100644 --- a/source/blender/gpu/GPU_batch.h +++ b/source/blender/gpu/GPU_batch.h @@ -14,6 +14,7 @@ #include "GPU_index_buffer.h" #include "GPU_shader.h" +#include "GPU_storage_buffer.h" #include "GPU_uniform_buffer.h" #include "GPU_vertex_buffer.h" @@ -69,6 +70,8 @@ typedef struct GPUBatch { GPUVertBuf *inst[GPU_BATCH_INST_VBO_MAX_LEN]; /** NULL if element list not needed */ GPUIndexBuf *elem; + /** Resource ID attribute workaround. */ + GPUStorageBuf *resource_id_buf; /** Bookkeeping. */ eGPUBatchFlag flag; /** Type of geometry to draw. */ @@ -92,8 +95,10 @@ void GPU_batch_init_ex(GPUBatch *batch, */ void GPU_batch_copy(GPUBatch *batch_dst, GPUBatch *batch_src); -#define GPU_batch_create(prim, verts, elem) GPU_batch_create_ex(prim, verts, elem, 0) -#define GPU_batch_init(batch, prim, verts, elem) GPU_batch_init_ex(batch, prim, verts, elem, 0) +#define GPU_batch_create(prim, verts, elem) \ + GPU_batch_create_ex(prim, verts, elem, (eGPUBatchFlag)0) +#define GPU_batch_init(batch, prim, verts, elem) \ + GPU_batch_init_ex(batch, prim, verts, elem, (eGPUBatchFlag)0) /** * Same as discard but does not free. (does not call free callback). @@ -123,6 +128,11 @@ bool GPU_batch_vertbuf_has(GPUBatch *, GPUVertBuf *); #define GPU_batch_vertbuf_add(batch, verts) GPU_batch_vertbuf_add_ex(batch, verts, false) +/** + * Set resource id buffer to bind as instance attribute to workaround the lack of gl_BaseInstance. + */ +void GPU_batch_resource_id_buf_set(GPUBatch *batch, GPUStorageBuf *resource_id_buf); + void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader); /** * Bind program bound to IMM to the batch. @@ -161,6 +171,13 @@ void GPU_batch_program_set_builtin_with_config(GPUBatch *batch, #define GPU_batch_texture_bind(batch, name, tex) \ GPU_texture_bind(tex, GPU_shader_get_texture_binding((batch)->shader, name)); +/** + * Return indirect draw call parameters for this batch. + * NOTE: r_base_index is set to -1 if not using an index buffer. + */ +void GPU_batch_draw_parameter_get( + GPUBatch *batch, int *r_v_count, int *r_v_first, int *r_base_index, int *r_i_count); + void GPU_batch_draw(GPUBatch *batch); void GPU_batch_draw_range(GPUBatch *batch, int v_first, int v_count); /** @@ -171,7 +188,15 @@ void GPU_batch_draw_instanced(GPUBatch *batch, int i_count); /** * This does not bind/unbind shader and does not call GPU_matrix_bind(). */ -void GPU_batch_draw_advanced(GPUBatch *, int v_first, int v_count, int i_first, int i_count); +void GPU_batch_draw_advanced(GPUBatch *batch, int v_first, int v_count, int i_first, int i_count); + +/** + * Issue a draw call using GPU computed arguments. The argument are expected to be valid for the + * type of geometry drawn (index or non-indexed). + */ +void GPU_batch_draw_indirect(GPUBatch *batch, GPUStorageBuf *indirect_buf, intptr_t offset); +void GPU_batch_multi_draw_indirect( + GPUBatch *batch, GPUStorageBuf *indirect_buf, int count, intptr_t offset, intptr_t stride); #if 0 /* future plans */ diff --git a/source/blender/gpu/GPU_buffers.h b/source/blender/gpu/GPU_buffers.h index 1fe3b363687..d1d91cb7508 100644 --- a/source/blender/gpu/GPU_buffers.h +++ b/source/blender/gpu/GPU_buffers.h @@ -22,6 +22,7 @@ struct CCGKey; struct DMFlagMat; struct GSet; struct TableGSet; +struct Mesh; struct MLoop; struct MLoopCol; struct MLoopTri; @@ -46,19 +47,18 @@ typedef struct GPU_PBVH_Buffers GPU_PBVH_Buffers; * * Threaded: do not call any functions that use OpenGL calls! */ -GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(const struct MPoly *mpoly, - const struct MLoop *mloop, +GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(const struct Mesh *mesh, const struct MLoopTri *looptri, - const struct MVert *mvert, - const int *face_indices, const int *sculpt_face_sets, - int face_indices_len, - const struct Mesh *mesh); + const int *face_indices, + int face_indices_len); /** * Threaded: do not call any functions that use OpenGL calls! */ -GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(int totgrid, unsigned int **grid_hidden); +GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(int totgrid, + unsigned int **grid_hidden, + bool smooth); /** * Threaded: do not call any functions that use OpenGL calls! @@ -89,9 +89,8 @@ enum { */ void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, GPU_PBVH_Buffers *buffers, + const struct Mesh *mesh, const struct MVert *mvert, - const CustomData *vdata, - const CustomData *ldata, const float *vmask, const int *sculpt_face_sets, const int face_sets_color_seed, diff --git a/source/blender/gpu/GPU_capabilities.h b/source/blender/gpu/GPU_capabilities.h index 7fe467de402..61c60f336e1 100644 --- a/source/blender/gpu/GPU_capabilities.h +++ b/source/blender/gpu/GPU_capabilities.h @@ -16,6 +16,7 @@ extern "C" { #endif int GPU_max_texture_size(void); +int GPU_max_texture_3d_size(void); int GPU_max_texture_layers(void); int GPU_max_textures(void); int GPU_max_textures_vert(void); @@ -31,6 +32,7 @@ int GPU_max_vertex_attribs(void); int GPU_max_varying_floats(void); int GPU_max_shader_storage_buffer_bindings(void); int GPU_max_compute_shader_storage_blocks(void); +int GPU_max_samplers(void); int GPU_extensions_len(void); const char *GPU_extension_get(int i); @@ -47,6 +49,7 @@ bool GPU_crappy_amd_driver(void); bool GPU_compute_shader_support(void); bool GPU_shader_storage_buffer_objects_support(void); bool GPU_shader_image_load_store_support(void); +bool GPU_shader_draw_parameters_support(void); bool GPU_mem_stats_supported(void); void GPU_mem_stats_get(int *totalmem, int *freemem); @@ -56,6 +59,9 @@ void GPU_mem_stats_get(int *totalmem, int *freemem); */ bool GPU_stereo_quadbuffer_support(void); +int GPU_minimum_per_vertex_stride(void); +bool GPU_transform_feedback_support(void); + #ifdef __cplusplus } #endif diff --git a/source/blender/gpu/GPU_common_types.h b/source/blender/gpu/GPU_common_types.h index 8c91d60812f..13535a4fb3b 100644 --- a/source/blender/gpu/GPU_common_types.h +++ b/source/blender/gpu/GPU_common_types.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + /** \file * \ingroup gpu */ @@ -8,6 +10,14 @@ extern "C" { #endif +typedef enum eGPULoadOp { + GPU_LOADACTION_CLEAR = 0, + GPU_LOADACTION_LOAD, + GPU_LOADACTION_DONT_CARE +} eGPULoadOp; + +typedef enum eGPUStoreOp { GPU_STOREACTION_STORE = 0, GPU_STOREACTION_DONT_CARE } eGPUStoreOp; + typedef enum eGPUFrontFace { GPU_CLOCKWISE, GPU_COUNTERCLOCKWISE, diff --git a/source/blender/gpu/GPU_compute.h b/source/blender/gpu/GPU_compute.h index 6dfd6f73ae8..ff94620f186 100644 --- a/source/blender/gpu/GPU_compute.h +++ b/source/blender/gpu/GPU_compute.h @@ -20,7 +20,7 @@ void GPU_compute_dispatch(GPUShader *shader, uint groups_y_len, uint groups_z_len); -void GPU_compute_dispatch_indirect(GPUShader *shader, GPUStorageBuf *indirect_buf); +void GPU_compute_dispatch_indirect(GPUShader *shader, GPUStorageBuf *indirect_buf_); #ifdef __cplusplus } diff --git a/source/blender/gpu/GPU_context.h b/source/blender/gpu/GPU_context.h index f3b7f8c29bf..a242bb7cc94 100644 --- a/source/blender/gpu/GPU_context.h +++ b/source/blender/gpu/GPU_context.h @@ -17,10 +17,10 @@ extern "C" { #endif -void GPU_backend_init(eGPUBackendType backend); -void GPU_backend_exit(void); -bool GPU_backend_supported(eGPUBackendType type); - +/* GPU back-ends abstract the differences between different APIs. #GPU_context_create + * automatically initializes the back-end, and #GPU_context_discard frees it when there + * are no more contexts. */ +bool GPU_backend_supported(void); eGPUBackendType GPU_backend_get_type(void); /** Opaque type hiding blender::gpu::Context. */ @@ -38,6 +38,13 @@ void GPU_context_discard(GPUContext *); void GPU_context_active_set(GPUContext *); GPUContext *GPU_context_active_get(void); +/* Begin and end frame are used to mark the singular boundary representing the lifetime of a whole + * frame. This also acts as a divisor for ensuring workload submission and flushing, especially for + * background rendering when there is no call to present. + * This is required by explicit-API's where there is no implicit workload flushing. */ +void GPU_context_begin_frame(GPUContext *ctx); +void GPU_context_end_frame(GPUContext *ctx); + /* Legacy GPU (Intel HD4000 series) do not support sharing GPU objects between GPU * contexts. EEVEE/Workbench can create different contexts for image/preview rendering, baking or * compiling. When a legacy GPU is detected (`GPU_use_main_context_workaround()`) any worker diff --git a/source/blender/gpu/GPU_framebuffer.h b/source/blender/gpu/GPU_framebuffer.h index 4436f7a5a7b..70ec7c19e7c 100644 --- a/source/blender/gpu/GPU_framebuffer.h +++ b/source/blender/gpu/GPU_framebuffer.h @@ -14,6 +14,7 @@ #pragma once +#include "GPU_common_types.h" #include "GPU_texture.h" typedef enum eGPUFrameBufferBits { @@ -52,6 +53,44 @@ void GPU_framebuffer_bind(GPUFrameBuffer *fb); void GPU_framebuffer_bind_no_srgb(GPUFrameBuffer *fb); void GPU_framebuffer_restore(void); +/* Advanced binding control. */ +typedef struct GPULoadStore { + eGPULoadOp load_action; + eGPUStoreOp store_action; +} GPULoadStore; +#define NULL_LOAD_STORE \ + { \ + GPU_LOADACTION_DONT_CARE, GPU_STOREACTION_DONT_CARE \ + } + +/* Load store config array (load_store_actions) matches attachment structure of + * GPU_framebuffer_config_array. This allows us to explicitly specify whether attachment data needs + * to be loaded and stored on a per-attachment basis. This enables a number of bandwidth + * optimizations: + * - No need to load contents if subsequent work is over-writing every pixel. + * - No need to store attachments whose contents are not used beyond this pass e.g. depth buffer. + * - State can be customized at bind-time rather than applying to the frame-buffer object as a + * whole. + * + * Example: + * \code{.c} + * GPU_framebuffer_bind_loadstore(&fb, { + * {GPU_LOADACTION_LOAD, GPU_STOREACTION_DONT_CARE} // must be depth buffer + * {GPU_LOADACTION_LOAD, GPU_STOREACTION_STORE}, // Color attachment 0 + * {GPU_LOADACTION_DONT_CARE, GPU_STOREACTION_STORE}, // Color attachment 1 + * {GPU_LOADACTION_DONT_CARE, GPU_STOREACTION_STORE} // Color attachment 2 + * }) + * \encode + */ +void GPU_framebuffer_bind_loadstore(GPUFrameBuffer *fb, + const GPULoadStore *load_store_actions, + uint actions_len); +#define GPU_framebuffer_bind_ex(_fb, ...) \ + { \ + GPULoadStore actions[] = __VA_ARGS__; \ + GPU_framebuffer_bind_loadstore(_fb, actions, (sizeof(actions) / sizeof(GPULoadStore))); \ + } + bool GPU_framebuffer_bound(GPUFrameBuffer *fb); bool GPU_framebuffer_check_valid(GPUFrameBuffer *fb, char err_out[256]); diff --git a/source/blender/gpu/GPU_glew.h b/source/blender/gpu/GPU_glew.h deleted file mode 100644 index 38209a0eb17..00000000000 --- a/source/blender/gpu/GPU_glew.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2012 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup gpu - */ - -#pragma once - -#if defined(WITH_OPENGL) -# include "glew-mx.h" -# ifndef WITH_LEGACY_OPENGL -# include "GPU_legacy_stubs.h" -# endif -#endif diff --git a/source/blender/gpu/GPU_index_buffer.h b/source/blender/gpu/GPU_index_buffer.h index bbb431cbc15..e5fefda527d 100644 --- a/source/blender/gpu/GPU_index_buffer.h +++ b/source/blender/gpu/GPU_index_buffer.h @@ -26,14 +26,17 @@ typedef struct GPUIndexBufBuilder { uint index_len; uint index_min; uint index_max; + uint restart_index_value; + bool uses_restart_indices; + GPUPrimType prim_type; uint32_t *data; } GPUIndexBufBuilder; -/* supports all primitive types. */ +/** Supports all primitive types. */ void GPU_indexbuf_init_ex(GPUIndexBufBuilder *, GPUPrimType, uint index_len, uint vertex_len); -/* supports only GPU_PRIM_POINTS, GPU_PRIM_LINES and GPU_PRIM_TRIS. */ +/** Supports only #GPU_PRIM_POINTS, #GPU_PRIM_LINES and #GPU_PRIM_TRIS. */ void GPU_indexbuf_init(GPUIndexBufBuilder *, GPUPrimType, uint prim_len, uint vertex_len); GPUIndexBuf *GPU_indexbuf_build_on_device(uint index_len); diff --git a/source/blender/gpu/GPU_legacy_stubs.h b/source/blender/gpu/GPU_legacy_stubs.h deleted file mode 100644 index 5970738a9b3..00000000000 --- a/source/blender/gpu/GPU_legacy_stubs.h +++ /dev/null @@ -1,497 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2017 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup gpu - * - * This is to mark the transition to OpenGL core profile - * The idea is to allow Blender 2.8 to be built with OpenGL 3.3 even if it means breaking things - * - * This file should be removed in the future - */ - -#pragma once - -#if defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wunused-parameter" -# pragma GCC diagnostic ignored "-Wunused-function" -#endif - -#include <stdlib.h> /* for abort(). */ - -#include "BLI_utildefines.h" - -/** - * Empty function, use for break-point when a deprecated - * OpenGL function is called. - */ -static void gl_deprecated(void) -{ - BLI_assert(true); -} - -#define _GL_BOOL BLI_INLINE GLboolean -#define _GL_BOOL_RET \ - { \ - gl_deprecated(); \ - return false; \ - } - -#define _GL_ENUM BLI_INLINE GLenum -#define _GL_ENUM_RET \ - { \ - gl_deprecated(); \ - return 0; \ - } - -#define _GL_INT BLI_INLINE GLint -#define _GL_INT_RET \ - { \ - gl_deprecated(); \ - return 0; \ - } - -#define _GL_UINT BLI_INLINE GLuint -#define _GL_UINT_RET \ - { \ - gl_deprecated(); \ - return 0; \ - } - -#define _GL_VOID BLI_INLINE void -#define _GL_VOID_RET \ - { \ - gl_deprecated(); \ - } - -static bool disable_enable_check(GLenum cap) -{ - const bool is_deprecated = ELEM(cap, - GL_ALPHA_TEST, - GL_LINE_STIPPLE, - GL_POINT_SPRITE, - GL_TEXTURE_1D, - GL_TEXTURE_2D, - GL_TEXTURE_GEN_S, - GL_TEXTURE_GEN_T, - -1); - - if (is_deprecated) { - gl_deprecated(); - } - - return is_deprecated; -} - -_GL_VOID USE_CAREFULLY_glDisable(GLenum cap) -{ - if (!disable_enable_check(cap)) { - glDisable(cap); - } -} -#define glDisable USE_CAREFULLY_glDisable - -_GL_VOID USE_CAREFULLY_glEnable(GLenum cap) -{ - if (!disable_enable_check(cap)) { - glEnable(cap); - } -} -#define glEnable USE_CAREFULLY_glEnable - -/** - * Hand written cases - */ - -_GL_VOID DO_NOT_USE_glClientActiveTexture(GLenum texture) _GL_VOID_RET - -/** - * List automatically generated from `gl-deprecated.h` and `glew.h` - */ - -/** - * ENUM values - */ -#define DO_NOT_USE_GL_CURRENT_FOG_COORDINATE 0 -#define DO_NOT_USE_GL_FOG_COORDINATE 0 -#define DO_NOT_USE_GL_FOG_COORDINATE_ARRAY 0 -#define DO_NOT_USE_GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING 0 -#define DO_NOT_USE_GL_FOG_COORDINATE_ARRAY_POINTER 0 -#define DO_NOT_USE_GL_FOG_COORDINATE_ARRAY_STRIDE 0 -#define DO_NOT_USE_GL_FOG_COORDINATE_ARRAY_TYPE 0 -#define DO_NOT_USE_GL_FOG_COORDINATE_SOURCE 0 -#define DO_NOT_USE_GL_POINT_SIZE_GRANULARITY 0 -#define DO_NOT_USE_GL_POINT_SIZE_RANGE 0 -#define DO_NOT_USE_GL_SOURCE0_ALPHA 0 -#define DO_NOT_USE_GL_SOURCE0_RGB 0 -#define DO_NOT_USE_GL_SOURCE1_ALPHA 0 -#define DO_NOT_USE_GL_SOURCE1_RGB 0 -#define DO_NOT_USE_GL_SOURCE2_ALPHA 0 -#define DO_NOT_USE_GL_SOURCE2_RGB 0 - - /** - * Functions - */ - _GL_VOID DO_NOT_USE_glAccum(GLenum op, GLfloat value) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glAlphaFunc(GLenum func, GLclampf ref) _GL_VOID_RET _GL_BOOL - DO_NOT_USE_glAreTexturesResident(GLsizei n, - const GLuint *textures, - GLboolean *residences) _GL_BOOL_RET _GL_VOID - DO_NOT_USE_glArrayElement(GLint i) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glBegin(GLenum mode) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glBitmap(GLsizei width, - GLsizei height, - GLfloat xorig, - GLfloat yorig, - GLfloat xmove, - GLfloat ymove, - const GLubyte *bitmap) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glCallList(GLuint list) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glCallLists(GLsizei n, GLenum type, const void *lists) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glClearAccum(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) - _GL_VOID_RET _GL_VOID DO_NOT_USE_glClearIndex(GLfloat c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glClipPlane(GLenum plane, const GLdouble *equation) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3b(GLbyte red, GLbyte green, GLbyte blue) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3bv(const GLbyte *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3d(GLdouble red, GLdouble green, GLdouble blue) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3f(GLfloat red, GLfloat green, GLfloat blue) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3i(GLint red, GLint green, GLint blue) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3s(GLshort red, GLshort green, GLshort blue) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3ub(GLubyte red, GLubyte green, GLubyte blue) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3ubv(const GLubyte *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3ui(GLuint red, GLuint green, GLuint blue) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3uiv(const GLuint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3us(GLushort red, GLushort green, GLushort blue) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor3usv(const GLushort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4b(GLbyte red, GLbyte green, GLbyte blue, GLbyte alpha) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4bv(const GLbyte *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4d(GLdouble red, GLdouble green, GLdouble blue, GLdouble alpha) - _GL_VOID_RET _GL_VOID DO_NOT_USE_glColor4dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4f(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) - _GL_VOID_RET _GL_VOID DO_NOT_USE_glColor4fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4i(GLint red, GLint green, GLint blue, GLint alpha) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4s(GLshort red, GLshort green, GLshort blue, GLshort alpha) - _GL_VOID_RET _GL_VOID DO_NOT_USE_glColor4sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4ub(GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha) - _GL_VOID_RET _GL_VOID DO_NOT_USE_glColor4ubv(const GLubyte *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4ui(GLuint red, GLuint green, GLuint blue, GLuint alpha) - _GL_VOID_RET _GL_VOID DO_NOT_USE_glColor4uiv(const GLuint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColor4us(GLushort red, GLushort green, GLushort blue, GLushort alpha) - _GL_VOID_RET _GL_VOID DO_NOT_USE_glColor4usv(const GLushort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColorMaterial(GLenum face, GLenum mode) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glColorPointer(GLint size, GLenum type, GLsizei stride, const void *pointer) - _GL_VOID_RET _GL_VOID - DO_NOT_USE_glCopyPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum type) - _GL_VOID_RET _GL_VOID - DO_NOT_USE_glDeleteLists(GLuint list, GLsizei range) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glDisableClientState(GLenum array) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glDrawPixels(GLsizei width, - GLsizei height, - GLenum format, - GLenum type, - const void *pixels) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEdgeFlag(GLboolean flag) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEdgeFlagPointer(GLsizei stride, const void *pointer) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEdgeFlagv(const GLboolean *flag) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEnableClientState(GLenum array) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEnd(void) _GL_VOID_RET _GL_VOID DO_NOT_USE_glEndList(void) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalCoord1d(GLdouble u) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalCoord1dv(const GLdouble *u) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalCoord1f(GLfloat u) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalCoord1fv(const GLfloat *u) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalCoord2d(GLdouble u, GLdouble v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalCoord2dv(const GLdouble *u) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalCoord2f(GLfloat u, GLfloat v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalCoord2fv(const GLfloat *u) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalMesh1(GLenum mode, GLint i1, GLint i2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalMesh2(GLenum mode, GLint i1, GLint i2, GLint j1, GLint j2) - _GL_VOID_RET _GL_VOID DO_NOT_USE_glEvalPoint1(GLint i) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glEvalPoint2(GLint i, GLint j) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glFeedbackBuffer(GLsizei size, GLenum type, GLfloat *buffer) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glFogf(GLenum pname, GLfloat param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glFogfv(GLenum pname, const GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glFogi(GLenum pname, GLint param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glFogiv(GLenum pname, const GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glFrustum(GLdouble left, - GLdouble right, - GLdouble bottom, - GLdouble top, - GLdouble zNear, - GLdouble zFar) _GL_VOID_RET _GL_UINT - DO_NOT_USE_glGenLists(GLsizei range) _GL_UINT_RET _GL_VOID - DO_NOT_USE_glGetClipPlane(GLenum plane, GLdouble *equation) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetLightfv(GLenum light, GLenum pname, GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetLightiv(GLenum light, GLenum pname, GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetMapdv(GLenum target, GLenum query, GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetMapfv(GLenum target, GLenum query, GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetMapiv(GLenum target, GLenum query, GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetMaterialfv(GLenum face, GLenum pname, GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetMaterialiv(GLenum face, GLenum pname, GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetPixelMapfv(GLenum map, GLfloat *values) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetPixelMapuiv(GLenum map, GLuint *values) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetPixelMapusv(GLenum map, GLushort *values) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetPolygonStipple(GLubyte *mask) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetTexEnvfv(GLenum target, GLenum pname, GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetTexEnviv(GLenum target, GLenum pname, GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetTexGendv(GLenum coord, GLenum pname, GLdouble *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetTexGenfv(GLenum coord, GLenum pname, GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glGetTexGeniv(GLenum coord, GLenum pname, GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexMask(GLuint mask) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexPointer(GLenum type, - GLsizei stride, - const void *pointer) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexd(GLdouble c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexdv(const GLdouble *c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexf(GLfloat c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexfv(const GLfloat *c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexi(GLint c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexiv(const GLint *c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexs(GLshort c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexsv(const GLshort *c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexub(GLubyte c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glIndexubv(const GLubyte *c) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glInitNames(void) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glInterleavedArrays(GLenum format, - GLsizei stride, - const void *pointer) _GL_VOID_RET _GL_BOOL - DO_NOT_USE_glIsList(GLuint list) _GL_BOOL_RET _GL_VOID - DO_NOT_USE_glLightModelf(GLenum pname, GLfloat param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLightModelfv(GLenum pname, const GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLightModeli(GLenum pname, GLint param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLightModeliv(GLenum pname, const GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLightf(GLenum light, GLenum pname, GLfloat param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLightfv(GLenum light, GLenum pname, const GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLighti(GLenum light, GLenum pname, GLint param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLightiv(GLenum light, GLenum pname, const GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLineStipple(GLint factor, GLushort pattern) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glListBase(GLuint base) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLoadIdentity(void) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLoadMatrixd(const GLdouble *m) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLoadMatrixf(const GLfloat *m) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glLoadName(GLuint name) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMap1d(GLenum target, - GLdouble u1, - GLdouble u2, - GLint stride, - GLint order, - const GLdouble *points) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMap1f(GLenum target, - GLfloat u1, - GLfloat u2, - GLint stride, - GLint order, - const GLfloat *points) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMap2d(GLenum target, - GLdouble u1, - GLdouble u2, - GLint ustride, - GLint uorder, - GLdouble v1, - GLdouble v2, - GLint vstride, - GLint vorder, - const GLdouble *points) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMap2f(GLenum target, - GLfloat u1, - GLfloat u2, - GLint ustride, - GLint uorder, - GLfloat v1, - GLfloat v2, - GLint vstride, - GLint vorder, - const GLfloat *points) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMapGrid1d(GLint un, GLdouble u1, GLdouble u2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMapGrid1f(GLint un, GLfloat u1, GLfloat u2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMapGrid2d(GLint un, GLdouble u1, GLdouble u2, GLint vn, GLdouble v1, GLdouble v2) - _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMapGrid2f(GLint un, GLfloat u1, GLfloat u2, GLint vn, GLfloat v1, GLfloat v2) - _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMaterialf(GLenum face, GLenum pname, GLfloat param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMateriali(GLenum face, GLenum pname, GLint param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMaterialiv(GLenum face, GLenum pname, const GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMatrixMode(GLenum mode) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMultMatrixd(const GLdouble *m) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glMultMatrixf(const GLfloat *m) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNewList(GLuint list, GLenum mode) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3b(GLbyte nx, GLbyte ny, GLbyte nz) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3bv(const GLbyte *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3d(GLdouble nx, GLdouble ny, GLdouble nz) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3f(GLfloat nx, GLfloat ny, GLfloat nz) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3i(GLint nx, GLint ny, GLint nz) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3s(GLshort nx, GLshort ny, GLshort nz) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormal3sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glNormalPointer(GLenum type, - GLsizei stride, - const void *pointer) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glOrtho(GLdouble left, - GLdouble right, - GLdouble bottom, - GLdouble top, - GLdouble zNear, - GLdouble zFar) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPassThrough(GLfloat token) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPixelMapfv(GLenum map, - GLsizei mapsize, - const GLfloat *values) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPixelMapuiv(GLenum map, - GLsizei mapsize, - const GLuint *values) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPixelMapusv(GLenum map, - GLsizei mapsize, - const GLushort *values) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPixelTransferf(GLenum pname, GLfloat param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPixelTransferi(GLenum pname, GLint param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPixelZoom(GLfloat xfactor, GLfloat yfactor) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPolygonStipple(const GLubyte *mask) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPopAttrib(void) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPopClientAttrib(void) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPopMatrix(void) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPopName(void) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPrioritizeTextures(GLsizei n, - const GLuint *textures, - const GLclampf *priorities) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPushAttrib(GLbitfield mask) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPushClientAttrib(GLbitfield mask) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPushMatrix(void) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glPushName(GLuint name) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos2d(GLdouble x, GLdouble y) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos2dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos2f(GLfloat x, GLfloat y) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos2fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos2i(GLint x, GLint y) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos2iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos2s(GLshort x, GLshort y) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos2sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos3d(GLdouble x, GLdouble y, GLdouble z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos3dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos3f(GLfloat x, GLfloat y, GLfloat z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos3fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos3i(GLint x, GLint y, GLint z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos3iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos3s(GLshort x, GLshort y, GLshort z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos3sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos4d(GLdouble x, GLdouble y, GLdouble z, GLdouble w) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos4dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos4f(GLfloat x, GLfloat y, GLfloat z, GLfloat w) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos4fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos4i(GLint x, GLint y, GLint z, GLint w) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos4iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos4s(GLshort x, GLshort y, GLshort z, GLshort w) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRasterPos4sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRectd(GLdouble x1, GLdouble y1, GLdouble x2, GLdouble y2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRectdv(const GLdouble *v1, const GLdouble *v2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRectf(GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRectfv(const GLfloat *v1, const GLfloat *v2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRecti(GLint x1, GLint y1, GLint x2, GLint y2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRectiv(const GLint *v1, const GLint *v2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRects(GLshort x1, GLshort y1, GLshort x2, GLshort y2) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRectsv(const GLshort *v1, const GLshort *v2) _GL_VOID_RET _GL_INT - DO_NOT_USE_glRenderMode(GLenum mode) _GL_INT_RET _GL_VOID - DO_NOT_USE_glRotated(GLdouble angle, GLdouble x, GLdouble y, GLdouble z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glScaled(GLdouble x, GLdouble y, GLdouble z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glScalef(GLfloat x, GLfloat y, GLfloat z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glSelectBuffer(GLsizei size, GLuint *buffer) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glShadeModel(GLenum mode) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord1d(GLdouble s) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord1dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord1f(GLfloat s) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord1fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord1i(GLint s) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord1iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord1s(GLshort s) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord1sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord2d(GLdouble s, GLdouble t) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord2dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord2f(GLfloat s, GLfloat t) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord2fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord2i(GLint s, GLint t) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord2iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord2s(GLshort s, GLshort t) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord2sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord3d(GLdouble s, GLdouble t, GLdouble r) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord3dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord3f(GLfloat s, GLfloat t, GLfloat r) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord3fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord3i(GLint s, GLint t, GLint r) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord3iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord3s(GLshort s, GLshort t, GLshort r) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord3sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord4d(GLdouble s, GLdouble t, GLdouble r, GLdouble q) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord4dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord4f(GLfloat s, GLfloat t, GLfloat r, GLfloat q) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord4fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord4i(GLint s, GLint t, GLint r, GLint q) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord4iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord4s(GLshort s, GLshort t, GLshort r, GLshort q) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoord4sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const void *pointer) - _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexEnvf(GLenum target, GLenum pname, GLfloat param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexEnvfv(GLenum target, GLenum pname, const GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexEnvi(GLenum target, GLenum pname, GLint param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexEnviv(GLenum target, GLenum pname, const GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexGend(GLenum coord, GLenum pname, GLdouble param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexGendv(GLenum coord, GLenum pname, const GLdouble *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexGenf(GLenum coord, GLenum pname, GLfloat param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexGenfv(GLenum coord, GLenum pname, const GLfloat *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexGeni(GLenum coord, GLenum pname, GLint param) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTexGeniv(GLenum coord, GLenum pname, const GLint *params) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTranslated(GLdouble x, GLdouble y, GLdouble z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glTranslatef(GLfloat x, GLfloat y, GLfloat z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex2d(GLdouble x, GLdouble y) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex2dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex2f(GLfloat x, GLfloat y) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex2fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex2i(GLint x, GLint y) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex2iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex2s(GLshort x, GLshort y) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex2sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex3d(GLdouble x, GLdouble y, GLdouble z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex3dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex3f(GLfloat x, GLfloat y, GLfloat z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex3fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex3i(GLint x, GLint y, GLint z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex3iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex3s(GLshort x, GLshort y, GLshort z) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex3sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex4d(GLdouble x, GLdouble y, GLdouble z, GLdouble w) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex4dv(const GLdouble *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex4f(GLfloat x, GLfloat y, GLfloat z, GLfloat w) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex4fv(const GLfloat *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex4i(GLint x, GLint y, GLint z, GLint w) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex4iv(const GLint *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex4s(GLshort x, GLshort y, GLshort z, GLshort w) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertex4sv(const GLshort *v) _GL_VOID_RET _GL_VOID - DO_NOT_USE_glVertexPointer(GLint size, GLenum type, GLsizei stride, const void *pointer) - _GL_VOID_RET - -/** - * End of automatically generated list - */ - -#undef _GL_BOOL -#undef _GL_BOOL_RET -#undef _GL_ENUM -#undef _GL_ENUM_RET -#undef _GL_INT -#undef _GL_INT_RET -#undef _GL_UINT -#undef _GL_UINT_RET -#undef _GL_VOID -#undef _GL_VOID_RET - -#if defined(__GNUC__) -# pragma GCC diagnostic pop -#endif diff --git a/source/blender/gpu/GPU_material.h b/source/blender/gpu/GPU_material.h index c0633f0323d..023221543ec 100644 --- a/source/blender/gpu/GPU_material.h +++ b/source/blender/gpu/GPU_material.h @@ -77,12 +77,20 @@ typedef enum eGPUMaterialFlag { GPU_MATFLAG_HOLDOUT = (1 << 6), GPU_MATFLAG_SHADER_TO_RGBA = (1 << 7), GPU_MATFLAG_AO = (1 << 8), + GPU_MATFLAG_CLEARCOAT = (1 << 9), GPU_MATFLAG_OBJECT_INFO = (1 << 10), GPU_MATFLAG_AOV = (1 << 11), GPU_MATFLAG_BARYCENTRIC = (1 << 20), + /* Optimization to only add the branches of the principled shader that are necessary. */ + GPU_MATFLAG_PRINCIPLED_CLEARCOAT = (1 << 21), + GPU_MATFLAG_PRINCIPLED_METALLIC = (1 << 22), + GPU_MATFLAG_PRINCIPLED_DIELECTRIC = (1 << 23), + GPU_MATFLAG_PRINCIPLED_GLASS = (1 << 24), + GPU_MATFLAG_PRINCIPLED_ANY = (1 << 25), + /* Tells the render engine the material was just compiled or updated. */ GPU_MATFLAG_UPDATED = (1 << 29), @@ -121,6 +129,7 @@ typedef struct GPUCodegenOutput { char *surface; char *volume; char *thickness; + char *composite; char *material_functions; GPUShaderCreateInfo *create_info; @@ -131,11 +140,19 @@ typedef void (*GPUCodegenCallbackFn)(void *thunk, GPUMaterial *mat, GPUCodegenOu GPUNodeLink *GPU_constant(const float *num); GPUNodeLink *GPU_uniform(const float *num); GPUNodeLink *GPU_attribute(GPUMaterial *mat, eCustomDataType type, const char *name); +/** + * Add a GPU attribute that refers to the default color attribute on a geometry. + * The name, type, and domain are unknown and do not depend on the material. + */ +GPUNodeLink *GPU_attribute_default_color(GPUMaterial *mat); GPUNodeLink *GPU_attribute_with_default(GPUMaterial *mat, eCustomDataType type, const char *name, eGPUDefaultValue default_value); -GPUNodeLink *GPU_uniform_attribute(GPUMaterial *mat, const char *name, bool use_dupli); +GPUNodeLink *GPU_uniform_attribute(GPUMaterial *mat, + const char *name, + bool use_dupli, + uint32_t *r_hash); GPUNodeLink *GPU_image(GPUMaterial *mat, struct Image *ima, struct ImageUser *iuser, @@ -156,15 +173,11 @@ GPUNodeLink *GPU_differentiate_float_function(const char *function_name); bool GPU_link(GPUMaterial *mat, const char *name, ...); bool GPU_stack_link(GPUMaterial *mat, - struct bNode *node, + const struct bNode *node, const char *name, GPUNodeStack *in, GPUNodeStack *out, ...); -GPUNodeLink *GPU_uniformbuf_link_out(struct GPUMaterial *mat, - struct bNode *node, - struct GPUNodeStack *stack, - int index); void GPU_material_output_surface(GPUMaterial *material, GPUNodeLink *link); void GPU_material_output_volume(GPUMaterial *material, GPUNodeLink *link); @@ -173,6 +186,8 @@ void GPU_material_output_thickness(GPUMaterial *material, GPUNodeLink *link); void GPU_material_add_output_link_aov(GPUMaterial *material, GPUNodeLink *link, int hash); +void GPU_material_add_output_link_composite(GPUMaterial *material, GPUNodeLink *link); + /** * Wrap a part of the material graph into a function. You need then need to call the function by * using something like #GPU_differentiate_float_function. @@ -213,6 +228,7 @@ GPUMaterial *GPU_material_from_nodetree(struct Scene *scene, void *thunk); void GPU_material_compile(GPUMaterial *mat); +void GPU_material_free_single(GPUMaterial *material); void GPU_material_free(struct ListBase *gpumaterial); void GPU_material_acquire(GPUMaterial *mat); @@ -223,6 +239,7 @@ void GPU_materials_free(struct Main *bmain); struct Scene *GPU_material_scene(GPUMaterial *material); struct GPUPass *GPU_material_get_pass(GPUMaterial *material); struct GPUShader *GPU_material_get_shader(GPUMaterial *material); +const char *GPU_material_get_name(GPUMaterial *material); /** * Return can be NULL if it's a world material. */ @@ -266,6 +283,12 @@ typedef struct GPUMaterialAttribute { eGPUDefaultValue default_value; /* Only for volumes attributes. */ int id; int users; + /** + * If true, the corresponding attribute is the specified default color attribute on the mesh, + * if it exists. In that case the type and name data can vary per geometry, so it will not be + * valid here. + */ + bool is_default_color; } GPUMaterialAttribute; typedef struct GPUMaterialTexture { @@ -288,6 +311,10 @@ typedef struct GPUUniformAttr { /* Meaningful part of the attribute set key. */ char name[64]; /* MAX_CUSTOMDATA_LAYER_NAME */ + /** Escaped name with [""]. */ + char name_id_prop[64 * 2 + 4]; + /** Hash of name[64] + use_dupli. */ + uint32_t hash_code; bool use_dupli; /* Helper fields used by code generation. */ @@ -302,12 +329,22 @@ typedef struct GPUUniformAttrList { unsigned int count, hash_code; } GPUUniformAttrList; -GPUUniformAttrList *GPU_material_uniform_attributes(GPUMaterial *material); +const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material); struct GHash *GPU_uniform_attr_list_hash_new(const char *info); -void GPU_uniform_attr_list_copy(GPUUniformAttrList *dest, GPUUniformAttrList *src); +void GPU_uniform_attr_list_copy(GPUUniformAttrList *dest, const GPUUniformAttrList *src); void GPU_uniform_attr_list_free(GPUUniformAttrList *set); +/* A callback passed to GPU_material_from_callbacks to construct the material graph by adding and + * linking the necessary GPU material nodes. */ +typedef void (*ConstructGPUMaterialFn)(void *thunk, GPUMaterial *material); + +/* Construct a GPU material from a set of callbacks. See the callback types for more information. + * The given thunk will be passed as the first parameter of each callback. */ +GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_function_cb, + GPUCodegenCallbackFn generate_code_function_cb, + void *thunk); + #ifdef __cplusplus } #endif diff --git a/source/blender/gpu/GPU_primitive.h b/source/blender/gpu/GPU_primitive.h index 4860b037bfb..de2feac2607 100644 --- a/source/blender/gpu/GPU_primitive.h +++ b/source/blender/gpu/GPU_primitive.h @@ -9,6 +9,7 @@ #pragma once +#include "BLI_assert.h" #include "GPU_common.h" #ifdef __cplusplus @@ -42,6 +43,79 @@ typedef enum { GPU_PRIM_CLASS_ANY = GPU_PRIM_CLASS_POINT | GPU_PRIM_CLASS_LINE | GPU_PRIM_CLASS_SURFACE, } GPUPrimClass; +inline int gpu_get_prim_count_from_type(uint vertex_len, GPUPrimType prim_type) +{ + /* does vertex_len make sense for this primitive type? */ + if (vertex_len == 0) { + return 0; + } + + switch (prim_type) { + case GPU_PRIM_POINTS: + return vertex_len; + + case GPU_PRIM_LINES: + BLI_assert(vertex_len % 2 == 0); + return vertex_len / 2; + + case GPU_PRIM_LINE_STRIP: + return vertex_len - 1; + + case GPU_PRIM_LINE_LOOP: + return vertex_len; + + case GPU_PRIM_LINES_ADJ: + BLI_assert(vertex_len % 4 == 0); + return vertex_len / 4; + + case GPU_PRIM_LINE_STRIP_ADJ: + return vertex_len - 2; + + case GPU_PRIM_TRIS: + BLI_assert(vertex_len % 3 == 0); + return vertex_len / 3; + + case GPU_PRIM_TRI_STRIP: + BLI_assert(vertex_len >= 3); + return vertex_len - 2; + + case GPU_PRIM_TRI_FAN: + BLI_assert(vertex_len >= 3); + return vertex_len - 2; + + case GPU_PRIM_TRIS_ADJ: + BLI_assert(vertex_len % 6 == 0); + return vertex_len / 6; + + default: + BLI_assert_unreachable(); + return 0; + } +} + +inline bool is_restart_compatible(GPUPrimType type) +{ + switch (type) { + case GPU_PRIM_POINTS: + case GPU_PRIM_LINES: + case GPU_PRIM_TRIS: + case GPU_PRIM_LINES_ADJ: + case GPU_PRIM_TRIS_ADJ: + case GPU_PRIM_NONE: + default: { + return false; + } + case GPU_PRIM_LINE_STRIP: + case GPU_PRIM_LINE_LOOP: + case GPU_PRIM_TRI_STRIP: + case GPU_PRIM_TRI_FAN: + case GPU_PRIM_LINE_STRIP_ADJ: { + return true; + } + } + return false; +} + /** * TODO: Improve error checking by validating that the shader is suited for this primitive type. * GPUPrimClass GPU_primtype_class(GPUPrimType); diff --git a/source/blender/gpu/GPU_shader.h b/source/blender/gpu/GPU_shader.h index 3460d33fe68..c1b3b879c34 100644 --- a/source/blender/gpu/GPU_shader.h +++ b/source/blender/gpu/GPU_shader.h @@ -148,11 +148,19 @@ typedef enum { GPU_NUM_UNIFORM_BLOCKS, /* Special value, denotes number of builtin uniforms block. */ } GPUUniformBlockBuiltin; +typedef enum { + GPU_STORAGE_BUFFER_DEBUG_VERTS = 0, /* drw_debug_verts_buf */ + GPU_STORAGE_BUFFER_DEBUG_PRINT, /* drw_debug_print_buf */ + + GPU_NUM_STORAGE_BUFFERS, /* Special value, denotes number of builtin buffer blocks. */ +} GPUStorageBufferBuiltin; + void GPU_shader_set_srgb_uniform(GPUShader *shader); int GPU_shader_get_uniform(GPUShader *shader, const char *name); int GPU_shader_get_builtin_uniform(GPUShader *shader, int builtin); int GPU_shader_get_builtin_block(GPUShader *shader, int builtin); +int GPU_shader_get_builtin_ssbo(GPUShader *shader, int builtin); /** DEPRECATED: Kept only because of Python GPU API. */ int GPU_shader_get_uniform_block(GPUShader *shader, const char *name); int GPU_shader_get_ssbo(GPUShader *shader, const char *name); @@ -177,11 +185,18 @@ void GPU_shader_uniform_4f(GPUShader *sh, const char *name, float x, float y, fl void GPU_shader_uniform_2fv(GPUShader *sh, const char *name, const float data[2]); void GPU_shader_uniform_3fv(GPUShader *sh, const char *name, const float data[3]); void GPU_shader_uniform_4fv(GPUShader *sh, const char *name, const float data[4]); +void GPU_shader_uniform_2iv(GPUShader *sh, const char *name, const int data[2]); void GPU_shader_uniform_mat4(GPUShader *sh, const char *name, const float data[4][4]); +void GPU_shader_uniform_mat3_as_mat4(GPUShader *sh, const char *name, const float data[3][3]); void GPU_shader_uniform_2fv_array(GPUShader *sh, const char *name, int len, const float (*val)[2]); void GPU_shader_uniform_4fv_array(GPUShader *sh, const char *name, int len, const float (*val)[4]); +unsigned int GPU_shader_get_attribute_len(const GPUShader *shader); int GPU_shader_get_attribute(GPUShader *shader, const char *name); +bool GPU_shader_get_attribute_info(const GPUShader *shader, + int attr_location, + char r_name[256], + int *r_type); void GPU_shader_set_framebuffer_srgb_target(int use_srgb_to_linear); @@ -191,30 +206,12 @@ typedef enum eGPUBuiltinShader { GPU_SHADER_TEXT, GPU_SHADER_KEYFRAME_SHAPE, GPU_SHADER_SIMPLE_LIGHTING, - /* for simple 2D drawing */ - /** - * Take a single color for all the vertices and a 2D position for each vertex. - * - * \param color: uniform vec4 - * \param pos: in vec2 - */ - GPU_SHADER_2D_UNIFORM_COLOR, - /** - * Take a 2D position and color for each vertex without color interpolation. - * - * \param color: in vec4 - * \param pos: in vec2 - */ - GPU_SHADER_2D_FLAT_COLOR, /** * Take a 2D position and color for each vertex with linear interpolation in window space. * * \param color: in vec4 * \param pos: in vec2 */ - GPU_SHADER_2D_SMOOTH_COLOR, - GPU_SHADER_2D_IMAGE, - GPU_SHADER_2D_IMAGE_COLOR, GPU_SHADER_2D_IMAGE_DESATURATE_COLOR, GPU_SHADER_2D_IMAGE_RECT_COLOR, GPU_SHADER_2D_IMAGE_MULTI_RECT_COLOR, @@ -290,14 +287,14 @@ typedef enum eGPUBuiltinShader { */ GPU_SHADER_3D_IMAGE, /** - * Draw texture with alpha. Take a 3D position and a 2D texture coordinate for each vertex. + * Take a 3D position and color for each vertex with linear interpolation in window space. * - * \param alpha: uniform float + * \param color: uniform vec4 * \param image: uniform sampler2D * \param texCoord: in vec2 * \param pos: in vec3 */ - GPU_SHADER_3D_IMAGE_MODULATE_ALPHA, + GPU_SHADER_3D_IMAGE_COLOR, /* points */ /** * Draw round points with a constant size. @@ -346,7 +343,6 @@ typedef enum eGPUBuiltinShader { */ GPU_SHADER_3D_POINT_VARYING_SIZE_VARYING_COLOR, /* lines */ - GPU_SHADER_2D_LINE_DASHED_UNIFORM_COLOR, GPU_SHADER_3D_LINE_DASHED_UNIFORM_COLOR, /* grease pencil drawing */ GPU_SHADER_GPENCIL_STROKE, diff --git a/source/blender/gpu/GPU_shader_shared_utils.h b/source/blender/gpu/GPU_shader_shared_utils.h index 474549d1f42..96feed9e7d9 100644 --- a/source/blender/gpu/GPU_shader_shared_utils.h +++ b/source/blender/gpu/GPU_shader_shared_utils.h @@ -41,21 +41,25 @@ # define floorf floor # define ceilf ceil # define sqrtf sqrt +# define expf exp -# define float2 vec2 -# define float3 vec3 -# define float4 vec4 -# define float4x4 mat4 -# define int2 ivec2 -# define int3 ivec3 -# define int4 ivec4 -# define uint2 uvec2 -# define uint3 uvec3 -# define uint4 uvec4 # define bool1 bool -# define bool2 bvec2 -# define bool3 bvec3 -# define bool4 bvec4 +/* Type name collision with Metal shading language - These type-names are already defined. */ +# ifndef GPU_METAL +# define float2 vec2 +# define float3 vec3 +# define float4 vec4 +# define float4x4 mat4 +# define int2 ivec2 +# define int3 ivec3 +# define int4 ivec4 +# define uint2 uvec2 +# define uint3 uvec3 +# define uint4 uvec4 +# define bool2 bvec2 +# define bool3 bvec3 +# define bool4 bvec4 +# endif #else /* C / C++ */ # pragma once diff --git a/source/blender/gpu/GPU_storage_buffer.h b/source/blender/gpu/GPU_storage_buffer.h index ca6a848786b..8837a7c7647 100644 --- a/source/blender/gpu/GPU_storage_buffer.h +++ b/source/blender/gpu/GPU_storage_buffer.h @@ -48,6 +48,13 @@ void GPU_storagebuf_clear(GPUStorageBuf *ssbo, void GPU_storagebuf_clear_to_zero(GPUStorageBuf *ssbo); /** + * Read back content of the buffer to CPU for inspection. + * Slow! Only use for inspection / debugging. + * NOTE: Not synchronized. Use appropriate barrier before reading. + */ +void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data); + +/** * \brief Copy a part of a vertex buffer to a storage buffer. * * \param ssbo: destination storage buffer diff --git a/source/blender/gpu/GPU_texture.h b/source/blender/gpu/GPU_texture.h index 5bd20b7be98..8b54f4c9822 100644 --- a/source/blender/gpu/GPU_texture.h +++ b/source/blender/gpu/GPU_texture.h @@ -49,7 +49,12 @@ typedef enum eGPUSamplerState { * #GPU_SAMPLER_MAX is not a valid enum value, but only a limit. * It also creates a bad mask for the `NOT` operator in #ENUM_OPERATORS. */ +#ifdef __cplusplus +static constexpr eGPUSamplerState GPU_SAMPLER_MAX = eGPUSamplerState(GPU_SAMPLER_ICON + 1); +#else static const int GPU_SAMPLER_MAX = (GPU_SAMPLER_ICON + 1); +#endif + ENUM_OPERATORS(eGPUSamplerState, GPU_SAMPLER_ICON) #ifdef __cplusplus @@ -193,7 +198,7 @@ unsigned int GPU_texture_memory_usage_get(void); * \note \a data is expected to be float. If the \a format is not compatible with float data or if * the data is not in float format, use GPU_texture_update to upload the data with the right data * format. - * \a mips is the number of mip level to allocate. It must be >= 1. + * \a mip_len is the number of mip level to allocate. It must be >= 1. */ GPUTexture *GPU_texture_create_1d( const char *name, int w, int mip_len, eGPUTextureFormat format, const float *data); @@ -331,6 +336,7 @@ int GPU_texture_orig_width(const GPUTexture *tex); int GPU_texture_orig_height(const GPUTexture *tex); void GPU_texture_orig_size_set(GPUTexture *tex, int w, int h); eGPUTextureFormat GPU_texture_format(const GPUTexture *tex); +const char *GPU_texture_format_description(eGPUTextureFormat texture_format); bool GPU_texture_array(const GPUTexture *tex); bool GPU_texture_cube(const GPUTexture *tex); bool GPU_texture_depth(const GPUTexture *tex); diff --git a/source/blender/gpu/GPU_vertex_buffer.h b/source/blender/gpu/GPU_vertex_buffer.h index 722ef878271..d3c1bd8145d 100644 --- a/source/blender/gpu/GPU_vertex_buffer.h +++ b/source/blender/gpu/GPU_vertex_buffer.h @@ -40,12 +40,20 @@ extern "C" { typedef enum { /* can be extended to support more types */ - GPU_USAGE_STREAM, - GPU_USAGE_STATIC, /* do not keep data in memory */ - GPU_USAGE_DYNAMIC, - GPU_USAGE_DEVICE_ONLY, /* Do not do host->device data transfers. */ + GPU_USAGE_STREAM = 0, + GPU_USAGE_STATIC = 1, /* do not keep data in memory */ + GPU_USAGE_DYNAMIC = 2, + GPU_USAGE_DEVICE_ONLY = 3, /* Do not do host->device data transfers. */ + + /** Extended usage flags. */ + /* Flag for vertex buffers used for textures. Skips additional padding/compaction to ensure + * format matches the texture exactly. Can be masked with other properties, and is stripped + * during VertBuf::init. */ + GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY = 1 << 3, } GPUUsageType; +ENUM_OPERATORS(GPUUsageType, GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY); + /** Opaque type hiding blender::gpu::VertBuf. */ typedef struct GPUVertBuf GPUVertBuf; diff --git a/source/blender/gpu/intern/gpu_backend.hh b/source/blender/gpu/intern/gpu_backend.hh index 6e07e6c3229..d2890efee72 100644 --- a/source/blender/gpu/intern/gpu_backend.hh +++ b/source/blender/gpu/intern/gpu_backend.hh @@ -30,6 +30,7 @@ class VertBuf; class GPUBackend { public: virtual ~GPUBackend() = default; + virtual void delete_resources() = 0; static GPUBackend *get(); diff --git a/source/blender/gpu/intern/gpu_batch.cc b/source/blender/gpu/intern/gpu_batch.cc index 32b117dac12..c871004deac 100644 --- a/source/blender/gpu/intern/gpu_batch.cc +++ b/source/blender/gpu/intern/gpu_batch.cc @@ -14,7 +14,6 @@ #include "GPU_batch.h" #include "GPU_batch_presets.h" -#include "GPU_matrix.h" #include "GPU_platform.h" #include "GPU_shader.h" @@ -201,6 +200,13 @@ bool GPU_batch_vertbuf_has(GPUBatch *batch, GPUVertBuf *verts) return false; } +void GPU_batch_resource_id_buf_set(GPUBatch *batch, GPUStorageBuf *resource_id_buf) +{ + BLI_assert(resource_id_buf); + batch->flag |= GPU_BATCH_DIRTY; + batch->resource_id_buf = resource_id_buf; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -221,6 +227,30 @@ void GPU_batch_set_shader(GPUBatch *batch, GPUShader *shader) /** \name Drawing / Drawcall functions * \{ */ +void GPU_batch_draw_parameter_get( + GPUBatch *gpu_batch, int *r_v_count, int *r_v_first, int *r_base_index, int *r_i_count) +{ + Batch *batch = static_cast<Batch *>(gpu_batch); + + if (batch->elem) { + *r_v_count = batch->elem_()->index_len_get(); + *r_v_first = batch->elem_()->index_start_get(); + *r_base_index = batch->elem_()->index_base_get(); + } + else { + *r_v_count = batch->verts_(0)->vertex_len; + *r_v_first = 0; + *r_base_index = -1; + } + + int i_count = (batch->inst[0]) ? batch->inst_(0)->vertex_len : 1; + /* Meh. This is to be able to use different numbers of verts in instance VBO's. */ + if (batch->inst[1] != nullptr) { + i_count = min_ii(i_count, batch->inst_(1)->vertex_len); + } + *r_i_count = i_count; +} + void GPU_batch_draw(GPUBatch *batch) { GPU_shader_bind(batch->shader); @@ -271,6 +301,25 @@ void GPU_batch_draw_advanced( batch->draw(v_first, v_count, i_first, i_count); } +void GPU_batch_draw_indirect(GPUBatch *gpu_batch, GPUStorageBuf *indirect_buf, intptr_t offset) +{ + BLI_assert(Context::get()->shader != nullptr); + BLI_assert(indirect_buf != nullptr); + Batch *batch = static_cast<Batch *>(gpu_batch); + + batch->draw_indirect(indirect_buf, offset); +} + +void GPU_batch_multi_draw_indirect( + GPUBatch *gpu_batch, GPUStorageBuf *indirect_buf, int count, intptr_t offset, intptr_t stride) +{ + BLI_assert(Context::get()->shader != nullptr); + BLI_assert(indirect_buf != nullptr); + Batch *batch = static_cast<Batch *>(gpu_batch); + + batch->multi_draw_indirect(indirect_buf, count, offset, stride); +} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_batch_presets.c b/source/blender/gpu/intern/gpu_batch_presets.c index ab5e23a846c..4dff35c3633 100644 --- a/source/blender/gpu/intern/gpu_batch_presets.c +++ b/source/blender/gpu/intern/gpu_batch_presets.c @@ -11,15 +11,8 @@ #include "BLI_utildefines.h" #include "MEM_guardedalloc.h" -#include "DNA_userdef_types.h" - -#include "UI_interface.h" -#include "UI_resources.h" - #include "GPU_batch.h" -#include "GPU_batch_presets.h" /* own include */ -#include "GPU_batch_utils.h" -#include "GPU_context.h" +#include "GPU_batch_presets.h" /* Own include. */ /* -------------------------------------------------------------------- */ /** \name Local Structures @@ -139,7 +132,7 @@ GPUBatch *GPU_batch_preset_sphere_wire(int lod) /** \name Create Sphere (3D) * \{ */ -GPUBatch *gpu_batch_sphere(int lat_res, int lon_res) +static GPUBatch *gpu_batch_sphere(int lat_res, int lon_res) { const float lon_inc = 2 * M_PI / lon_res; const float lat_inc = M_PI / lat_res; diff --git a/source/blender/gpu/intern/gpu_batch_private.hh b/source/blender/gpu/intern/gpu_batch_private.hh index 23052f601d2..59646925d68 100644 --- a/source/blender/gpu/intern/gpu_batch_private.hh +++ b/source/blender/gpu/intern/gpu_batch_private.hh @@ -29,6 +29,11 @@ class Batch : public GPUBatch { virtual ~Batch() = default; virtual void draw(int v_first, int v_count, int i_first, int i_count) = 0; + virtual void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) = 0; + virtual void multi_draw_indirect(GPUStorageBuf *indirect_buf, + int count, + intptr_t offset, + intptr_t stride) = 0; /* Convenience casts. */ IndexBuf *elem_() const diff --git a/source/blender/gpu/intern/gpu_batch_utils.c b/source/blender/gpu/intern/gpu_batch_utils.c index 43a47aab945..10a05fe90b9 100644 --- a/source/blender/gpu/intern/gpu_batch_utils.c +++ b/source/blender/gpu/intern/gpu_batch_utils.c @@ -8,7 +8,6 @@ #include "BLI_math.h" #include "BLI_polyfill_2d.h" -#include "BLI_rect.h" #include "BLI_sort_utils.h" #include "BLI_utildefines.h" diff --git a/source/blender/gpu/intern/gpu_buffers.c b/source/blender/gpu/intern/gpu_buffers.c index f7be2434fbf..8e3058b884d 100644 --- a/source/blender/gpu/intern/gpu_buffers.c +++ b/source/blender/gpu/intern/gpu_buffers.c @@ -14,26 +14,18 @@ #include "MEM_guardedalloc.h" -#include "BLI_alloca.h" -#include "BLI_array.h" #include "BLI_bitmap.h" #include "BLI_ghash.h" -#include "BLI_hash.h" -#include "BLI_math.h" #include "BLI_math_color.h" -#include "BLI_math_color_blend.h" -#include "BLI_string.h" #include "BLI_utildefines.h" #include "DNA_mesh_types.h" #include "DNA_meshdata_types.h" -#include "DNA_userdef_types.h" #include "BKE_DerivedMesh.h" #include "BKE_attribute.h" #include "BKE_ccg.h" #include "BKE_customdata.h" -#include "BKE_global.h" #include "BKE_mesh.h" #include "BKE_paint.h" #include "BKE_pbvh.h" @@ -219,19 +211,18 @@ static void gpu_pbvh_batch_init(GPU_PBVH_Buffers *buffers, GPUPrimType prim) * \{ */ static bool gpu_pbvh_is_looptri_visible(const MLoopTri *lt, - const MVert *mvert, + const bool *hide_vert, const MLoop *mloop, const int *sculpt_face_sets) { - return (!paint_is_face_hidden(lt, mvert, mloop) && sculpt_face_sets && + return (!paint_is_face_hidden(lt, hide_vert, mloop) && sculpt_face_sets && sculpt_face_sets[lt->poly] > SCULPT_FACE_SET_NONE); } void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, GPU_PBVH_Buffers *buffers, + const Mesh *mesh, const MVert *mvert, - const CustomData *vdata, - const CustomData *ldata, const float *vmask, const int *sculpt_face_sets, int face_sets_color_seed, @@ -242,23 +233,25 @@ void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, GPUAttrRef vcol_refs[MAX_GPU_ATTR]; GPUAttrRef cd_uvs[MAX_GPU_ATTR]; - Mesh me_query; - BKE_id_attribute_copy_domains_temp(ID_ME, vdata, NULL, ldata, NULL, NULL, &me_query.id); + const bool *hide_vert = (const bool *)CustomData_get_layer_named( + &mesh->vdata, CD_PROP_BOOL, ".hide_vert"); + const int *material_indices = (const int *)CustomData_get_layer_named( + &mesh->pdata, CD_PROP_INT32, "material_index"); - CustomDataLayer *actcol = BKE_id_attributes_active_color_get(&me_query.id); - eAttrDomain actcol_domain = actcol ? BKE_id_attribute_domain(&me_query.id, actcol) : + const CustomDataLayer *actcol = BKE_id_attributes_active_color_get(&mesh->id); + eAttrDomain actcol_domain = actcol ? BKE_id_attribute_domain(&mesh->id, actcol) : ATTR_DOMAIN_AUTO; - CustomDataLayer *rendercol = BKE_id_attributes_render_color_get(&me_query.id); + const CustomDataLayer *rendercol = BKE_id_attributes_render_color_get(&mesh->id); int totcol; if (update_flags & GPU_PBVH_BUFFERS_SHOW_VCOL) { totcol = gpu_pbvh_make_attr_offs(ATTR_DOMAIN_MASK_COLOR, CD_MASK_COLOR_ALL, - vdata, + &mesh->vdata, NULL, - ldata, + &mesh->ldata, NULL, vcol_refs, vbo_id->active_attrs_only, @@ -275,14 +268,14 @@ void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, CD_MASK_MLOOPUV, NULL, NULL, - ldata, + &mesh->ldata, NULL, cd_uvs, vbo_id->active_attrs_only, CD_MLOOPUV, ATTR_DOMAIN_CORNER, - get_active_layer(ldata, CD_MLOOPUV), - get_render_layer(ldata, CD_MLOOPUV)); + get_active_layer(&mesh->ldata, CD_MLOOPUV), + get_render_layer(&mesh->ldata, CD_MLOOPUV)); const bool show_mask = vmask && (update_flags & GPU_PBVH_BUFFERS_SHOW_MASK) != 0; const bool show_face_sets = sculpt_face_sets && @@ -316,13 +309,13 @@ void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, GPU_vertbuf_attr_get_raw_data(buffers->vert_buf, vbo_id->uv[uv_i], &uv_step); GPUAttrRef *ref = cd_uvs + uv_i; - CustomDataLayer *layer = ldata->layers + ref->layer_idx; + CustomDataLayer *layer = mesh->ldata.layers + ref->layer_idx; MLoopUV *muv = layer->data; for (uint i = 0; i < buffers->face_indices_len; i++) { const MLoopTri *lt = &buffers->looptri[buffers->face_indices[i]]; - if (!gpu_pbvh_is_looptri_visible(lt, mvert, buffers->mloop, sculpt_face_sets)) { + if (!gpu_pbvh_is_looptri_visible(lt, hide_vert, buffers->mloop, sculpt_face_sets)) { continue; } @@ -338,20 +331,20 @@ void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, for (int col_i = 0; col_i < totcol; col_i++) { GPU_vertbuf_attr_get_raw_data(buffers->vert_buf, vbo_id->col[col_i], &col_step); - MPropCol *pcol = NULL; - MLoopCol *mcol = NULL; + const MPropCol *pcol = NULL; + const MLoopCol *mcol = NULL; GPUAttrRef *ref = vcol_refs + col_i; - const CustomData *cdata = ref->domain == ATTR_DOMAIN_POINT ? vdata : ldata; - CustomDataLayer *layer = cdata->layers + ref->layer_idx; + const CustomData *cdata = ref->domain == ATTR_DOMAIN_POINT ? &mesh->vdata : &mesh->ldata; + const CustomDataLayer *layer = cdata->layers + ref->layer_idx; bool color_loops = ref->domain == ATTR_DOMAIN_CORNER; if (layer->type == CD_PROP_COLOR) { - pcol = (MPropCol *)layer->data; + pcol = (const MPropCol *)layer->data; } else { - mcol = (MLoopCol *)layer->data; + mcol = (const MLoopCol *)layer->data; } for (uint i = 0; i < buffers->face_indices_len; i++) { @@ -362,7 +355,7 @@ void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, buffers->mloop[lt->tri[2]].v, }; - if (!gpu_pbvh_is_looptri_visible(lt, mvert, buffers->mloop, sculpt_face_sets)) { + if (!gpu_pbvh_is_looptri_visible(lt, hide_vert, buffers->mloop, sculpt_face_sets)) { continue; } @@ -373,7 +366,7 @@ void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, ushort scol[4] = {USHRT_MAX, USHRT_MAX, USHRT_MAX, USHRT_MAX}; if (pcol) { - MPropCol *pcol2 = pcol + (color_loops ? loop_index : vtri[j]); + const MPropCol *pcol2 = pcol + (color_loops ? loop_index : vtri[j]); scol[0] = unit_float_to_ushort_clamp(pcol2->color[0]); scol[1] = unit_float_to_ushort_clamp(pcol2->color[1]); @@ -402,7 +395,7 @@ void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, buffers->mloop[lt->tri[2]].v, }; - if (!gpu_pbvh_is_looptri_visible(lt, mvert, buffers->mloop, sculpt_face_sets)) { + if (!gpu_pbvh_is_looptri_visible(lt, hide_vert, buffers->mloop, sculpt_face_sets)) { continue; } @@ -458,37 +451,39 @@ void GPU_pbvh_mesh_buffers_update(PBVHGPUFormat *vbo_id, /* Get material index from the first face of this buffer. */ const MLoopTri *lt = &buffers->looptri[buffers->face_indices[0]]; - const MPoly *mp = &buffers->mpoly[lt->poly]; - buffers->material_index = mp->mat_nr; + buffers->material_index = material_indices ? material_indices[lt->poly] : 0; buffers->show_overlay = !empty_mask || !default_face_set; buffers->mvert = mvert; } -GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(const MPoly *mpoly, - const MLoop *mloop, +GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(const Mesh *mesh, const MLoopTri *looptri, - const MVert *mvert, - const int *face_indices, const int *sculpt_face_sets, - const int face_indices_len, - const struct Mesh *mesh) + const int *face_indices, + const int face_indices_len) { GPU_PBVH_Buffers *buffers; int i, tottri; int tot_real_edges = 0; + const MPoly *polys = BKE_mesh_polys(mesh); + const MLoop *loops = BKE_mesh_loops(mesh); + buffers = MEM_callocN(sizeof(GPU_PBVH_Buffers), "GPU_Buffers"); + const bool *hide_vert = (bool *)CustomData_get_layer_named( + &mesh->vdata, CD_PROP_BOOL, ".hide_vert"); + /* smooth or flat for all */ - buffers->smooth = mpoly[looptri[face_indices[0]].poly].flag & ME_SMOOTH; + buffers->smooth = polys[looptri[face_indices[0]].poly].flag & ME_SMOOTH; buffers->show_overlay = false; /* Count the number of visible triangles */ for (i = 0, tottri = 0; i < face_indices_len; i++) { const MLoopTri *lt = &looptri[face_indices[i]]; - if (gpu_pbvh_is_looptri_visible(lt, mvert, mloop, sculpt_face_sets)) { + if (gpu_pbvh_is_looptri_visible(lt, hide_vert, loops, sculpt_face_sets)) { int r_edges[3]; BKE_mesh_looptri_get_real_edges(mesh, lt, r_edges); for (int j = 0; j < 3; j++) { @@ -503,8 +498,8 @@ GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(const MPoly *mpoly, if (tottri == 0) { buffers->tot_tri = 0; - buffers->mpoly = mpoly; - buffers->mloop = mloop; + buffers->mpoly = polys; + buffers->mloop = loops; buffers->looptri = looptri; buffers->face_indices = face_indices; buffers->face_indices_len = 0; @@ -521,7 +516,7 @@ GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(const MPoly *mpoly, const MLoopTri *lt = &looptri[face_indices[i]]; /* Skip hidden faces */ - if (!gpu_pbvh_is_looptri_visible(lt, mvert, mloop, sculpt_face_sets)) { + if (!gpu_pbvh_is_looptri_visible(lt, hide_vert, loops, sculpt_face_sets)) { continue; } @@ -543,8 +538,8 @@ GPU_PBVH_Buffers *GPU_pbvh_mesh_buffers_build(const MPoly *mpoly, buffers->tot_tri = tottri; - buffers->mpoly = mpoly; - buffers->mloop = mloop; + buffers->mpoly = polys; + buffers->mloop = loops; buffers->looptri = looptri; buffers->face_indices = face_indices; @@ -889,13 +884,14 @@ void GPU_pbvh_grid_buffers_update(PBVHGPUFormat *vbo_id, buffers->show_overlay = !empty_mask || !default_face_set; } -GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(int totgrid, BLI_bitmap **grid_hidden) +GPU_PBVH_Buffers *GPU_pbvh_grid_buffers_build(int totgrid, BLI_bitmap **grid_hidden, bool smooth) { GPU_PBVH_Buffers *buffers; buffers = MEM_callocN(sizeof(GPU_PBVH_Buffers), "GPU_Buffers"); buffers->grid_hidden = grid_hidden; buffers->totgrid = totgrid; + buffers->smooth = smooth; buffers->show_overlay = false; @@ -1189,9 +1185,9 @@ GPU_PBVH_Buffers *GPU_pbvh_bmesh_buffers_build(bool smooth_shading) * Builds a list of attributes from a set of domains and a set of * customdata types. * - * \param active_only Returns only one item, a GPUAttrRef to active_layer - * \param active_layer CustomDataLayer to use for the active layer - * \param active_layer CustomDataLayer to use for the render layer + * \param active_only: Returns only one item, a #GPUAttrRef to active_layer. + * \param active_layer: #CustomDataLayer to use for the active layer. + * \param active_layer: #CustomDataLayer to use for the render layer. */ static int gpu_pbvh_make_attr_offs(eAttrDomainMask domain_mask, eCustomDataMask type_mask, @@ -1237,7 +1233,7 @@ static int gpu_pbvh_make_attr_offs(eAttrDomainMask domain_mask, continue; } - CustomDataLayer *cl = cdata->layers; + const CustomDataLayer *cl = cdata->layers; for (int i = 0; count < MAX_GPU_ATTR && i < cdata->totlayer; i++, cl++) { if ((CD_TYPE_AS_MASK(cl->type) & type_mask) && !(cl->flag & CD_FLAG_TEMPORARY)) { @@ -1253,9 +1249,7 @@ static int gpu_pbvh_make_attr_offs(eAttrDomainMask domain_mask, } } - /* ensure render layer is last - draw cache code seems to need this - */ + /* Ensure render layer is last, draw cache code seems to need this. */ for (int i = 0; i < count; i++) { GPUAttrRef *ref = r_cd_attrs + i; @@ -1326,12 +1320,12 @@ bool GPU_pbvh_attribute_names_update(PBVHType pbvh_type, BKE_id_attribute_copy_domains_temp(ID_ME, vdata, NULL, ldata, NULL, NULL, &me_query.id); - CustomDataLayer *active_color_layer = BKE_id_attributes_active_color_get(&me_query.id); - CustomDataLayer *render_color_layer = BKE_id_attributes_render_color_get(&me_query.id); + const CustomDataLayer *active_color_layer = BKE_id_attributes_active_color_get(&me_query.id); + const CustomDataLayer *render_color_layer = BKE_id_attributes_render_color_get(&me_query.id); eAttrDomain active_color_domain = active_color_layer ? BKE_id_attribute_domain(&me_query.id, active_color_layer) : - ATTR_DOMAIN_NUM; + ATTR_DOMAIN_POINT; GPUAttrRef vcol_layers[MAX_GPU_ATTR]; int totlayer = gpu_pbvh_make_attr_offs(ATTR_DOMAIN_MASK_COLOR, @@ -1381,7 +1375,7 @@ bool GPU_pbvh_attribute_names_update(PBVHType pbvh_type, vbo_id->totuv = 0; if (pbvh_type == PBVH_FACES && ldata && CustomData_has_layer(ldata, CD_MLOOPUV)) { GPUAttrRef uv_layers[MAX_GPU_ATTR]; - CustomDataLayer *active = NULL, *render = NULL; + const CustomDataLayer *active = NULL, *render = NULL; active = get_active_layer(ldata, CD_MLOOPUV); render = get_render_layer(ldata, CD_MLOOPUV); @@ -1407,7 +1401,7 @@ bool GPU_pbvh_attribute_names_update(PBVHType pbvh_type, vbo_id->uv[i] = GPU_vertformat_attr_add( &vbo_id->format, "uvs", GPU_COMP_F32, 2, GPU_FETCH_FLOAT); - CustomDataLayer *cl = ldata->layers + ref->layer_idx; + const CustomDataLayer *cl = ldata->layers + ref->layer_idx; bool is_active = ref->layer_idx == CustomData_get_active_layer_index(ldata, CD_MLOOPUV); DRW_cdlayer_attr_aliases_add(&vbo_id->format, "u", ldata, cl, cl == render, is_active); diff --git a/source/blender/gpu/intern/gpu_capabilities.cc b/source/blender/gpu/intern/gpu_capabilities.cc index eb69a1d2635..e584b757a05 100644 --- a/source/blender/gpu/intern/gpu_capabilities.cc +++ b/source/blender/gpu/intern/gpu_capabilities.cc @@ -8,7 +8,7 @@ * with checks for drivers and GPU support. */ -#include "DNA_userdef_types.h" +#include "DNA_userdef_types.h" /* For `U.glreslimit`. */ #include "GPU_capabilities.h" @@ -33,6 +33,11 @@ int GPU_max_texture_size() return GCaps.max_texture_size; } +int GPU_max_texture_3d_size(void) +{ + return GCaps.max_texture_3d_size; +} + int GPU_texture_size_with_limit(int res) { int size = GPU_max_texture_size(); @@ -115,6 +120,11 @@ const char *GPU_extension_get(int i) return GCaps.extension_get ? GCaps.extension_get(i) : "\0"; } +int GPU_max_samplers() +{ + return GCaps.max_samplers; +} + bool GPU_mip_render_workaround() { return GCaps.mip_render_workaround; @@ -161,6 +171,11 @@ bool GPU_shader_image_load_store_support() return GCaps.shader_image_load_store_support; } +bool GPU_shader_draw_parameters_support() +{ + return GCaps.shader_draw_parameters_support; +} + int GPU_max_shader_storage_buffer_bindings() { return GCaps.max_shader_storage_buffer_bindings; @@ -171,6 +186,16 @@ int GPU_max_compute_shader_storage_blocks() return GCaps.max_compute_shader_storage_blocks; } +int GPU_minimum_per_vertex_stride(void) +{ + return GCaps.minimum_per_vertex_stride; +} + +bool GPU_transform_feedback_support(void) +{ + return GCaps.transform_feedback_support; +} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/intern/gpu_capabilities_private.hh b/source/blender/gpu/intern/gpu_capabilities_private.hh index a17dbe7f8e6..dadd14791e7 100644 --- a/source/blender/gpu/intern/gpu_capabilities_private.hh +++ b/source/blender/gpu/intern/gpu_capabilities_private.hh @@ -44,6 +44,7 @@ struct GPUCapabilities { bool compute_shader_support = false; bool shader_storage_buffer_objects_support = false; bool shader_image_load_store_support = false; + bool shader_draw_parameters_support = false; bool transform_feedback_support = false; /* OpenGL related workarounds. */ diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc index a5ba0949a83..0102b8db5b2 100644 --- a/source/blender/gpu/intern/gpu_codegen.cc +++ b/source/blender/gpu/intern/gpu_codegen.cc @@ -12,8 +12,6 @@ #include "DNA_customdata_types.h" #include "DNA_image_types.h" -#include "BLI_blenlib.h" -#include "BLI_dynstr.h" #include "BLI_ghash.h" #include "BLI_hash_mm2a.h" #include "BLI_link_utils.h" @@ -23,7 +21,6 @@ #include "PIL_time.h" #include "BKE_material.h" -#include "BKE_world.h" #include "GPU_capabilities.h" #include "GPU_material.h" @@ -35,7 +32,6 @@ #include "BLI_vector.hh" #include "gpu_codegen.h" -#include "gpu_material_library.h" #include "gpu_node_graph.h" #include "gpu_shader_create_info.hh" #include "gpu_shader_dependency_private.h" @@ -56,16 +52,19 @@ using namespace blender::gpu::shader; */ struct GPUCodegenCreateInfo : ShaderCreateInfo { struct NameBuffer { + using NameEntry = std::array<char, 32>; + /** Duplicate attribute names to avoid reference the GPUNodeGraph directly. */ char attr_names[16][GPU_MAX_SAFE_ATTR_NAME + 1]; char var_names[16][8]; - blender::Vector<std::array<char, 32>, 16> sampler_names; + blender::Vector<std::unique_ptr<NameEntry>, 16> sampler_names; /* Returns the appended name memory location */ const char *append_sampler_name(const char name[32]) { - auto index = sampler_names.append_and_get_index(std::array<char, 32>()); - char *name_buffer = sampler_names[index].data(); + auto index = sampler_names.size(); + sampler_names.append(std::make_unique<NameEntry>()); + char *name_buffer = sampler_names[index]->data(); memcpy(name_buffer, name, 32); return name_buffer; } @@ -209,9 +208,10 @@ static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input) stream << input->type << "("; for (int i = 0; i < input->type; i++) { char formated_float[32]; - /* Print with the maximum precision for single precision float using scientific notation. - * See https://stackoverflow.com/questions/16839658/#answer-21162120 */ - SNPRINTF(formated_float, "%.9g", input->vec[i]); + /* Use uint representation to allow exact same bit pattern even if NaN. This is because we can + * pass UINTs as floats for constants. */ + const uint32_t *uint_vec = reinterpret_cast<const uint32_t *>(input->vec); + SNPRINTF(formated_float, "uintBitsToFloat(%uu)", uint_vec[i]); stream << formated_float; if (i < input->type - 1) { stream << ", "; @@ -260,6 +260,7 @@ class GPUCodegen { MEM_SAFE_FREE(output.volume); MEM_SAFE_FREE(output.thickness); MEM_SAFE_FREE(output.displacement); + MEM_SAFE_FREE(output.composite); MEM_SAFE_FREE(output.material_functions); delete create_info; BLI_freelistN(&ubo_inputs_); @@ -281,6 +282,7 @@ class GPUCodegen { void node_serialize(std::stringstream &eval_ss, const GPUNode *node); char *graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link); + char *graph_serialize(eGPUNodeTag tree_tag); static char *extract_c_str(std::stringstream &stream) { @@ -303,7 +305,7 @@ void GPUCodegen::generate_attribs() info.vertex_out(iface); /* Input declaration, loading / assignment to interface and geometry shader passthrough. */ - std::stringstream decl_ss, iface_ss, load_ss; + std::stringstream load_ss; int slot = 15; LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) { @@ -352,24 +354,43 @@ void GPUCodegen::generate_resources() { GPUCodegenCreateInfo &info = *create_info; + /* Ref. T98190: Defines are optimizations for old compilers. + * Might become unnecessary with EEVEE-Next. */ + if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_CLEARCOAT)) { + info.define("PRINCIPLED_CLEARCOAT"); + } + if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_METALLIC)) { + info.define("PRINCIPLED_METALLIC"); + } + if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_DIELECTRIC)) { + info.define("PRINCIPLED_DIELECTRIC"); + } + if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_GLASS)) { + info.define("PRINCIPLED_GLASS"); + } + if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_ANY)) { + info.define("PRINCIPLED_ANY"); + } + std::stringstream ss; /* Textures. */ + int slot = 0; LISTBASE_FOREACH (GPUMaterialTexture *, tex, &graph.textures) { if (tex->colorband) { const char *name = info.name_buffer.append_sampler_name(tex->sampler_name); - info.sampler(0, ImageType::FLOAT_1D_ARRAY, name, Frequency::BATCH); + info.sampler(slot++, ImageType::FLOAT_1D_ARRAY, name, Frequency::BATCH); } else if (tex->tiled_mapping_name[0] != '\0') { const char *name = info.name_buffer.append_sampler_name(tex->sampler_name); - info.sampler(0, ImageType::FLOAT_2D_ARRAY, name, Frequency::BATCH); + info.sampler(slot++, ImageType::FLOAT_2D_ARRAY, name, Frequency::BATCH); const char *name_mapping = info.name_buffer.append_sampler_name(tex->tiled_mapping_name); - info.sampler(0, ImageType::FLOAT_1D_ARRAY, name_mapping, Frequency::BATCH); + info.sampler(slot++, ImageType::FLOAT_1D_ARRAY, name_mapping, Frequency::BATCH); } else { const char *name = info.name_buffer.append_sampler_name(tex->sampler_name); - info.sampler(0, ImageType::FLOAT_2D, name, Frequency::BATCH); + info.sampler(slot++, ImageType::FLOAT_2D, name, Frequency::BATCH); } } @@ -382,7 +403,7 @@ void GPUCodegen::generate_resources() } ss << "};\n\n"; - info.uniform_buf(0, "NodeTree", GPU_UBO_BLOCK_NAME, Frequency::BATCH); + info.uniform_buf(1, "NodeTree", GPU_UBO_BLOCK_NAME, Frequency::BATCH); } if (!BLI_listbase_is_empty(&graph.uniform_attrs.list)) { @@ -394,7 +415,7 @@ void GPUCodegen::generate_resources() /* TODO(fclem): Use the macro for length. Currently not working for EEVEE. */ /* DRW_RESOURCE_CHUNK_LEN = 512 */ - info.uniform_buf(0, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH); + info.uniform_buf(2, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH); } info.typedef_source_generated = ss.str(); @@ -501,6 +522,19 @@ char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link return eval_c_str; } +char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag) +{ + std::stringstream eval_ss; + LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) { + if (node->tag & tree_tag) { + node_serialize(eval_ss, node); + } + } + char *eval_c_str = extract_c_str(eval_ss); + BLI_hash_mm2a_add(&hm2a_, (uchar *)eval_c_str, eval_ss.str().size()); + return eval_c_str; +} + void GPUCodegen::generate_uniform_buffer() { /* Extract uniform inputs. */ @@ -540,6 +574,9 @@ void GPUCodegen::generate_graphs() output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume); output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement); output.thickness = graph_serialize(GPU_NODE_TAG_THICKNESS, graph.outlink_thickness); + if (!BLI_listbase_is_empty(&graph.outlink_compositor)) { + output.composite = graph_serialize(GPU_NODE_TAG_COMPOSITOR); + } if (!BLI_listbase_is_empty(&graph.material_functions)) { std::stringstream eval_ss; @@ -570,9 +607,10 @@ GPUPass *GPU_generate_pass(GPUMaterial *material, GPUCodegenCallbackFn finalize_source_cb, void *thunk) { - /* Prune the unused nodes and extract attributes before compiling so the - * generated VBOs are ready to accept the future shader. */ gpu_node_graph_prune_unused(graph); + + /* Extract attributes before compiling so the generated VBOs are ready to accept the future + * shader. */ gpu_node_graph_finalize_uniform_attrs(graph); GPUCodegen codegen(material, graph); diff --git a/source/blender/gpu/intern/gpu_compute.cc b/source/blender/gpu/intern/gpu_compute.cc index b45cf8211cb..277f6d22280 100644 --- a/source/blender/gpu/intern/gpu_compute.cc +++ b/source/blender/gpu/intern/gpu_compute.cc @@ -7,7 +7,6 @@ #include "GPU_compute.h" #include "gpu_backend.hh" -#include "gpu_storage_buffer_private.hh" void GPU_compute_dispatch(GPUShader *shader, uint groups_x_len, diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc index c6eaf7defdc..bcc418169b7 100644 --- a/source/blender/gpu/intern/gpu_context.cc +++ b/source/blender/gpu/intern/gpu_context.cc @@ -23,12 +23,11 @@ #include "GPU_context.h" #include "GPU_framebuffer.h" -#include "GHOST_C-api.h" - #include "gpu_backend.hh" #include "gpu_batch_private.hh" #include "gpu_context_private.hh" #include "gpu_matrix_private.h" +#include "gpu_private.h" #ifdef WITH_OPENGL_BACKEND # include "gl_backend.hh" @@ -45,17 +44,27 @@ using namespace blender::gpu; static thread_local Context *active_ctx = nullptr; +static std::mutex backend_users_mutex; +static int num_backend_users = 0; + +static void gpu_backend_create(); +static void gpu_backend_discard(); + /* -------------------------------------------------------------------- */ /** \name gpu::Context methods * \{ */ namespace blender::gpu { +int Context::context_counter = 0; Context::Context() { thread_ = pthread_self(); is_active_ = false; matrix_state = GPU_matrix_state_create(); + + context_id = Context::context_counter; + Context::context_counter++; } Context::~Context() @@ -87,9 +96,13 @@ Context *Context::get() GPUContext *GPU_context_create(void *ghost_window) { - if (GPUBackend::get() == nullptr) { - /* TODO: move where it make sense. */ - GPU_backend_init(GPU_BACKEND_OPENGL); + { + std::scoped_lock lock(backend_users_mutex); + if (num_backend_users == 0) { + /* Automatically create backend when first context is created. */ + gpu_backend_create(); + } + num_backend_users++; } Context *ctx = GPUBackend::get()->context_alloc(ghost_window); @@ -103,6 +116,16 @@ void GPU_context_discard(GPUContext *ctx_) Context *ctx = unwrap(ctx_); delete ctx; active_ctx = nullptr; + + { + std::scoped_lock lock(backend_users_mutex); + num_backend_users--; + BLI_assert(num_backend_users >= 0); + if (num_backend_users == 0) { + /* Discard backend when last context is discarded. */ + gpu_backend_discard(); + } + } } void GPU_context_active_set(GPUContext *ctx_) @@ -125,6 +148,22 @@ GPUContext *GPU_context_active_get() return wrap(Context::get()); } +void GPU_context_begin_frame(GPUContext *ctx) +{ + blender::gpu::Context *_ctx = unwrap(ctx); + if (_ctx) { + _ctx->begin_frame(); + } +} + +void GPU_context_end_frame(GPUContext *ctx) +{ + blender::gpu::Context *_ctx = unwrap(ctx); + if (_ctx) { + _ctx->end_frame(); + } +} + /* -------------------------------------------------------------------- */ /** \name Main context global mutex * @@ -177,11 +216,12 @@ void GPU_render_step() /** \name Backend selection * \{ */ -static GPUBackend *g_backend; +static const eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL; +static GPUBackend *g_backend = nullptr; -bool GPU_backend_supported(eGPUBackendType type) +bool GPU_backend_supported(void) { - switch (type) { + switch (g_backend_type) { case GPU_BACKEND_OPENGL: #ifdef WITH_OPENGL_BACKEND return true; @@ -200,12 +240,12 @@ bool GPU_backend_supported(eGPUBackendType type) } } -void GPU_backend_init(eGPUBackendType backend_type) +static void gpu_backend_create() { BLI_assert(g_backend == nullptr); - BLI_assert(GPU_backend_supported(backend_type)); + BLI_assert(GPU_backend_supported()); - switch (backend_type) { + switch (g_backend_type) { #ifdef WITH_OPENGL_BACKEND case GPU_BACKEND_OPENGL: g_backend = new GLBackend; @@ -222,10 +262,15 @@ void GPU_backend_init(eGPUBackendType backend_type) } } -void GPU_backend_exit() +void gpu_backend_delete_resources() +{ + BLI_assert(g_backend); + g_backend->delete_resources(); +} + +void gpu_backend_discard() { - /* TODO: assert no resource left. Currently UI textures are still not freed in their context - * correctly. */ + /* TODO: assert no resource left. */ delete g_backend; g_backend = nullptr; } diff --git a/source/blender/gpu/intern/gpu_context_private.hh b/source/blender/gpu/intern/gpu_context_private.hh index af9791fde88..2217e5262ed 100644 --- a/source/blender/gpu/intern/gpu_context_private.hh +++ b/source/blender/gpu/intern/gpu_context_private.hh @@ -28,11 +28,11 @@ namespace blender::gpu { class Context { public: /** State management */ - Shader *shader = NULL; - FrameBuffer *active_fb = NULL; - GPUMatrixState *matrix_state = NULL; - StateManager *state_manager = NULL; - Immediate *imm = NULL; + Shader *shader = nullptr; + FrameBuffer *active_fb = nullptr; + GPUMatrixState *matrix_state = nullptr; + StateManager *state_manager = nullptr; + Immediate *imm = nullptr; /** * All 4 window frame-buffers. @@ -41,18 +41,26 @@ class Context { * Front frame-buffers contains (in principle, but not always) the last frame color. * Default frame-buffer is back_left. */ - FrameBuffer *back_left = NULL; - FrameBuffer *front_left = NULL; - FrameBuffer *back_right = NULL; - FrameBuffer *front_right = NULL; + FrameBuffer *back_left = nullptr; + FrameBuffer *front_left = nullptr; + FrameBuffer *back_right = nullptr; + FrameBuffer *front_right = nullptr; DebugStack debug_stack; + /* GPUContext counter used to assign a unique ID to each GPUContext. + * NOTE(Metal): This is required by the Metal Backend, as a bug exists in the global OS shader + * cache wherein compilation of identical source from two distinct threads can result in an + * invalid cache collision, result in a broken shader object. Appending the unique context ID + * onto compiled sources ensures the source hashes are different. */ + static int context_counter; + int context_id = 0; + protected: /** Thread on which this context is active. */ pthread_t thread_; bool is_active_; - /** Avoid including GHOST headers. Can be NULL for off-screen contexts. */ + /** Avoid including GHOST headers. Can be nullptr for off-screen contexts. */ void *ghost_window_; public: @@ -63,6 +71,8 @@ class Context { virtual void activate() = 0; virtual void deactivate() = 0; + virtual void begin_frame() = 0; + virtual void end_frame() = 0; /* Will push all pending commands to the GPU. */ virtual void flush() = 0; diff --git a/source/blender/gpu/intern/gpu_drawlist.cc b/source/blender/gpu/intern/gpu_drawlist.cc index b5b8d2f90bc..e1699bd0036 100644 --- a/source/blender/gpu/intern/gpu_drawlist.cc +++ b/source/blender/gpu/intern/gpu_drawlist.cc @@ -7,9 +7,6 @@ * Implementation of Multi Draw Indirect. */ -#include "MEM_guardedalloc.h" - -#include "GPU_batch.h" #include "GPU_drawlist.h" #include "gpu_backend.hh" diff --git a/source/blender/gpu/intern/gpu_framebuffer.cc b/source/blender/gpu/intern/gpu_framebuffer.cc index fb3c9549f18..8d93e49d588 100644 --- a/source/blender/gpu/intern/gpu_framebuffer.cc +++ b/source/blender/gpu/intern/gpu_framebuffer.cc @@ -7,7 +7,6 @@ #include "MEM_guardedalloc.h" -#include "BLI_blenlib.h" #include "BLI_math_base.h" #include "BLI_utildefines.h" @@ -18,7 +17,6 @@ #include "gpu_backend.hh" #include "gpu_context_private.hh" -#include "gpu_private.h" #include "gpu_texture_private.hh" #include "gpu_framebuffer_private.hh" @@ -126,6 +124,43 @@ void FrameBuffer::attachment_remove(GPUAttachmentType type) dirty_attachments_ = true; } +void FrameBuffer::load_store_config_array(const GPULoadStore *load_store_actions, uint actions_len) +{ + /* Follows attachment structure of GPU_framebuffer_config_array/GPU_framebuffer_ensure_config */ + const GPULoadStore &depth_action = load_store_actions[0]; + Span<GPULoadStore> color_attachments(load_store_actions + 1, actions_len - 1); + + if (this->attachments_[GPU_FB_DEPTH_STENCIL_ATTACHMENT].tex) { + this->attachment_set_loadstore_op( + GPU_FB_DEPTH_STENCIL_ATTACHMENT, depth_action.load_action, depth_action.store_action); + } + if (this->attachments_[GPU_FB_DEPTH_ATTACHMENT].tex) { + this->attachment_set_loadstore_op( + GPU_FB_DEPTH_ATTACHMENT, depth_action.load_action, depth_action.store_action); + } + + GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0; + for (const GPULoadStore &actions : color_attachments) { + if (this->attachments_[type].tex) { + this->attachment_set_loadstore_op(type, actions.load_action, actions.store_action); + } + ++type; + } +} + +unsigned int FrameBuffer::get_bits_per_pixel() +{ + unsigned int total_bits = 0; + for (GPUAttachment &attachment : attachments_) { + Texture *tex = reinterpret_cast<Texture *>(attachment.tex); + if (tex != nullptr) { + int bits = to_bytesize(tex->format_get()) * to_component_len(tex->format_get()); + total_bits += bits; + } + } + return total_bits; +} + void FrameBuffer::recursive_downsample(int max_lvl, void (*callback)(void *userData, int level), void *userData) @@ -151,10 +186,21 @@ void FrameBuffer::recursive_downsample(int max_lvl, attachment.mip = mip_lvl; } } + /* Update the internal attachments and viewport size. */ dirty_attachments_ = true; this->bind(true); + /* Optimize load-store state. */ + GPUAttachmentType type = GPU_FB_DEPTH_ATTACHMENT; + for (GPUAttachment &attachment : attachments_) { + Texture *tex = reinterpret_cast<Texture *>(attachment.tex); + if (tex != nullptr) { + this->attachment_set_loadstore_op(type, GPU_LOADACTION_DONT_CARE, GPU_STOREACTION_STORE); + } + ++type; + } + callback(userData, mip_lvl); } @@ -200,6 +246,18 @@ void GPU_framebuffer_bind(GPUFrameBuffer *gpu_fb) unwrap(gpu_fb)->bind(enable_srgb); } +void GPU_framebuffer_bind_loadstore(GPUFrameBuffer *gpu_fb, + const GPULoadStore *load_store_actions, + uint actions_len) +{ + /* Bind */ + GPU_framebuffer_bind(gpu_fb); + + /* Update load store */ + FrameBuffer *fb = unwrap(gpu_fb); + fb->load_store_config_array(load_store_actions, actions_len); +} + void GPU_framebuffer_bind_no_srgb(GPUFrameBuffer *gpu_fb) { const bool enable_srgb = false; diff --git a/source/blender/gpu/intern/gpu_framebuffer_private.hh b/source/blender/gpu/intern/gpu_framebuffer_private.hh index d218662d17f..8cecc6b8b15 100644 --- a/source/blender/gpu/intern/gpu_framebuffer_private.hh +++ b/source/blender/gpu/intern/gpu_framebuffer_private.hh @@ -114,6 +114,10 @@ class FrameBuffer { eGPUDataFormat data_format, const void *clear_value) = 0; + virtual void attachment_set_loadstore_op(GPUAttachmentType type, + eGPULoadOp load_action, + eGPUStoreOp store_action) = 0; + virtual void read(eGPUFrameBufferBits planes, eGPUDataFormat format, const int area[4], @@ -128,12 +132,15 @@ class FrameBuffer { int dst_offset_x, int dst_offset_y) = 0; + void load_store_config_array(const GPULoadStore *load_store_actions, uint actions_len); + void attachment_set(GPUAttachmentType type, const GPUAttachment &new_attachment); void attachment_remove(GPUAttachmentType type); void recursive_downsample(int max_lvl, void (*callback)(void *userData, int level), void *userData); + uint get_bits_per_pixel(); inline void size_set(int width, int height) { diff --git a/source/blender/gpu/intern/gpu_immediate.cc b/source/blender/gpu/intern/gpu_immediate.cc index 69467e5b28a..3b4accf9cc5 100644 --- a/source/blender/gpu/intern/gpu_immediate.cc +++ b/source/blender/gpu/intern/gpu_immediate.cc @@ -18,7 +18,6 @@ #include "gpu_context_private.hh" #include "gpu_immediate_private.hh" #include "gpu_shader_private.hh" -#include "gpu_vertex_buffer_private.hh" #include "gpu_vertex_format_private.h" using namespace blender::gpu; @@ -132,15 +131,12 @@ static void wide_line_workaround_start(GPUPrimType prim_type) case GPU_SHADER_3D_CLIPPED_UNIFORM_COLOR: polyline_sh = GPU_SHADER_3D_POLYLINE_CLIPPED_UNIFORM_COLOR; break; - case GPU_SHADER_2D_UNIFORM_COLOR: case GPU_SHADER_3D_UNIFORM_COLOR: polyline_sh = GPU_SHADER_3D_POLYLINE_UNIFORM_COLOR; break; - case GPU_SHADER_2D_FLAT_COLOR: case GPU_SHADER_3D_FLAT_COLOR: polyline_sh = GPU_SHADER_3D_POLYLINE_FLAT_COLOR; break; - case GPU_SHADER_2D_SMOOTH_COLOR: case GPU_SHADER_3D_SMOOTH_COLOR: polyline_sh = GPU_SHADER_3D_POLYLINE_SMOOTH_COLOR; break; diff --git a/source/blender/gpu/intern/gpu_immediate_private.hh b/source/blender/gpu/intern/gpu_immediate_private.hh index 6c50fa01071..74ebbdc7ae3 100644 --- a/source/blender/gpu/intern/gpu_immediate_private.hh +++ b/source/blender/gpu/intern/gpu_immediate_private.hh @@ -19,7 +19,7 @@ namespace blender::gpu { class Immediate { public: /** Pointer to the mapped buffer data for the current vertex. */ - uchar *vertex_data = NULL; + uchar *vertex_data = nullptr; /** Current vertex index. */ uint vertex_idx = 0; /** Length of the buffer in vertices. */ @@ -32,12 +32,12 @@ class Immediate { /** Current draw call specification. */ GPUPrimType prim_type = GPU_PRIM_NONE; GPUVertFormat vertex_format = {}; - GPUShader *shader = NULL; + GPUShader *shader = nullptr; /** Enforce strict vertex count (disabled when using #immBeginAtMost). */ bool strict_vertex_len = true; /** Batch in construction when using #immBeginBatch. */ - GPUBatch *batch = NULL; + GPUBatch *batch = nullptr; /** Wide Line workaround. */ diff --git a/source/blender/gpu/intern/gpu_immediate_util.c b/source/blender/gpu/intern/gpu_immediate_util.c index daefd57a5b3..743bc058b45 100644 --- a/source/blender/gpu/intern/gpu_immediate_util.c +++ b/source/blender/gpu/intern/gpu_immediate_util.c @@ -13,7 +13,6 @@ #include "BLI_utildefines.h" #include "GPU_immediate.h" -#include "GPU_immediate_util.h" #include "UI_resources.h" @@ -122,7 +121,7 @@ void immRecti_complete(int x1, int y1, int x2, int y2, const float color[4]) { GPUVertFormat *format = immVertexFormat(); uint pos = add_attr(format, "pos", GPU_COMP_I32, 2, GPU_FETCH_INT_TO_FLOAT); - immBindBuiltinProgram(GPU_SHADER_2D_UNIFORM_COLOR); + immBindBuiltinProgram(GPU_SHADER_3D_UNIFORM_COLOR); immUniformColor4fv(color); immRecti(pos, x1, y1, x2, y2); immUnbindProgram(); @@ -143,7 +142,7 @@ static void imm_draw_circle(GPUPrimType prim_type, int nsegments) { if (prim_type == GPU_PRIM_LINE_LOOP) { - /* Note(Metal/AMD): For small primitives, line list more efficient than line strip.. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line strip.. */ immBegin(GPU_PRIM_LINES, nsegments * 2); immVertex2f(shdr_pos, x + (radius_x * cosf(0.0f)), y + (radius_y * sinf(0.0f))); @@ -240,9 +239,9 @@ void imm_draw_circle_partial_wire_2d( } void imm_draw_circle_partial_wire_3d( - uint pos, float x, float y, float z, float rad, int nsegments, float start, float sweep) + uint pos, float x, float y, float z, float radius, int nsegments, float start, float sweep) { - imm_draw_circle_partial_3d(GPU_PRIM_LINE_STRIP, pos, x, y, z, rad, nsegments, start, sweep); + imm_draw_circle_partial_3d(GPU_PRIM_LINE_STRIP, pos, x, y, z, radius, nsegments, start, sweep); } static void imm_draw_disk_partial(GPUPrimType prim_type, @@ -334,7 +333,7 @@ static void imm_draw_circle_3D( GPUPrimType prim_type, uint pos, float x, float y, float radius, int nsegments) { if (prim_type == GPU_PRIM_LINE_LOOP) { - /* Note(Metal/AMD): For small primitives, line list more efficient than line strip. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line strip. */ immBegin(GPU_PRIM_LINES, nsegments * 2); const float angle = (float)(2 * M_PI) / (float)nsegments; @@ -387,7 +386,7 @@ void imm_draw_circle_fill_3d(uint pos, float x, float y, float radius, int nsegm void imm_draw_box_wire_2d(uint pos, float x1, float y1, float x2, float y2) { - /* Note(Metal/AMD): For small primitives, line list more efficient than line-strip. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line-strip. */ immBegin(GPU_PRIM_LINES, 8); immVertex2f(pos, x1, y1); immVertex2f(pos, x1, y2); @@ -406,7 +405,7 @@ void imm_draw_box_wire_2d(uint pos, float x1, float y1, float x2, float y2) void imm_draw_box_wire_3d(uint pos, float x1, float y1, float x2, float y2) { /* use this version when GPUVertFormat has a vec3 position */ - /* Note(Metal/AMD): For small primitives, line list more efficient than line-strip. */ + /* NOTE(Metal/AMD): For small primitives, line list more efficient than line-strip. */ immBegin(GPU_PRIM_LINES, 8); immVertex3f(pos, x1, y1, 0.0f); immVertex3f(pos, x1, y2, 0.0f); diff --git a/source/blender/gpu/intern/gpu_index_buffer.cc b/source/blender/gpu/intern/gpu_index_buffer.cc index 146461d1dfb..3a66f547403 100644 --- a/source/blender/gpu/intern/gpu_index_buffer.cc +++ b/source/blender/gpu/intern/gpu_index_buffer.cc @@ -16,6 +16,8 @@ #include "gpu_index_buffer_private.hh" +#include "GPU_platform.h" + #include <cstring> #define KEEP_SINGLE_COPY 1 @@ -40,6 +42,28 @@ void GPU_indexbuf_init_ex(GPUIndexBufBuilder *builder, builder->index_min = UINT32_MAX; builder->index_max = 0; builder->prim_type = prim_type; + +#ifdef __APPLE__ + /* Only encode restart indices for restart-compatible primitive types. + * Resolves out-of-bounds read error on macOS. Using 0-index will ensure + * degenerative primitives when skipping primitives is required and will + * incur no additional performance cost for rendering. */ + if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL)) { + /* We will still use restart-indices for point primitives and then + * patch these during IndexBuf::init, as we cannot benefit from degenerative + * primitives to eliminate these. */ + builder->restart_index_value = (is_restart_compatible(prim_type) || + prim_type == GPU_PRIM_POINTS) ? + RESTART_INDEX : + 0; + } + else { + builder->restart_index_value = RESTART_INDEX; + } +#else + builder->restart_index_value = RESTART_INDEX; +#endif + builder->uses_restart_indices = false; builder->data = (uint *)MEM_callocN(builder->max_index_len * sizeof(uint), "GPUIndexBuf data"); } @@ -94,7 +118,8 @@ void GPU_indexbuf_add_primitive_restart(GPUIndexBufBuilder *builder) assert(builder->data != nullptr); assert(builder->index_len < builder->max_index_len); #endif - builder->data[builder->index_len++] = RESTART_INDEX; + builder->data[builder->index_len++] = builder->restart_index_value; + builder->uses_restart_indices = true; } void GPU_indexbuf_add_point_vert(GPUIndexBufBuilder *builder, uint v) @@ -186,8 +211,9 @@ void GPU_indexbuf_set_point_restart(GPUIndexBufBuilder *builder, uint elem) { BLI_assert(builder->prim_type == GPU_PRIM_POINTS); BLI_assert(elem < builder->max_index_len); - builder->data[elem++] = RESTART_INDEX; + builder->data[elem++] = builder->restart_index_value; builder->index_len = MAX2(builder->index_len, elem); + builder->uses_restart_indices = true; } void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem) @@ -195,9 +221,10 @@ void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem) BLI_assert(builder->prim_type == GPU_PRIM_LINES); BLI_assert((elem + 1) * 2 <= builder->max_index_len); uint idx = elem * 2; - builder->data[idx++] = RESTART_INDEX; - builder->data[idx++] = RESTART_INDEX; + builder->data[idx++] = builder->restart_index_value; + builder->data[idx++] = builder->restart_index_value; builder->index_len = MAX2(builder->index_len, idx); + builder->uses_restart_indices = true; } void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem) @@ -205,10 +232,11 @@ void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem) BLI_assert(builder->prim_type == GPU_PRIM_TRIS); BLI_assert((elem + 1) * 3 <= builder->max_index_len); uint idx = elem * 3; - builder->data[idx++] = RESTART_INDEX; - builder->data[idx++] = RESTART_INDEX; - builder->data[idx++] = RESTART_INDEX; + builder->data[idx++] = builder->restart_index_value; + builder->data[idx++] = builder->restart_index_value; + builder->data[idx++] = builder->restart_index_value; builder->index_len = MAX2(builder->index_len, idx); + builder->uses_restart_indices = true; } /** \} */ @@ -226,7 +254,12 @@ IndexBuf::~IndexBuf() } } -void IndexBuf::init(uint indices_len, uint32_t *indices, uint min_index, uint max_index) +void IndexBuf::init(uint indices_len, + uint32_t *indices, + uint min_index, + uint max_index, + GPUPrimType prim_type, + bool uses_restart_indices) { is_init_ = true; data_ = indices; @@ -234,6 +267,21 @@ void IndexBuf::init(uint indices_len, uint32_t *indices, uint min_index, uint ma index_len_ = indices_len; is_empty_ = min_index > max_index; + /* Patch index buffer to remove restart indices from + * non-restart-compatible primitive types. Restart indices + * are situationally added to selectively hide vertices. + * Metal does not support restart-indices for non-restart-compatible + * types, as such we should remove these indices. + * + * We only need to perform this for point primitives, as + * line primitives/triangle primitives can use index 0 for all + * vertices to create a degenerative primitive, where all + * vertices share the same index and skip rendering via HW + * culling. */ + if (prim_type == GPU_PRIM_POINTS && uses_restart_indices) { + this->strip_restart_indices(); + } + #if GPU_TRACK_INDEX_RANGE /* Everything remains 32 bit while building to keep things simple. * Find min/max after, then convert to smallest index type possible. */ @@ -243,7 +291,18 @@ void IndexBuf::init(uint indices_len, uint32_t *indices, uint min_index, uint ma if (range <= 0xFFFF) { index_type_ = GPU_INDEX_U16; - this->squeeze_indices_short(min_index, max_index); + bool do_clamp_indices = false; +# ifdef __APPLE__ + /* NOTE: For the Metal Backend, we use degenerative primitives to hide vertices + * which are not restart compatible. When this is done, we need to ensure + * that compressed index ranges clamp all index values within the valid + * range, rather than maximally clamping against the USHORT restart index + * value of 0xFFFFu, as this will cause an out-of-bounds read during + * vertex assembly. */ + do_clamp_indices = GPU_type_matches_ex( + GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_METAL); +# endif + this->squeeze_indices_short(min_index, max_index, prim_type, do_clamp_indices); } #endif } @@ -302,7 +361,10 @@ uint IndexBuf::index_range(uint *r_min, uint *r_max) return max_value - min_value; } -void IndexBuf::squeeze_indices_short(uint min_idx, uint max_idx) +void IndexBuf::squeeze_indices_short(uint min_idx, + uint max_idx, + GPUPrimType prim_type, + bool clamp_indices_in_range) { /* data will never be *larger* than builder->data... * converting in place to avoid extra allocation */ @@ -311,8 +373,22 @@ void IndexBuf::squeeze_indices_short(uint min_idx, uint max_idx) if (max_idx >= 0xFFFF) { index_base_ = min_idx; + /* NOTE: When using restart_index=0 for degenerative primitives indices, + * the compressed index will go below zero and wrap around when min_idx > 0. + * In order to ensure the resulting index is still within range, we instead + * clamp index to the maximum within the index range. + * + * `clamp_max_idx` represents the maximum possible index to clamp against. If primitive is + * restart-compatible, we can just clamp against the primitive-restart value, otherwise, we + * must assign to a valid index within the range. + * + * NOTE: For OpenGL we skip this by disabling clamping, as we still need to use + * restart index values for point primitives to disable rendering. */ + uint16_t clamp_max_idx = (is_restart_compatible(prim_type) || !clamp_indices_in_range) ? + 0xFFFFu : + (max_idx - min_idx); for (uint i = 0; i < index_len_; i++) { - ushort_idx[i] = (uint16_t)MIN2(0xFFFF, uint_idx[i] - min_idx); + ushort_idx[i] = (uint16_t)MIN2(clamp_max_idx, uint_idx[i] - min_idx); } } else { @@ -363,7 +439,12 @@ void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *builder, GPUIndexBuf *elem) BLI_assert(builder->data != nullptr); /* Transfer data ownership to GPUIndexBuf. * It will be uploaded upon first use. */ - unwrap(elem)->init(builder->index_len, builder->data, builder->index_min, builder->index_max); + unwrap(elem)->init(builder->index_len, + builder->data, + builder->index_min, + builder->index_max, + builder->prim_type, + builder->uses_restart_indices); builder->data = nullptr; } diff --git a/source/blender/gpu/intern/gpu_index_buffer_private.hh b/source/blender/gpu/intern/gpu_index_buffer_private.hh index 6ce62ae852e..4099d6641a6 100644 --- a/source/blender/gpu/intern/gpu_index_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_index_buffer_private.hh @@ -59,7 +59,12 @@ class IndexBuf { IndexBuf(){}; virtual ~IndexBuf(); - void init(uint indices_len, uint32_t *indices, uint min_index, uint max_index); + void init(uint indices_len, + uint32_t *indices, + uint min_index, + uint max_index, + GPUPrimType prim_type, + bool uses_restart_indices); void init_subrange(IndexBuf *elem_src, uint start, uint length); void init_build_on_device(uint index_len); @@ -70,6 +75,14 @@ class IndexBuf { * They can lead to graphical glitches on some systems. (See T96892) */ return is_empty_ ? 0 : index_len_; } + uint32_t index_start_get() const + { + return index_start_; + } + uint32_t index_base_get() const + { + return index_base_; + } /* Return size in byte of the drawable data buffer range. Actual buffer size might be bigger. */ size_t size_get() const { @@ -91,8 +104,12 @@ class IndexBuf { virtual void update_sub(uint start, uint len, const void *data) = 0; private: - inline void squeeze_indices_short(uint min_idx, uint max_idx); + inline void squeeze_indices_short(uint min_idx, + uint max_idx, + GPUPrimType prim_type, + bool clamp_indices_in_range); inline uint index_range(uint *r_min, uint *r_max); + virtual void strip_restart_indices() = 0; }; /* Syntactic sugar. */ diff --git a/source/blender/gpu/intern/gpu_init_exit.c b/source/blender/gpu/intern/gpu_init_exit.c index e97c9e9c829..34b355eefaf 100644 --- a/source/blender/gpu/intern/gpu_init_exit.c +++ b/source/blender/gpu/intern/gpu_init_exit.c @@ -6,15 +6,10 @@ */ #include "GPU_init_exit.h" /* interface */ -#include "BKE_global.h" #include "BLI_sys_types.h" #include "GPU_batch.h" -#include "GPU_buffers.h" -#include "GPU_context.h" -#include "GPU_immediate.h" #include "intern/gpu_codegen.h" -#include "intern/gpu_material_library.h" #include "intern/gpu_private.h" #include "intern/gpu_shader_create_info_private.hh" #include "intern/gpu_shader_dependency_private.h" @@ -60,6 +55,8 @@ void GPU_exit(void) gpu_shader_dependency_exit(); gpu_shader_create_info_exit(); + gpu_backend_delete_resources(); + initialized = false; } diff --git a/source/blender/gpu/intern/gpu_material.c b/source/blender/gpu/intern/gpu_material.c index 5d6651c3e3a..75066b21e7b 100644 --- a/source/blender/gpu/intern/gpu_material.c +++ b/source/blender/gpu/intern/gpu_material.c @@ -19,14 +19,11 @@ #include "BLI_listbase.h" #include "BLI_math.h" #include "BLI_string.h" -#include "BLI_string_utils.h" #include "BLI_utildefines.h" #include "BKE_main.h" #include "BKE_material.h" #include "BKE_node.h" -#include "BKE_scene.h" -#include "BKE_world.h" #include "NOD_shader.h" @@ -94,6 +91,8 @@ struct GPUMaterial { #ifndef NDEBUG char name[64]; +#else + char name[16]; #endif }; @@ -144,7 +143,7 @@ static void gpu_material_ramp_texture_build(GPUMaterial *mat) mat->coba_builder = NULL; } -static void gpu_material_free_single(GPUMaterial *material) +void GPU_material_free_single(GPUMaterial *material) { bool do_free = atomic_sub_and_fetch_uint32(&material->refcount, 1) == 0; if (!do_free) { @@ -176,7 +175,7 @@ void GPU_material_free(ListBase *gpumaterial) LISTBASE_FOREACH (LinkData *, link, gpumaterial) { GPUMaterial *material = link->data; DRW_deferred_shader_remove(material); - gpu_material_free_single(material); + GPU_material_free_single(material); } BLI_freelistN(gpumaterial); } @@ -196,6 +195,11 @@ GPUShader *GPU_material_get_shader(GPUMaterial *material) return material->pass ? GPU_pass_shader_get(material->pass) : NULL; } +const char *GPU_material_get_name(GPUMaterial *material) +{ + return material->name; +} + Material *GPU_material_get_material(GPUMaterial *material) { return material->ma; @@ -208,12 +212,7 @@ GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material) void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs) { -#ifndef NDEBUG - const char *name = material->name; -#else - const char *name = "Material"; -#endif - material->ubo = GPU_uniformbuf_create_from_list(inputs, name); + material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name); } ListBase GPU_material_attributes(GPUMaterial *material) @@ -226,9 +225,9 @@ ListBase GPU_material_textures(GPUMaterial *material) return material->graph.textures; } -GPUUniformAttrList *GPU_material_uniform_attributes(GPUMaterial *material) +const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material) { - GPUUniformAttrList *attrs = &material->graph.uniform_attrs; + const GPUUniformAttrList *attrs = &material->graph.uniform_attrs; return attrs->count > 0 ? attrs : NULL; } @@ -541,6 +540,13 @@ void GPU_material_add_output_link_aov(GPUMaterial *material, GPUNodeLink *link, BLI_addtail(&material->graph.outlink_aovs, aov_link); } +void GPU_material_add_output_link_composite(GPUMaterial *material, GPUNodeLink *link) +{ + GPUNodeGraphOutputLink *compositor_link = MEM_callocN(sizeof(GPUNodeGraphOutputLink), __func__); + compositor_link->outlink = link; + BLI_addtail(&material->graph.outlink_compositor, compositor_link); +} + char *GPU_material_split_sub_function(GPUMaterial *material, eGPUType return_type, GPUNodeLink **link) @@ -668,11 +674,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene, mat->graph.used_libraries = BLI_gset_new( BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); mat->refcount = 1; -#ifndef NDEBUG STRNCPY(mat->name, name); -#else - UNUSED_VARS(name); -#endif if (is_lookdev) { mat->flag |= GPU_MATFLAG_LOOKDEV_HACK; } @@ -724,7 +726,7 @@ void GPU_material_acquire(GPUMaterial *mat) void GPU_material_release(GPUMaterial *mat) { - gpu_material_free_single(mat); + GPU_material_free_single(mat); } void GPU_material_compile(GPUMaterial *mat) @@ -775,3 +777,42 @@ void GPU_materials_free(Main *bmain) // BKE_world_defaults_free_gpu(); BKE_material_defaults_free_gpu(); } + +GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_function_cb, + GPUCodegenCallbackFn generate_code_function_cb, + void *thunk) +{ + /* Allocate a new material and its material graph, and initialize its reference count. */ + GPUMaterial *material = MEM_callocN(sizeof(GPUMaterial), "GPUMaterial"); + material->graph.used_libraries = BLI_gset_new( + BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); + material->refcount = 1; + + /* Construct the material graph by adding and linking the necessary GPU material nodes. */ + construct_function_cb(thunk, material); + + /* Create and initialize the texture storing color bands used by Ramp and Curve nodes. */ + gpu_material_ramp_texture_build(material); + + /* Lookup an existing pass in the cache or generate a new one. */ + material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk); + + /* The pass already exists in the pass cache but its shader already failed to compile. */ + if (material->pass == NULL) { + material->status = GPU_MAT_FAILED; + gpu_node_graph_free(&material->graph); + return material; + } + + /* The pass already exists in the pass cache and its shader is already compiled. */ + GPUShader *shader = GPU_pass_shader_get(material->pass); + if (shader != NULL) { + material->status = GPU_MAT_SUCCESS; + gpu_node_graph_free_nodes(&material->graph); + return material; + } + + /* The material was created successfully but still needs to be compiled. */ + material->status = GPU_MAT_CREATED; + return material; +} diff --git a/source/blender/gpu/intern/gpu_node_graph.c b/source/blender/gpu/intern/gpu_node_graph.c index 3c6a03c56d3..f82af7538b5 100644 --- a/source/blender/gpu/intern/gpu_node_graph.c +++ b/source/blender/gpu/intern/gpu_node_graph.c @@ -75,9 +75,26 @@ static void gpu_node_input_link(GPUNode *node, GPUNodeLink *link, const eGPUType if (STR_ELEM(name, "set_value", "set_rgb", "set_rgba") && (input->type == type)) { input = MEM_dupallocN(outnode->inputs.first); + + switch (input->source) { + case GPU_SOURCE_ATTR: + input->attr->users++; + break; + case GPU_SOURCE_UNIFORM_ATTR: + input->uniform_attr->users++; + break; + case GPU_SOURCE_TEX: + case GPU_SOURCE_TEX_TILED_MAPPING: + input->texture->users++; + break; + default: + break; + } + if (input->link) { input->link->users++; } + BLI_addtail(&node->inputs, input); return; } @@ -162,7 +179,7 @@ static const char *gpu_uniform_set_function_from_type(eNodeSocketDatatype type) * This is called for the input/output sockets that are not connected. */ static GPUNodeLink *gpu_uniformbuffer_link(GPUMaterial *mat, - bNode *node, + const bNode *node, GPUNodeStack *stack, const int index, const eNodeSocketInOut in_out) @@ -179,39 +196,25 @@ static GPUNodeLink *gpu_uniformbuffer_link(GPUMaterial *mat, BLI_assert(socket != NULL); BLI_assert(socket->in_out == in_out); - if ((socket->flag & SOCK_HIDE_VALUE) == 0) { - GPUNodeLink *link; - switch (socket->type) { - case SOCK_FLOAT: { - bNodeSocketValueFloat *socket_data = socket->default_value; - link = GPU_uniform(&socket_data->value); - break; - } - case SOCK_VECTOR: { - bNodeSocketValueVector *socket_data = socket->default_value; - link = GPU_uniform(socket_data->value); - break; - } - case SOCK_RGBA: { - bNodeSocketValueRGBA *socket_data = socket->default_value; - link = GPU_uniform(socket_data->value); - break; - } - default: - return NULL; - break; - } + if (socket->flag & SOCK_HIDE_VALUE) { + return NULL; + } - if (in_out == SOCK_IN) { - GPU_link(mat, gpu_uniform_set_function_from_type(socket->type), link, &stack->link); - } - return link; + if (!ELEM(socket->type, SOCK_FLOAT, SOCK_VECTOR, SOCK_RGBA)) { + return NULL; } - return NULL; + + GPUNodeLink *link = GPU_uniform(stack->vec); + + if (in_out == SOCK_IN) { + GPU_link(mat, gpu_uniform_set_function_from_type(socket->type), link, &stack->link); + } + + return link; } static void gpu_node_input_socket( - GPUMaterial *material, bNode *bnode, GPUNode *node, GPUNodeStack *sock, const int index) + GPUMaterial *material, const bNode *bnode, GPUNode *node, GPUNodeStack *sock, const int index) { if (sock->link) { gpu_node_input_link(node, sock->link, sock->type); @@ -289,7 +292,7 @@ struct GHash *GPU_uniform_attr_list_hash_new(const char *info) return BLI_ghash_new(uniform_attr_list_hash, uniform_attr_list_cmp, info); } -void GPU_uniform_attr_list_copy(GPUUniformAttrList *dest, GPUUniformAttrList *src) +void GPU_uniform_attr_list_copy(GPUUniformAttrList *dest, const GPUUniformAttrList *src) { dest->count = src->count; dest->hash_code = src->hash_code; @@ -317,24 +320,20 @@ void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph) LISTBASE_FOREACH (GPUUniformAttr *, attr, &attrs->list) { attr->id = next_id++; - - attrs->hash_code ^= BLI_ghashutil_strhash_p(attr->name); - - if (attr->use_dupli) { - attrs->hash_code ^= BLI_ghashutil_uinthash(attr->id); - } + attrs->hash_code ^= BLI_ghashutil_uinthash(attr->hash_code + (1 << (attr->id + 1))); } } /* Attributes and Textures */ -static char attr_prefix_get(eCustomDataType type) +static char attr_prefix_get(GPUMaterialAttribute *attr) { - switch (type) { + if (attr->is_default_color) { + return 'c'; + } + switch (attr->type) { case CD_TANGENT: return 't'; - case CD_MCOL: - return 'c'; case CD_AUTO_FROM_NAME: return 'a'; case CD_HAIRLENGTH: @@ -353,7 +352,7 @@ static void attr_input_name(GPUMaterialAttribute *attr) STRNCPY(attr->input_name, "orco"); } else { - attr->input_name[0] = attr_prefix_get(attr->type); + attr->input_name[0] = attr_prefix_get(attr); attr->input_name[1] = '\0'; if (attr->name[0] != '\0') { /* XXX FIXME: see notes in mesh_render_data_create() */ @@ -365,21 +364,24 @@ static void attr_input_name(GPUMaterialAttribute *attr) /** Add a new varying attribute of given type and name. Returns NULL if out of slots. */ static GPUMaterialAttribute *gpu_node_graph_add_attribute(GPUNodeGraph *graph, eCustomDataType type, - const char *name) + const char *name, + const bool is_default_color) { /* Find existing attribute. */ int num_attributes = 0; GPUMaterialAttribute *attr = graph->attributes.first; for (; attr; attr = attr->next) { - if (attr->type == type && STREQ(attr->name, name)) { + if (attr->type == type && STREQ(attr->name, name) && + attr->is_default_color == is_default_color) { break; } num_attributes++; } /* Add new requested attribute if it's within GPU limits. */ - if (attr == NULL && num_attributes < GPU_MAX_ATTR) { + if (attr == NULL) { attr = MEM_callocN(sizeof(*attr), __func__); + attr->is_default_color = is_default_color; attr->type = type; STRNCPY(attr->name, name); attr_input_name(attr); @@ -413,7 +415,13 @@ static GPUUniformAttr *gpu_node_graph_add_uniform_attribute(GPUNodeGraph *graph, if (attr == NULL && attrs->count < GPU_MAX_UNIFORM_ATTR) { attr = MEM_callocN(sizeof(*attr), __func__); STRNCPY(attr->name, name); + { + char attr_name_esc[sizeof(attr->name) * 2]; + BLI_str_escape(attr_name_esc, attr->name, sizeof(attr_name_esc)); + SNPRINTF(attr->name_id_prop, "[\"%s\"]", attr_name_esc); + } attr->use_dupli = use_dupli; + attr->hash_code = BLI_ghashutil_strhash_p(attr->name) << 1 | (attr->use_dupli ? 0 : 1); attr->id = -1; BLI_addtail(&attrs->list, attr); attrs->count++; @@ -471,7 +479,7 @@ static GPUMaterialTexture *gpu_node_graph_add_texture(GPUNodeGraph *graph, GPUNodeLink *GPU_attribute(GPUMaterial *mat, const eCustomDataType type, const char *name) { GPUNodeGraph *graph = gpu_material_node_graph(mat); - GPUMaterialAttribute *attr = gpu_node_graph_add_attribute(graph, type, name); + GPUMaterialAttribute *attr = gpu_node_graph_add_attribute(graph, type, name, false); if (type == CD_ORCO) { /* OPTI: orco might be computed from local positions and needs object infos. */ @@ -490,6 +498,21 @@ GPUNodeLink *GPU_attribute(GPUMaterial *mat, const eCustomDataType type, const c return link; } +GPUNodeLink *GPU_attribute_default_color(GPUMaterial *mat) +{ + GPUNodeGraph *graph = gpu_material_node_graph(mat); + GPUMaterialAttribute *attr = gpu_node_graph_add_attribute(graph, CD_AUTO_FROM_NAME, "", true); + if (attr == NULL) { + static const float zero_data[GPU_MAX_CONSTANT_DATA] = {0.0f}; + return GPU_constant(zero_data); + } + attr->is_default_color = true; + GPUNodeLink *link = gpu_node_link_create(); + link->link_type = GPU_NODE_LINK_ATTR; + link->attr = attr; + return link; +} + GPUNodeLink *GPU_attribute_with_default(GPUMaterial *mat, const eCustomDataType type, const char *name, @@ -502,16 +525,21 @@ GPUNodeLink *GPU_attribute_with_default(GPUMaterial *mat, return link; } -GPUNodeLink *GPU_uniform_attribute(GPUMaterial *mat, const char *name, bool use_dupli) +GPUNodeLink *GPU_uniform_attribute(GPUMaterial *mat, + const char *name, + bool use_dupli, + uint32_t *r_hash) { GPUNodeGraph *graph = gpu_material_node_graph(mat); GPUUniformAttr *attr = gpu_node_graph_add_uniform_attribute(graph, name, use_dupli); /* Dummy fallback if out of slots. */ if (attr == NULL) { + *r_hash = 0; static const float zero_data[GPU_MAX_CONSTANT_DATA] = {0.0f}; return GPU_constant(zero_data); } + *r_hash = attr->hash_code; GPUNodeLink *link = gpu_node_link_create(); link->link_type = GPU_NODE_LINK_UNIFORM_ATTR; @@ -630,7 +658,7 @@ bool GPU_link(GPUMaterial *mat, const char *name, ...) } static bool gpu_stack_link_v(GPUMaterial *material, - bNode *bnode, + const bNode *bnode, const char *name, GPUNodeStack *in, GPUNodeStack *out, @@ -702,7 +730,7 @@ static bool gpu_stack_link_v(GPUMaterial *material, } bool GPU_stack_link(GPUMaterial *material, - bNode *bnode, + const bNode *bnode, const char *name, GPUNodeStack *in, GPUNodeStack *out, @@ -716,14 +744,6 @@ bool GPU_stack_link(GPUMaterial *material, return valid; } -GPUNodeLink *GPU_uniformbuf_link_out(GPUMaterial *mat, - bNode *node, - GPUNodeStack *stack, - const int index) -{ - return gpu_uniformbuffer_link(mat, node, stack, index, SOCK_OUT); -} - /* Node Graph */ static void gpu_inputs_free(ListBase *inputs) @@ -784,6 +804,7 @@ void gpu_node_graph_free(GPUNodeGraph *graph) { BLI_freelistN(&graph->outlink_aovs); BLI_freelistN(&graph->material_functions); + BLI_freelistN(&graph->outlink_compositor); gpu_node_graph_free_nodes(graph); BLI_freelistN(&graph->textures); @@ -836,6 +857,9 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph) LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, funclink, &graph->material_functions) { gpu_nodes_tag(funclink->outlink, GPU_NODE_TAG_FUNCTION); } + LISTBASE_FOREACH (GPUNodeGraphOutputLink *, compositor_link, &graph->outlink_compositor) { + gpu_nodes_tag(compositor_link->outlink, GPU_NODE_TAG_COMPOSITOR); + } for (GPUNode *node = graph->nodes.first, *next = NULL; node; node = next) { next = node->next; diff --git a/source/blender/gpu/intern/gpu_node_graph.h b/source/blender/gpu/intern/gpu_node_graph.h index ae472d5b7aa..08ff8bbef58 100644 --- a/source/blender/gpu/intern/gpu_node_graph.h +++ b/source/blender/gpu/intern/gpu_node_graph.h @@ -59,6 +59,7 @@ typedef enum { GPU_NODE_TAG_THICKNESS = (1 << 3), GPU_NODE_TAG_AOV = (1 << 4), GPU_NODE_TAG_FUNCTION = (1 << 5), + GPU_NODE_TAG_COMPOSITOR = (1 << 6), } eGPUNodeTag; ENUM_OPERATORS(eGPUNodeTag, GPU_NODE_TAG_FUNCTION) @@ -158,6 +159,8 @@ typedef struct GPUNodeGraph { ListBase outlink_aovs; /* List of GPUNodeGraphFunctionLink */ ListBase material_functions; + /* List of GPUNodeGraphOutputLink */ + ListBase outlink_compositor; /* Requested attributes and textures. */ ListBase attributes; diff --git a/source/blender/gpu/intern/gpu_platform.cc b/source/blender/gpu/intern/gpu_platform.cc index d108dd468a0..f8e2c0fe6fc 100644 --- a/source/blender/gpu/intern/gpu_platform.cc +++ b/source/blender/gpu/intern/gpu_platform.cc @@ -79,11 +79,15 @@ void GPUPlatformGlobal::init(eGPUDeviceType gpu_device, this->driver = driver_type; this->support_level = gpu_support_level; - this->vendor = BLI_strdup(vendor_str); - this->renderer = BLI_strdup(renderer_str); - this->version = BLI_strdup(version_str); - this->support_key = create_key(gpu_support_level, vendor_str, renderer_str, version_str); - this->gpu_name = create_gpu_name(vendor_str, renderer_str, version_str); + const char *vendor = vendor_str ? vendor_str : "UNKNOWN"; + const char *renderer = renderer_str ? renderer_str : "UNKNOWN"; + const char *version = version_str ? version_str : "UNKNOWN"; + + this->vendor = BLI_strdup(vendor); + this->renderer = BLI_strdup(renderer); + this->version = BLI_strdup(version); + this->support_key = create_key(gpu_support_level, vendor, renderer, version); + this->gpu_name = create_gpu_name(vendor, renderer, version); this->backend = backend; } diff --git a/source/blender/gpu/intern/gpu_private.h b/source/blender/gpu/intern/gpu_private.h index a8ee5187d98..0e293302086 100644 --- a/source/blender/gpu/intern/gpu_private.h +++ b/source/blender/gpu/intern/gpu_private.h @@ -10,6 +10,10 @@ extern "C" { #endif +/* gpu_backend.cc */ + +void gpu_backend_delete_resources(void); + /* gpu_pbvh.c */ void gpu_pbvh_init(void); diff --git a/source/blender/gpu/intern/gpu_select.c b/source/blender/gpu/intern/gpu_select.c index ac33c5d5ca8..7afba20c2d9 100644 --- a/source/blender/gpu/intern/gpu_select.c +++ b/source/blender/gpu/intern/gpu_select.c @@ -12,12 +12,8 @@ #include "GPU_select.h" -#include "MEM_guardedalloc.h" - #include "BLI_rect.h" -#include "DNA_userdef_types.h" - #include "BLI_utildefines.h" #include "gpu_select_private.h" diff --git a/source/blender/gpu/intern/gpu_select_pick.c b/source/blender/gpu/intern/gpu_select_pick.c index 840201c8c97..b5b2d7fa1a5 100644 --- a/source/blender/gpu/intern/gpu_select_pick.c +++ b/source/blender/gpu/intern/gpu_select_pick.c @@ -13,7 +13,6 @@ #include "GPU_debug.h" #include "GPU_framebuffer.h" -#include "GPU_immediate.h" #include "GPU_select.h" #include "GPU_state.h" diff --git a/source/blender/gpu/intern/gpu_select_sample_query.cc b/source/blender/gpu/intern/gpu_select_sample_query.cc index 26c9ed79d6c..7393dfd0d81 100644 --- a/source/blender/gpu/intern/gpu_select_sample_query.cc +++ b/source/blender/gpu/intern/gpu_select_sample_query.cc @@ -19,7 +19,6 @@ #include "BLI_rect.h" -#include "BLI_bitmap.h" #include "BLI_utildefines.h" #include "BLI_vector.hh" diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index fe9aacb95f9..4d059ae495e 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -7,6 +7,7 @@ #include "MEM_guardedalloc.h" +#include "BLI_math_matrix.h" #include "BLI_string_utils.h" #include "GPU_capabilities.h" @@ -94,6 +95,9 @@ static void standard_defines(Vector<const char *> &sources) case GPU_BACKEND_OPENGL: sources.append("#define GPU_OPENGL\n"); break; + case GPU_BACKEND_METAL: + sources.append("#define GPU_METAL\n"); + break; default: BLI_assert(false && "Invalid GPU Backend Type"); break; @@ -382,6 +386,8 @@ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info) sources.append(resources.c_str()); sources.append(layout.c_str()); sources.extend(code); + sources.extend(info.dependencies_generated); + sources.append(info.compute_source_generated.c_str()); shader->compute_shader_from_glsl(sources); } @@ -575,6 +581,12 @@ int GPU_shader_get_builtin_block(GPUShader *shader, int builtin) return interface->ubo_builtin((GPUUniformBlockBuiltin)builtin); } +int GPU_shader_get_builtin_ssbo(GPUShader *shader, int builtin) +{ + ShaderInterface *interface = unwrap(shader)->interface; + return interface->ssbo_builtin((GPUStorageBufferBuiltin)builtin); +} + int GPU_shader_get_ssbo(GPUShader *shader, const char *name) { ShaderInterface *interface = unwrap(shader)->interface; @@ -603,6 +615,12 @@ int GPU_shader_get_texture_binding(GPUShader *shader, const char *name) return tex ? tex->binding : -1; } +uint GPU_shader_get_attribute_len(const GPUShader *shader) +{ + ShaderInterface *interface = unwrap(shader)->interface; + return interface->attr_len_; +} + int GPU_shader_get_attribute(GPUShader *shader, const char *name) { ShaderInterface *interface = unwrap(shader)->interface; @@ -610,6 +628,23 @@ int GPU_shader_get_attribute(GPUShader *shader, const char *name) return attr ? attr->location : -1; } +bool GPU_shader_get_attribute_info(const GPUShader *shader, + int attr_location, + char r_name[256], + int *r_type) +{ + ShaderInterface *interface = unwrap(shader)->interface; + + const ShaderInput *attr = interface->attr_get(attr_location); + if (!attr) { + return false; + } + + BLI_strncpy(r_name, interface->input_name_get(attr), 256); + *r_type = attr->location != -1 ? interface->attr_types_[attr->location] : -1; + return true; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -702,12 +737,25 @@ void GPU_shader_uniform_4fv(GPUShader *sh, const char *name, const float data[4] GPU_shader_uniform_vector(sh, loc, 4, 1, data); } +void GPU_shader_uniform_2iv(GPUShader *sh, const char *name, const int data[2]) +{ + const int loc = GPU_shader_get_uniform(sh, name); + GPU_shader_uniform_vector_int(sh, loc, 2, 1, data); +} + void GPU_shader_uniform_mat4(GPUShader *sh, const char *name, const float data[4][4]) { const int loc = GPU_shader_get_uniform(sh, name); GPU_shader_uniform_vector(sh, loc, 16, 1, (const float *)data); } +void GPU_shader_uniform_mat3_as_mat4(GPUShader *sh, const char *name, const float data[3][3]) +{ + float matrix[4][4]; + copy_m4_m3(matrix, data); + GPU_shader_uniform_mat4(sh, name, matrix); +} + void GPU_shader_uniform_2fv_array(GPUShader *sh, const char *name, int len, const float (*val)[2]) { const int loc = GPU_shader_get_uniform(sh, name); diff --git a/source/blender/gpu/intern/gpu_shader_builder.cc b/source/blender/gpu/intern/gpu_shader_builder.cc index fc99b892554..9b699c60126 100644 --- a/source/blender/gpu/intern/gpu_shader_builder.cc +++ b/source/blender/gpu/intern/gpu_shader_builder.cc @@ -51,7 +51,6 @@ void ShaderBuilder::init() void ShaderBuilder::exit() { - GPU_backend_exit(); GPU_exit(); GPU_context_discard(gpu_context_); diff --git a/source/blender/gpu/intern/gpu_shader_builder_stubs.cc b/source/blender/gpu/intern/gpu_shader_builder_stubs.cc index 515f65adb73..e15054bd045 100644 --- a/source/blender/gpu/intern/gpu_shader_builder_stubs.cc +++ b/source/blender/gpu/intern/gpu_shader_builder_stubs.cc @@ -12,6 +12,7 @@ #include "IMB_imbuf.h" #include "IMB_imbuf_types.h" +#include "BKE_attribute.h" #include "BKE_customdata.h" #include "BKE_global.h" #include "BKE_material.h" @@ -101,10 +102,42 @@ void UI_GetThemeColorShadeAlpha4ubv(int UNUSED(colorid), /** \} */ /* -------------------------------------------------------------------- */ +/** \name Stubs of BKE_attribute.h + * \{ */ + +void BKE_id_attribute_copy_domains_temp(short UNUSED(id_type), + const struct CustomData *UNUSED(vdata), + const struct CustomData *UNUSED(edata), + const struct CustomData *UNUSED(ldata), + const struct CustomData *UNUSED(pdata), + const struct CustomData *UNUSED(cdata), + struct ID *UNUSED(r_id)) +{ +} + +struct CustomDataLayer *BKE_id_attributes_active_color_get(const struct ID *UNUSED(id)) +{ + return nullptr; +} + +struct CustomDataLayer *BKE_id_attributes_render_color_get(const struct ID *UNUSED(id)) +{ + return nullptr; +} + +eAttrDomain BKE_id_attribute_domain(const struct ID *UNUSED(id), + const struct CustomDataLayer *UNUSED(layer)) +{ + return ATTR_DOMAIN_AUTO; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ /** \name Stubs of BKE_paint.h * \{ */ bool paint_is_face_hidden(const struct MLoopTri *UNUSED(lt), - const struct MVert *UNUSED(mvert), + const bool *UNUSED(hide_vert), const struct MLoop *UNUSED(mloop)) { BLI_assert_unreachable(); @@ -170,6 +203,40 @@ int CustomData_get_offset(const struct CustomData *UNUSED(data), int UNUSED(type return 0; } +int CustomData_get_named_layer_index(const struct CustomData *UNUSED(data), + int UNUSED(type), + const char *UNUSED(name)) +{ + return -1; +} + +int CustomData_get_active_layer_index(const struct CustomData *UNUSED(data), int UNUSED(type)) +{ + return -1; +} + +int CustomData_get_render_layer_index(const struct CustomData *UNUSED(data), int UNUSED(type)) +{ + return -1; +} + +bool CustomData_has_layer(const struct CustomData *UNUSED(data), int UNUSED(type)) +{ + return false; +} + +void *CustomData_get_layer_named(const struct CustomData *UNUSED(data), + int UNUSED(type), + const char *UNUSED(name)) +{ + return nullptr; +} + +void *CustomData_get_layer(const struct CustomData *UNUSED(data), int UNUSED(type)) +{ + return nullptr; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -237,5 +304,14 @@ void DRW_deferred_shader_remove(struct GPUMaterial *UNUSED(mat)) BLI_assert_unreachable(); } +void DRW_cdlayer_attr_aliases_add(struct GPUVertFormat *UNUSED(format), + const char *UNUSED(base_name), + const struct CustomData *UNUSED(data), + const struct CustomDataLayer *UNUSED(cl), + bool UNUSED(is_active_render), + bool UNUSED(is_active_layer)) +{ +} + /** \} */ } diff --git a/source/blender/gpu/intern/gpu_shader_builtin.c b/source/blender/gpu/intern/gpu_shader_builtin.c index b92fae4a89b..8a6586e06f6 100644 --- a/source/blender/gpu/intern/gpu_shader_builtin.c +++ b/source/blender/gpu/intern/gpu_shader_builtin.c @@ -5,25 +5,9 @@ * \ingroup gpu */ -#include "MEM_guardedalloc.h" - -#include "BLI_math_base.h" -#include "BLI_math_vector.h" -#include "BLI_path_util.h" -#include "BLI_string.h" -#include "BLI_string_utils.h" #include "BLI_utildefines.h" -#include "BKE_appdir.h" -#include "BKE_global.h" - -#include "DNA_space_types.h" - -#include "GPU_matrix.h" -#include "GPU_platform.h" #include "GPU_shader.h" -#include "GPU_texture.h" -#include "GPU_uniform_buffer.h" /* Adjust these constants as needed. */ #define MAX_DEFINE_LENGTH 256 @@ -41,10 +25,7 @@ extern char datatoc_gpu_shader_flat_id_frag_glsl[]; extern char datatoc_gpu_shader_2D_area_borders_vert_glsl[]; extern char datatoc_gpu_shader_2D_area_borders_frag_glsl[]; extern char datatoc_gpu_shader_2D_vert_glsl[]; -extern char datatoc_gpu_shader_2D_flat_color_vert_glsl[]; extern char datatoc_gpu_shader_2D_smooth_color_uniform_alpha_vert_glsl[]; -extern char datatoc_gpu_shader_2D_smooth_color_vert_glsl[]; -extern char datatoc_gpu_shader_2D_smooth_color_frag_glsl[]; extern char datatoc_gpu_shader_2D_image_vert_glsl[]; extern char datatoc_gpu_shader_2D_image_rect_vert_glsl[]; extern char datatoc_gpu_shader_2D_image_multi_rect_vert_glsl[]; @@ -155,10 +136,10 @@ static const GPUShaderStages builtin_shader_stages[GPU_SHADER_BUILTIN_LEN] = { .name = "GPU_SHADER_3D_IMAGE", .create_info = "gpu_shader_3D_image", }, - [GPU_SHADER_3D_IMAGE_MODULATE_ALPHA] = + [GPU_SHADER_3D_IMAGE_COLOR] = { - .name = "GPU_SHADER_3D_IMAGE_MODULATE_ALPHA", - .create_info = "gpu_shader_3D_image_modulate_alpha", + .name = "GPU_SHADER_3D_IMAGE_COLOR", + .create_info = "gpu_shader_3D_image_color", }, [GPU_SHADER_2D_CHECKER] = { @@ -172,21 +153,6 @@ static const GPUShaderStages builtin_shader_stages[GPU_SHADER_BUILTIN_LEN] = { .create_info = "gpu_shader_2D_diag_stripes", }, - [GPU_SHADER_2D_UNIFORM_COLOR] = - { - .name = "GPU_SHADER_2D_UNIFORM_COLOR", - .create_info = "gpu_shader_2D_uniform_color", - }, - [GPU_SHADER_2D_FLAT_COLOR] = - { - .name = "GPU_SHADER_2D_FLAT_COLOR", - .create_info = "gpu_shader_2D_flat_color", - }, - [GPU_SHADER_2D_SMOOTH_COLOR] = - { - .name = "GPU_SHADER_2D_SMOOTH_COLOR", - .create_info = "gpu_shader_2D_smooth_color", - }, [GPU_SHADER_2D_IMAGE_OVERLAYS_MERGE] = { .name = "GPU_SHADER_2D_IMAGE_OVERLAYS_MERGE", @@ -197,16 +163,6 @@ static const GPUShaderStages builtin_shader_stages[GPU_SHADER_BUILTIN_LEN] = { .name = "GPU_SHADER_2D_IMAGE_OVERLAYS_STEREO_MERGE", .create_info = "gpu_shader_2D_image_overlays_stereo_merge", }, - [GPU_SHADER_2D_IMAGE] = - { - .name = "GPU_SHADER_2D_IMAGE", - .create_info = "gpu_shader_2D_image", - }, - [GPU_SHADER_2D_IMAGE_COLOR] = - { - .name = "GPU_SHADER_2D_IMAGE_COLOR", - .create_info = "gpu_shader_2D_image_color", - }, [GPU_SHADER_2D_IMAGE_DESATURATE_COLOR] = { .name = "GPU_SHADER_2D_IMAGE_DESATURATE_COLOR", @@ -279,11 +235,6 @@ static const GPUShaderStages builtin_shader_stages[GPU_SHADER_BUILTIN_LEN] = { .create_info = "gpu_shader_3D_polyline_smooth_color", }, - [GPU_SHADER_2D_LINE_DASHED_UNIFORM_COLOR] = - { - .name = "GPU_SHADER_2D_LINE_DASHED_UNIFORM_COLOR", - .create_info = "gpu_shader_2D_line_dashed_uniform_color", - }, [GPU_SHADER_3D_LINE_DASHED_UNIFORM_COLOR] = { .name = "GPU_SHADER_3D_LINE_DASHED_UNIFORM_COLOR", diff --git a/source/blender/gpu/intern/gpu_shader_create_info.cc b/source/blender/gpu/intern/gpu_shader_create_info.cc index f5b90989481..a18fdcd32df 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.cc +++ b/source/blender/gpu/intern/gpu_shader_create_info.cc @@ -19,7 +19,6 @@ #include "gpu_shader_create_info.hh" #include "gpu_shader_create_info_private.hh" #include "gpu_shader_dependency_private.h" -#include "gpu_shader_private.hh" #undef GPU_SHADER_INTERFACE_INFO #undef GPU_SHADER_CREATE_INFO @@ -155,13 +154,13 @@ std::string ShaderCreateInfo::check_error() const } else { if (!this->vertex_source_.is_empty()) { - error += "Compute shader has vertex_source_ shader attached in" + this->name_ + ".\n"; + error += "Compute shader has vertex_source_ shader attached in " + this->name_ + ".\n"; } if (!this->geometry_source_.is_empty()) { - error += "Compute shader has geometry_source_ shader attached in" + this->name_ + ".\n"; + error += "Compute shader has geometry_source_ shader attached in " + this->name_ + ".\n"; } if (!this->fragment_source_.is_empty()) { - error += "Compute shader has fragment_source_ shader attached in" + this->name_ + ".\n"; + error += "Compute shader has fragment_source_ shader attached in " + this->name_ + ".\n"; } } @@ -301,12 +300,25 @@ void gpu_shader_create_info_init() draw_modelmat = draw_modelmat_legacy; } + /* WORKAROUND: Replace the use of gpu_BaseInstance by an instance attribute. */ + if (GPU_shader_draw_parameters_support() == false) { + draw_resource_id_new = draw_resource_id_fallback; + } + for (ShaderCreateInfo *info : g_create_infos->values()) { if (info->do_static_compilation_) { info->builtins_ |= gpu_shader_dependency_get_builtins(info->vertex_source_); info->builtins_ |= gpu_shader_dependency_get_builtins(info->fragment_source_); info->builtins_ |= gpu_shader_dependency_get_builtins(info->geometry_source_); info->builtins_ |= gpu_shader_dependency_get_builtins(info->compute_source_); + + /* Automatically amend the create info for ease of use of the debug feature. */ + if ((info->builtins_ & BuiltinBits::USE_DEBUG_DRAW) == BuiltinBits::USE_DEBUG_DRAW) { + info->additional_info("draw_debug_draw"); + } + if ((info->builtins_ & BuiltinBits::USE_DEBUG_PRINT) == BuiltinBits::USE_DEBUG_PRINT) { + info->additional_info("draw_debug_print"); + } } } @@ -334,8 +346,11 @@ bool gpu_shader_create_info_compile_all() int skipped = 0; int total = 0; for (ShaderCreateInfo *info : g_create_infos->values()) { + info->finalize(); if (info->do_static_compilation_) { - if (GPU_compute_shader_support() == false && info->compute_source_ != nullptr) { + if ((GPU_compute_shader_support() == false && info->compute_source_ != nullptr) || + (GPU_shader_image_load_store_support() == false && info->has_resource_image()) || + (GPU_shader_storage_buffer_objects_support() == false && info->has_resource_storage())) { skipped++; continue; } diff --git a/source/blender/gpu/intern/gpu_shader_create_info.hh b/source/blender/gpu/intern/gpu_shader_create_info.hh index 4927ef75a75..25a79dd26ac 100644 --- a/source/blender/gpu/intern/gpu_shader_create_info.hh +++ b/source/blender/gpu/intern/gpu_shader_create_info.hh @@ -32,6 +32,7 @@ namespace blender::gpu::shader { #endif enum class Type { + /* Types supported natively across all GPU back-ends. */ FLOAT = 0, VEC2, VEC3, @@ -47,6 +48,21 @@ enum class Type { IVEC3, IVEC4, BOOL, + /* Additionally supported types to enable data optimization and native + * support in some GPU back-ends. + * NOTE: These types must be representable in all APIs. E.g. `VEC3_101010I2` is aliased as vec3 + * in the GL back-end, as implicit type conversions from packed normal attribute data to vec3 is + * supported. UCHAR/CHAR types are natively supported in Metal and can be used to avoid + * additional data conversions for `GPU_COMP_U8` vertex attributes. */ + VEC3_101010I2, + UCHAR, + UCHAR2, + UCHAR3, + UCHAR4, + CHAR, + CHAR2, + CHAR3, + CHAR4 }; /* All of these functions is a bit out of place */ @@ -86,6 +102,40 @@ static inline std::ostream &operator<<(std::ostream &stream, const Type type) return stream << "mat3"; case Type::MAT4: return stream << "mat4"; + case Type::VEC3_101010I2: + return stream << "vec3_1010102_Inorm"; + case Type::UCHAR: + return stream << "uchar"; + case Type::UCHAR2: + return stream << "uchar2"; + case Type::UCHAR3: + return stream << "uchar3"; + case Type::UCHAR4: + return stream << "uchar4"; + case Type::CHAR: + return stream << "char"; + case Type::CHAR2: + return stream << "char2"; + case Type::CHAR3: + return stream << "char3"; + case Type::CHAR4: + return stream << "char4"; + case Type::INT: + return stream << "int"; + case Type::IVEC2: + return stream << "ivec2"; + case Type::IVEC3: + return stream << "ivec3"; + case Type::IVEC4: + return stream << "ivec4"; + case Type::UINT: + return stream << "uint"; + case Type::UVEC2: + return stream << "uvec2"; + case Type::UVEC3: + return stream << "uvec3"; + case Type::UVEC4: + return stream << "uvec4"; default: BLI_assert(0); return stream; @@ -127,8 +177,12 @@ enum class BuiltinBits { VERTEX_ID = (1 << 14), WORK_GROUP_ID = (1 << 15), WORK_GROUP_SIZE = (1 << 16), + + /* Not a builtin but a flag we use to tag shaders that use the debug features. */ + USE_DEBUG_DRAW = (1 << 29), + USE_DEBUG_PRINT = (1 << 30), }; -ENUM_OPERATORS(BuiltinBits, BuiltinBits::WORK_GROUP_SIZE); +ENUM_OPERATORS(BuiltinBits, BuiltinBits::USE_DEBUG_PRINT); /** * Follow convention described in: @@ -224,6 +278,8 @@ enum class PrimitiveOut { POINTS = 0, LINE_STRIP, TRIANGLE_STRIP, + LINES, + TRIANGLES, }; struct StageInterfaceInfo { @@ -268,10 +324,10 @@ struct StageInterfaceInfo { /** * \brief Describe inputs & outputs, stage interfaces, resources and sources of a shader. * If all data is correctly provided, this is all that is needed to create and compile - * a GPUShader. + * a #GPUShader. * * IMPORTANT: All strings are references only. Make sure all the strings used by a - * ShaderCreateInfo are not freed until it is consumed or deleted. + * #ShaderCreateInfo are not freed until it is consumed or deleted. */ struct ShaderCreateInfo { /** Shader name for debugging. */ @@ -290,7 +346,7 @@ struct ShaderCreateInfo { DepthWrite depth_write_ = DepthWrite::ANY; /** * Maximum length of all the resource names including each null terminator. - * Only for names used by gpu::ShaderInterface. + * Only for names used by #gpu::ShaderInterface. */ size_t interface_names_size_ = 0; /** Manually set builtins. */ @@ -298,6 +354,7 @@ struct ShaderCreateInfo { /** Manually set generated code. */ std::string vertex_source_generated = ""; std::string fragment_source_generated = ""; + std::string compute_source_generated = ""; std::string geometry_source_generated = ""; std::string typedef_source_generated = ""; /** Manually set generated dependencies. */ @@ -740,33 +797,16 @@ struct ShaderCreateInfo { * Used to share parts of the infos that are common to many shaders. * \{ */ - Self &additional_info(StringRefNull info_name0, - StringRefNull info_name1 = "", - StringRefNull info_name2 = "", - StringRefNull info_name3 = "", - StringRefNull info_name4 = "", - StringRefNull info_name5 = "", - StringRefNull info_name6 = "") - { - additional_infos_.append(info_name0); - if (!info_name1.is_empty()) { - additional_infos_.append(info_name1); - } - if (!info_name2.is_empty()) { - additional_infos_.append(info_name2); - } - if (!info_name3.is_empty()) { - additional_infos_.append(info_name3); - } - if (!info_name4.is_empty()) { - additional_infos_.append(info_name4); - } - if (!info_name5.is_empty()) { - additional_infos_.append(info_name5); - } - if (!info_name6.is_empty()) { - additional_infos_.append(info_name6); - } + Self &additional_info(StringRefNull info_name) + { + additional_infos_.append(info_name); + return *(Self *)this; + } + + template<typename... Args> Self &additional_info(StringRefNull info_name, Args... args) + { + additional_info(info_name); + additional_info(args...); return *(Self *)this; } @@ -818,6 +858,7 @@ struct ShaderCreateInfo { TEST_EQUAL(*this, b, builtins_); TEST_EQUAL(*this, b, vertex_source_generated); TEST_EQUAL(*this, b, fragment_source_generated); + TEST_EQUAL(*this, b, compute_source_generated); TEST_EQUAL(*this, b, typedef_source_generated); TEST_VECTOR_EQUAL(*this, b, vertex_inputs_); TEST_EQUAL(*this, b, geometry_layout_); @@ -872,6 +913,31 @@ struct ShaderCreateInfo { return stream; } + bool has_resource_type(Resource::BindType bind_type) const + { + for (auto &res : batch_resources_) { + if (res.bind_type == bind_type) { + return true; + } + } + for (auto &res : pass_resources_) { + if (res.bind_type == bind_type) { + return true; + } + } + return false; + } + + bool has_resource_image() const + { + return has_resource_type(Resource::BindType::IMAGE); + } + + bool has_resource_storage() const + { + return has_resource_type(Resource::BindType::STORAGE_BUFFER); + } + /** \} */ #undef TEST_EQUAL diff --git a/source/blender/gpu/intern/gpu_shader_dependency.cc b/source/blender/gpu/intern/gpu_shader_dependency.cc index 842914f5bed..2c59cb6e501 100644 --- a/source/blender/gpu/intern/gpu_shader_dependency.cc +++ b/source/blender/gpu/intern/gpu_shader_dependency.cc @@ -11,11 +11,11 @@ #include <algorithm> #include <iomanip> #include <iostream> +#include <regex> #include <sstream> #include "BLI_ghash.h" #include "BLI_map.hh" -#include "BLI_set.hh" #include "BLI_string_ref.hh" #include "gpu_material_library.h" @@ -43,7 +43,7 @@ struct GPUSource { StringRefNull source; Vector<GPUSource *> dependencies; bool dependencies_init = false; - shader::BuiltinBits builtins = (shader::BuiltinBits)0; + shader::BuiltinBits builtins = shader::BuiltinBits::NONE; std::string processed_source; GPUSource(const char *path, @@ -55,46 +55,45 @@ struct GPUSource { /* Scan for builtins. */ /* FIXME: This can trigger false positive caused by disabled #if blocks. */ /* TODO(fclem): Could be made faster by scanning once. */ - if (source.find("gl_FragCoord", 0)) { + if (source.find("gl_FragCoord", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::FRAG_COORD; } - if (source.find("gl_FrontFacing", 0)) { + if (source.find("gl_FrontFacing", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::FRONT_FACING; } - if (source.find("gl_GlobalInvocationID", 0)) { + if (source.find("gl_GlobalInvocationID", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::GLOBAL_INVOCATION_ID; } - if (source.find("gl_InstanceID", 0)) { + if (source.find("gl_InstanceID", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::INSTANCE_ID; } - if (source.find("gl_LocalInvocationID", 0)) { + if (source.find("gl_LocalInvocationID", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::LOCAL_INVOCATION_ID; } - if (source.find("gl_LocalInvocationIndex", 0)) { + if (source.find("gl_LocalInvocationIndex", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::LOCAL_INVOCATION_INDEX; } - if (source.find("gl_NumWorkGroup", 0)) { + if (source.find("gl_NumWorkGroup", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::NUM_WORK_GROUP; } - if (source.find("gl_PointCoord", 0)) { + if (source.find("gl_PointCoord", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::POINT_COORD; } - if (source.find("gl_PointSize", 0)) { + if (source.find("gl_PointSize", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::POINT_SIZE; } - if (source.find("gl_PrimitiveID", 0)) { + if (source.find("gl_PrimitiveID", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::PRIMITIVE_ID; } - if (source.find("gl_VertexID", 0)) { + if (source.find("gl_VertexID", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::VERTEX_ID; } - if (source.find("gl_WorkGroupID", 0)) { + if (source.find("gl_WorkGroupID", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::WORK_GROUP_ID; } - if (source.find("gl_WorkGroupSize", 0)) { + if (source.find("gl_WorkGroupSize", 0) != StringRef::not_found) { builtins |= shader::BuiltinBits::WORK_GROUP_SIZE; } - /* TODO(fclem): We could do that at compile time. */ /* Limit to shared header files to avoid the temptation to use C++ syntax in .glsl files. */ if (filename.endswith(".h") || filename.endswith(".hh")) { @@ -102,6 +101,18 @@ struct GPUSource { quote_preprocess(); } else { + if (source.find("'") != StringRef::not_found) { + char_literals_preprocess(); + } + if (source.find("drw_print") != StringRef::not_found) { + string_preprocess(); + } + if ((source.find("drw_debug_") != StringRef::not_found) && + /* Avoid these two files where it makes no sense to add the dependency. */ + (filename != "common_debug_draw_lib.glsl" && + filename != "draw_debug_draw_display_vert.glsl")) { + builtins |= shader::BuiltinBits::USE_DEBUG_DRAW; + } check_no_quotes(); } @@ -523,6 +534,217 @@ struct GPUSource { } } + void char_literals_preprocess() + { + const StringRefNull input = source; + std::stringstream output; + int64_t cursor = -1; + int64_t last_pos = 0; + + while (true) { + cursor = find_token(input, '\'', cursor + 1); + if (cursor == -1) { + break; + } + /* Output anything between 2 print statement. */ + output << input.substr(last_pos, cursor - last_pos); + + /* Extract string. */ + int64_t char_start = cursor + 1; + int64_t char_end = find_token(input, '\'', char_start); + CHECK(char_end, input, cursor, "Malformed char literal. Missing ending `'`."); + + StringRef input_char = input.substr(char_start, char_end - char_start); + if (input_char.size() == 0) { + CHECK(-1, input, cursor, "Malformed char literal. Empty character constant"); + } + + uint8_t char_value = input_char[0]; + + if (input_char[0] == '\\') { + if (input_char[1] == 'n') { + char_value = '\n'; + } + else { + CHECK(-1, input, cursor, "Unsupported escaped character"); + } + } + else { + if (input_char.size() > 1) { + CHECK(-1, input, cursor, "Malformed char literal. Multi-character character constant"); + } + } + + char hex[8]; + SNPRINTF(hex, "0x%.2Xu", char_value); + output << hex; + + cursor = last_pos = char_end + 1; + } + /* If nothing has been changed, do not allocate processed_source. */ + if (last_pos == 0) { + return; + } + + if (last_pos != 0) { + output << input.substr(last_pos); + } + processed_source = output.str(); + source = processed_source.c_str(); + } + + /* Replace print(string) by equivalent drw_print_char4() sequence. */ + void string_preprocess() + { + const StringRefNull input = source; + std::stringstream output; + int64_t cursor = -1; + int64_t last_pos = 0; + + while (true) { + cursor = find_keyword(input, "drw_print", cursor + 1); + if (cursor == -1) { + break; + } + + bool do_endl = false; + StringRef func = input.substr(cursor); + if (func.startswith("drw_print(")) { + do_endl = true; + } + else if (func.startswith("drw_print_no_endl(")) { + do_endl = false; + } + else { + continue; + } + + /* Output anything between 2 print statement. */ + output << input.substr(last_pos, cursor - last_pos); + + /* Extract string. */ + int64_t str_start = input.find('(', cursor) + 1; + int64_t semicolon = find_token(input, ';', str_start + 1); + CHECK(semicolon, input, cursor, "Malformed print(). Missing `;` ."); + int64_t str_end = rfind_token(input, ')', semicolon); + if (str_end < str_start) { + CHECK(-1, input, cursor, "Malformed print(). Missing closing `)` ."); + } + + std::stringstream sub_output; + StringRef input_args = input.substr(str_start, str_end - str_start); + + auto print_string = [&](std::string str) -> int { + size_t len_before_pad = str.length(); + /* Pad string to uint size. */ + while (str.length() % 4 != 0) { + str += " "; + } + /* Keep everything in one line to not mess with the shader logs. */ + sub_output << "/* " << str << "*/"; + sub_output << "drw_print_string_start(" << len_before_pad << ");"; + for (size_t i = 0; i < len_before_pad; i += 4) { + uint8_t chars[4] = {*(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 0), + *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 1), + *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 2), + *(reinterpret_cast<const uint8_t *>(str.c_str()) + i + 3)}; + if (i + 4 > len_before_pad) { + chars[len_before_pad - i] = '\0'; + } + char uint_hex[12]; + SNPRINTF(uint_hex, "0x%.2X%.2X%.2X%.2Xu", chars[3], chars[2], chars[1], chars[0]); + sub_output << "drw_print_char4(" << StringRefNull(uint_hex) << ");"; + } + return 0; + }; + + std::string func_args = input_args; + /* Workaround to support function call inside prints. We replace commas by a non control + * character `$` in order to use simpler regex later. */ + bool string_scope = false; + int func_scope = 0; + for (char &c : func_args) { + if (c == '"') { + string_scope = !string_scope; + } + else if (!string_scope) { + if (c == '(') { + func_scope++; + } + else if (c == ')') { + func_scope--; + } + else if (c == ',' && func_scope != 0) { + c = '$'; + } + } + } + + const bool print_as_variable = (input_args[0] != '"') && find_token(input_args, ',') == -1; + if (print_as_variable) { + /* Variable or expression debugging. */ + std::string arg = input_args; + /* Pad align most values. */ + while (arg.length() % 4 != 0) { + arg += " "; + } + print_string(arg); + print_string("= "); + sub_output << "drw_print_value(" << input_args << ");"; + } + else { + const std::regex arg_regex( + /* String args. */ + "[\\s]*\"([^\r\n\t\f\v\"]*)\"" + /* OR. */ + "|" + /* value args. */ + "([^,]+)"); + std::smatch args_match; + std::string::const_iterator args_search_start(func_args.cbegin()); + while (std::regex_search(args_search_start, func_args.cend(), args_match, arg_regex)) { + args_search_start = args_match.suffix().first; + std::string arg_string = args_match[1].str(); + std::string arg_val = args_match[2].str(); + + if (arg_string.empty()) { + for (char &c : arg_val) { + if (c == '$') { + c = ','; + } + } + sub_output << "drw_print_value(" << arg_val << ");"; + } + else { + print_string(arg_string); + } + } + } + + if (do_endl) { + sub_output << "drw_print_newline();"; + } + + output << sub_output.str(); + + cursor = last_pos = str_end + 1; + } + /* If nothing has been changed, do not allocate processed_source. */ + if (last_pos == 0) { + return; + } + + if (filename != "common_debug_print_lib.glsl") { + builtins |= shader::BuiltinBits::USE_DEBUG_PRINT; + } + + if (last_pos != 0) { + output << input.substr(last_pos); + } + processed_source = output.str(); + source = processed_source.c_str(); + } + #undef find_keyword #undef rfind_keyword #undef find_token @@ -538,6 +760,15 @@ struct GPUSource { this->dependencies_init = true; int64_t pos = -1; + using namespace shader; + /* Auto dependency injection for debug capabilities. */ + if ((builtins & BuiltinBits::USE_DEBUG_DRAW) == BuiltinBits::USE_DEBUG_DRAW) { + dependencies.append_non_duplicates(dict.lookup("common_debug_draw_lib.glsl")); + } + if ((builtins & BuiltinBits::USE_DEBUG_PRINT) == BuiltinBits::USE_DEBUG_PRINT) { + dependencies.append_non_duplicates(dict.lookup("common_debug_print_lib.glsl")); + } + while (true) { GPUSource *dependency_source = nullptr; @@ -559,6 +790,7 @@ struct GPUSource { return 1; } } + /* Recursive. */ int result = dependency_source->init_dependencies(dict, g_functions); if (result != 0) { @@ -584,7 +816,7 @@ struct GPUSource { shader::BuiltinBits builtins_get() const { - shader::BuiltinBits out_builtins = shader::BuiltinBits::NONE; + shader::BuiltinBits out_builtins = builtins; for (auto *dep : dependencies) { out_builtins |= dep->builtins; } @@ -594,7 +826,8 @@ struct GPUSource { bool is_from_material_library() const { return (filename.startswith("gpu_shader_material_") || - filename.startswith("gpu_shader_common_")) && + filename.startswith("gpu_shader_common_") || + filename.startswith("gpu_shader_compositor_")) && filename.endswith(".glsl"); } }; diff --git a/source/blender/gpu/intern/gpu_shader_interface.hh b/source/blender/gpu/intern/gpu_shader_interface.hh index ac78af38fcc..41e06569bdc 100644 --- a/source/blender/gpu/intern/gpu_shader_interface.hh +++ b/source/blender/gpu/intern/gpu_shader_interface.hh @@ -18,6 +18,7 @@ #include "BLI_utildefines.h" #include "GPU_shader.h" +#include "GPU_vertex_format.h" /* GPU_VERT_ATTR_MAX_LEN */ #include "gpu_shader_create_info.hh" namespace blender::gpu { @@ -39,9 +40,9 @@ class ShaderInterface { /* TODO(fclem): should be protected. */ public: /** Flat array. In this order: Attributes, Ubos, Uniforms. */ - ShaderInput *inputs_ = NULL; + ShaderInput *inputs_ = nullptr; /** Buffer containing all inputs names separated by '\0'. */ - char *name_buffer_ = NULL; + char *name_buffer_ = nullptr; /** Input counts inside input array. */ uint attr_len_ = 0; uint ubo_len_ = 0; @@ -56,6 +57,14 @@ class ShaderInterface { /** Location of builtin uniforms. Fast access, no lookup needed. */ int32_t builtins_[GPU_NUM_UNIFORMS]; int32_t builtin_blocks_[GPU_NUM_UNIFORM_BLOCKS]; + int32_t builtin_buffers_[GPU_NUM_STORAGE_BUFFERS]; + + /** + * Currently only used for `GPU_shader_get_attribute_info`. + * This utility is useful for automatic creation of `GPUVertFormat` in Python. + * Use `ShaderInput::location` to identify the `Type`. + */ + uint8_t attr_types_[GPU_VERT_ATTR_MAX_LEN]; public: ShaderInterface(); @@ -68,6 +77,10 @@ class ShaderInterface { { return input_lookup(inputs_, attr_len_, name); } + inline const ShaderInput *attr_get(const int binding) const + { + return input_lookup(inputs_, attr_len_, binding); + } inline const ShaderInput *ubo_get(const char *name) const { @@ -116,9 +129,17 @@ class ShaderInterface { return builtin_blocks_[builtin]; } + /* Returns binding position. */ + inline int32_t ssbo_builtin(const GPUStorageBufferBuiltin builtin) const + { + BLI_assert(builtin >= 0 && builtin < GPU_NUM_STORAGE_BUFFERS); + return builtin_buffers_[builtin]; + } + protected: static inline const char *builtin_uniform_name(GPUUniformBuiltin u); static inline const char *builtin_uniform_block_name(GPUUniformBlockBuiltin u); + static inline const char *builtin_storage_block_name(GPUStorageBufferBuiltin u); inline uint32_t set_input_name(ShaderInput *input, char *name, uint32_t name_len) const; inline void copy_input_name(ShaderInput *input, @@ -187,7 +208,7 @@ inline const char *ShaderInterface::builtin_uniform_name(GPUUniformBuiltin u) return "srgbTarget"; default: - return NULL; + return nullptr; } } @@ -208,7 +229,19 @@ inline const char *ShaderInterface::builtin_uniform_block_name(GPUUniformBlockBu case GPU_UNIFORM_BLOCK_DRW_INFOS: return "drw_infos"; default: - return NULL; + return nullptr; + } +} + +inline const char *ShaderInterface::builtin_storage_block_name(GPUStorageBufferBuiltin u) +{ + switch (u) { + case GPU_STORAGE_BUFFER_DEBUG_VERTS: + return "drw_debug_verts_buf"; + case GPU_STORAGE_BUFFER_DEBUG_PRINT: + return "drw_debug_print_buf"; + default: + return nullptr; } } @@ -258,7 +291,7 @@ inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const return inputs + i; /* not found */ } } - return NULL; /* not found */ + return nullptr; /* not found */ } /* This is a bit dangerous since we could have a hash collision. @@ -268,7 +301,7 @@ inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const return inputs + i; } } - return NULL; /* not found */ + return nullptr; /* not found */ } inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const inputs, @@ -281,7 +314,7 @@ inline const ShaderInput *ShaderInterface::input_lookup(const ShaderInput *const return inputs + i; } } - return NULL; /* not found */ + return nullptr; /* not found */ } } // namespace blender::gpu diff --git a/source/blender/gpu/intern/gpu_shader_log.cc b/source/blender/gpu/intern/gpu_shader_log.cc index 83fc34a3278..dbc36c5afd0 100644 --- a/source/blender/gpu/intern/gpu_shader_log.cc +++ b/source/blender/gpu/intern/gpu_shader_log.cc @@ -15,8 +15,6 @@ #include "gpu_shader_dependency_private.h" #include "gpu_shader_private.hh" -#include "GPU_platform.h" - #include "CLG_log.h" static CLG_LogRef LOG = {"gpu.shader"}; @@ -230,6 +228,7 @@ void Shader::print_log(Span<const char *> sources, log_line = line_end + 1; previous_location = log_item.cursor; } + // printf("%s", sources_combined); MEM_freeN(sources_combined); CLG_Severity severity = error ? CLG_SEVERITY_ERROR : CLG_SEVERITY_WARN; diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh index 4d318093c98..a822cd8aa38 100644 --- a/source/blender/gpu/intern/gpu_shader_private.hh +++ b/source/blender/gpu/intern/gpu_shader_private.hh @@ -55,8 +55,6 @@ class Shader { virtual void uniform_float(int location, int comp_len, int array_size, const float *data) = 0; virtual void uniform_int(int location, int comp_len, int array_size, const int *data) = 0; - virtual void vertformat_from_shader(GPUVertFormat *) const = 0; - std::string defines_declare(const shader::ShaderCreateInfo &info) const; virtual std::string resources_declare(const shader::ShaderCreateInfo &info) const = 0; virtual std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const = 0; diff --git a/source/blender/gpu/intern/gpu_state.cc b/source/blender/gpu/intern/gpu_state.cc index f74d500340d..a1e0b8867a0 100644 --- a/source/blender/gpu/intern/gpu_state.cc +++ b/source/blender/gpu/intern/gpu_state.cc @@ -14,8 +14,6 @@ #include "BLI_math_vector.h" #include "BLI_utildefines.h" -#include "BKE_global.h" - #include "GPU_state.h" #include "gpu_context_private.hh" diff --git a/source/blender/gpu/intern/gpu_storage_buffer.cc b/source/blender/gpu/intern/gpu_storage_buffer.cc index 68020ec66f4..460a643089c 100644 --- a/source/blender/gpu/intern/gpu_storage_buffer.cc +++ b/source/blender/gpu/intern/gpu_storage_buffer.cc @@ -12,7 +12,6 @@ #include "BLI_math_base.h" #include "gpu_backend.hh" -#include "gpu_node_graph.h" #include "GPU_material.h" #include "GPU_vertex_buffer.h" /* For GPUUsageType. */ @@ -110,4 +109,9 @@ void GPU_storagebuf_copy_sub_from_vertbuf( unwrap(ssbo)->copy_sub(unwrap(src), dst_offset, src_offset, copy_size); } +void GPU_storagebuf_read(GPUStorageBuf *ssbo, void *data) +{ + unwrap(ssbo)->read(data); +} + /** \} */ diff --git a/source/blender/gpu/intern/gpu_storage_buffer_private.hh b/source/blender/gpu/intern/gpu_storage_buffer_private.hh index 091e6c2d386..0c96f97ad30 100644 --- a/source/blender/gpu/intern/gpu_storage_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_storage_buffer_private.hh @@ -29,7 +29,7 @@ class StorageBuf { /** Data size in bytes. */ size_t size_in_bytes_; /** Continuous memory block to copy to GPU. This data is owned by the StorageBuf. */ - void *data_ = NULL; + void *data_ = nullptr; /** Debugging name */ char name_[DEBUG_NAME_LEN]; @@ -44,6 +44,7 @@ class StorageBuf { eGPUDataFormat data_format, void *data) = 0; virtual void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) = 0; + virtual void read(void *data) = 0; }; /* Syntactic sugar. */ diff --git a/source/blender/gpu/intern/gpu_texture.cc b/source/blender/gpu/intern/gpu_texture.cc index d78dc845074..bec8b8a0df3 100644 --- a/source/blender/gpu/intern/gpu_texture.cc +++ b/source/blender/gpu/intern/gpu_texture.cc @@ -13,7 +13,6 @@ #include "gpu_backend.hh" #include "gpu_context_private.hh" #include "gpu_framebuffer_private.hh" -#include "gpu_vertex_buffer_private.hh" #include "gpu_texture_private.hh" @@ -52,13 +51,13 @@ Texture::~Texture() #endif } -bool Texture::init_1D(int w, int layers, int mips, eGPUTextureFormat format) +bool Texture::init_1D(int w, int layers, int mip_len, eGPUTextureFormat format) { w_ = w; h_ = layers; d_ = 0; - int mips_max = 1 + floorf(log2f(w)); - mipmaps_ = min_ii(mips, mips_max); + int mip_len_max = 1 + floorf(log2f(w)); + mipmaps_ = min_ii(mip_len, mip_len_max); format_ = format; format_flag_ = to_format_flag(format); type_ = (layers > 0) ? GPU_TEXTURE_1D_ARRAY : GPU_TEXTURE_1D; @@ -68,13 +67,13 @@ bool Texture::init_1D(int w, int layers, int mips, eGPUTextureFormat format) return this->init_internal(); } -bool Texture::init_2D(int w, int h, int layers, int mips, eGPUTextureFormat format) +bool Texture::init_2D(int w, int h, int layers, int mip_len, eGPUTextureFormat format) { w_ = w; h_ = h; d_ = layers; - int mips_max = 1 + floorf(log2f(max_ii(w, h))); - mipmaps_ = min_ii(mips, mips_max); + int mip_len_max = 1 + floorf(log2f(max_ii(w, h))); + mipmaps_ = min_ii(mip_len, mip_len_max); format_ = format; format_flag_ = to_format_flag(format); type_ = (layers > 0) ? GPU_TEXTURE_2D_ARRAY : GPU_TEXTURE_2D; @@ -84,13 +83,13 @@ bool Texture::init_2D(int w, int h, int layers, int mips, eGPUTextureFormat form return this->init_internal(); } -bool Texture::init_3D(int w, int h, int d, int mips, eGPUTextureFormat format) +bool Texture::init_3D(int w, int h, int d, int mip_len, eGPUTextureFormat format) { w_ = w; h_ = h; d_ = d; - int mips_max = 1 + floorf(log2f(max_iii(w, h, d))); - mipmaps_ = min_ii(mips, mips_max); + int mip_len_max = 1 + floorf(log2f(max_iii(w, h, d))); + mipmaps_ = min_ii(mip_len, mip_len_max); format_ = format; format_flag_ = to_format_flag(format); type_ = GPU_TEXTURE_3D; @@ -100,13 +99,13 @@ bool Texture::init_3D(int w, int h, int d, int mips, eGPUTextureFormat format) return this->init_internal(); } -bool Texture::init_cubemap(int w, int layers, int mips, eGPUTextureFormat format) +bool Texture::init_cubemap(int w, int layers, int mip_len, eGPUTextureFormat format) { w_ = w; h_ = w; d_ = max_ii(1, layers) * 6; - int mips_max = 1 + floorf(log2f(w)); - mipmaps_ = min_ii(mips, mips_max); + int mip_len_max = 1 + floorf(log2f(w)); + mipmaps_ = min_ii(mip_len, mip_len_max); format_ = format; format_flag_ = to_format_flag(format); type_ = (layers > 0) ? GPU_TEXTURE_CUBE_ARRAY : GPU_TEXTURE_CUBE; @@ -238,29 +237,29 @@ static inline GPUTexture *gpu_texture_create(const char *name, const int h, const int d, const eGPUTextureType type, - int mips, + int mip_len, eGPUTextureFormat tex_format, eGPUDataFormat data_format, const void *pixels) { - BLI_assert(mips > 0); + BLI_assert(mip_len > 0); Texture *tex = GPUBackend::get()->texture_alloc(name); bool success = false; switch (type) { case GPU_TEXTURE_1D: case GPU_TEXTURE_1D_ARRAY: - success = tex->init_1D(w, h, mips, tex_format); + success = tex->init_1D(w, h, mip_len, tex_format); break; case GPU_TEXTURE_2D: case GPU_TEXTURE_2D_ARRAY: - success = tex->init_2D(w, h, d, mips, tex_format); + success = tex->init_2D(w, h, d, mip_len, tex_format); break; case GPU_TEXTURE_3D: - success = tex->init_3D(w, h, d, mips, tex_format); + success = tex->init_3D(w, h, d, mip_len, tex_format); break; case GPU_TEXTURE_CUBE: case GPU_TEXTURE_CUBE_ARRAY: - success = tex->init_cubemap(w, d, mips, tex_format); + success = tex->init_cubemap(w, d, mip_len, tex_format); break; default: break; @@ -361,6 +360,13 @@ GPUTexture *GPU_texture_create_compressed_2d( GPUTexture *GPU_texture_create_from_vertbuf(const char *name, GPUVertBuf *vert) { +#ifndef NDEBUG + /* Vertex buffers used for texture buffers must be flagged with: + * GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY. */ + BLI_assert_msg(unwrap(vert)->extended_usage_ & GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY, + "Vertex Buffers used for textures should have usage flag " + "GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY."); +#endif eGPUTextureFormat tex_format = to_texture_format(GPU_vertbuf_get_format(vert)); Texture *tex = GPUBackend::get()->texture_alloc(name); @@ -642,6 +648,112 @@ eGPUTextureFormat GPU_texture_format(const GPUTexture *tex) return reinterpret_cast<const Texture *>(tex)->format_get(); } +const char *GPU_texture_format_description(eGPUTextureFormat texture_format) +{ + switch (texture_format) { + case GPU_RGBA8UI: + return "RGBA8UI"; + case GPU_RGBA8I: + return "RGBA8I"; + case GPU_RGBA8: + return "RGBA8"; + case GPU_RGBA32UI: + return "RGBA32UI"; + case GPU_RGBA32I: + return "RGBA32I"; + case GPU_RGBA32F: + return "RGBA32F"; + case GPU_RGBA16UI: + return "RGBA16UI"; + case GPU_RGBA16I: + return "RGBA16I"; + case GPU_RGBA16F: + return "RGBA16F"; + case GPU_RGBA16: + return "RGBA16"; + case GPU_RG8UI: + return "RG8UI"; + case GPU_RG8I: + return "RG8I"; + case GPU_RG8: + return "RG8"; + case GPU_RG32UI: + return "RG32UI"; + case GPU_RG32I: + return "RG32I"; + case GPU_RG32F: + return "RG32F"; + case GPU_RG16UI: + return "RG16UI"; + case GPU_RG16I: + return "RG16I"; + case GPU_RG16F: + return "RG16F"; + case GPU_RG16: + return "RG16"; + case GPU_R8UI: + return "R8UI"; + case GPU_R8I: + return "R8I"; + case GPU_R8: + return "R8"; + case GPU_R32UI: + return "R32UI"; + case GPU_R32I: + return "R32I"; + case GPU_R32F: + return "R32F"; + case GPU_R16UI: + return "R16UI"; + case GPU_R16I: + return "R16I"; + case GPU_R16F: + return "R16F"; + case GPU_R16: + return "R16"; + + /* Special formats texture & render-buffer. */ + case GPU_RGB10_A2: + return "RGB10A2"; + case GPU_R11F_G11F_B10F: + return "R11FG11FB10F"; + case GPU_DEPTH32F_STENCIL8: + return "DEPTH32FSTENCIL8"; + case GPU_DEPTH24_STENCIL8: + return "DEPTH24STENCIL8"; + case GPU_SRGB8_A8: + return "SRGB8A8"; + + /* Texture only format */ + case (GPU_RGB16F): + return "RGB16F"; + + /* Special formats texture only */ + case GPU_SRGB8_A8_DXT1: + return "SRGB8_A8_DXT1"; + case GPU_SRGB8_A8_DXT3: + return "SRGB8_A8_DXT3"; + case GPU_SRGB8_A8_DXT5: + return "SRGB8_A8_DXT5"; + case GPU_RGBA8_DXT1: + return "RGBA8_DXT1"; + case GPU_RGBA8_DXT3: + return "RGBA8_DXT3"; + case GPU_RGBA8_DXT5: + return "RGBA8_DXT5"; + + /* Depth Formats */ + case GPU_DEPTH_COMPONENT32F: + return "DEPTH32F"; + case GPU_DEPTH_COMPONENT24: + return "DEPTH24"; + case GPU_DEPTH_COMPONENT16: + return "DEPTH16"; + } + BLI_assert_unreachable(); + return ""; +} + bool GPU_texture_depth(const GPUTexture *tex) { return (reinterpret_cast<const Texture *>(tex)->format_flag_get() & GPU_FORMAT_DEPTH) != 0; @@ -702,7 +814,11 @@ void GPU_texture_get_mipmap_size(GPUTexture *tex, int lvl, int *r_size) void GPU_samplers_update() { - GPUBackend::get()->samplers_update(); + /* Backend may not exist when we are updating preferences from background mode. */ + GPUBackend *backend = GPUBackend::get(); + if (backend) { + backend->samplers_update(); + } } /** \} */ diff --git a/source/blender/gpu/intern/gpu_texture_private.hh b/source/blender/gpu/intern/gpu_texture_private.hh index 00bcc9fac00..8521b0fd77f 100644 --- a/source/blender/gpu/intern/gpu_texture_private.hh +++ b/source/blender/gpu/intern/gpu_texture_private.hh @@ -101,10 +101,10 @@ class Texture { virtual ~Texture(); /* Return true on success. */ - bool init_1D(int w, int layers, int mips, eGPUTextureFormat format); - bool init_2D(int w, int h, int layers, int mips, eGPUTextureFormat format); - bool init_3D(int w, int h, int d, int mips, eGPUTextureFormat format); - bool init_cubemap(int w, int layers, int mips, eGPUTextureFormat format); + bool init_1D(int w, int layers, int mip_len, eGPUTextureFormat format); + bool init_2D(int w, int h, int layers, int mip_len, eGPUTextureFormat format); + bool init_3D(int w, int h, int d, int mip_len, eGPUTextureFormat format); + bool init_cubemap(int w, int layers, int mip_len, eGPUTextureFormat format); bool init_buffer(GPUVertBuf *vbo, eGPUTextureFormat format); bool init_view(const GPUTexture *src, eGPUTextureFormat format, diff --git a/source/blender/gpu/intern/gpu_uniform_buffer_private.hh b/source/blender/gpu/intern/gpu_uniform_buffer_private.hh index 6e3285b6fef..e3d70634ce1 100644 --- a/source/blender/gpu/intern/gpu_uniform_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_uniform_buffer_private.hh @@ -29,7 +29,7 @@ class UniformBuf { /** Data size in bytes. */ size_t size_in_bytes_; /** Continuous memory block to copy to GPU. This data is owned by the UniformBuf. */ - void *data_ = NULL; + void *data_ = nullptr; /** Debugging name */ char name_[DEBUG_NAME_LEN]; diff --git a/source/blender/gpu/intern/gpu_vertex_buffer.cc b/source/blender/gpu/intern/gpu_vertex_buffer.cc index f47970d48d1..a441cfe2fb8 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer.cc +++ b/source/blender/gpu/intern/gpu_vertex_buffer.cc @@ -12,7 +12,6 @@ #include "gpu_backend.hh" #include "gpu_vertex_format_private.h" -#include "gl_vertex_buffer.hh" /* TODO: remove. */ #include "gpu_context_private.hh" /* TODO: remove. */ #include "gpu_vertex_buffer_private.hh" @@ -41,10 +40,21 @@ VertBuf::~VertBuf() void VertBuf::init(const GPUVertFormat *format, GPUUsageType usage) { - usage_ = usage; + /* Strip extended usage flags. */ + usage_ = usage & ~GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY; +#ifndef NDEBUG + /* Store extended usage. */ + extended_usage_ = usage; +#endif flag = GPU_VERTBUF_DATA_DIRTY; GPU_vertformat_copy(&this->format, format); - if (!format->packed) { + /* Avoid packing vertex formats which are used for texture buffers. + * These cases use singular types and do not need packing. They must + * also not have increased alignment padding to the minimum per-vertex stride. */ + if (usage & GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY) { + VertexFormat_texture_buffer_pack(&this->format); + } + if (!this->format.packed) { VertexFormat_pack(&this->format); } flag |= GPU_VERTBUF_INIT; @@ -63,6 +73,10 @@ VertBuf *VertBuf::duplicate() *dst = *this; /* Almost full copy... */ dst->handle_refcount_ = 1; + /* Metadata. */ +#ifndef NDEBUG + dst->extended_usage_ = extended_usage_; +#endif /* Duplicate all needed implementation specifics data. */ this->duplicate_data(dst); return dst; @@ -193,6 +207,7 @@ void GPU_vertbuf_data_len_set(GPUVertBuf *verts_, uint v_len) void GPU_vertbuf_attr_set(GPUVertBuf *verts_, uint a_idx, uint v_idx, const void *data) { VertBuf *verts = unwrap(verts_); + BLI_assert(verts->get_usage_type() != GPU_USAGE_DEVICE_ONLY); const GPUVertFormat *format = &verts->format; const GPUVertAttr *a = &format->attrs[a_idx]; BLI_assert(v_idx < verts->vertex_alloc); @@ -216,6 +231,7 @@ void GPU_vertbuf_attr_fill(GPUVertBuf *verts_, uint a_idx, const void *data) void GPU_vertbuf_vert_set(GPUVertBuf *verts_, uint v_idx, const void *data) { VertBuf *verts = unwrap(verts_); + BLI_assert(verts->get_usage_type() != GPU_USAGE_DEVICE_ONLY); const GPUVertFormat *format = &verts->format; BLI_assert(v_idx < verts->vertex_alloc); BLI_assert(verts->data != nullptr); @@ -226,6 +242,7 @@ void GPU_vertbuf_vert_set(GPUVertBuf *verts_, uint v_idx, const void *data) void GPU_vertbuf_attr_fill_stride(GPUVertBuf *verts_, uint a_idx, uint stride, const void *data) { VertBuf *verts = unwrap(verts_); + BLI_assert(verts->get_usage_type() != GPU_USAGE_DEVICE_ONLY); const GPUVertFormat *format = &verts->format; const GPUVertAttr *a = &format->attrs[a_idx]; BLI_assert(a_idx < format->attr_len); diff --git a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh index 7a0b53cf958..f20f6caf6de 100644 --- a/source/blender/gpu/intern/gpu_vertex_buffer_private.hh +++ b/source/blender/gpu/intern/gpu_vertex_buffer_private.hh @@ -29,7 +29,12 @@ class VertBuf { /** Status flag. */ GPUVertBufStatus flag = GPU_VERTBUF_INVALID; /** NULL indicates data in VRAM (unmapped) */ - uchar *data = NULL; + uchar *data = nullptr; + +#ifndef NDEBUG + /** Usage including extended usage flags. */ + GPUUsageType extended_usage_ = GPU_USAGE_STATIC; +#endif protected: /** Usage hint for GL optimization. */ @@ -83,6 +88,11 @@ class VertBuf { } } + GPUUsageType get_usage_type() const + { + return usage_; + } + virtual void update_sub(uint start, uint len, const void *data) = 0; virtual const void *read() const = 0; virtual void *unmap(const void *mapped_data) const = 0; diff --git a/source/blender/gpu/intern/gpu_vertex_format.cc b/source/blender/gpu/intern/gpu_vertex_format.cc index 59ae862aa51..897e80293bf 100644 --- a/source/blender/gpu/intern/gpu_vertex_format.cc +++ b/source/blender/gpu/intern/gpu_vertex_format.cc @@ -8,6 +8,9 @@ */ #include "GPU_vertex_format.h" +#include "GPU_capabilities.h" + +#include "gpu_shader_create_info.hh" #include "gpu_shader_private.hh" #include "gpu_vertex_format_private.h" @@ -25,6 +28,7 @@ #endif using namespace blender::gpu; +using namespace blender::gpu::shader; void GPU_vertformat_clear(GPUVertFormat *format) { @@ -66,7 +70,7 @@ static uint attr_size(const GPUVertAttr *a) return a->comp_len * comp_size(static_cast<GPUVertCompType>(a->comp_type)); } -static uint attr_align(const GPUVertAttr *a) +static uint attr_align(const GPUVertAttr *a, uint minimum_stride) { if (a->comp_type == GPU_COMP_I10) { return 4; /* always packed as 10_10_10_2 */ @@ -76,7 +80,10 @@ static uint attr_align(const GPUVertAttr *a) return 4 * c; /* AMD HW can't fetch these well, so pad it out (other vendors too?) */ } - return c; /* most fetches are ok if components are naturally aligned */ + /* Most fetches are ok if components are naturally aligned. + * However, in Metal,the minimum supported per-vertex stride is 4, + * so we must query the GPU and pad out the size accordingly. */ + return max_ii(minimum_stride, c); } uint vertex_buffer_size(const GPUVertFormat *format, uint vertex_len) @@ -306,7 +313,7 @@ static void show_pack(uint a_idx, uint size, uint pad) } #endif -void VertexFormat_pack(GPUVertFormat *format) +static void VertexFormat_pack_impl(GPUVertFormat *format, uint minimum_stride) { GPUVertAttr *a0 = &format->attrs[0]; a0->offset = 0; @@ -318,7 +325,7 @@ void VertexFormat_pack(GPUVertFormat *format) for (uint a_idx = 1; a_idx < format->attr_len; a_idx++) { GPUVertAttr *a = &format->attrs[a_idx]; - uint mid_padding = padding(offset, attr_align(a)); + uint mid_padding = padding(offset, attr_align(a, minimum_stride)); offset += mid_padding; a->offset = offset; offset += a->size; @@ -328,7 +335,7 @@ void VertexFormat_pack(GPUVertFormat *format) #endif } - uint end_padding = padding(offset, attr_align(a0)); + uint end_padding = padding(offset, attr_align(a0, minimum_stride)); #if PACK_DEBUG show_pack(0, 0, end_padding); @@ -338,8 +345,106 @@ void VertexFormat_pack(GPUVertFormat *format) format->packed = true; } +void VertexFormat_pack(GPUVertFormat *format) +{ + /* Perform standard vertex packing, ensuring vertex format satisfies + * minimum stride requirements for vertex assembly. */ + VertexFormat_pack_impl(format, GPU_minimum_per_vertex_stride()); +} + +void VertexFormat_texture_buffer_pack(GPUVertFormat *format) +{ + /* Validates packing for vertex formats used with texture buffers. + * In these cases, there must only be a single vertex attribute. + * This attribute should be tightly packed without padding, to ensure + * it aligns with the backing texture data format, skipping + * minimum per-vertex stride, which mandates 4-byte alignment in Metal. + * This additional alignment padding caused smaller data types, e.g. U16, + * to mis-align. */ + BLI_assert_msg(format->attr_len == 1, + "Texture buffer mode should only use a single vertex attribute."); + + /* Pack vertex format without minimum stride, as this is not required by texture buffers. */ + VertexFormat_pack_impl(format, 1); +} + +static uint component_size_get(const Type gpu_type) +{ + switch (gpu_type) { + case Type::VEC2: + case Type::IVEC2: + case Type::UVEC2: + return 2; + case Type::VEC3: + case Type::IVEC3: + case Type::UVEC3: + return 3; + case Type::VEC4: + case Type::IVEC4: + case Type::UVEC4: + return 4; + case Type::MAT3: + return 12; + case Type::MAT4: + return 16; + default: + return 1; + } +} + +static void recommended_fetch_mode_and_comp_type(Type gpu_type, + GPUVertCompType *r_comp_type, + GPUVertFetchMode *r_fetch_mode) +{ + switch (gpu_type) { + case Type::FLOAT: + case Type::VEC2: + case Type::VEC3: + case Type::VEC4: + case Type::MAT3: + case Type::MAT4: + *r_comp_type = GPU_COMP_F32; + *r_fetch_mode = GPU_FETCH_FLOAT; + break; + case Type::INT: + case Type::IVEC2: + case Type::IVEC3: + case Type::IVEC4: + *r_comp_type = GPU_COMP_I32; + *r_fetch_mode = GPU_FETCH_INT; + break; + case Type::UINT: + case Type::UVEC2: + case Type::UVEC3: + case Type::UVEC4: + *r_comp_type = GPU_COMP_U32; + *r_fetch_mode = GPU_FETCH_INT; + break; + default: + BLI_assert(0); + } +} + void GPU_vertformat_from_shader(GPUVertFormat *format, const struct GPUShader *gpushader) { - const Shader *shader = reinterpret_cast<const Shader *>(gpushader); - shader->vertformat_from_shader(format); + GPU_vertformat_clear(format); + + uint attr_len = GPU_shader_get_attribute_len(gpushader); + int location_test = 0, attrs_added = 0; + while (attrs_added < attr_len) { + char name[256]; + Type gpu_type; + if (!GPU_shader_get_attribute_info(gpushader, location_test++, name, (int *)&gpu_type)) { + continue; + } + + GPUVertCompType comp_type; + GPUVertFetchMode fetch_mode; + recommended_fetch_mode_and_comp_type(gpu_type, &comp_type, &fetch_mode); + + int comp_len = component_size_get(gpu_type); + + GPU_vertformat_attr_add(format, name, comp_type, comp_len, fetch_mode); + attrs_added++; + } } diff --git a/source/blender/gpu/intern/gpu_vertex_format_private.h b/source/blender/gpu/intern/gpu_vertex_format_private.h index 0f8a869f6df..430008b4cb9 100644 --- a/source/blender/gpu/intern/gpu_vertex_format_private.h +++ b/source/blender/gpu/intern/gpu_vertex_format_private.h @@ -16,6 +16,7 @@ extern "C" { struct GPUVertFormat; void VertexFormat_pack(struct GPUVertFormat *format); +void VertexFormat_texture_buffer_pack(struct GPUVertFormat *format); uint padding(uint offset, uint alignment); uint vertex_buffer_size(const struct GPUVertFormat *format, uint vertex_len); diff --git a/source/blender/gpu/intern/gpu_viewport.c b/source/blender/gpu/intern/gpu_viewport.c index c3118ca320c..71bdf9e336b 100644 --- a/source/blender/gpu/intern/gpu_viewport.c +++ b/source/blender/gpu/intern/gpu_viewport.c @@ -9,16 +9,13 @@ #include <string.h> -#include "BLI_listbase.h" #include "BLI_math_vector.h" -#include "BLI_memblock.h" #include "BLI_rect.h" #include "BKE_colortools.h" #include "IMB_colormanagement.h" -#include "DNA_userdef_types.h" #include "DNA_vec_types.h" #include "GPU_capabilities.h" diff --git a/source/blender/gpu/metal/kernels/compute_texture_read.msl b/source/blender/gpu/metal/kernels/compute_texture_read.msl index 4bfb48567f9..7b0760d7620 100644 --- a/source/blender/gpu/metal/kernels/compute_texture_read.msl +++ b/source/blender/gpu/metal/kernels/compute_texture_read.msl @@ -74,7 +74,7 @@ template<> uchar convert_type<uchar>(float val) template<> uint convert_type<uint>(float val) { - return uint(val * double(0xFFFFFFFFu)); + return uint(val * float(0xFFFFFFFFu)); } struct TextureReadParams { diff --git a/source/blender/gpu/metal/kernels/compute_texture_update.msl b/source/blender/gpu/metal/kernels/compute_texture_update.msl index 095c495ac54..43c746e0afa 100644 --- a/source/blender/gpu/metal/kernels/compute_texture_update.msl +++ b/source/blender/gpu/metal/kernels/compute_texture_update.msl @@ -38,22 +38,6 @@ using namespace metal; # define POSITION_TYPE uint3 #endif -float3 mtl_linear_to_srgb_attr(float3 c) -{ - c = max(c, float3(0.0)); - float3 c1 = c * 12.92; - float3 c2 = 1.055 * pow(c, float3(1.0 / 2.4)) - 0.055; - return mix(c1, c2, step(float3(0.0031308), c)); -} - -float3 mtl_srgb_to_linear_attr(float3 c) -{ - c = max(c, float3(0.0)); - float3 c1 = c * (1.0 / 12.92); - float3 c2 = pow((c + 0.055) * (1.0 / 1.055), float3(2.4)); - return mix(c1, c2, step(float3(0.04045), c)); -} - struct TextureUpdateParams { int mip_index; int extent[3]; diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl index 9fd54f3f31f..374aedff90d 100644 --- a/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl +++ b/source/blender/gpu/metal/kernels/depth_2d_update_float_frag.glsl @@ -1,9 +1,4 @@ -uniform sampler2D source_data; -uniform int mip; - -in vec2 texCoord_interp; - void main() { gl_FragDepth = textureLod(source_data, texCoord_interp, mip).r; diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_info.hh b/source/blender/gpu/metal/kernels/depth_2d_update_info.hh new file mode 100644 index 00000000000..0a3281a98f2 --- /dev/null +++ b/source/blender/gpu/metal/kernels/depth_2d_update_info.hh @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_INTERFACE_INFO(depth_2d_update_iface, "").smooth(Type::VEC2, "texCoord_interp"); + +GPU_SHADER_CREATE_INFO(depth_2d_update_info_base) + .vertex_in(0, Type::VEC2, "pos") + .vertex_out(depth_2d_update_iface) + .fragment_out(0, Type::VEC4, "fragColor") + .push_constant(Type::VEC2, "extent") + .push_constant(Type::VEC2, "offset") + .push_constant(Type::VEC2, "size") + .push_constant(Type::INT, "mip") + .sampler(0, ImageType::FLOAT_2D, "source_data", Frequency::PASS) + .vertex_source("depth_2d_update_vert.glsl"); + +GPU_SHADER_CREATE_INFO(depth_2d_update_float) + .fragment_source("depth_2d_update_float_frag.glsl") + .additional_info("depth_2d_update_info_base") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(depth_2d_update_int24) + .fragment_source("depth_2d_update_int24_frag.glsl") + .additional_info("depth_2d_update_info_base") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(depth_2d_update_int32) + .fragment_source("depth_2d_update_int32_frag.glsl") + .additional_info("depth_2d_update_info_base") + .do_static_compilation(true); diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl index 7483343503f..a4d9e35d491 100644 --- a/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl +++ b/source/blender/gpu/metal/kernels/depth_2d_update_int24_frag.glsl @@ -1,8 +1,4 @@ -uniform isampler2D source_data; -uniform int mip; - -in vec2 texCoord_interp; void main() { diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl index 75d42c57f73..421c25a2e5c 100644 --- a/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl +++ b/source/blender/gpu/metal/kernels/depth_2d_update_int32_frag.glsl @@ -1,9 +1,4 @@ -uniform isampler2D source_data; -uniform int mip; - -in vec2 texCoord_interp; - void main() { uint val = textureLod(source_data, texCoord_interp, mip).r; diff --git a/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl b/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl index faae68d2f55..def0c1ae9de 100644 --- a/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl +++ b/source/blender/gpu/metal/kernels/depth_2d_update_vert.glsl @@ -1,10 +1,4 @@ -uniform vec2 extent; -uniform vec2 offset; -uniform vec2 size; -out vec2 texCoord_interp; -in vec2 pos; - void main() { vec4 rect = vec4(offset.x, offset.y, offset.x + extent.x, offset.y + extent.y); diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl index b1353478593..8c81c5c0d83 100644 --- a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl +++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_frag.glsl @@ -1,10 +1,5 @@ -in vec4 uvcoordsvar; -uniform sampler2D imageTexture; -uniform int mip; -out vec4 fragColor; - void main() { vec4 tex_color = textureLod(imageTexture, uvcoordsvar.xy, mip); diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh new file mode 100644 index 00000000000..6af67ad44d2 --- /dev/null +++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_info.hh @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_INTERFACE_INFO(fullscreen_blit_iface, "").smooth(Type::VEC4, "uvcoordsvar"); + +GPU_SHADER_CREATE_INFO(fullscreen_blit) + .vertex_in(0, Type::VEC2, "pos") + .vertex_out(fullscreen_blit_iface) + .fragment_out(0, Type::VEC4, "fragColor") + .push_constant(Type::VEC2, "fullscreen") + .push_constant(Type::VEC2, "size") + .push_constant(Type::VEC2, "dst_offset") + .push_constant(Type::VEC2, "src_offset") + .push_constant(Type::INT, "mip") + .sampler(0, ImageType::FLOAT_2D, "imageTexture", Frequency::PASS) + .vertex_source("gpu_shader_fullscreen_blit_vert.glsl") + .fragment_source("gpu_shader_fullscreen_blit_frag.glsl") + .do_static_compilation(true);
\ No newline at end of file diff --git a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl index 8e52868f67d..5d5a0e2ab5f 100644 --- a/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl +++ b/source/blender/gpu/metal/kernels/gpu_shader_fullscreen_blit_vert.glsl @@ -1,12 +1,4 @@ -out vec4 uvcoordsvar; - -in vec2 pos; -uniform vec2 fullscreen; -uniform vec2 size; -uniform vec2 dst_offset; -uniform vec2 src_offset; - void main() { /* The position represents a 0-1 square, we first scale it by the size we want to have it on diff --git a/source/blender/gpu/metal/mtl_backend.hh b/source/blender/gpu/metal/mtl_backend.hh index 9044d8517ab..214a5d738a9 100644 --- a/source/blender/gpu/metal/mtl_backend.hh +++ b/source/blender/gpu/metal/mtl_backend.hh @@ -16,7 +16,6 @@ namespace blender::gpu { class Batch; class DrawList; class FrameBuffer; -class IndexBuf; class QueryPool; class Shader; class UniformBuf; @@ -35,19 +34,24 @@ class MTLBackend : public GPUBackend { return MTLBackend::capabilities; } - inline ~MTLBackend() + ~MTLBackend() { MTLBackend::platform_exit(); } + void delete_resources() override + { + /* Delete any resources with context active. */ + } + static bool metal_is_supported(); - inline static MTLBackend *get() + static MTLBackend *get() { return static_cast<MTLBackend *>(GPUBackend::get()); } void samplers_update() override; - inline void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override + void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override { /* Placeholder */ } diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index 1064091a036..3cd7794f6c9 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -9,6 +9,11 @@ #include "gpu_backend.hh" #include "mtl_backend.hh" #include "mtl_context.hh" +#include "mtl_framebuffer.hh" +#include "mtl_index_buffer.hh" +#include "mtl_query.hh" +#include "mtl_shader.hh" +#include "mtl_uniform_buffer.hh" #include "gpu_capabilities_private.hh" #include "gpu_platform_private.hh" @@ -50,26 +55,25 @@ DrawList *MTLBackend::drawlist_alloc(int list_length) FrameBuffer *MTLBackend::framebuffer_alloc(const char *name) { - /* TODO(Metal): Implement MTLFrameBuffer. */ - return nullptr; + MTLContext *mtl_context = static_cast<MTLContext *>( + reinterpret_cast<Context *>(GPU_context_active_get())); + return new MTLFrameBuffer(mtl_context, name); }; IndexBuf *MTLBackend::indexbuf_alloc() { - /* TODO(Metal): Implement MTLIndexBuf. */ - return nullptr; + return new MTLIndexBuf(); }; QueryPool *MTLBackend::querypool_alloc() { - /* TODO(Metal): Implement MTLQueryPool. */ - return nullptr; + return new MTLQueryPool(); }; Shader *MTLBackend::shader_alloc(const char *name) { - /* TODO(Metal): Implement MTLShader. */ - return nullptr; + MTLContext *mtl_context = MTLContext::get(); + return new MTLShader(mtl_context, name); }; Texture *MTLBackend::texture_alloc(const char *name) @@ -79,8 +83,7 @@ Texture *MTLBackend::texture_alloc(const char *name) UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name) { - /* TODO(Metal): Implement MTLUniformBuf. */ - return nullptr; + return new MTLUniformBuf(size, name); }; StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name) @@ -125,7 +128,21 @@ void MTLBackend::render_end() void MTLBackend::render_step() { - /* Placeholder */ + /* NOTE(Metal): Primarily called from main thread, but below data-structures + * and operations are thread-safe, and GPUContext rendering coordination + * is also thread-safe. */ + + /* Flush any MTLSafeFreeLists which have previously been released by any MTLContext. */ + MTLContext::get_global_memory_manager().update_memory_pools(); + + /* End existing MTLSafeFreeList and begin new list -- + * Buffers wont `free` until all associated in-flight command buffers have completed. + * Decrement final reference count for ensuring the previous list is certainly + * released. */ + MTLSafeFreeList *cmd_free_buffer_list = + MTLContext::get_global_memory_manager().get_current_safe_list(); + MTLContext::get_global_memory_manager().begin_new_safe_list(); + cmd_free_buffer_list->decrement_reference(); } bool MTLBackend::is_inside_render_boundary() @@ -152,7 +169,7 @@ void MTLBackend::platform_init(MTLContext *ctx) eGPUSupportLevel support_level = GPU_SUPPORT_LEVEL_SUPPORTED; BLI_assert(ctx); - id<MTLDevice> mtl_device = nil; /*ctx->device; TODO(Metal): Implement MTLContext. */ + id<MTLDevice> mtl_device = ctx->device; BLI_assert(device); NSString *gpu_name = [mtl_device name]; @@ -171,7 +188,7 @@ void MTLBackend::platform_init(MTLContext *ctx) os = GPU_OS_UNIX; #endif - BLI_assert(os == GPU_OS_MAC && "Platform must be macOS"); + BLI_assert_msg(os == GPU_OS_MAC, "Platform must be macOS"); /* Determine Vendor from name. */ if (strstr(vendor, "ATI") || strstr(vendor, "AMD")) { @@ -318,7 +335,7 @@ bool MTLBackend::metal_is_supported() void MTLBackend::capabilities_init(MTLContext *ctx) { BLI_assert(ctx); - id<MTLDevice> device = nil; /*ctx->device TODO(Metal): Implement MTLContext. */ + id<MTLDevice> device = ctx->device; BLI_assert(device); /* Initialize Capabilities. */ @@ -365,6 +382,8 @@ void MTLBackend::capabilities_init(MTLContext *ctx) GCaps.shader_image_load_store_support = ([device supportsFamily:MTLGPUFamilyApple3] || MTLBackend::capabilities.supports_family_mac1 || MTLBackend::capabilities.supports_family_mac2); + /* TODO(Metal): Add support? */ + GCaps.shader_draw_parameters_support = false; GCaps.compute_shader_support = false; /* TODO(Metal): Add compute support. */ GCaps.shader_storage_buffer_objects_support = false; /* TODO(Metal): implement Storage Buffer support. */ @@ -380,11 +399,10 @@ void MTLBackend::capabilities_init(MTLContext *ctx) /* In Metal, total_thread_count is 512 or 1024, such that * threadgroup `width*height*depth <= total_thread_count` */ - unsigned int max_threads_per_threadgroup_per_dim = - ([device supportsFamily:MTLGPUFamilyApple4] || - MTLBackend::capabilities.supports_family_mac1) ? - 1024 : - 512; + uint max_threads_per_threadgroup_per_dim = ([device supportsFamily:MTLGPUFamilyApple4] || + MTLBackend::capabilities.supports_family_mac1) ? + 1024 : + 512; GCaps.max_work_group_size[0] = max_threads_per_threadgroup_per_dim; GCaps.max_work_group_size[1] = max_threads_per_threadgroup_per_dim; GCaps.max_work_group_size[2] = max_threads_per_threadgroup_per_dim; diff --git a/source/blender/gpu/metal/mtl_capabilities.hh b/source/blender/gpu/metal/mtl_capabilities.hh index 3afa6e31ccb..36536438bf5 100644 --- a/source/blender/gpu/metal/mtl_capabilities.hh +++ b/source/blender/gpu/metal/mtl_capabilities.hh @@ -14,12 +14,14 @@ namespace gpu { #define MTL_MAX_TEXTURE_SLOTS 128 #define MTL_MAX_SAMPLER_SLOTS MTL_MAX_TEXTURE_SLOTS +/* Max limit without using bind-less for samplers. */ +#define MTL_MAX_DEFAULT_SAMPLERS 16 #define MTL_MAX_UNIFORM_BUFFER_BINDINGS 31 #define MTL_MAX_VERTEX_INPUT_ATTRIBUTES 31 #define MTL_MAX_UNIFORMS_PER_BLOCK 64 /* Context-specific limits -- populated in 'MTLBackend::platform_init' */ -typedef struct MTLCapabilities { +struct MTLCapabilities { /* Variable Limits & feature sets. */ int max_color_render_targets = 4; /* Minimum = 4 */ @@ -40,8 +42,7 @@ typedef struct MTLCapabilities { bool supports_family_mac2 = false; bool supports_family_mac_catalyst1 = false; bool supports_family_mac_catalyst2 = false; - -} MTLCapabilities; +}; } // namespace gpu } // namespace blender diff --git a/source/blender/gpu/metal/mtl_command_buffer.mm b/source/blender/gpu/metal/mtl_command_buffer.mm new file mode 100644 index 00000000000..0e13e8d4690 --- /dev/null +++ b/source/blender/gpu/metal/mtl_command_buffer.mm @@ -0,0 +1,652 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "DNA_userdef_types.h" + +#include "mtl_backend.hh" +#include "mtl_common.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_framebuffer.hh" + +#include <fstream> + +using namespace blender; +using namespace blender::gpu; + +namespace blender::gpu { + +/* Global sync event used across MTLContext's. + * This resolves flickering artifacts from command buffer + * dependencies not being honored for work submitted between + * different GPUContext's. */ +id<MTLEvent> MTLCommandBufferManager::sync_event = nil; +uint64_t MTLCommandBufferManager::event_signal_val = 0; + +/* Counter for active command buffers. */ +int MTLCommandBufferManager::num_active_cmd_bufs = 0; + +/* -------------------------------------------------------------------- */ +/** \name MTLCommandBuffer initialization and render coordination. + * \{ */ + +void MTLCommandBufferManager::prepare(bool supports_render) +{ + render_pass_state_.reset_state(); +} + +void MTLCommandBufferManager::register_encoder_counters() +{ + encoder_count_++; + empty_ = false; +} + +id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin() +{ + if (active_command_buffer_ == nil) { + + /* Verify number of active command buffers is below limit. + * Exceeding this limit will mean we either have a leak/GPU hang + * or we should increase the command buffer limit during MTLQueue creation */ + BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs < MTL_MAX_COMMAND_BUFFERS); + + if (G.debug & G_DEBUG_GPU) { + /* Debug: Enable Advanced Errors for GPU work execution. */ + MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init]; + desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus; + desc.retainedReferences = YES; + active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc]; + } + else { + active_command_buffer_ = [context_.queue commandBuffer]; + } + [active_command_buffer_ retain]; + MTLCommandBufferManager::num_active_cmd_bufs++; + + /* Ensure command buffers execute in submission order across multiple MTLContext's. */ + if (this->sync_event != nil) { + [active_command_buffer_ encodeWaitForEvent:this->sync_event value:this->event_signal_val]; + } + + /* Ensure we begin new Scratch Buffer if we are on a new frame. */ + MTLScratchBufferManager &mem = context_.memory_manager; + mem.ensure_increment_scratch_buffer(); + + /* Reset Command buffer heuristics. */ + this->reset_counters(); + } + BLI_assert(active_command_buffer_ != nil); + return active_command_buffer_; +} + +/* If wait is true, CPU will stall until GPU work has completed. */ +bool MTLCommandBufferManager::submit(bool wait) +{ + /* Skip submission if command buffer is empty. */ + if (empty_ || active_command_buffer_ == nil) { + return false; + } + + /* Ensure current encoders are finished. */ + this->end_active_command_encoder(); + BLI_assert(active_command_encoder_type_ == MTL_NO_COMMAND_ENCODER); + + /* Flush active ScratchBuffer associated with parent MTLContext. */ + context_.memory_manager.flush_active_scratch_buffer(); + + /*** Submit Command Buffer. ***/ + /* Strict ordering ensures command buffers are guaranteed to execute after a previous + * one has completed. Resolves flickering when command buffers are submitted from + * different MTLContext's. */ + if (MTLCommandBufferManager::sync_event == nil) { + MTLCommandBufferManager::sync_event = [context_.device newEvent]; + BLI_assert(MTLCommandBufferManager::sync_event); + [MTLCommandBufferManager::sync_event retain]; + } + BLI_assert(MTLCommandBufferManager::sync_event != nil); + MTLCommandBufferManager::event_signal_val++; + + [active_command_buffer_ encodeSignalEvent:MTLCommandBufferManager::sync_event + value:MTLCommandBufferManager::event_signal_val]; + + /* Command buffer lifetime tracking. */ + /* Increment current MTLSafeFreeList reference counter to flag MTLBuffers freed within + * the current command buffer lifetime as used. + * This ensures that in-use resources are not prematurely de-referenced and returned to the + * available buffer pool while they are in-use by the GPU. */ + MTLSafeFreeList *cmd_free_buffer_list = + MTLContext::get_global_memory_manager().get_current_safe_list(); + BLI_assert(cmd_free_buffer_list); + cmd_free_buffer_list->increment_reference(); + + id<MTLCommandBuffer> cmd_buffer_ref = active_command_buffer_; + [cmd_buffer_ref retain]; + + [cmd_buffer_ref addCompletedHandler:^(id<MTLCommandBuffer> cb) { + /* Upon command buffer completion, decrement MTLSafeFreeList reference count + * to allow buffers no longer in use by this CommandBuffer to be freed. */ + cmd_free_buffer_list->decrement_reference(); + + /* Release command buffer after completion callback handled. */ + [cmd_buffer_ref release]; + + /* Decrement count. */ + MTLCommandBufferManager::num_active_cmd_bufs--; + }]; + + /* Submit command buffer to GPU. */ + [active_command_buffer_ commit]; + + if (wait || (G.debug & G_DEBUG_GPU)) { + /* Wait until current GPU work has finished executing. */ + [active_command_buffer_ waitUntilCompleted]; + + /* Command buffer execution debugging can return an error message if + * execution has failed or encountered GPU-side errors. */ + if (G.debug & G_DEBUG_GPU) { + + NSError *error = [active_command_buffer_ error]; + if (error != nil) { + NSLog(@"%@", error); + BLI_assert(false); + + @autoreleasepool { + const char *stringAsChar = [[NSString stringWithFormat:@"%@", error] UTF8String]; + + std::ofstream outfile; + outfile.open("command_buffer_error.txt", std::fstream::out | std::fstream::app); + outfile << stringAsChar; + outfile.close(); + } + } + } + } + + /* Release previous frames command buffer and reset active cmd buffer. */ + if (last_submitted_command_buffer_ != nil) { + + BLI_assert(MTLBackend::get()->is_inside_render_boundary()); + [last_submitted_command_buffer_ autorelease]; + last_submitted_command_buffer_ = nil; + } + last_submitted_command_buffer_ = active_command_buffer_; + active_command_buffer_ = nil; + + return true; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Render Command Encoder Utility and management functions. + * \{ */ + +/* Fetch/query current encoder. */ +bool MTLCommandBufferManager::is_inside_render_pass() +{ + return (active_command_encoder_type_ == MTL_RENDER_COMMAND_ENCODER); +} + +bool MTLCommandBufferManager::is_inside_blit() +{ + return (active_command_encoder_type_ == MTL_BLIT_COMMAND_ENCODER); +} + +bool MTLCommandBufferManager::is_inside_compute() +{ + return (active_command_encoder_type_ == MTL_COMPUTE_COMMAND_ENCODER); +} + +id<MTLRenderCommandEncoder> MTLCommandBufferManager::get_active_render_command_encoder() +{ + /* Calling code should check if inside render pass. Otherwise nil. */ + return active_render_command_encoder_; +} + +id<MTLBlitCommandEncoder> MTLCommandBufferManager::get_active_blit_command_encoder() +{ + /* Calling code should check if inside render pass. Otherwise nil. */ + return active_blit_command_encoder_; +} + +id<MTLComputeCommandEncoder> MTLCommandBufferManager::get_active_compute_command_encoder() +{ + /* Calling code should check if inside render pass. Otherwise nil. */ + return active_compute_command_encoder_; +} + +MTLFrameBuffer *MTLCommandBufferManager::get_active_framebuffer() +{ + /* If outside of RenderPass, nullptr will be returned. */ + if (this->is_inside_render_pass()) { + return active_frame_buffer_; + } + return nullptr; +} + +/* Encoder and Pass management. */ +/* End currently active MTLCommandEncoder. */ +bool MTLCommandBufferManager::end_active_command_encoder() +{ + + /* End active encoder if one is active. */ + if (active_command_encoder_type_ != MTL_NO_COMMAND_ENCODER) { + + switch (active_command_encoder_type_) { + case MTL_RENDER_COMMAND_ENCODER: { + /* Verify a RenderCommandEncoder is active and end. */ + BLI_assert(active_render_command_encoder_ != nil); + + /* Complete Encoding. */ + [active_render_command_encoder_ endEncoding]; + [active_render_command_encoder_ release]; + active_render_command_encoder_ = nil; + active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER; + + /* Reset associated frame-buffer flag. */ + active_frame_buffer_ = nullptr; + active_pass_descriptor_ = nullptr; + return true; + } + + case MTL_BLIT_COMMAND_ENCODER: { + /* Verify a RenderCommandEncoder is active and end. */ + BLI_assert(active_blit_command_encoder_ != nil); + [active_blit_command_encoder_ endEncoding]; + [active_blit_command_encoder_ release]; + active_blit_command_encoder_ = nil; + active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER; + return true; + } + + case MTL_COMPUTE_COMMAND_ENCODER: { + /* Verify a RenderCommandEncoder is active and end. */ + BLI_assert(active_compute_command_encoder_ != nil); + [active_compute_command_encoder_ endEncoding]; + [active_compute_command_encoder_ release]; + active_compute_command_encoder_ = nil; + active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER; + return true; + } + + default: { + BLI_assert(false && "Invalid command encoder type"); + return false; + } + }; + } + else { + /* MTL_NO_COMMAND_ENCODER. */ + BLI_assert(active_render_command_encoder_ == nil); + BLI_assert(active_blit_command_encoder_ == nil); + BLI_assert(active_compute_command_encoder_ == nil); + return false; + } +} + +id<MTLRenderCommandEncoder> MTLCommandBufferManager::ensure_begin_render_command_encoder( + MTLFrameBuffer *ctx_framebuffer, bool force_begin, bool *new_pass) +{ + /* Ensure valid frame-buffer. */ + BLI_assert(ctx_framebuffer != nullptr); + + /* Ensure active command buffer. */ + id<MTLCommandBuffer> cmd_buf = this->ensure_begin(); + BLI_assert(cmd_buf); + + /* Begin new command encoder if the currently active one is + * incompatible or requires updating. */ + if (active_command_encoder_type_ != MTL_RENDER_COMMAND_ENCODER || + active_frame_buffer_ != ctx_framebuffer || force_begin) { + this->end_active_command_encoder(); + + /* Determine if this is a re-bind of the same frame-buffer. */ + bool is_rebind = (active_frame_buffer_ == ctx_framebuffer); + + /* Generate RenderPassDescriptor from bound frame-buffer. */ + BLI_assert(ctx_framebuffer); + active_frame_buffer_ = ctx_framebuffer; + active_pass_descriptor_ = active_frame_buffer_->bake_render_pass_descriptor( + is_rebind && (!active_frame_buffer_->get_pending_clear())); + + /* Determine if there is a visibility buffer assigned to the context. */ + gpu::MTLBuffer *visibility_buffer = context_.get_visibility_buffer(); + this->active_pass_descriptor_.visibilityResultBuffer = + (visibility_buffer) ? visibility_buffer->get_metal_buffer() : nil; + context_.clear_visibility_dirty(); + + /* Ensure we have already cleaned up our previous render command encoder. */ + BLI_assert(active_render_command_encoder_ == nil); + + /* Create new RenderCommandEncoder based on descriptor (and begin encoding). */ + active_render_command_encoder_ = [cmd_buf + renderCommandEncoderWithDescriptor:active_pass_descriptor_]; + [active_render_command_encoder_ retain]; + active_command_encoder_type_ = MTL_RENDER_COMMAND_ENCODER; + + /* Update command buffer encoder heuristics. */ + this->register_encoder_counters(); + + /* Apply initial state. */ + /* Update Viewport and Scissor State */ + active_frame_buffer_->apply_state(); + + /* FLAG FRAMEBUFFER AS CLEARED -- A clear only lasts as long as one has been specified. + * After this, resets to Load attachments to parallel GL behavior. */ + active_frame_buffer_->mark_cleared(); + + /* Reset RenderPassState to ensure resource bindings are re-applied. */ + render_pass_state_.reset_state(); + + /* Return true as new pass started. */ + *new_pass = true; + } + else { + /* No new pass. */ + *new_pass = false; + } + + BLI_assert(active_render_command_encoder_ != nil); + return active_render_command_encoder_; +} + +id<MTLBlitCommandEncoder> MTLCommandBufferManager::ensure_begin_blit_encoder() +{ + /* Ensure active command buffer. */ + id<MTLCommandBuffer> cmd_buf = this->ensure_begin(); + BLI_assert(cmd_buf); + + /* Ensure no existing command encoder of a different type is active. */ + if (active_command_encoder_type_ != MTL_BLIT_COMMAND_ENCODER) { + this->end_active_command_encoder(); + } + + /* Begin new Blit Encoder. */ + if (active_blit_command_encoder_ == nil) { + active_blit_command_encoder_ = [cmd_buf blitCommandEncoder]; + BLI_assert(active_blit_command_encoder_ != nil); + [active_blit_command_encoder_ retain]; + active_command_encoder_type_ = MTL_BLIT_COMMAND_ENCODER; + + /* Update command buffer encoder heuristics. */ + this->register_encoder_counters(); + } + BLI_assert(active_blit_command_encoder_ != nil); + return active_blit_command_encoder_; +} + +id<MTLComputeCommandEncoder> MTLCommandBufferManager::ensure_begin_compute_encoder() +{ + /* Ensure active command buffer. */ + id<MTLCommandBuffer> cmd_buf = this->ensure_begin(); + BLI_assert(cmd_buf); + + /* Ensure no existing command encoder of a different type is active. */ + if (active_command_encoder_type_ != MTL_COMPUTE_COMMAND_ENCODER) { + this->end_active_command_encoder(); + } + + /* Begin new Compute Encoder. */ + if (active_compute_command_encoder_ == nil) { + active_compute_command_encoder_ = [cmd_buf computeCommandEncoder]; + BLI_assert(active_compute_command_encoder_ != nil); + [active_compute_command_encoder_ retain]; + active_command_encoder_type_ = MTL_COMPUTE_COMMAND_ENCODER; + + /* Update command buffer encoder heuristics. */ + this->register_encoder_counters(); + } + BLI_assert(active_compute_command_encoder_ != nil); + return active_compute_command_encoder_; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Command buffer heuristics. + * \{ */ + +/* Rendering Heuristics. */ +void MTLCommandBufferManager::register_draw_counters(int vertex_submission) +{ + current_draw_call_count_++; + vertex_submitted_count_ += vertex_submission; + empty_ = false; +} + +/* Reset workload counters. */ +void MTLCommandBufferManager::reset_counters() +{ + empty_ = true; + current_draw_call_count_ = 0; + encoder_count_ = 0; + vertex_submitted_count_ = 0; +} + +/* Workload evaluation. */ +bool MTLCommandBufferManager::do_break_submission() +{ + /* Skip if no active command buffer. */ + if (active_command_buffer_ == nil) { + return false; + } + + /* Use optimized heuristic to split heavy command buffer submissions to better saturate the + * hardware and also reduce stalling from individual large submissions. */ + if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_ANY, GPU_DRIVER_ANY) || + GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_ANY)) { + return ((current_draw_call_count_ > 30000) || (vertex_submitted_count_ > 100000000) || + (encoder_count_ > 25)); + } + else { + /* Apple Silicon is less efficient if splitting submissions. */ + return false; + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Command buffer debugging. + * \{ */ + +/* Debug. */ +void MTLCommandBufferManager::push_debug_group(const char *name, int index) +{ + id<MTLCommandBuffer> cmd = this->ensure_begin(); + if (cmd != nil) { + [cmd pushDebugGroup:[NSString stringWithFormat:@"%s_%d", name, index]]; + } +} + +void MTLCommandBufferManager::pop_debug_group() +{ + id<MTLCommandBuffer> cmd = this->ensure_begin(); + if (cmd != nil) { + [cmd popDebugGroup]; + } +} + +/* Workload Synchronization. */ +bool MTLCommandBufferManager::insert_memory_barrier(eGPUBarrier barrier_bits, + eGPUStageBarrierBits before_stages, + eGPUStageBarrierBits after_stages) +{ + /* Only supporting Metal on 10.14 onward anyway - Check required for warnings. */ + if (@available(macOS 10.14, *)) { + + /* Resolve scope. */ + MTLBarrierScope scope = 0; + if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS || + barrier_bits & GPU_BARRIER_TEXTURE_FETCH) { + scope = scope | MTLBarrierScopeTextures | MTLBarrierScopeRenderTargets; + } + if (barrier_bits & GPU_BARRIER_SHADER_STORAGE || + barrier_bits & GPU_BARRIER_VERTEX_ATTRIB_ARRAY || + barrier_bits & GPU_BARRIER_ELEMENT_ARRAY) { + scope = scope | MTLBarrierScopeBuffers; + } + + if (scope != 0) { + /* Issue barrier based on encoder. */ + switch (active_command_encoder_type_) { + case MTL_NO_COMMAND_ENCODER: + case MTL_BLIT_COMMAND_ENCODER: { + /* No barrier to be inserted. */ + return false; + } + + /* Rendering. */ + case MTL_RENDER_COMMAND_ENCODER: { + /* Currently flagging both stages -- can use bits above to filter on stage type -- + * though full barrier is safe for now*/ + MTLRenderStages before_stage_flags = 0; + MTLRenderStages after_stage_flags = 0; + if (before_stages & GPU_BARRIER_STAGE_VERTEX && + !(before_stages & GPU_BARRIER_STAGE_FRAGMENT)) { + before_stage_flags = before_stage_flags | MTLRenderStageVertex; + } + if (before_stages & GPU_BARRIER_STAGE_FRAGMENT) { + before_stage_flags = before_stage_flags | MTLRenderStageFragment; + } + if (after_stages & GPU_BARRIER_STAGE_VERTEX) { + after_stage_flags = after_stage_flags | MTLRenderStageVertex; + } + if (after_stages & GPU_BARRIER_STAGE_FRAGMENT) { + after_stage_flags = MTLRenderStageFragment; + } + + id<MTLRenderCommandEncoder> rec = this->get_active_render_command_encoder(); + BLI_assert(rec != nil); + [rec memoryBarrierWithScope:scope + afterStages:after_stage_flags + beforeStages:before_stage_flags]; + return true; + } + + /* Compute. */ + case MTL_COMPUTE_COMMAND_ENCODER: { + id<MTLComputeCommandEncoder> rec = this->get_active_compute_command_encoder(); + BLI_assert(rec != nil); + [rec memoryBarrierWithScope:scope]; + return true; + } + } + } + } + /* No barrier support. */ + return false; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Render Pass State for active RenderCommandEncoder + * \{ */ +/* Reset binding state when a new RenderCommandEncoder is bound, to ensure + * pipeline resources are re-applied to the new Encoder. + * NOTE: In Metal, state is only persistent within an MTLCommandEncoder, + * not globally. */ +void MTLRenderPassState::reset_state() +{ + /* Reset Cached pipeline state. */ + this->bound_pso = nil; + this->bound_ds_state = nil; + + /* Clear shader binding. */ + this->last_bound_shader_state.set(nullptr, 0); + + /* Other states. */ + MTLFrameBuffer *fb = this->cmd.get_active_framebuffer(); + this->last_used_stencil_ref_value = 0; + this->last_scissor_rect = {0, + 0, + (uint)((fb != nullptr) ? fb->get_width() : 0), + (uint)((fb != nullptr) ? fb->get_height() : 0)}; + + /* Reset cached resource binding state */ + for (int ubo = 0; ubo < MTL_MAX_UNIFORM_BUFFER_BINDINGS; ubo++) { + this->cached_vertex_buffer_bindings[ubo].is_bytes = false; + this->cached_vertex_buffer_bindings[ubo].metal_buffer = nil; + this->cached_vertex_buffer_bindings[ubo].offset = -1; + + this->cached_fragment_buffer_bindings[ubo].is_bytes = false; + this->cached_fragment_buffer_bindings[ubo].metal_buffer = nil; + this->cached_fragment_buffer_bindings[ubo].offset = -1; + } + + /* Reset cached texture and sampler state binding state. */ + for (int tex = 0; tex < MTL_MAX_TEXTURE_SLOTS; tex++) { + this->cached_vertex_texture_bindings[tex].metal_texture = nil; + this->cached_vertex_sampler_state_bindings[tex].sampler_state = nil; + this->cached_vertex_sampler_state_bindings[tex].is_arg_buffer_binding = false; + + this->cached_fragment_texture_bindings[tex].metal_texture = nil; + this->cached_fragment_sampler_state_bindings[tex].sampler_state = nil; + this->cached_fragment_sampler_state_bindings[tex].is_arg_buffer_binding = false; + } +} + +/* Bind Texture to current RenderCommandEncoder. */ +void MTLRenderPassState::bind_vertex_texture(id<MTLTexture> tex, uint slot) +{ + if (this->cached_vertex_texture_bindings[slot].metal_texture != tex) { + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + [rec setVertexTexture:tex atIndex:slot]; + this->cached_vertex_texture_bindings[slot].metal_texture = tex; + } +} + +void MTLRenderPassState::bind_fragment_texture(id<MTLTexture> tex, uint slot) +{ + if (this->cached_fragment_texture_bindings[slot].metal_texture != tex) { + id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder(); + BLI_assert(rec != nil); + [rec setFragmentTexture:tex atIndex:slot]; + this->cached_fragment_texture_bindings[slot].metal_texture = tex; + } +} + +void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding, + bool use_argument_buffer_for_samplers, + uint slot) +{ + /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be + * implemented alongside MTLShader. */ +} + +void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding, + bool use_argument_buffer_for_samplers, + uint slot) +{ + /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be + * implemented alongside MTLShader. */ +} + +void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index) +{ + /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be + * implemented alongside the full MTLMemoryManager. */ +} + +void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index) +{ + /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be + * implemented alongside the full MTLMemoryManager. */ +} + +void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index) +{ + /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be + * implemented alongside the full MTLMemoryManager. */ +} + +void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index) +{ + /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be + * implemented alongside the full MTLMemoryManager. */ +} + +/** \} */ + +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_common.hh b/source/blender/gpu/metal/mtl_common.hh index aa60d3aff61..b6f9c0050a9 100644 --- a/source/blender/gpu/metal/mtl_common.hh +++ b/source/blender/gpu/metal/mtl_common.hh @@ -4,7 +4,15 @@ #define __MTL_COMMON // -- Renderer Options -- +#define MTL_MAX_DRAWABLES 3 #define MTL_MAX_SET_BYTES_SIZE 4096 #define MTL_FORCE_WAIT_IDLE 0 +#define MTL_MAX_COMMAND_BUFFERS 64 +/* Number of frames for which we retain in-flight resources such as scratch buffers. + * Set as number of GPU frames in flight, plus an additional value for extra possible CPU frame. */ +#define MTL_NUM_SAFE_FRAMES (MTL_MAX_DRAWABLES + 1) + +/* Display debug information about missing attributes and incorrect vertex formats. */ +#define MTL_DEBUG_SHADER_ATTRIBUTES 0 #endif diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh index 1849a04ea48..e996193e722 100644 --- a/source/blender/gpu/metal/mtl_context.hh +++ b/source/blender/gpu/metal/mtl_context.hh @@ -3,6 +3,8 @@ /** \file * \ingroup gpu */ +#pragma once + #include "MEM_guardedalloc.h" #include "gpu_context_private.hh" @@ -10,7 +12,13 @@ #include "GPU_common_types.h" #include "GPU_context.h" +#include "mtl_backend.hh" #include "mtl_capabilities.hh" +#include "mtl_common.hh" +#include "mtl_framebuffer.hh" +#include "mtl_memory.hh" +#include "mtl_shader.hh" +#include "mtl_shader_interface.hh" #include "mtl_texture.hh" #include <Cocoa/Cocoa.h> @@ -23,12 +31,117 @@ namespace blender::gpu { -class MTLShader; +/* Forward Declarations */ +class MTLContext; +class MTLCommandBufferManager; class MTLUniformBuf; -class MTLBuffer; + +/* Structs containing information on current binding state for textures and samplers. */ +struct MTLTextureBinding { + bool used; + + /* Same value as index in bindings array. */ + uint slot_index; + gpu::MTLTexture *texture_resource; +}; + +struct MTLSamplerBinding { + bool used; + MTLSamplerState state; + + bool operator==(MTLSamplerBinding const &other) const + { + return (used == other.used && state == other.state); + } +}; + +/* Metal Context Render Pass State -- Used to track active RenderCommandEncoder state based on + * bound MTLFrameBuffer's.Owned by MTLContext. */ +class MTLRenderPassState { + friend class MTLContext; + + public: + MTLRenderPassState(MTLContext &context, MTLCommandBufferManager &command_buffer_manager) + : ctx(context), cmd(command_buffer_manager){}; + + /* Given a RenderPassState is associated with a live RenderCommandEncoder, + * this state sits within the MTLCommandBufferManager. */ + MTLContext &ctx; + MTLCommandBufferManager &cmd; + + /* Caching of resource bindings for active MTLRenderCommandEncoder. + * In Metal, resource bindings are local to the MTLCommandEncoder, + * not globally to the whole pipeline/cmd buffer. */ + struct MTLBoundShaderState { + MTLShader *shader_ = nullptr; + uint pso_index_; + void set(MTLShader *shader, uint pso_index) + { + shader_ = shader; + pso_index_ = pso_index; + } + }; + + MTLBoundShaderState last_bound_shader_state; + id<MTLRenderPipelineState> bound_pso = nil; + id<MTLDepthStencilState> bound_ds_state = nil; + uint last_used_stencil_ref_value = 0; + MTLScissorRect last_scissor_rect; + + /* Caching of CommandEncoder Vertex/Fragment buffer bindings. */ + struct BufferBindingCached { + /* Whether the given binding slot uses byte data (Push Constant equivalent) + * or an MTLBuffer. */ + bool is_bytes; + id<MTLBuffer> metal_buffer; + int offset; + }; + + BufferBindingCached cached_vertex_buffer_bindings[MTL_MAX_UNIFORM_BUFFER_BINDINGS]; + BufferBindingCached cached_fragment_buffer_bindings[MTL_MAX_UNIFORM_BUFFER_BINDINGS]; + + /* Caching of CommandEncoder textures bindings. */ + struct TextureBindingCached { + id<MTLTexture> metal_texture; + }; + + TextureBindingCached cached_vertex_texture_bindings[MTL_MAX_TEXTURE_SLOTS]; + TextureBindingCached cached_fragment_texture_bindings[MTL_MAX_TEXTURE_SLOTS]; + + /* Cached of CommandEncoder sampler states. */ + struct SamplerStateBindingCached { + MTLSamplerState binding_state; + id<MTLSamplerState> sampler_state; + bool is_arg_buffer_binding; + }; + + SamplerStateBindingCached cached_vertex_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS]; + SamplerStateBindingCached cached_fragment_sampler_state_bindings[MTL_MAX_TEXTURE_SLOTS]; + + /* Reset RenderCommandEncoder binding state. */ + void reset_state(); + + /* Texture Binding (RenderCommandEncoder). */ + void bind_vertex_texture(id<MTLTexture> tex, uint slot); + void bind_fragment_texture(id<MTLTexture> tex, uint slot); + + /* Sampler Binding (RenderCommandEncoder). */ + void bind_vertex_sampler(MTLSamplerBinding &sampler_binding, + bool use_argument_buffer_for_samplers, + uint slot); + void bind_fragment_sampler(MTLSamplerBinding &sampler_binding, + bool use_argument_buffer_for_samplers, + uint slot); + + /* Buffer binding (RenderCommandEncoder). */ + void bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index); + void bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index); + void bind_vertex_bytes(void *bytes, uint length, uint index); + void bind_fragment_bytes(void *bytes, uint length, uint index); +}; /* Depth Stencil State */ -typedef struct MTLContextDepthStencilState { +struct MTLContextDepthStencilState { /* Depth State. */ bool depth_write_enable; @@ -44,9 +157,9 @@ typedef struct MTLContextDepthStencilState { /* Stencil State. */ bool stencil_test_enabled; - unsigned int stencil_read_mask; - unsigned int stencil_write_mask; - unsigned int stencil_ref; + uint stencil_read_mask; + uint stencil_write_mask; + uint stencil_ref; MTLCompareFunction stencil_func; MTLStencilOperation stencil_op_front_stencil_fail; @@ -62,10 +175,10 @@ typedef struct MTLContextDepthStencilState { bool has_depth_target; bool has_stencil_target; - /* TODO(Metal): Consider optimizing this function using memcmp. + /* TODO(Metal): Consider optimizing this function using `memcmp`. * Un-used, but differing, stencil state leads to over-generation - * of state objects when doing trivial compare. */ - inline bool operator==(const MTLContextDepthStencilState &other) const + * of state objects when doing trivial compare. */ + bool operator==(const MTLContextDepthStencilState &other) const { bool depth_state_equality = (has_depth_target == other.has_depth_target && depth_write_enable == other.depth_write_enable && @@ -98,7 +211,7 @@ typedef struct MTLContextDepthStencilState { * - setStencilReferenceValue: * - setDepthBias:slopeScale:clamp: */ - inline std::size_t hash() const + std::size_t hash() const { std::size_t boolean_bitmask = (this->depth_write_enable ? 1 : 0) | ((this->depth_test_enabled ? 1 : 0) << 1) | @@ -127,9 +240,9 @@ typedef struct MTLContextDepthStencilState { std::size_t final_hash = (main_hash << 8) | boolean_bitmask; return final_hash; } -} MTLContextDepthStencilState; +}; -typedef struct MTLContextTextureUtils { +struct MTLContextTextureUtils { /* Depth Update Utilities */ /* Depth texture updates are not directly supported with Blit operations, similarly, we cannot @@ -174,8 +287,7 @@ typedef struct MTLContextTextureUtils { blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> texture_buffer_update_compute_psos; - template<typename T> - inline void free_cached_pso_map(blender::Map<T, id<MTLComputePipelineState>> &map) + template<typename T> void free_cached_pso_map(blender::Map<T, id<MTLComputePipelineState>> &map) { for (typename blender::Map<T, id<MTLComputePipelineState>>::MutableItem item : map.items()) { [item.value release]; @@ -183,12 +295,12 @@ typedef struct MTLContextTextureUtils { map.clear(); } - inline void init() + void init() { fullscreen_blit_shader = nullptr; } - inline void cleanup() + void cleanup() { if (fullscreen_blit_shader) { GPU_shader_free(fullscreen_blit_shader); @@ -213,37 +325,16 @@ typedef struct MTLContextTextureUtils { free_cached_pso_map(texture_cube_array_update_compute_psos); free_cached_pso_map(texture_buffer_update_compute_psos); } - -} MTLContextTextureUtils; - -/* Structs containing information on current binding state for textures and samplers. */ -typedef struct MTLTextureBinding { - bool used; - - /* Same value as index in bindings array. */ - unsigned int texture_slot_index; - gpu::MTLTexture *texture_resource; - -} MTLTextureBinding; - -typedef struct MTLSamplerBinding { - bool used; - MTLSamplerState state; - - bool operator==(MTLSamplerBinding const &other) const - { - return (used == other.used && state == other.state); - } -} MTLSamplerBinding; +}; /* Combined sampler state configuration for Argument Buffer caching. */ struct MTLSamplerArray { - unsigned int num_samplers; + uint num_samplers; /* MTLSamplerState permutations between 0..256 - slightly more than a byte. */ MTLSamplerState mtl_sampler_flags[MTL_MAX_TEXTURE_SLOTS]; id<MTLSamplerState> mtl_sampler[MTL_MAX_TEXTURE_SLOTS]; - inline bool operator==(const MTLSamplerArray &other) const + bool operator==(const MTLSamplerArray &other) const { if (this->num_samplers != other.num_samplers) { return false; @@ -253,7 +344,7 @@ struct MTLSamplerArray { sizeof(MTLSamplerState) * this->num_samplers) == 0); } - inline uint32_t hash() const + uint32_t hash() const { uint32_t hash = this->num_samplers; for (int i = 0; i < this->num_samplers; i++) { @@ -267,7 +358,7 @@ typedef enum MTLPipelineStateDirtyFlag { MTL_PIPELINE_STATE_NULL_FLAG = 0, /* Whether we need to call setViewport. */ MTL_PIPELINE_STATE_VIEWPORT_FLAG = (1 << 0), - /* Whether we need to call setScissor.*/ + /* Whether we need to call setScissor. */ MTL_PIPELINE_STATE_SCISSOR_FLAG = (1 << 1), /* Whether we need to update/rebind active depth stencil state. */ MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG = (1 << 2), @@ -287,12 +378,12 @@ typedef enum MTLPipelineStateDirtyFlag { /* Ignore full flag bit-mask `MTL_PIPELINE_STATE_ALL_FLAG`. */ ENUM_OPERATORS(MTLPipelineStateDirtyFlag, MTL_PIPELINE_STATE_CULLMODE_FLAG); -typedef struct MTLUniformBufferBinding { +struct MTLUniformBufferBinding { bool bound; MTLUniformBuf *ubo; -} MTLUniformBufferBinding; +}; -typedef struct MTLContextGlobalShaderPipelineState { +struct MTLContextGlobalShaderPipelineState { bool initialised; /* Whether the pipeline state has been modified since application. @@ -358,35 +449,140 @@ typedef struct MTLContextGlobalShaderPipelineState { /* Render parameters. */ float point_size = 1.0f; float line_width = 1.0f; +}; -} MTLContextGlobalShaderPipelineState; +/* Command Buffer Manager - Owned by MTLContext. + * The MTLCommandBufferManager represents all work associated with + * a command buffer of a given identity. This manager is a fixed-state + * on the context, which coordinates the lifetime of command buffers + * for particular categories of work. + * + * This ensures operations on command buffers, and the state associated, + * is correctly tracked and managed. Workload submission and MTLCommandEncoder + * coordination is managed from here. + * + * There is currently only one MTLCommandBufferManager for managing submission + * of the "main" rendering commands. A secondary upload command buffer track, + * or asynchronous compute command buffer track may be added in the future. */ +class MTLCommandBufferManager { + friend class MTLContext; -/* Metal Buffer */ -typedef struct MTLTemporaryBufferRange { - id<MTLBuffer> metal_buffer; - void *host_ptr; - unsigned long long buffer_offset; - unsigned long long size; - MTLResourceOptions options; + public: + /* Event to coordinate sequential execution across all "main" command buffers. */ + static id<MTLEvent> sync_event; + static uint64_t event_signal_val; + + /* Counter for active command buffers. */ + static int num_active_cmd_bufs; + + private: + /* Associated Context and properties. */ + MTLContext &context_; + bool supports_render_ = false; + + /* CommandBuffer tracking. */ + id<MTLCommandBuffer> active_command_buffer_ = nil; + id<MTLCommandBuffer> last_submitted_command_buffer_ = nil; + + /* Active MTLCommandEncoders. */ + enum { + MTL_NO_COMMAND_ENCODER = 0, + MTL_RENDER_COMMAND_ENCODER = 1, + MTL_BLIT_COMMAND_ENCODER = 2, + MTL_COMPUTE_COMMAND_ENCODER = 3 + } active_command_encoder_type_ = MTL_NO_COMMAND_ENCODER; + + id<MTLRenderCommandEncoder> active_render_command_encoder_ = nil; + id<MTLBlitCommandEncoder> active_blit_command_encoder_ = nil; + id<MTLComputeCommandEncoder> active_compute_command_encoder_ = nil; + + /* State associated with active RenderCommandEncoder. */ + MTLRenderPassState render_pass_state_; + MTLFrameBuffer *active_frame_buffer_ = nullptr; + MTLRenderPassDescriptor *active_pass_descriptor_ = nullptr; + + /* Workload heuristics - We may need to split command buffers to optimize workload and balancing. + */ + int current_draw_call_count_ = 0; + int encoder_count_ = 0; + int vertex_submitted_count_ = 0; + bool empty_ = true; - void flush(); - bool requires_flush(); -} MTLTemporaryBufferRange; + public: + MTLCommandBufferManager(MTLContext &context) + : context_(context), render_pass_state_(context, *this){}; + void prepare(bool supports_render = true); + + /* If wait is true, CPU will stall until GPU work has completed. */ + bool submit(bool wait); + + /* Fetch/query current encoder. */ + bool is_inside_render_pass(); + bool is_inside_blit(); + bool is_inside_compute(); + id<MTLRenderCommandEncoder> get_active_render_command_encoder(); + id<MTLBlitCommandEncoder> get_active_blit_command_encoder(); + id<MTLComputeCommandEncoder> get_active_compute_command_encoder(); + MTLFrameBuffer *get_active_framebuffer(); + + /* RenderPassState for RenderCommandEncoder. */ + MTLRenderPassState &get_render_pass_state() + { + /* Render pass state should only be valid if we are inside a render pass. */ + BLI_assert(this->is_inside_render_pass()); + return render_pass_state_; + } + + /* Rendering Heuristics. */ + void register_draw_counters(int vertex_submission); + void reset_counters(); + bool do_break_submission(); + + /* Encoder and Pass management. */ + /* End currently active MTLCommandEncoder. */ + bool end_active_command_encoder(); + id<MTLRenderCommandEncoder> ensure_begin_render_command_encoder(MTLFrameBuffer *ctx_framebuffer, + bool force_begin, + bool *new_pass); + id<MTLBlitCommandEncoder> ensure_begin_blit_encoder(); + id<MTLComputeCommandEncoder> ensure_begin_compute_encoder(); + + /* Workload Synchronization. */ + bool insert_memory_barrier(eGPUBarrier barrier_bits, + eGPUStageBarrierBits before_stages, + eGPUStageBarrierBits after_stages); + /* TODO(Metal): Support fences in command buffer class. */ + + /* Debug. */ + void push_debug_group(const char *name, int index); + void pop_debug_group(); + + private: + /* Begin new command buffer. */ + id<MTLCommandBuffer> ensure_begin(); + + void register_encoder_counters(); +}; /** MTLContext -- Core render loop and state management. **/ -/* NOTE(Metal): Partial MTLContext stub to provide wrapper functionality - * for work-in-progress MTL* classes. */ +/* NOTE(Metal): Partial #MTLContext stub to provide wrapper functionality + * for work-in-progress `MTL*` classes. */ class MTLContext : public Context { friend class MTLBackend; private: + /* Null buffers for empty/uninitialized bindings. + * Null attribute buffer follows default attribute format of OpenGL Back-end. */ + id<MTLBuffer> null_buffer_; /* All zero's. */ + id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */ + /* Compute and specialization caches. */ MTLContextTextureUtils texture_utils_; /* Texture Samplers. */ - /* Cache of generated MTLSamplerState objects based on permutations of `eGPUSamplerState`. */ - id<MTLSamplerState> sampler_state_cache_[GPU_SAMPLER_MAX] = {0}; + /* Cache of generated #MTLSamplerState objects based on permutations of `eGPUSamplerState`. */ + id<MTLSamplerState> sampler_state_cache_[GPU_SAMPLER_MAX]; id<MTLSamplerState> default_sampler_state_ = nil; /* When texture sampler count exceeds the resource bind limit, an @@ -397,6 +593,14 @@ class MTLContext : public Context { MTLSamplerArray samplers_; blender::Map<MTLSamplerArray, gpu::MTLBuffer *> cached_sampler_buffers_; + /* Frame. */ + bool is_inside_frame_ = false; + uint current_frame_index_; + + /* Visibility buffer for MTLQuery results. */ + gpu::MTLBuffer *visibility_buffer_ = nullptr; + bool visibility_is_dirty_ = false; + public: /* Shaders and Pipeline state. */ MTLContextGlobalShaderPipelineState pipeline_state; @@ -405,22 +609,36 @@ class MTLContext : public Context { id<MTLCommandQueue> queue = nil; id<MTLDevice> device = nil; + /* Memory Management */ + MTLScratchBufferManager memory_manager; + static MTLBufferPool global_memory_manager; + + /* CommandBuffer managers. */ + MTLCommandBufferManager main_command_buffer; + /* GPUContext interface. */ MTLContext(void *ghost_window); ~MTLContext(); static void check_error(const char *info); - void activate(void) override; - void deactivate(void) override; + void activate() override; + void deactivate() override; + void begin_frame() override; + void end_frame() override; - void flush(void) override; - void finish(void) override; + void flush() override; + void finish() override; void memory_statistics_get(int *total_mem, int *free_mem) override; + static MTLContext *get() + { + return static_cast<MTLContext *>(Context::get()); + } + void debug_group_begin(const char *name, int index) override; - void debug_group_end(void) override; + void debug_group_end() override; /*** MTLContext Utility functions. */ /* @@ -428,38 +646,83 @@ class MTLContext : public Context { * rendering, binding resources, setting global state, resource management etc; */ - /* Metal Context Core functions. */ - /* Command Buffer Management. */ - id<MTLCommandBuffer> get_active_command_buffer(); + /** Metal Context Core functions. **/ - /* Render Pass State and Management. */ - void begin_render_pass(); - void end_render_pass(); - bool is_render_pass_active(); + /* Bind frame-buffer to context. */ + void framebuffer_bind(MTLFrameBuffer *framebuffer); - /* Texture Binding. */ - void texture_bind(gpu::MTLTexture *mtl_texture, unsigned int texture_unit); - void sampler_bind(MTLSamplerState, unsigned int sampler_unit); + /* Restore frame-buffer used by active context to default back-buffer. */ + void framebuffer_restore(); + + /* Ensure a render-pass using the Context frame-buffer (active_fb_) is in progress. */ + id<MTLRenderCommandEncoder> ensure_begin_render_pass(); + + MTLFrameBuffer *get_current_framebuffer(); + MTLFrameBuffer *get_default_framebuffer(); + + /* Context Global-State Texture Binding. */ + void texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit); + void sampler_bind(MTLSamplerState, uint sampler_unit); void texture_unbind(gpu::MTLTexture *mtl_texture); - void texture_unbind_all(void); + void texture_unbind_all(); id<MTLSamplerState> get_sampler_from_state(MTLSamplerState state); id<MTLSamplerState> generate_sampler_from_state(MTLSamplerState state); id<MTLSamplerState> get_default_sampler_state(); /* Metal Context pipeline state. */ - void pipeline_state_init(void); - MTLShader *get_active_shader(void); + void pipeline_state_init(); + MTLShader *get_active_shader(); /* State assignment. */ void set_viewport(int origin_x, int origin_y, int width, int height); void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height); void set_scissor_enabled(bool scissor_enabled); + /* Visibility buffer control. */ + void set_visibility_buffer(gpu::MTLBuffer *buffer); + gpu::MTLBuffer *get_visibility_buffer() const; + + /* Flag whether the visibility buffer for query results + * has changed. This requires a new RenderPass in order + * to update. */ + bool is_visibility_dirty() const; + + /* Reset dirty flag state for visibility buffer. */ + void clear_visibility_dirty(); + /* Texture utilities. */ MTLContextTextureUtils &get_texture_utils() { - return this->texture_utils_; + return texture_utils_; + } + + bool get_active() + { + return is_active_; + } + + bool get_inside_frame() + { + return is_inside_frame_; + } + + uint get_current_frame_index() + { + return current_frame_index_; + } + + MTLScratchBufferManager &get_scratchbuffer_manager() + { + return this->memory_manager; + } + + static MTLBufferPool &get_global_memory_manager() + { + return MTLContext::global_memory_manager; } + /* Uniform Buffer Bindings to command encoders. */ + id<MTLBuffer> get_null_buffer(); + id<MTLBuffer> get_null_attribute_buffer(); }; } // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index 94f5682b11b..a66645e5fb5 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -5,6 +5,8 @@ */ #include "mtl_context.hh" #include "mtl_debug.hh" +#include "mtl_shader.hh" +#include "mtl_shader_interface.hh" #include "mtl_state.hh" #include "DNA_userdef_types.h" @@ -16,48 +18,125 @@ using namespace blender::gpu; namespace blender::gpu { -/* -------------------------------------------------------------------- */ -/** \name Memory Management - * \{ */ - -bool MTLTemporaryBufferRange::requires_flush() -{ - /* We do not need to flush shared memory */ - return this->options & MTLResourceStorageModeManaged; -} - -void MTLTemporaryBufferRange::flush() -{ - if (this->requires_flush()) { - BLI_assert(this->metal_buffer); - BLI_assert((this->buffer_offset + this->size) <= [this->metal_buffer length]); - BLI_assert(this->buffer_offset >= 0); - [this->metal_buffer - didModifyRange:NSMakeRange(this->buffer_offset, this->size - this->buffer_offset)]; - } -} - -/** \} */ +/* Global memory manager. */ +MTLBufferPool MTLContext::global_memory_manager; /* -------------------------------------------------------------------- */ /** \name MTLContext * \{ */ /* Placeholder functions */ -MTLContext::MTLContext(void *ghost_window) +MTLContext::MTLContext(void *ghost_window) : memory_manager(*this), main_command_buffer(*this) { /* Init debug. */ debug::mtl_debug_init(); + /* Device creation. + * TODO(Metal): This is a temporary initialization path to enable testing of features + * and shader compilation tests. Future functionality should fetch the existing device + * from GHOST_ContextCGL.mm. Plumbing to be updated in future. */ + this->device = MTLCreateSystemDefaultDevice(); + + /* Initialize command buffer state. */ + this->main_command_buffer.prepare(); + + /* Initialize IMM and pipeline state */ + this->pipeline_state.initialised = false; + + /* Frame management. */ + is_inside_frame_ = false; + current_frame_index_ = 0; + + /* Prepare null data buffer */ + null_buffer_ = nil; + null_attribute_buffer_ = nil; + + /* Create FrameBuffer handles. */ + MTLFrameBuffer *mtl_front_left = new MTLFrameBuffer(this, "front_left"); + MTLFrameBuffer *mtl_back_left = new MTLFrameBuffer(this, "back_left"); + this->front_left = mtl_front_left; + this->back_left = mtl_back_left; + this->active_fb = this->back_left; + + /* Prepare platform and capabilities. (NOTE: With METAL, this needs to be done after CTX + * initialization). */ + MTLBackend::platform_init(this); + MTLBackend::capabilities_init(this); + /* Initialize Metal modules. */ + this->memory_manager.init(); this->state_manager = new MTLStateManager(this); - /* TODO(Metal): Implement. */ + /* Ensure global memory manager is initialized. */ + MTLContext::global_memory_manager.init(this->device); + + /* Initialize texture read/update structures. */ + this->get_texture_utils().init(); + + /* Bound Samplers struct. */ + for (int i = 0; i < MTL_MAX_TEXTURE_SLOTS; i++) { + samplers_.mtl_sampler[i] = nil; + samplers_.mtl_sampler_flags[i] = DEFAULT_SAMPLER_STATE; + } + + /* Initialize samplers. */ + for (uint i = 0; i < GPU_SAMPLER_MAX; i++) { + MTLSamplerState state; + state.state = static_cast<eGPUSamplerState>(i); + sampler_state_cache_[i] = this->generate_sampler_from_state(state); + } } MTLContext::~MTLContext() { - /* TODO(Metal): Implement. */ + BLI_assert(this == reinterpret_cast<MTLContext *>(GPU_context_active_get())); + /* Ensure rendering is complete command encoders/command buffers are freed. */ + if (MTLBackend::get()->is_inside_render_boundary()) { + this->finish(); + + /* End frame. */ + if (this->get_inside_frame()) { + this->end_frame(); + } + } + /* Release update/blit shaders. */ + this->get_texture_utils().cleanup(); + + /* Release Sampler States. */ + for (int i = 0; i < GPU_SAMPLER_MAX; i++) { + if (sampler_state_cache_[i] != nil) { + [sampler_state_cache_[i] release]; + sampler_state_cache_[i] = nil; + } + } + if (null_buffer_) { + [null_buffer_ release]; + } + if (null_attribute_buffer_) { + [null_attribute_buffer_ release]; + } +} + +void MTLContext::begin_frame() +{ + BLI_assert(MTLBackend::get()->is_inside_render_boundary()); + if (this->get_inside_frame()) { + return; + } + + /* Begin Command buffer for next frame. */ + is_inside_frame_ = true; +} + +void MTLContext::end_frame() +{ + BLI_assert(this->get_inside_frame()); + + /* Ensure pre-present work is committed. */ + this->flush(); + + /* Increment frame counter. */ + is_inside_frame_ = false; } void MTLContext::check_error(const char *info) @@ -65,20 +144,20 @@ void MTLContext::check_error(const char *info) /* TODO(Metal): Implement. */ } -void MTLContext::activate(void) +void MTLContext::activate() { /* TODO(Metal): Implement. */ } -void MTLContext::deactivate(void) +void MTLContext::deactivate() { /* TODO(Metal): Implement. */ } -void MTLContext::flush(void) +void MTLContext::flush() { /* TODO(Metal): Implement. */ } -void MTLContext::finish(void) +void MTLContext::finish() { /* TODO(Metal): Implement. */ } @@ -90,26 +169,128 @@ void MTLContext::memory_statistics_get(int *total_mem, int *free_mem) *free_mem = 0; } -id<MTLCommandBuffer> MTLContext::get_active_command_buffer() +void MTLContext::framebuffer_bind(MTLFrameBuffer *framebuffer) { - /* TODO(Metal): Implement. */ - return nil; + /* We do not yet begin the pass -- We defer beginning the pass until a draw is requested. */ + BLI_assert(framebuffer); + this->active_fb = framebuffer; } -/* Render Pass State and Management */ -void MTLContext::begin_render_pass() +void MTLContext::framebuffer_restore() { - /* TODO(Metal): Implement. */ + /* Bind default framebuffer from context -- + * We defer beginning the pass until a draw is requested. */ + this->active_fb = this->back_left; } -void MTLContext::end_render_pass() + +id<MTLRenderCommandEncoder> MTLContext::ensure_begin_render_pass() { - /* TODO(Metal): Implement. */ + BLI_assert(this); + + /* Ensure the rendering frame has started. */ + if (!this->get_inside_frame()) { + this->begin_frame(); + } + + /* Check whether a framebuffer is bound. */ + if (!this->active_fb) { + BLI_assert(false && "No framebuffer is bound!"); + return this->main_command_buffer.get_active_render_command_encoder(); + } + + /* Ensure command buffer workload submissions are optimal -- + * Though do not split a batch mid-IMM recording. */ + /* TODO(Metal): Add IMM Check once MTLImmediate has been implemented. */ + if (this->main_command_buffer.do_break_submission()/*&& + !((MTLImmediate *)(this->imm))->imm_is_recording()*/) { + this->flush(); + } + + /* Begin pass or perform a pass switch if the active framebuffer has been changed, or if the + * framebuffer state has been modified (is_dirty). */ + if (!this->main_command_buffer.is_inside_render_pass() || + this->active_fb != this->main_command_buffer.get_active_framebuffer() || + this->main_command_buffer.get_active_framebuffer()->get_dirty() || + this->is_visibility_dirty()) { + + /* Validate bound framebuffer before beginning render pass. */ + if (!static_cast<MTLFrameBuffer *>(this->active_fb)->validate_render_pass()) { + MTL_LOG_WARNING("Framebuffer validation failed, falling back to default framebuffer\n"); + this->framebuffer_restore(); + + if (!static_cast<MTLFrameBuffer *>(this->active_fb)->validate_render_pass()) { + MTL_LOG_ERROR("CRITICAL: DEFAULT FRAMEBUFFER FAIL VALIDATION!!\n"); + } + } + + /* Begin RenderCommandEncoder on main CommandBuffer. */ + bool new_render_pass = false; + id<MTLRenderCommandEncoder> new_enc = + this->main_command_buffer.ensure_begin_render_command_encoder( + static_cast<MTLFrameBuffer *>(this->active_fb), true, &new_render_pass); + if (new_render_pass) { + /* Flag context pipeline state as dirty - dynamic pipeline state need re-applying. */ + this->pipeline_state.dirty_flags = MTL_PIPELINE_STATE_ALL_FLAG; + } + return new_enc; + } + BLI_assert(!this->main_command_buffer.get_active_framebuffer()->get_dirty()); + return this->main_command_buffer.get_active_render_command_encoder(); } -bool MTLContext::is_render_pass_active() +MTLFrameBuffer *MTLContext::get_current_framebuffer() { - /* TODO(Metal): Implement. */ - return false; + MTLFrameBuffer *last_bound = static_cast<MTLFrameBuffer *>(this->active_fb); + return last_bound ? last_bound : this->get_default_framebuffer(); +} + +MTLFrameBuffer *MTLContext::get_default_framebuffer() +{ + return static_cast<MTLFrameBuffer *>(this->back_left); +} + +MTLShader *MTLContext::get_active_shader() +{ + return this->pipeline_state.active_shader; +} + +id<MTLBuffer> MTLContext::get_null_buffer() +{ + if (null_buffer_ != nil) { + return null_buffer_; + } + + static const int null_buffer_size = 4096; + null_buffer_ = [this->device newBufferWithLength:null_buffer_size + options:MTLResourceStorageModeManaged]; + [null_buffer_ retain]; + uint32_t *null_data = (uint32_t *)calloc(0, null_buffer_size); + memcpy([null_buffer_ contents], null_data, null_buffer_size); + [null_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)]; + free(null_data); + + BLI_assert(null_buffer_ != nil); + return null_buffer_; +} + +id<MTLBuffer> MTLContext::get_null_attribute_buffer() +{ + if (null_attribute_buffer_ != nil) { + return null_attribute_buffer_; + } + + /* Allocate Null buffer if it has not yet been created. + * Min buffer size is 256 bytes -- though we only need 64 bytes of data. */ + static const int null_buffer_size = 256; + null_attribute_buffer_ = [this->device newBufferWithLength:null_buffer_size + options:MTLResourceStorageModeManaged]; + BLI_assert(null_attribute_buffer_ != nil); + [null_attribute_buffer_ retain]; + float data[4] = {0.0f, 0.0f, 0.0f, 1.0f}; + memcpy([null_attribute_buffer_ contents], data, sizeof(float) * 4); + [null_attribute_buffer_ didModifyRange:NSMakeRange(0, null_buffer_size)]; + + return null_attribute_buffer_; } /** \} */ @@ -124,20 +305,20 @@ void MTLContext::pipeline_state_init() /*** Initialize state only once. ***/ if (!this->pipeline_state.initialised) { this->pipeline_state.initialised = true; - this->pipeline_state.active_shader = NULL; + this->pipeline_state.active_shader = nullptr; /* Clear bindings state. */ for (int t = 0; t < GPU_max_textures(); t++) { this->pipeline_state.texture_bindings[t].used = false; - this->pipeline_state.texture_bindings[t].texture_slot_index = t; - this->pipeline_state.texture_bindings[t].texture_resource = NULL; + this->pipeline_state.texture_bindings[t].slot_index = -1; + this->pipeline_state.texture_bindings[t].texture_resource = nullptr; } for (int s = 0; s < MTL_MAX_SAMPLER_SLOTS; s++) { this->pipeline_state.sampler_bindings[s].used = false; } for (int u = 0; u < MTL_MAX_UNIFORM_BUFFER_BINDINGS; u++) { this->pipeline_state.ubo_bindings[u].bound = false; - this->pipeline_state.ubo_bindings[u].ubo = NULL; + this->pipeline_state.ubo_bindings[u].ubo = nullptr; } } @@ -200,13 +381,107 @@ void MTLContext::pipeline_state_init() MTLStencilOperationKeep; } +void MTLContext::set_viewport(int origin_x, int origin_y, int width, int height) +{ + BLI_assert(this); + BLI_assert(width > 0); + BLI_assert(height > 0); + BLI_assert(origin_x >= 0); + BLI_assert(origin_y >= 0); + bool changed = (this->pipeline_state.viewport_offset_x != origin_x) || + (this->pipeline_state.viewport_offset_y != origin_y) || + (this->pipeline_state.viewport_width != width) || + (this->pipeline_state.viewport_height != height); + this->pipeline_state.viewport_offset_x = origin_x; + this->pipeline_state.viewport_offset_y = origin_y; + this->pipeline_state.viewport_width = width; + this->pipeline_state.viewport_height = height; + if (changed) { + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags | + MTL_PIPELINE_STATE_VIEWPORT_FLAG); + } +} + +void MTLContext::set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height) +{ + BLI_assert(this); + bool changed = (this->pipeline_state.scissor_x != scissor_x) || + (this->pipeline_state.scissor_y != scissor_y) || + (this->pipeline_state.scissor_width != scissor_width) || + (this->pipeline_state.scissor_height != scissor_height) || + (this->pipeline_state.scissor_enabled != true); + this->pipeline_state.scissor_x = scissor_x; + this->pipeline_state.scissor_y = scissor_y; + this->pipeline_state.scissor_width = scissor_width; + this->pipeline_state.scissor_height = scissor_height; + this->pipeline_state.scissor_enabled = (scissor_width > 0 && scissor_height > 0); + + if (changed) { + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags | + MTL_PIPELINE_STATE_SCISSOR_FLAG); + } +} + +void MTLContext::set_scissor_enabled(bool scissor_enabled) +{ + /* Only turn on Scissor if requested scissor region is valid */ + scissor_enabled = scissor_enabled && (this->pipeline_state.scissor_width > 0 && + this->pipeline_state.scissor_height > 0); + + bool changed = (this->pipeline_state.scissor_enabled != scissor_enabled); + this->pipeline_state.scissor_enabled = scissor_enabled; + if (changed) { + this->pipeline_state.dirty_flags = (this->pipeline_state.dirty_flags | + MTL_PIPELINE_STATE_SCISSOR_FLAG); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Visibility buffer control for MTLQueryPool. + * \{ */ + +void MTLContext::set_visibility_buffer(gpu::MTLBuffer *buffer) +{ + /* Flag visibility buffer as dirty if the buffer being used for visibility has changed -- + * This is required by the render pass, and we will break the pass if the results destination + * buffer is modified. */ + if (buffer) { + visibility_is_dirty_ = (buffer != visibility_buffer_) || visibility_is_dirty_; + visibility_buffer_ = buffer; + visibility_buffer_->debug_ensure_used(); + } + else { + /* If buffer is null, reset visibility state, mark dirty to break render pass if results are no + * longer needed. */ + visibility_is_dirty_ = (visibility_buffer_ != nullptr) || visibility_is_dirty_; + visibility_buffer_ = nullptr; + } +} + +gpu::MTLBuffer *MTLContext::get_visibility_buffer() const +{ + return visibility_buffer_; +} + +void MTLContext::clear_visibility_dirty() +{ + visibility_is_dirty_ = false; +} + +bool MTLContext::is_visibility_dirty() const +{ + return visibility_is_dirty_; +} + /** \} */ /* -------------------------------------------------------------------- */ /** \name Texture State Management * \{ */ -void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, unsigned int texture_unit) +void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, uint texture_unit) { BLI_assert(this); BLI_assert(mtl_texture); @@ -226,7 +501,7 @@ void MTLContext::texture_bind(gpu::MTLTexture *mtl_texture, unsigned int texture mtl_texture->is_bound_ = true; } -void MTLContext::sampler_bind(MTLSamplerState sampler_state, unsigned int sampler_unit) +void MTLContext::sampler_bind(MTLSamplerState sampler_state, uint sampler_unit) { BLI_assert(this); if (sampler_unit < 0 || sampler_unit >= GPU_max_textures() || @@ -271,67 +546,61 @@ void MTLContext::texture_unbind_all() id<MTLSamplerState> MTLContext::get_sampler_from_state(MTLSamplerState sampler_state) { - BLI_assert((unsigned int)sampler_state >= 0 && ((unsigned int)sampler_state) < GPU_SAMPLER_MAX); - return this->sampler_state_cache_[(unsigned int)sampler_state]; + BLI_assert((uint)sampler_state >= 0 && ((uint)sampler_state) < GPU_SAMPLER_MAX); + return sampler_state_cache_[(uint)sampler_state]; } id<MTLSamplerState> MTLContext::generate_sampler_from_state(MTLSamplerState sampler_state) { /* Check if sampler already exists for given state. */ - id<MTLSamplerState> st = this->sampler_state_cache_[(unsigned int)sampler_state]; - if (st != nil) { - return st; - } - else { - MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init]; - descriptor.normalizedCoordinates = true; - - MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ? - MTLSamplerAddressModeClampToBorderColor : - MTLSamplerAddressModeClampToEdge; - descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ? - MTLSamplerAddressModeRepeat : - clamp_type; - descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ? - MTLSamplerAddressModeRepeat : - clamp_type; - descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ? - MTLSamplerAddressModeRepeat : - clamp_type; - descriptor.borderColor = MTLSamplerBorderColorTransparentBlack; - descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? - MTLSamplerMinMagFilterLinear : - MTLSamplerMinMagFilterNearest; - descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? - MTLSamplerMinMagFilterLinear : - MTLSamplerMinMagFilterNearest; - descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? - MTLSamplerMipFilterLinear : - MTLSamplerMipFilterNotMipmapped; - descriptor.lodMinClamp = -1000; - descriptor.lodMaxClamp = 1000; - float aniso_filter = max_ff(16, U.anisotropic_filter); - descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1; - descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ? - MTLCompareFunctionLessEqual : - MTLCompareFunctionAlways; - descriptor.supportArgumentBuffers = true; - - id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor]; - this->sampler_state_cache_[(unsigned int)sampler_state] = state; - - BLI_assert(state != nil); - [descriptor autorelease]; - return state; - } + MTLSamplerDescriptor *descriptor = [[MTLSamplerDescriptor alloc] init]; + descriptor.normalizedCoordinates = true; + + MTLSamplerAddressMode clamp_type = (sampler_state.state & GPU_SAMPLER_CLAMP_BORDER) ? + MTLSamplerAddressModeClampToBorderColor : + MTLSamplerAddressModeClampToEdge; + descriptor.rAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_R) ? + MTLSamplerAddressModeRepeat : + clamp_type; + descriptor.sAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_S) ? + MTLSamplerAddressModeRepeat : + clamp_type; + descriptor.tAddressMode = (sampler_state.state & GPU_SAMPLER_REPEAT_T) ? + MTLSamplerAddressModeRepeat : + clamp_type; + descriptor.borderColor = MTLSamplerBorderColorTransparentBlack; + descriptor.minFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? + MTLSamplerMinMagFilterLinear : + MTLSamplerMinMagFilterNearest; + descriptor.magFilter = (sampler_state.state & GPU_SAMPLER_FILTER) ? + MTLSamplerMinMagFilterLinear : + MTLSamplerMinMagFilterNearest; + descriptor.mipFilter = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? + MTLSamplerMipFilterLinear : + MTLSamplerMipFilterNotMipmapped; + descriptor.lodMinClamp = -1000; + descriptor.lodMaxClamp = 1000; + float aniso_filter = max_ff(16, U.anisotropic_filter); + descriptor.maxAnisotropy = (sampler_state.state & GPU_SAMPLER_MIPMAP) ? aniso_filter : 1; + descriptor.compareFunction = (sampler_state.state & GPU_SAMPLER_COMPARE) ? + MTLCompareFunctionLessEqual : + MTLCompareFunctionAlways; + descriptor.supportArgumentBuffers = true; + + id<MTLSamplerState> state = [this->device newSamplerStateWithDescriptor:descriptor]; + sampler_state_cache_[(uint)sampler_state] = state; + + BLI_assert(state != nil); + [descriptor autorelease]; + return state; } id<MTLSamplerState> MTLContext::get_default_sampler_state() { - if (this->default_sampler_state_ == nil) { - this->default_sampler_state_ = this->get_sampler_from_state(DEFAULT_SAMPLER_STATE); + if (default_sampler_state_ == nil) { + default_sampler_state_ = this->get_sampler_from_state(DEFAULT_SAMPLER_STATE); } - return this->default_sampler_state_; + return default_sampler_state_; } /** \} */ diff --git a/source/blender/gpu/metal/mtl_debug.mm b/source/blender/gpu/metal/mtl_debug.mm index 9d67a1f4f04..8ca4a0cc6e3 100644 --- a/source/blender/gpu/metal/mtl_debug.mm +++ b/source/blender/gpu/metal/mtl_debug.mm @@ -46,20 +46,14 @@ namespace blender::gpu { void MTLContext::debug_group_begin(const char *name, int index) { if (G.debug & G_DEBUG_GPU) { - id<MTLCommandBuffer> cmd = this->get_active_command_buffer(); - if (cmd != nil) { - [cmd pushDebugGroup:[NSString stringWithFormat:@"%s_%d", name, index]]; - } + this->main_command_buffer.push_debug_group(name, index); } } void MTLContext::debug_group_end() { if (G.debug & G_DEBUG_GPU) { - id<MTLCommandBuffer> cmd = this->get_active_command_buffer(); - if (cmd != nil) { - [cmd popDebugGroup]; - } + this->main_command_buffer.pop_debug_group(); } } diff --git a/source/blender/gpu/metal/mtl_framebuffer.hh b/source/blender/gpu/metal/mtl_framebuffer.hh new file mode 100644 index 00000000000..434d1a15b43 --- /dev/null +++ b/source/blender/gpu/metal/mtl_framebuffer.hh @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Encapsulation of Frame-buffer states (attached textures, viewport, scissors). + */ + +#pragma once + +#include "GPU_common_types.h" +#include "MEM_guardedalloc.h" + +#include "gpu_framebuffer_private.hh" +#include "mtl_texture.hh" +#include <Metal/Metal.h> + +namespace blender::gpu { + +class MTLContext; + +struct MTLAttachment { + bool used; + gpu::MTLTexture *texture; + union { + float color[4]; + float depth; + uint stencil; + } clear_value; + + eGPULoadOp load_action; + eGPUStoreOp store_action; + uint mip; + uint slice; + uint depth_plane; + + /* If Array Length is larger than zero, use multilayered rendering. */ + uint render_target_array_length; +}; + +/** + * Implementation of FrameBuffer object using Metal. + */ +class MTLFrameBuffer : public FrameBuffer { + private: + /* Context Handle. */ + MTLContext *context_; + + /* Metal Attachment properties. */ + uint colour_attachment_count_; + MTLAttachment mtl_color_attachments_[GPU_FB_MAX_COLOR_ATTACHMENT]; + MTLAttachment mtl_depth_attachment_; + MTLAttachment mtl_stencil_attachment_; + bool use_multilayered_rendering_ = false; + + /* State. */ + + /** + * Whether global frame-buffer properties have changed and require + * re-generation of #MTLRenderPassDescriptor / #RenderCommandEncoders. + */ + bool is_dirty_; + + /** Whether `loadstore` properties have changed (only affects certain cached configurations). */ + bool is_loadstore_dirty_; + + /** + * Context that the latest modified state was last applied to. + * If this does not match current ctx, re-apply state. + */ + MTLContext *dirty_state_ctx_; + + /** + * Whether a clear is pending -- Used to toggle between clear and load FB configurations + * (without dirtying the state) - Frame-buffer load config is used if no `GPU_clear_*` command + * was issued after binding the #FrameBuffer. + */ + bool has_pending_clear_; + + /** + * Render Pass Descriptors: + * There are 3 #MTLRenderPassDescriptors for different ways in which a frame-buffer + * can be configured: + * [0] = CLEAR CONFIG -- Used when a GPU_framebuffer_clear_* command has been issued. + * [1] = LOAD CONFIG -- Used if bound, but no clear is required. + * [2] = CUSTOM CONFIG -- When using GPU_framebuffer_bind_ex to manually specify + * load-store configuration for optimal bandwidth utilization. + * -- We cache these different configs to avoid re-generation -- + */ + typedef enum { + MTL_FB_CONFIG_CLEAR = 0, + MTL_FB_CONFIG_LOAD = 1, + MTL_FB_CONFIG_CUSTOM = 2 + } MTL_FB_CONFIG; +#define MTL_FB_CONFIG_MAX (MTL_FB_CONFIG_CUSTOM + 1) + + MTLRenderPassDescriptor *framebuffer_descriptor_[MTL_FB_CONFIG_MAX]; + MTLRenderPassColorAttachmentDescriptor + *colour_attachment_descriptors_[GPU_FB_MAX_COLOR_ATTACHMENT]; + /** Whether `MTLRenderPassDescriptor[N]` requires updating with latest state. */ + bool descriptor_dirty_[MTL_FB_CONFIG_MAX]; + /** Whether SRGB is enabled for this frame-buffer configuration. */ + bool srgb_enabled_; + /** Whether the primary Frame-buffer attachment is an SRGB target or not. */ + bool is_srgb_; + + public: + /** + * Create a conventional framebuffer to attach texture to. + */ + MTLFrameBuffer(MTLContext *ctx, const char *name); + + ~MTLFrameBuffer(); + + void bind(bool enabled_srgb) override; + + bool check(char err_out[256]) override; + + void clear(eGPUFrameBufferBits buffers, + const float clear_col[4], + float clear_depth, + uint clear_stencil) override; + void clear_multi(const float (*clear_cols)[4]) override; + void clear_attachment(GPUAttachmentType type, + eGPUDataFormat data_format, + const void *clear_value) override; + + void attachment_set_loadstore_op(GPUAttachmentType type, + eGPULoadOp load_action, + eGPUStoreOp store_action) override; + + void read(eGPUFrameBufferBits planes, + eGPUDataFormat format, + const int area[4], + int channel_len, + int slot, + void *r_data) override; + + void blit_to(eGPUFrameBufferBits planes, + int src_slot, + FrameBuffer *dst, + int dst_slot, + int dst_offset_x, + int dst_offset_y) override; + + void apply_state(); + + /* State. */ + /* Flag MTLFramebuffer configuration as having changed. */ + void mark_dirty(); + void mark_loadstore_dirty(); + /* Mark that a pending clear has been performed. */ + void mark_cleared(); + /* Mark that we have a pending clear. */ + void mark_do_clear(); + + /* Attachment management. */ + /* When dirty_attachments_ is true, we need to reprocess attachments to extract Metal + * information. */ + void update_attachments(bool update_viewport); + bool add_color_attachment(gpu::MTLTexture *texture, uint slot, int miplevel, int layer); + bool add_depth_attachment(gpu::MTLTexture *texture, int miplevel, int layer); + bool add_stencil_attachment(gpu::MTLTexture *texture, int miplevel, int layer); + bool remove_color_attachment(uint slot); + bool remove_depth_attachment(); + bool remove_stencil_attachment(); + void remove_all_attachments(); + void ensure_render_target_size(); + + /* Clear values -> Load/store actions. */ + bool set_color_attachment_clear_color(uint slot, const float clear_color[4]); + bool set_depth_attachment_clear_value(float depth_clear); + bool set_stencil_attachment_clear_value(uint stencil_clear); + bool set_color_loadstore_op(uint slot, eGPULoadOp load_action, eGPUStoreOp store_action); + bool set_depth_loadstore_op(eGPULoadOp load_action, eGPUStoreOp store_action); + bool set_stencil_loadstore_op(eGPULoadOp load_action, eGPUStoreOp store_action); + + /* Remove any pending clears - Ensure "load" configuration is used. */ + bool reset_clear_state(); + + /* Fetch values */ + bool has_attachment_at_slot(uint slot); + bool has_color_attachment_with_texture(gpu::MTLTexture *texture); + bool has_depth_attachment(); + bool has_stencil_attachment(); + int get_color_attachment_slot_from_texture(gpu::MTLTexture *texture); + uint get_attachment_count(); + uint get_attachment_limit() + { + return GPU_FB_MAX_COLOR_ATTACHMENT; + }; + MTLAttachment get_color_attachment(uint slot); + MTLAttachment get_depth_attachment(); + MTLAttachment get_stencil_attachment(); + + /* Metal API resources and validation. */ + bool validate_render_pass(); + MTLRenderPassDescriptor *bake_render_pass_descriptor(bool load_contents); + + /* Blitting. */ + void blit(uint read_slot, + uint src_x_offset, + uint src_y_offset, + MTLFrameBuffer *metal_fb_write, + uint write_slot, + uint dst_x_offset, + uint dst_y_offset, + uint width, + uint height, + eGPUFrameBufferBits blit_buffers); + + int get_width(); + int get_height(); + bool get_dirty() + { + return is_dirty_ || is_loadstore_dirty_; + } + + bool get_pending_clear() + { + return has_pending_clear_; + } + + bool get_srgb_enabled() + { + return srgb_enabled_; + } + + bool get_is_srgb() + { + return is_srgb_; + } + + private: + /* Clears a render target by force-opening a render pass. */ + void force_clear(); + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLFrameBuffer"); +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_framebuffer.mm b/source/blender/gpu/metal/mtl_framebuffer.mm new file mode 100644 index 00000000000..975e78fc466 --- /dev/null +++ b/source/blender/gpu/metal/mtl_framebuffer.mm @@ -0,0 +1,1899 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "BKE_global.h" + +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_framebuffer.hh" +#include "mtl_texture.hh" +#import <Availability.h> + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Creation & Deletion + * \{ */ + +MTLFrameBuffer::MTLFrameBuffer(MTLContext *ctx, const char *name) : FrameBuffer(name) +{ + + context_ = ctx; + is_dirty_ = true; + is_loadstore_dirty_ = true; + dirty_state_ctx_ = nullptr; + has_pending_clear_ = false; + colour_attachment_count_ = 0; + srgb_enabled_ = false; + is_srgb_ = false; + + for (int i = 0; i < GPU_FB_MAX_COLOR_ATTACHMENT; i++) { + mtl_color_attachments_[i].used = false; + } + mtl_depth_attachment_.used = false; + mtl_stencil_attachment_.used = false; + + for (int i = 0; i < MTL_FB_CONFIG_MAX; i++) { + framebuffer_descriptor_[i] = [[MTLRenderPassDescriptor alloc] init]; + descriptor_dirty_[i] = true; + } + + for (int i = 0; i < GPU_FB_MAX_COLOR_ATTACHMENT; i++) { + colour_attachment_descriptors_[i] = [[MTLRenderPassColorAttachmentDescriptor alloc] init]; + } + + /* Initial state. */ + this->size_set(0, 0); + this->viewport_reset(); + this->scissor_reset(); +} + +MTLFrameBuffer::~MTLFrameBuffer() +{ + /* If FrameBuffer is associated with a currently open RenderPass, end. */ + if (context_->main_command_buffer.get_active_framebuffer() == this) { + context_->main_command_buffer.end_active_command_encoder(); + } + + /* Restore default frame-buffer if this frame-buffer was bound. */ + if (context_->active_fb == this && context_->back_left != this) { + /* If this assert triggers it means the frame-buffer is being freed while in use by another + * context which, by the way, is TOTALLY UNSAFE!!! (Copy from GL behavior). */ + BLI_assert(context_ == static_cast<MTLContext *>(unwrap(GPU_context_active_get()))); + GPU_framebuffer_restore(); + } + + /* Free Render Pass Descriptors. */ + for (int config = 0; config < MTL_FB_CONFIG_MAX; config++) { + if (framebuffer_descriptor_[config] != nil) { + [framebuffer_descriptor_[config] release]; + framebuffer_descriptor_[config] = nil; + } + } + + /* Free colour attachment descriptors. */ + for (int i = 0; i < GPU_FB_MAX_COLOR_ATTACHMENT; i++) { + if (colour_attachment_descriptors_[i] != nil) { + [colour_attachment_descriptors_[i] release]; + colour_attachment_descriptors_[i] = nil; + } + } + + /* Remove attachments - release FB texture references. */ + this->remove_all_attachments(); + + if (context_ == nullptr) { + return; + } +} + +void MTLFrameBuffer::bind(bool enabled_srgb) +{ + + /* Verify Context is valid. */ + if (context_ != static_cast<MTLContext *>(unwrap(GPU_context_active_get()))) { + BLI_assert(false && "Trying to use the same frame-buffer in multiple context's."); + return; + } + + /* Ensure SRGB state is up-to-date and valid. */ + bool srgb_state_changed = srgb_enabled_ != enabled_srgb; + if (context_->active_fb != this || srgb_state_changed) { + if (srgb_state_changed) { + this->mark_dirty(); + } + srgb_enabled_ = enabled_srgb; + GPU_shader_set_framebuffer_srgb_target(srgb_enabled_ && is_srgb_); + } + + /* Ensure local MTLAttachment data is up to date. */ + this->update_attachments(true); + + /* Reset clear state on bind -- Clears and load/store ops are set after binding. */ + this->reset_clear_state(); + + /* Bind to active context. */ + MTLContext *mtl_context = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + if (mtl_context) { + mtl_context->framebuffer_bind(this); + dirty_state_ = true; + } + else { + MTL_LOG_WARNING("Attempting to bind FrameBuffer, but no context is active\n"); + } +} + +bool MTLFrameBuffer::check(char err_out[256]) +{ + /* Ensure local MTLAttachment data is up to date. */ + this->update_attachments(true); + + /* Ensure there is at least one attachment. */ + bool valid = (this->get_attachment_count() > 0 || + this->has_depth_attachment() | this->has_stencil_attachment()); + if (!valid) { + const char *format = "Framebuffer %s does not have any attachments.\n"; + if (err_out) { + BLI_snprintf(err_out, 256, format, name_); + } + else { + MTL_LOG_ERROR(format, name_); + } + return false; + } + + /* Ensure all attachments have identical dimensions. */ + /* Ensure all attachments are render-targets. */ + bool first = true; + uint dim_x = 0; + uint dim_y = 0; + for (int col_att = 0; col_att < this->get_attachment_count(); col_att++) { + MTLAttachment att = this->get_color_attachment(col_att); + if (att.used) { + if (att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT) { + if (first) { + dim_x = att.texture->width_get(); + dim_y = att.texture->height_get(); + first = false; + } + else { + if (dim_x != att.texture->width_get() || dim_y != att.texture->height_get()) { + const char *format = + "Framebuffer %s: Color attachment dimensions do not match those of previous " + "attachment\n"; + if (err_out) { + BLI_snprintf(err_out, 256, format, name_); + } + else { + fprintf(stderr, format, name_); + MTL_LOG_ERROR(format, name_); + } + return false; + } + } + } + else { + const char *format = + "Framebuffer %s: Color attachment texture does not have usage flag " + "'GPU_TEXTURE_USAGE_ATTACHMENT'\n"; + if (err_out) { + BLI_snprintf(err_out, 256, format, name_); + } + else { + fprintf(stderr, format, name_); + MTL_LOG_ERROR(format, name_); + } + return false; + } + } + } + MTLAttachment depth_att = this->get_depth_attachment(); + MTLAttachment stencil_att = this->get_stencil_attachment(); + if (depth_att.used) { + if (first) { + dim_x = depth_att.texture->width_get(); + dim_y = depth_att.texture->height_get(); + first = false; + valid = (depth_att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT); + + if (!valid) { + const char *format = + "Framebuffer %n: Depth attachment does not have usage " + "'GPU_TEXTURE_USAGE_ATTACHMENT'\n"; + if (err_out) { + BLI_snprintf(err_out, 256, format, name_); + } + else { + fprintf(stderr, format, name_); + MTL_LOG_ERROR(format, name_); + } + return false; + } + } + else { + if (dim_x != depth_att.texture->width_get() || dim_y != depth_att.texture->height_get()) { + const char *format = + "Framebuffer %n: Depth attachment dimensions do not match that of previous " + "attachment\n"; + if (err_out) { + BLI_snprintf(err_out, 256, format, name_); + } + else { + fprintf(stderr, format, name_); + MTL_LOG_ERROR(format, name_); + } + return false; + } + } + } + if (stencil_att.used) { + if (first) { + dim_x = stencil_att.texture->width_get(); + dim_y = stencil_att.texture->height_get(); + first = false; + valid = (stencil_att.texture->gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATTACHMENT); + if (!valid) { + const char *format = + "Framebuffer %s: Stencil attachment does not have usage " + "'GPU_TEXTURE_USAGE_ATTACHMENT'\n"; + if (err_out) { + BLI_snprintf(err_out, 256, format, name_); + } + else { + fprintf(stderr, format, name_); + MTL_LOG_ERROR(format, name_); + } + return false; + } + } + else { + if (dim_x != stencil_att.texture->width_get() || + dim_y != stencil_att.texture->height_get()) { + const char *format = + "Framebuffer %s: Stencil attachment dimensions do not match that of previous " + "attachment"; + if (err_out) { + BLI_snprintf(err_out, 256, format, name_); + } + else { + fprintf(stderr, format, name_); + MTL_LOG_ERROR(format, name_); + } + return false; + } + } + } + + BLI_assert(valid); + return valid; +} + +void MTLFrameBuffer::force_clear() +{ + /* Perform clear by ending current and starting a new render pass. */ + MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + MTLFrameBuffer *current_framebuffer = mtl_context->get_current_framebuffer(); + if (current_framebuffer) { + BLI_assert(current_framebuffer == this); + /* End current render-pass. */ + if (mtl_context->main_command_buffer.is_inside_render_pass()) { + mtl_context->main_command_buffer.end_active_command_encoder(); + } + mtl_context->ensure_begin_render_pass(); + BLI_assert(has_pending_clear_ == false); + } +} + +void MTLFrameBuffer::clear(eGPUFrameBufferBits buffers, + const float clear_col[4], + float clear_depth, + uint clear_stencil) +{ + + BLI_assert(unwrap(GPU_context_active_get()) == context_); + BLI_assert(context_->active_fb == this); + + /* Ensure attachments are up to date. */ + this->update_attachments(true); + + /* If we had no previous clear pending, reset clear state. */ + if (!has_pending_clear_) { + this->reset_clear_state(); + } + + /* Ensure we only clear if attachments exist for given buffer bits. */ + bool do_clear = false; + if (buffers & GPU_COLOR_BIT) { + for (int i = 0; i < colour_attachment_count_; i++) { + this->set_color_attachment_clear_color(i, clear_col); + do_clear = true; + } + } + + if (buffers & GPU_DEPTH_BIT) { + this->set_depth_attachment_clear_value(clear_depth); + do_clear = do_clear || this->has_depth_attachment(); + } + if (buffers & GPU_STENCIL_BIT) { + this->set_stencil_attachment_clear_value(clear_stencil); + do_clear = do_clear || this->has_stencil_attachment(); + } + + if (do_clear) { + has_pending_clear_ = true; + + /* Apply state before clear. */ + this->apply_state(); + + /* TODO(Metal): Optimize - Currently force-clear always used. Consider moving clear state to + * MTLTexture instead. */ + /* Force clear if RP is not yet active -- not the most efficient, but there is no distinction + * between clears where no draws occur. Can optimize at the high-level by using explicit + * load-store flags. */ + this->force_clear(); + } +} + +void MTLFrameBuffer::clear_multi(const float (*clear_cols)[4]) +{ + /* If we had no previous clear pending, reset clear state. */ + if (!has_pending_clear_) { + this->reset_clear_state(); + } + + bool do_clear = false; + for (int i = 0; i < this->get_attachment_limit(); i++) { + if (this->has_attachment_at_slot(i)) { + this->set_color_attachment_clear_color(i, clear_cols[i]); + do_clear = true; + } + } + + if (do_clear) { + has_pending_clear_ = true; + + /* Apply state before clear. */ + this->apply_state(); + + /* TODO(Metal): Optimize - Currently force-clear always used. Consider moving clear state to + * MTLTexture instead. */ + /* Force clear if RP is not yet active -- not the most efficient, but there is no distinction + * between clears where no draws occur. Can optimize at the high-level by using explicit + * load-store flags. */ + this->force_clear(); + } +} + +void MTLFrameBuffer::clear_attachment(GPUAttachmentType type, + eGPUDataFormat data_format, + const void *clear_value) +{ + BLI_assert(static_cast<MTLContext *>(unwrap(GPU_context_active_get())) == context_); + BLI_assert(context_->active_fb == this); + + /* If we had no previous clear pending, reset clear state. */ + if (!has_pending_clear_) { + this->reset_clear_state(); + } + + bool do_clear = false; + + if (type == GPU_FB_DEPTH_STENCIL_ATTACHMENT) { + if (this->has_depth_attachment() || this->has_stencil_attachment()) { + BLI_assert(data_format == GPU_DATA_UINT_24_8); + float depth = ((*(uint32_t *)clear_value) & 0x00FFFFFFu) / (float)0x00FFFFFFu; + int stencil = ((*(uint32_t *)clear_value) >> 24); + this->set_depth_attachment_clear_value(depth); + this->set_stencil_attachment_clear_value(stencil); + do_clear = true; + } + } + else if (type == GPU_FB_DEPTH_ATTACHMENT) { + if (this->has_depth_attachment()) { + if (data_format == GPU_DATA_FLOAT) { + this->set_depth_attachment_clear_value(*(float *)clear_value); + } + else { + float depth = *(uint32_t *)clear_value / (float)0xFFFFFFFFu; + this->set_depth_attachment_clear_value(depth); + } + do_clear = true; + } + } + else { + int slot = type - GPU_FB_COLOR_ATTACHMENT0; + if (this->has_attachment_at_slot(slot)) { + float col_clear_val[4] = {0.0}; + switch (data_format) { + case GPU_DATA_FLOAT: { + const float *vals = (float *)clear_value; + col_clear_val[0] = vals[0]; + col_clear_val[1] = vals[1]; + col_clear_val[2] = vals[2]; + col_clear_val[3] = vals[3]; + } break; + case GPU_DATA_UINT: { + const uint *vals = (uint *)clear_value; + col_clear_val[0] = (float)(vals[0]); + col_clear_val[1] = (float)(vals[1]); + col_clear_val[2] = (float)(vals[2]); + col_clear_val[3] = (float)(vals[3]); + } break; + case GPU_DATA_INT: { + const int *vals = (int *)clear_value; + col_clear_val[0] = (float)(vals[0]); + col_clear_val[1] = (float)(vals[1]); + col_clear_val[2] = (float)(vals[2]); + col_clear_val[3] = (float)(vals[3]); + } break; + default: + BLI_assert_msg(0, "Unhandled data format"); + break; + } + this->set_color_attachment_clear_color(slot, col_clear_val); + do_clear = true; + } + } + + if (do_clear) { + has_pending_clear_ = true; + + /* Apply state before clear. */ + this->apply_state(); + + /* TODO(Metal): Optimize - Currently force-clear always used. Consider moving clear state to + * MTLTexture instead. */ + /* Force clear if RP is not yet active -- not the most efficient, but there is no distinction + * between clears where no draws occur. Can optimize at the high-level by using explicit + * load-store flags. */ + this->force_clear(); + } +} + +void MTLFrameBuffer::read(eGPUFrameBufferBits planes, + eGPUDataFormat format, + const int area[4], + int channel_len, + int slot, + void *r_data) +{ + + BLI_assert((planes & GPU_STENCIL_BIT) == 0); + BLI_assert(area[2] > 0); + BLI_assert(area[3] > 0); + + switch (planes) { + case GPU_DEPTH_BIT: { + if (this->has_depth_attachment()) { + MTLAttachment depth = this->get_depth_attachment(); + gpu::MTLTexture *tex = depth.texture; + if (tex) { + size_t sample_len = area[2] * area[3]; + size_t sample_size = to_bytesize(tex->format_, format); + int debug_data_size = sample_len * sample_size; + tex->read_internal(0, + area[0], + area[1], + 0, + area[2], + area[3], + 1, + format, + channel_len, + debug_data_size, + r_data); + } + } + else { + MTL_LOG_ERROR( + "Attempting to read depth from a framebuffer which does not have a depth " + "attachment!\n"); + } + } + return; + + case GPU_COLOR_BIT: { + if (this->has_attachment_at_slot(slot)) { + MTLAttachment color = this->get_color_attachment(slot); + gpu::MTLTexture *tex = color.texture; + if (tex) { + size_t sample_len = area[2] * area[3]; + size_t sample_size = to_bytesize(tex->format_, format); + int debug_data_size = sample_len * sample_size * channel_len; + tex->read_internal(0, + area[0], + area[1], + 0, + area[2], + area[3], + 1, + format, + channel_len, + debug_data_size, + r_data); + } + } + } + return; + + case GPU_STENCIL_BIT: + MTL_LOG_ERROR("GPUFramebuffer: Error: Trying to read stencil bit. Unsupported.\n"); + return; + } +} + +void MTLFrameBuffer::blit_to(eGPUFrameBufferBits planes, + int src_slot, + FrameBuffer *dst, + int dst_slot, + int dst_offset_x, + int dst_offset_y) +{ + this->update_attachments(true); + static_cast<MTLFrameBuffer *>(dst)->update_attachments(true); + + BLI_assert(planes != 0); + + MTLFrameBuffer *metal_fb_write = static_cast<MTLFrameBuffer *>(dst); + + BLI_assert(this); + BLI_assert(metal_fb_write); + + /* Get width/height from attachment. */ + MTLAttachment src_attachment; + const bool do_color = (planes & GPU_COLOR_BIT); + const bool do_depth = (planes & GPU_DEPTH_BIT); + const bool do_stencil = (planes & GPU_STENCIL_BIT); + + if (do_color) { + BLI_assert(!do_depth && !do_stencil); + src_attachment = this->get_color_attachment(src_slot); + } + else if (do_depth) { + BLI_assert(!do_color && !do_stencil); + src_attachment = this->get_depth_attachment(); + } + else if (do_stencil) { + BLI_assert(!do_color && !do_depth); + src_attachment = this->get_stencil_attachment(); + } + + BLI_assert(src_attachment.used); + this->blit(src_slot, + 0, + 0, + metal_fb_write, + dst_slot, + dst_offset_x, + dst_offset_y, + src_attachment.texture->width_get(), + src_attachment.texture->height_get(), + planes); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \ Private METAL implementation functions + * \{ */ + +void MTLFrameBuffer::mark_dirty() +{ + is_dirty_ = true; + is_loadstore_dirty_ = true; +} + +void MTLFrameBuffer::mark_loadstore_dirty() +{ + is_loadstore_dirty_ = true; +} + +void MTLFrameBuffer::mark_cleared() +{ + has_pending_clear_ = false; +} + +void MTLFrameBuffer::mark_do_clear() +{ + has_pending_clear_ = true; +} + +void MTLFrameBuffer::update_attachments(bool update_viewport) +{ + if (!dirty_attachments_) { + return; + } + + /* Cache viewport and scissor (If we have existing attachments). */ + int t_viewport[4], t_scissor[4]; + update_viewport = update_viewport && + (this->get_attachment_count() > 0 && this->has_depth_attachment() && + this->has_stencil_attachment()); + if (update_viewport) { + this->viewport_get(t_viewport); + this->scissor_get(t_scissor); + } + + /* Clear current attachments state. */ + this->remove_all_attachments(); + + /* Reset framebuffer options. */ + use_multilayered_rendering_ = false; + + /* Track first attachment for SRGB property extraction. */ + GPUAttachmentType first_attachment = GPU_FB_MAX_ATTACHMENT; + MTLAttachment first_attachment_mtl; + + /* Scan through changes to attachments and populate local structures. */ + bool depth_added = false; + for (GPUAttachmentType type = GPU_FB_MAX_ATTACHMENT - 1; type >= 0; --type) { + GPUAttachment &attach = attachments_[type]; + + switch (type) { + case GPU_FB_DEPTH_ATTACHMENT: + case GPU_FB_DEPTH_STENCIL_ATTACHMENT: { + /* If one of the DEPTH types has added a texture, we avoid running this again, as it would + * only remove the target. */ + if (depth_added) { + break; + } + if (attach.tex) { + /* If we already had a depth attachment, preserve load/clear-state parameters, + * but remove existing and add new attachment. */ + if (this->has_depth_attachment()) { + MTLAttachment depth_attachment_prev = this->get_depth_attachment(); + this->remove_depth_attachment(); + this->add_depth_attachment( + static_cast<gpu::MTLTexture *>(unwrap(attach.tex)), attach.mip, attach.layer); + this->set_depth_attachment_clear_value(depth_attachment_prev.clear_value.depth); + this->set_depth_loadstore_op(depth_attachment_prev.load_action, + depth_attachment_prev.store_action); + } + else { + this->add_depth_attachment( + static_cast<gpu::MTLTexture *>(unwrap(attach.tex)), attach.mip, attach.layer); + } + + /* Check stencil component -- if supplied texture format supports stencil. */ + eGPUTextureFormat format = GPU_texture_format(attach.tex); + bool use_stencil = (type == GPU_FB_DEPTH_STENCIL_ATTACHMENT) && + (format == GPU_DEPTH32F_STENCIL8 || format == GPU_DEPTH24_STENCIL8); + if (use_stencil) { + if (this->has_stencil_attachment()) { + MTLAttachment stencil_attachment_prev = this->get_stencil_attachment(); + this->remove_stencil_attachment(); + this->add_stencil_attachment( + static_cast<gpu::MTLTexture *>(unwrap(attach.tex)), attach.mip, attach.layer); + this->set_stencil_attachment_clear_value( + stencil_attachment_prev.clear_value.stencil); + this->set_stencil_loadstore_op(stencil_attachment_prev.load_action, + stencil_attachment_prev.store_action); + } + else { + this->add_stencil_attachment( + static_cast<gpu::MTLTexture *>(unwrap(attach.tex)), attach.mip, attach.layer); + } + } + + /* Flag depth as added -- mirrors the behavior in gl_framebuffer.cc to exit the for-loop + * after GPU_FB_DEPTH_STENCIL_ATTACHMENT has executed. */ + depth_added = true; + + if (first_attachment == GPU_FB_MAX_ATTACHMENT) { + /* Only use depth texture to get information if there is no color attachment. */ + first_attachment = type; + first_attachment_mtl = this->get_depth_attachment(); + } + } + else { + this->remove_depth_attachment(); + if (type == GPU_FB_DEPTH_STENCIL_ATTACHMENT && this->has_stencil_attachment()) { + this->remove_stencil_attachment(); + } + } + } break; + case GPU_FB_COLOR_ATTACHMENT0: + case GPU_FB_COLOR_ATTACHMENT1: + case GPU_FB_COLOR_ATTACHMENT2: + case GPU_FB_COLOR_ATTACHMENT3: + case GPU_FB_COLOR_ATTACHMENT4: + case GPU_FB_COLOR_ATTACHMENT5: { + int color_slot_ind = type - GPU_FB_COLOR_ATTACHMENT0; + if (attach.tex) { + /* If we already had a colour attachment, preserve load/clear-state parameters, + * but remove existing and add new attachment. */ + if (this->has_attachment_at_slot(color_slot_ind)) { + MTLAttachment color_attachment_prev = this->get_color_attachment(color_slot_ind); + + this->remove_color_attachment(color_slot_ind); + this->add_color_attachment(static_cast<gpu::MTLTexture *>(unwrap(attach.tex)), + color_slot_ind, + attach.mip, + attach.layer); + this->set_color_attachment_clear_color(color_slot_ind, + color_attachment_prev.clear_value.color); + this->set_color_loadstore_op(color_slot_ind, + color_attachment_prev.load_action, + color_attachment_prev.store_action); + } + else { + this->add_color_attachment(static_cast<gpu::MTLTexture *>(unwrap(attach.tex)), + color_slot_ind, + attach.mip, + attach.layer); + } + first_attachment = type; + first_attachment_mtl = this->get_color_attachment(color_slot_ind); + } + else { + this->remove_color_attachment(color_slot_ind); + } + } break; + default: + /* Non-attachment parameters. */ + break; + } + } + + /* Check whether the first attachment is SRGB. */ + if (first_attachment != GPU_FB_MAX_ATTACHMENT) { + is_srgb_ = (first_attachment_mtl.texture->format_get() == GPU_SRGB8_A8); + } + + /* Reset viewport and Scissor (If viewport is smaller or equal to the framebuffer size). */ + if (update_viewport && t_viewport[2] <= width_ && t_viewport[3] <= height_) { + + this->viewport_set(t_viewport); + this->scissor_set(t_viewport); + } + else { + this->viewport_reset(); + this->scissor_reset(); + } + + /* We have now updated our internal structures. */ + dirty_attachments_ = false; +} + +void MTLFrameBuffer::apply_state() +{ + MTLContext *mtl_ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(mtl_ctx); + if (mtl_ctx->active_fb == this) { + if (dirty_state_ == false && dirty_state_ctx_ == mtl_ctx) { + return; + } + + /* Ensure viewport has been set. NOTE: This should no longer happen, but kept for safety to + * track bugs. */ + if (viewport_[2] == 0 || viewport_[3] == 0) { + MTL_LOG_WARNING( + "Viewport had width and height of (0,0) -- Updating -- DEBUG Safety check\n"); + viewport_reset(); + } + + /* Update Context State. */ + mtl_ctx->set_viewport(viewport_[0], viewport_[1], viewport_[2], viewport_[3]); + mtl_ctx->set_scissor(scissor_[0], scissor_[1], scissor_[2], scissor_[3]); + mtl_ctx->set_scissor_enabled(scissor_test_); + + dirty_state_ = false; + dirty_state_ctx_ = mtl_ctx; + } + else { + MTL_LOG_ERROR( + "Attempting to set FrameBuffer State (VIEWPORT, SCISSOR), But FrameBuffer is not bound to " + "current Context.\n"); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \ Adding and Removing attachments + * \{ */ + +bool MTLFrameBuffer::add_color_attachment(gpu::MTLTexture *texture, + uint slot, + int miplevel, + int layer) +{ + BLI_assert(this); + BLI_assert(slot >= 0 && slot < this->get_attachment_limit()); + + if (texture) { + if (miplevel < 0 || miplevel >= MTL_MAX_MIPMAP_COUNT) { + MTL_LOG_WARNING("Attachment specified with invalid mip level %u\n", miplevel); + miplevel = 0; + } + + /* Check if slot is in-use. */ + /* Assume attachment load by default. */ + colour_attachment_count_ += (!mtl_color_attachments_[slot].used) ? 1 : 0; + mtl_color_attachments_[slot].used = true; + mtl_color_attachments_[slot].texture = texture; + mtl_color_attachments_[slot].mip = miplevel; + mtl_color_attachments_[slot].load_action = GPU_LOADACTION_LOAD; + mtl_color_attachments_[slot].store_action = GPU_STOREACTION_STORE; + mtl_color_attachments_[slot].render_target_array_length = 0; + + /* Determine whether array slice or depth plane based on texture type. */ + switch (texture->type_) { + case GPU_TEXTURE_1D: + case GPU_TEXTURE_2D: + BLI_assert(layer <= 0); + mtl_color_attachments_[slot].slice = 0; + mtl_color_attachments_[slot].depth_plane = 0; + break; + case GPU_TEXTURE_1D_ARRAY: + if (layer < 0) { + layer = 0; + MTL_LOG_WARNING("TODO: Support layered rendering for 1D array textures, if needed.\n"); + } + BLI_assert(layer < texture->h_); + mtl_color_attachments_[slot].slice = layer; + mtl_color_attachments_[slot].depth_plane = 0; + break; + case GPU_TEXTURE_2D_ARRAY: + BLI_assert(layer < texture->d_); + mtl_color_attachments_[slot].slice = layer; + mtl_color_attachments_[slot].depth_plane = 0; + if (layer == -1) { + mtl_color_attachments_[slot].slice = 0; + mtl_color_attachments_[slot].render_target_array_length = texture->d_; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_3D: + BLI_assert(layer < texture->d_); + mtl_color_attachments_[slot].slice = 0; + mtl_color_attachments_[slot].depth_plane = layer; + if (layer == -1) { + mtl_color_attachments_[slot].depth_plane = 0; + mtl_color_attachments_[slot].render_target_array_length = texture->d_; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_CUBE: + BLI_assert(layer < 6); + mtl_color_attachments_[slot].slice = layer; + mtl_color_attachments_[slot].depth_plane = 0; + if (layer == -1) { + mtl_color_attachments_[slot].slice = 0; + mtl_color_attachments_[slot].depth_plane = 0; + mtl_color_attachments_[slot].render_target_array_length = 6; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_CUBE_ARRAY: + BLI_assert(layer < 6 * texture->d_); + /* TODO(Metal): Verify multilayered rendering for Cube arrays. */ + mtl_color_attachments_[slot].slice = layer; + mtl_color_attachments_[slot].depth_plane = 0; + if (layer == -1) { + mtl_color_attachments_[slot].slice = 0; + mtl_color_attachments_[slot].depth_plane = 0; + mtl_color_attachments_[slot].render_target_array_length = texture->d_; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_BUFFER: + mtl_color_attachments_[slot].slice = 0; + mtl_color_attachments_[slot].depth_plane = 0; + break; + default: + MTL_LOG_ERROR("MTLFrameBuffer::add_color_attachment Unrecognized texture type %u\n", + texture->type_); + break; + } + + /* Update Frame-buffer Resolution. */ + int width_of_miplayer, height_of_miplayer; + if (miplevel <= 0) { + width_of_miplayer = texture->width_get(); + height_of_miplayer = texture->height_get(); + } + else { + width_of_miplayer = max_ii(texture->width_get() >> miplevel, 1); + height_of_miplayer = max_ii(texture->height_get() >> miplevel, 1); + } + + if (width_ == 0 || height_ == 0) { + this->size_set(width_of_miplayer, height_of_miplayer); + this->scissor_reset(); + this->viewport_reset(); + BLI_assert(width_ > 0); + BLI_assert(height_ > 0); + } + else { + BLI_assert(width_ == width_of_miplayer); + BLI_assert(height_ == height_of_miplayer); + } + + /* Flag as dirty. */ + this->mark_dirty(); + } + else { + MTL_LOG_ERROR( + "Passing in null texture to MTLFrameBuffer::addColourAttachment (This could be due to not " + "all texture types being supported).\n"); + } + return true; +} + +bool MTLFrameBuffer::add_depth_attachment(gpu::MTLTexture *texture, int miplevel, int layer) +{ + BLI_assert(this); + + if (texture) { + if (miplevel < 0 || miplevel >= MTL_MAX_MIPMAP_COUNT) { + MTL_LOG_WARNING("Attachment specified with invalid mip level %u\n", miplevel); + miplevel = 0; + } + + /* Assume attachment load by default. */ + mtl_depth_attachment_.used = true; + mtl_depth_attachment_.texture = texture; + mtl_depth_attachment_.mip = miplevel; + mtl_depth_attachment_.load_action = GPU_LOADACTION_LOAD; + mtl_depth_attachment_.store_action = GPU_STOREACTION_STORE; + mtl_depth_attachment_.render_target_array_length = 0; + + /* Determine whether array slice or depth plane based on texture type. */ + switch (texture->type_) { + case GPU_TEXTURE_1D: + case GPU_TEXTURE_2D: + BLI_assert(layer <= 0); + mtl_depth_attachment_.slice = 0; + mtl_depth_attachment_.depth_plane = 0; + break; + case GPU_TEXTURE_1D_ARRAY: + if (layer < 0) { + layer = 0; + MTL_LOG_WARNING("TODO: Support layered rendering for 1D array textures, if needed\n"); + } + BLI_assert(layer < texture->h_); + mtl_depth_attachment_.slice = layer; + mtl_depth_attachment_.depth_plane = 0; + break; + case GPU_TEXTURE_2D_ARRAY: + BLI_assert(layer < texture->d_); + mtl_depth_attachment_.slice = layer; + mtl_depth_attachment_.depth_plane = 0; + if (layer == -1) { + mtl_depth_attachment_.slice = 0; + mtl_depth_attachment_.render_target_array_length = texture->d_; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_3D: + BLI_assert(layer < texture->d_); + mtl_depth_attachment_.slice = 0; + mtl_depth_attachment_.depth_plane = layer; + if (layer == -1) { + mtl_depth_attachment_.depth_plane = 0; + mtl_depth_attachment_.render_target_array_length = texture->d_; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_CUBE: + BLI_assert(layer < 6); + mtl_depth_attachment_.slice = layer; + mtl_depth_attachment_.depth_plane = 0; + if (layer == -1) { + mtl_depth_attachment_.slice = 0; + mtl_depth_attachment_.depth_plane = 0; + mtl_depth_attachment_.render_target_array_length = 1; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_CUBE_ARRAY: + /* TODO(Metal): Verify multilayered rendering for Cube arrays. */ + BLI_assert(layer < 6 * texture->d_); + mtl_depth_attachment_.slice = layer; + mtl_depth_attachment_.depth_plane = 0; + if (layer == -1) { + mtl_depth_attachment_.slice = 0; + mtl_depth_attachment_.depth_plane = 0; + mtl_depth_attachment_.render_target_array_length = texture->d_; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_BUFFER: + mtl_depth_attachment_.slice = 0; + mtl_depth_attachment_.depth_plane = 0; + break; + default: + BLI_assert(false && "Unrecognized texture type"); + break; + } + + /* Update Frame-buffer Resolution. */ + int width_of_miplayer, height_of_miplayer; + if (miplevel <= 0) { + width_of_miplayer = texture->width_get(); + height_of_miplayer = texture->height_get(); + } + else { + width_of_miplayer = max_ii(texture->width_get() >> miplevel, 1); + height_of_miplayer = max_ii(texture->height_get() >> miplevel, 1); + } + + /* Update Frame-buffer Resolution. */ + if (width_ == 0 || height_ == 0) { + this->size_set(width_of_miplayer, height_of_miplayer); + this->scissor_reset(); + this->viewport_reset(); + BLI_assert(width_ > 0); + BLI_assert(height_ > 0); + } + else { + BLI_assert(width_ == texture->width_get()); + BLI_assert(height_ == texture->height_get()); + } + + /* Flag as dirty after attachments changed. */ + this->mark_dirty(); + } + else { + MTL_LOG_ERROR( + "Passing in null texture to MTLFrameBuffer::addDepthAttachment (This could be due to not " + "all texture types being supported)."); + } + return true; +} + +bool MTLFrameBuffer::add_stencil_attachment(gpu::MTLTexture *texture, int miplevel, int layer) +{ + BLI_assert(this); + + if (texture) { + if (miplevel < 0 || miplevel >= MTL_MAX_MIPMAP_COUNT) { + MTL_LOG_WARNING("Attachment specified with invalid mip level %u\n", miplevel); + miplevel = 0; + } + + /* Assume attachment load by default. */ + mtl_stencil_attachment_.used = true; + mtl_stencil_attachment_.texture = texture; + mtl_stencil_attachment_.mip = miplevel; + mtl_stencil_attachment_.load_action = GPU_LOADACTION_LOAD; + mtl_stencil_attachment_.store_action = GPU_STOREACTION_STORE; + mtl_stencil_attachment_.render_target_array_length = 0; + + /* Determine whether array slice or depth plane based on texture type. */ + switch (texture->type_) { + case GPU_TEXTURE_1D: + case GPU_TEXTURE_2D: + BLI_assert(layer <= 0); + mtl_stencil_attachment_.slice = 0; + mtl_stencil_attachment_.depth_plane = 0; + break; + case GPU_TEXTURE_1D_ARRAY: + if (layer < 0) { + layer = 0; + MTL_LOG_WARNING("TODO: Support layered rendering for 1D array textures, if needed\n"); + } + BLI_assert(layer < texture->h_); + mtl_stencil_attachment_.slice = layer; + mtl_stencil_attachment_.depth_plane = 0; + break; + case GPU_TEXTURE_2D_ARRAY: + BLI_assert(layer < texture->d_); + mtl_stencil_attachment_.slice = layer; + mtl_stencil_attachment_.depth_plane = 0; + if (layer == -1) { + mtl_stencil_attachment_.slice = 0; + mtl_stencil_attachment_.render_target_array_length = texture->d_; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_3D: + BLI_assert(layer < texture->d_); + mtl_stencil_attachment_.slice = 0; + mtl_stencil_attachment_.depth_plane = layer; + if (layer == -1) { + mtl_stencil_attachment_.depth_plane = 0; + mtl_stencil_attachment_.render_target_array_length = texture->d_; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_CUBE: + BLI_assert(layer < 6); + mtl_stencil_attachment_.slice = layer; + mtl_stencil_attachment_.depth_plane = 0; + if (layer == -1) { + mtl_stencil_attachment_.slice = 0; + mtl_stencil_attachment_.depth_plane = 0; + mtl_stencil_attachment_.render_target_array_length = 1; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_CUBE_ARRAY: + /* TODO(Metal): Verify multilayered rendering for Cube arrays. */ + BLI_assert(layer < 6 * texture->d_); + mtl_stencil_attachment_.slice = layer; + mtl_stencil_attachment_.depth_plane = 0; + if (layer == -1) { + mtl_stencil_attachment_.slice = 0; + mtl_stencil_attachment_.depth_plane = 0; + mtl_stencil_attachment_.render_target_array_length = texture->d_; + use_multilayered_rendering_ = true; + } + break; + case GPU_TEXTURE_BUFFER: + mtl_stencil_attachment_.slice = 0; + mtl_stencil_attachment_.depth_plane = 0; + break; + default: + BLI_assert(false && "Unrecognized texture type"); + break; + } + + /* Update Frame-buffer Resolution. */ + int width_of_miplayer, height_of_miplayer; + if (miplevel <= 0) { + width_of_miplayer = texture->width_get(); + height_of_miplayer = texture->height_get(); + } + else { + width_of_miplayer = max_ii(texture->width_get() >> miplevel, 1); + height_of_miplayer = max_ii(texture->height_get() >> miplevel, 1); + } + + /* Update Frame-buffer Resolution. */ + if (width_ == 0 || height_ == 0) { + this->size_set(width_of_miplayer, height_of_miplayer); + this->scissor_reset(); + this->viewport_reset(); + BLI_assert(width_ > 0); + BLI_assert(height_ > 0); + } + else { + BLI_assert(width_ == texture->width_get()); + BLI_assert(height_ == texture->height_get()); + } + + /* Flag as dirty after attachments changed. */ + this->mark_dirty(); + } + else { + MTL_LOG_ERROR( + "Passing in null texture to MTLFrameBuffer::addStencilAttachment (This could be due to " + "not all texture types being supported)."); + } + return true; +} + +bool MTLFrameBuffer::remove_color_attachment(uint slot) +{ + BLI_assert(this); + BLI_assert(slot >= 0 && slot < this->get_attachment_limit()); + + if (this->has_attachment_at_slot(slot)) { + colour_attachment_count_ -= (mtl_color_attachments_[slot].used) ? 1 : 0; + mtl_color_attachments_[slot].used = false; + this->ensure_render_target_size(); + this->mark_dirty(); + return true; + } + + return false; +} + +bool MTLFrameBuffer::remove_depth_attachment() +{ + BLI_assert(this); + + mtl_depth_attachment_.used = false; + mtl_depth_attachment_.texture = nullptr; + this->ensure_render_target_size(); + this->mark_dirty(); + + return true; +} + +bool MTLFrameBuffer::remove_stencil_attachment() +{ + BLI_assert(this); + + mtl_stencil_attachment_.used = false; + mtl_stencil_attachment_.texture = nullptr; + this->ensure_render_target_size(); + this->mark_dirty(); + + return true; +} + +void MTLFrameBuffer::remove_all_attachments() +{ + BLI_assert(this); + + for (int attachment = 0; attachment < GPU_FB_MAX_COLOR_ATTACHMENT; attachment++) { + this->remove_color_attachment(attachment); + } + this->remove_depth_attachment(); + this->remove_stencil_attachment(); + colour_attachment_count_ = 0; + this->mark_dirty(); + + /* Verify height. */ + this->ensure_render_target_size(); + + /* Flag attachments as no longer being dirty. */ + dirty_attachments_ = false; +} + +void MTLFrameBuffer::ensure_render_target_size() +{ + /* If we have no attachments, reset width and height to zero. */ + if (colour_attachment_count_ == 0 && !this->has_depth_attachment() && + !this->has_stencil_attachment()) { + + /* Reset Viewport and Scissor for NULL framebuffer. */ + this->size_set(0, 0); + this->scissor_reset(); + this->viewport_reset(); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \ Clear values and Load-store actions + * \{ */ + +void MTLFrameBuffer::attachment_set_loadstore_op(GPUAttachmentType type, + eGPULoadOp load_action, + eGPUStoreOp store_action) +{ + if (type >= GPU_FB_COLOR_ATTACHMENT0) { + int slot = type - GPU_FB_COLOR_ATTACHMENT0; + this->set_color_loadstore_op(slot, load_action, store_action); + } + else if (type == GPU_FB_DEPTH_STENCIL_ATTACHMENT) { + this->set_depth_loadstore_op(load_action, store_action); + this->set_stencil_loadstore_op(load_action, store_action); + } + else if (type == GPU_FB_DEPTH_ATTACHMENT) { + this->set_depth_loadstore_op(load_action, store_action); + } +} + +bool MTLFrameBuffer::set_color_attachment_clear_color(uint slot, const float clear_color[4]) +{ + BLI_assert(this); + BLI_assert(slot >= 0 && slot < this->get_attachment_limit()); + + /* Only mark as dirty if values have changed. */ + bool changed = mtl_color_attachments_[slot].load_action != GPU_LOADACTION_CLEAR; + changed = changed || (memcmp(mtl_color_attachments_[slot].clear_value.color, + clear_color, + sizeof(float) * 4) != 0); + if (changed) { + memcpy(mtl_color_attachments_[slot].clear_value.color, clear_color, sizeof(float) * 4); + } + mtl_color_attachments_[slot].load_action = GPU_LOADACTION_CLEAR; + + if (changed) { + this->mark_loadstore_dirty(); + } + return true; +} + +bool MTLFrameBuffer::set_depth_attachment_clear_value(float depth_clear) +{ + BLI_assert(this); + + if (mtl_depth_attachment_.clear_value.depth != depth_clear || + mtl_depth_attachment_.load_action != GPU_LOADACTION_CLEAR) { + mtl_depth_attachment_.clear_value.depth = depth_clear; + mtl_depth_attachment_.load_action = GPU_LOADACTION_CLEAR; + this->mark_loadstore_dirty(); + } + return true; +} + +bool MTLFrameBuffer::set_stencil_attachment_clear_value(uint stencil_clear) +{ + BLI_assert(this); + + if (mtl_stencil_attachment_.clear_value.stencil != stencil_clear || + mtl_stencil_attachment_.load_action != GPU_LOADACTION_CLEAR) { + mtl_stencil_attachment_.clear_value.stencil = stencil_clear; + mtl_stencil_attachment_.load_action = GPU_LOADACTION_CLEAR; + this->mark_loadstore_dirty(); + } + return true; +} + +bool MTLFrameBuffer::set_color_loadstore_op(uint slot, + eGPULoadOp load_action, + eGPUStoreOp store_action) +{ + BLI_assert(this); + eGPULoadOp prev_load_action = mtl_color_attachments_[slot].load_action; + eGPUStoreOp prev_store_action = mtl_color_attachments_[slot].store_action; + mtl_color_attachments_[slot].load_action = load_action; + mtl_color_attachments_[slot].store_action = store_action; + + bool changed = (mtl_color_attachments_[slot].load_action != prev_load_action || + mtl_color_attachments_[slot].store_action != prev_store_action); + if (changed) { + this->mark_loadstore_dirty(); + } + + return changed; +} + +bool MTLFrameBuffer::set_depth_loadstore_op(eGPULoadOp load_action, eGPUStoreOp store_action) +{ + BLI_assert(this); + eGPULoadOp prev_load_action = mtl_depth_attachment_.load_action; + eGPUStoreOp prev_store_action = mtl_depth_attachment_.store_action; + mtl_depth_attachment_.load_action = load_action; + mtl_depth_attachment_.store_action = store_action; + + bool changed = (mtl_depth_attachment_.load_action != prev_load_action || + mtl_depth_attachment_.store_action != prev_store_action); + if (changed) { + this->mark_loadstore_dirty(); + } + + return changed; +} + +bool MTLFrameBuffer::set_stencil_loadstore_op(eGPULoadOp load_action, eGPUStoreOp store_action) +{ + BLI_assert(this); + eGPULoadOp prev_load_action = mtl_stencil_attachment_.load_action; + eGPUStoreOp prev_store_action = mtl_stencil_attachment_.store_action; + mtl_stencil_attachment_.load_action = load_action; + mtl_stencil_attachment_.store_action = store_action; + + bool changed = (mtl_stencil_attachment_.load_action != prev_load_action || + mtl_stencil_attachment_.store_action != prev_store_action); + if (changed) { + this->mark_loadstore_dirty(); + } + + return changed; +} + +bool MTLFrameBuffer::reset_clear_state() +{ + for (int slot = 0; slot < colour_attachment_count_; slot++) { + this->set_color_loadstore_op(slot, GPU_LOADACTION_LOAD, GPU_STOREACTION_STORE); + } + this->set_depth_loadstore_op(GPU_LOADACTION_LOAD, GPU_STOREACTION_STORE); + this->set_stencil_loadstore_op(GPU_LOADACTION_LOAD, GPU_STOREACTION_STORE); + return true; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \ Fetch values and Frame-buffer status + * \{ */ + +bool MTLFrameBuffer::has_attachment_at_slot(uint slot) +{ + BLI_assert(this); + + if (slot >= 0 && slot < this->get_attachment_limit()) { + return mtl_color_attachments_[slot].used; + } + return false; +} + +bool MTLFrameBuffer::has_color_attachment_with_texture(gpu::MTLTexture *texture) +{ + BLI_assert(this); + + for (int attachment = 0; attachment < this->get_attachment_limit(); attachment++) { + if (mtl_color_attachments_[attachment].used && + mtl_color_attachments_[attachment].texture == texture) { + return true; + } + } + return false; +} + +bool MTLFrameBuffer::has_depth_attachment() +{ + BLI_assert(this); + return mtl_depth_attachment_.used; +} + +bool MTLFrameBuffer::has_stencil_attachment() +{ + BLI_assert(this); + return mtl_stencil_attachment_.used; +} + +int MTLFrameBuffer::get_color_attachment_slot_from_texture(gpu::MTLTexture *texture) +{ + BLI_assert(this); + BLI_assert(texture); + + for (int attachment = 0; attachment < this->get_attachment_limit(); attachment++) { + if (mtl_color_attachments_[attachment].used && + (mtl_color_attachments_[attachment].texture == texture)) { + return attachment; + } + } + return -1; +} + +uint MTLFrameBuffer::get_attachment_count() +{ + BLI_assert(this); + return colour_attachment_count_; +} + +MTLAttachment MTLFrameBuffer::get_color_attachment(uint slot) +{ + BLI_assert(this); + if (slot >= 0 && slot < GPU_FB_MAX_COLOR_ATTACHMENT) { + return mtl_color_attachments_[slot]; + } + MTLAttachment null_attachment; + null_attachment.used = false; + return null_attachment; +} + +MTLAttachment MTLFrameBuffer::get_depth_attachment() +{ + BLI_assert(this); + return mtl_depth_attachment_; +} + +MTLAttachment MTLFrameBuffer::get_stencil_attachment() +{ + BLI_assert(this); + return mtl_stencil_attachment_; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \ METAL API Resources and Validation + * \{ */ +bool MTLFrameBuffer::validate_render_pass() +{ + BLI_assert(this); + + /* First update attachments if dirty. */ + this->update_attachments(true); + + /* Verify attachment count. */ + int used_attachments = 0; + for (int attachment = 0; attachment < GPU_FB_MAX_COLOR_ATTACHMENT; attachment++) { + if (mtl_color_attachments_[attachment].used) { + used_attachments++; + } + } + used_attachments += (mtl_depth_attachment_.used) ? 1 : 0; + used_attachments += (mtl_stencil_attachment_.used) ? 1 : 0; + return (used_attachments > 0); +} + +MTLLoadAction mtl_load_action_from_gpu(eGPULoadOp action) +{ + return (action == GPU_LOADACTION_LOAD) ? + MTLLoadActionLoad : + ((action == GPU_LOADACTION_CLEAR) ? MTLLoadActionClear : MTLLoadActionDontCare); +} + +MTLStoreAction mtl_store_action_from_gpu(eGPUStoreOp action) +{ + return (action == GPU_STOREACTION_STORE) ? MTLStoreActionStore : MTLStoreActionDontCare; +} + +MTLRenderPassDescriptor *MTLFrameBuffer::bake_render_pass_descriptor(bool load_contents) +{ + BLI_assert(this); + if (load_contents) { + /* Only force-load contents if there is no clear pending. */ + BLI_assert(!has_pending_clear_); + } + + /* Ensure we are inside a frame boundary. */ + MTLContext *metal_ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(metal_ctx && metal_ctx->get_inside_frame()); + UNUSED_VARS_NDEBUG(metal_ctx); + + /* If Frame-buffer has been modified, regenerate descriptor. */ + if (is_dirty_) { + /* Clear all configs. */ + for (int config = 0; config < 3; config++) { + descriptor_dirty_[config] = true; + } + } + else if (is_loadstore_dirty_) { + /* Load config always has load ops, so we only need to re-generate custom and clear state. */ + descriptor_dirty_[MTL_FB_CONFIG_CLEAR] = true; + descriptor_dirty_[MTL_FB_CONFIG_CUSTOM] = true; + } + + /* If we need to populate descriptor" */ + /* Select config based on FrameBuffer state: + * [0] {MTL_FB_CONFIG_CLEAR} = Clear config -- we have a pending clear so should perform our + * configured clear. + * [1] {MTL_FB_CONFIG_LOAD} = Load config -- We need to re-load ALL attachments, + * used for re-binding/pass-breaks. + * [2] {MTL_FB_CONFIG_CUSTOM} = Custom config -- Use this when a custom binding config is + * specified. + */ + uint descriptor_config = (load_contents) ? MTL_FB_CONFIG_LOAD : + ((this->get_pending_clear()) ? MTL_FB_CONFIG_CLEAR : + MTL_FB_CONFIG_CUSTOM); + if (descriptor_dirty_[descriptor_config] || framebuffer_descriptor_[descriptor_config] == nil) { + + /* Create descriptor if it does not exist. */ + if (framebuffer_descriptor_[descriptor_config] == nil) { + framebuffer_descriptor_[descriptor_config] = [[MTLRenderPassDescriptor alloc] init]; + } + +#if defined(MAC_OS_X_VERSION_11_0) && __MAC_OS_X_VERSION_MAX_ALLOWED > MAC_OS_X_VERSION_11_0 + if (@available(macOS 11.00, *)) { + /* Optimization: Use smaller tile size on Apple Silicon if exceeding a certain bpp limit. */ + bool is_tile_based_gpu = [metal_ctx->device hasUnifiedMemory]; + if (is_tile_based_gpu) { + uint framebuffer_bpp = this->get_bits_per_pixel(); + bool use_small_tiles = (framebuffer_bpp > 64); + + if (use_small_tiles) { + framebuffer_descriptor_[descriptor_config].tileWidth = 16; + framebuffer_descriptor_[descriptor_config].tileHeight = 16; + } + } + } +#endif + + /* Configure multilayered rendering. */ + if (use_multilayered_rendering_) { + /* Ensure all targets have the same length. */ + int len = 0; + bool valid = true; + + for (int attachment_ind = 0; attachment_ind < GPU_FB_MAX_COLOR_ATTACHMENT; + attachment_ind++) { + if (mtl_color_attachments_[attachment_ind].used) { + if (len == 0) { + len = mtl_color_attachments_[attachment_ind].render_target_array_length; + } + else { + valid = valid && + (len == mtl_color_attachments_[attachment_ind].render_target_array_length); + } + } + } + + if (mtl_depth_attachment_.used) { + if (len == 0) { + len = mtl_depth_attachment_.render_target_array_length; + } + else { + valid = valid && (len == mtl_depth_attachment_.render_target_array_length); + } + } + + if (mtl_stencil_attachment_.used) { + if (len == 0) { + len = mtl_stencil_attachment_.render_target_array_length; + } + else { + valid = valid && (len == mtl_stencil_attachment_.render_target_array_length); + } + } + + BLI_assert(len > 0); + BLI_assert(valid); + framebuffer_descriptor_[descriptor_config].renderTargetArrayLength = len; + } + else { + framebuffer_descriptor_[descriptor_config].renderTargetArrayLength = 0; + } + + /* Color attachments. */ + int colour_attachments = 0; + for (int attachment_ind = 0; attachment_ind < GPU_FB_MAX_COLOR_ATTACHMENT; attachment_ind++) { + + if (mtl_color_attachments_[attachment_ind].used) { + + /* Create attachment descriptor. */ + MTLRenderPassColorAttachmentDescriptor *attachment = + colour_attachment_descriptors_[attachment_ind]; + BLI_assert(attachment != nil); + + id<MTLTexture> texture = + mtl_color_attachments_[attachment_ind].texture->get_metal_handle_base(); + if (texture == nil) { + MTL_LOG_ERROR("Attempting to assign invalid texture as attachment\n"); + } + + /* IF SRGB is enabled, but we are rendering with SRGB disabled, sample texture view. */ + /* TODO(Metal): Consider caching SRGB texture view. */ + id<MTLTexture> source_color_texture = texture; + if (this->get_is_srgb() && !this->get_srgb_enabled()) { + source_color_texture = [texture newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm]; + } + + /* Resolve appropriate load action -- IF force load, perform load. + * If clear but framebuffer has no pending clear, also load. */ + eGPULoadOp load_action = mtl_color_attachments_[attachment_ind].load_action; + if (descriptor_config == MTL_FB_CONFIG_LOAD) { + /* MTL_FB_CONFIG_LOAD must always load. */ + load_action = GPU_LOADACTION_LOAD; + } + else if (descriptor_config == MTL_FB_CONFIG_CUSTOM && + load_action == GPU_LOADACTION_CLEAR) { + /* Custom config should be LOAD or DONT_CARE only. */ + load_action = GPU_LOADACTION_LOAD; + } + attachment.texture = source_color_texture; + attachment.loadAction = mtl_load_action_from_gpu(load_action); + attachment.clearColor = + (load_action == GPU_LOADACTION_CLEAR) ? + MTLClearColorMake(mtl_color_attachments_[attachment_ind].clear_value.color[0], + mtl_color_attachments_[attachment_ind].clear_value.color[1], + mtl_color_attachments_[attachment_ind].clear_value.color[2], + mtl_color_attachments_[attachment_ind].clear_value.color[3]) : + MTLClearColorMake(0.0, 0.0, 0.0, 0.0); + attachment.storeAction = mtl_store_action_from_gpu( + mtl_color_attachments_[attachment_ind].store_action); + attachment.level = mtl_color_attachments_[attachment_ind].mip; + attachment.slice = mtl_color_attachments_[attachment_ind].slice; + attachment.depthPlane = mtl_color_attachments_[attachment_ind].depth_plane; + colour_attachments++; + + /* Copy attachment info back in. */ + [framebuffer_descriptor_[descriptor_config].colorAttachments setObject:attachment + atIndexedSubscript:attachment_ind]; + } + else { + /* Disable colour attachment. */ + [framebuffer_descriptor_[descriptor_config].colorAttachments setObject:nil + atIndexedSubscript:attachment_ind]; + } + } + BLI_assert(colour_attachments == colour_attachment_count_); + + /* Depth attachment. */ + if (mtl_depth_attachment_.used) { + framebuffer_descriptor_[descriptor_config].depthAttachment.texture = + (id<MTLTexture>)mtl_depth_attachment_.texture->get_metal_handle_base(); + + /* Resolve appropriate load action -- IF force load, perform load. + * If clear but framebuffer has no pending clear, also load. */ + eGPULoadOp load_action = mtl_depth_attachment_.load_action; + if (descriptor_config == MTL_FB_CONFIG_LOAD) { + /* MTL_FB_CONFIG_LOAD must always load. */ + load_action = GPU_LOADACTION_LOAD; + } + else if (descriptor_config == MTL_FB_CONFIG_CUSTOM && load_action == GPU_LOADACTION_CLEAR) { + /* Custom config should be LOAD or DONT_CARE only. */ + load_action = GPU_LOADACTION_LOAD; + } + framebuffer_descriptor_[descriptor_config].depthAttachment.loadAction = + mtl_load_action_from_gpu(load_action); + framebuffer_descriptor_[descriptor_config].depthAttachment.clearDepth = + (load_action == GPU_LOADACTION_CLEAR) ? mtl_depth_attachment_.clear_value.depth : 0; + framebuffer_descriptor_[descriptor_config].depthAttachment.storeAction = + mtl_store_action_from_gpu(mtl_depth_attachment_.store_action); + framebuffer_descriptor_[descriptor_config].depthAttachment.level = mtl_depth_attachment_.mip; + framebuffer_descriptor_[descriptor_config].depthAttachment.slice = + mtl_depth_attachment_.slice; + framebuffer_descriptor_[descriptor_config].depthAttachment.depthPlane = + mtl_depth_attachment_.depth_plane; + } + else { + framebuffer_descriptor_[descriptor_config].depthAttachment.texture = nil; + } + + /* Stencil attachment. */ + if (mtl_stencil_attachment_.used) { + framebuffer_descriptor_[descriptor_config].stencilAttachment.texture = + (id<MTLTexture>)mtl_stencil_attachment_.texture->get_metal_handle_base(); + + /* Resolve appropriate load action -- IF force load, perform load. + * If clear but framebuffer has no pending clear, also load. */ + eGPULoadOp load_action = mtl_stencil_attachment_.load_action; + if (descriptor_config == MTL_FB_CONFIG_LOAD) { + /* MTL_FB_CONFIG_LOAD must always load. */ + load_action = GPU_LOADACTION_LOAD; + } + else if (descriptor_config == MTL_FB_CONFIG_CUSTOM && load_action == GPU_LOADACTION_CLEAR) { + /* Custom config should be LOAD or DONT_CARE only. */ + load_action = GPU_LOADACTION_LOAD; + } + framebuffer_descriptor_[descriptor_config].stencilAttachment.loadAction = + mtl_load_action_from_gpu(load_action); + framebuffer_descriptor_[descriptor_config].stencilAttachment.clearStencil = + (load_action == GPU_LOADACTION_CLEAR) ? mtl_stencil_attachment_.clear_value.stencil : 0; + framebuffer_descriptor_[descriptor_config].stencilAttachment.storeAction = + mtl_store_action_from_gpu(mtl_stencil_attachment_.store_action); + framebuffer_descriptor_[descriptor_config].stencilAttachment.level = + mtl_stencil_attachment_.mip; + framebuffer_descriptor_[descriptor_config].stencilAttachment.slice = + mtl_stencil_attachment_.slice; + framebuffer_descriptor_[descriptor_config].stencilAttachment.depthPlane = + mtl_stencil_attachment_.depth_plane; + } + else { + framebuffer_descriptor_[descriptor_config].stencilAttachment.texture = nil; + } + descriptor_dirty_[descriptor_config] = false; + } + is_dirty_ = false; + is_loadstore_dirty_ = false; + return framebuffer_descriptor_[descriptor_config]; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \ Blitting + * \{ */ + +void MTLFrameBuffer::blit(uint read_slot, + uint src_x_offset, + uint src_y_offset, + MTLFrameBuffer *metal_fb_write, + uint write_slot, + uint dst_x_offset, + uint dst_y_offset, + uint width, + uint height, + eGPUFrameBufferBits blit_buffers) +{ + BLI_assert(this); + BLI_assert(metal_fb_write); + if (!(this && metal_fb_write)) { + return; + } + MTLContext *mtl_context = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + + const bool do_color = (blit_buffers & GPU_COLOR_BIT); + const bool do_depth = (blit_buffers & GPU_DEPTH_BIT); + const bool do_stencil = (blit_buffers & GPU_STENCIL_BIT); + + /* Early exit if there is no blit to do. */ + if (!(do_color || do_depth || do_stencil)) { + MTL_LOG_WARNING( + " MTLFrameBuffer: requested blit but no color, depth or stencil flag was set\n"); + return; + } + + id<MTLBlitCommandEncoder> blit_encoder = nil; + + /* If the color format is not the same, we cannot use the BlitCommandEncoder, and instead use + * a Graphics-based blit. */ + if (do_color && (this->get_color_attachment(read_slot).texture->format_get() != + metal_fb_write->get_color_attachment(read_slot).texture->format_get())) { + + MTLAttachment src_attachment = this->get_color_attachment(read_slot); + MTLAttachment dst_attachment = metal_fb_write->get_color_attachment(write_slot); + assert(src_attachment.slice == 0 && + "currently only supporting slice 0 for graphics framebuffer blit"); + + src_attachment.texture->blit(dst_attachment.texture, + src_x_offset, + src_y_offset, + dst_x_offset, + dst_y_offset, + src_attachment.mip, + dst_attachment.mip, + dst_attachment.slice, + width, + height); + } + else { + + /* Setup blit encoder. */ + blit_encoder = mtl_context->main_command_buffer.ensure_begin_blit_encoder(); + + if (do_color) { + MTLAttachment src_attachment = this->get_color_attachment(read_slot); + MTLAttachment dst_attachment = metal_fb_write->get_color_attachment(write_slot); + + if (src_attachment.used && dst_attachment.used) { + + /* TODO(Metal): Support depth(z) offset in blit if needed. */ + src_attachment.texture->blit(blit_encoder, + src_x_offset, + src_y_offset, + 0, + src_attachment.slice, + src_attachment.mip, + dst_attachment.texture, + dst_x_offset, + dst_y_offset, + 0, + dst_attachment.slice, + dst_attachment.mip, + width, + height, + 1); + } + else { + MTL_LOG_ERROR("Failed performing colour blit\n"); + } + } + } + if ((do_depth || do_stencil) && blit_encoder == nil) { + blit_encoder = mtl_context->main_command_buffer.ensure_begin_blit_encoder(); + } + + if (do_depth) { + MTLAttachment src_attachment = this->get_depth_attachment(); + MTLAttachment dst_attachment = metal_fb_write->get_depth_attachment(); + + if (src_attachment.used && dst_attachment.used) { + + /* TODO(Metal): Support depth(z) offset in blit if needed. */ + src_attachment.texture->blit(blit_encoder, + src_x_offset, + src_y_offset, + 0, + src_attachment.slice, + src_attachment.mip, + dst_attachment.texture, + dst_x_offset, + dst_y_offset, + 0, + dst_attachment.slice, + dst_attachment.mip, + width, + height, + 1); + } + else { + MTL_LOG_ERROR("Failed performing depth blit\n"); + } + } + + /* Stencil attachment blit. */ + if (do_stencil) { + MTLAttachment src_attachment = this->get_stencil_attachment(); + MTLAttachment dst_attachment = metal_fb_write->get_stencil_attachment(); + + if (src_attachment.used && dst_attachment.used) { + + /* TODO(Metal): Support depth(z) offset in blit if needed. */ + src_attachment.texture->blit(blit_encoder, + src_x_offset, + src_y_offset, + 0, + src_attachment.slice, + src_attachment.mip, + dst_attachment.texture, + dst_x_offset, + dst_y_offset, + 0, + dst_attachment.slice, + dst_attachment.mip, + width, + height, + 1); + } + else { + MTL_LOG_ERROR("Failed performing Stencil blit\n"); + } + } +} + +int MTLFrameBuffer::get_width() +{ + return width_; +} +int MTLFrameBuffer::get_height() +{ + return height_; +} + +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_index_buffer.hh b/source/blender/gpu/metal/mtl_index_buffer.hh new file mode 100644 index 00000000000..fde26b16927 --- /dev/null +++ b/source/blender/gpu/metal/mtl_index_buffer.hh @@ -0,0 +1,79 @@ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "MEM_guardedalloc.h" +#include "gpu_index_buffer_private.hh" +#include "mtl_context.hh" +#include <Cocoa/Cocoa.h> +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> + +namespace blender::gpu { + +class MTLIndexBuf : public IndexBuf { + friend class MTLBatch; + friend class MTLDrawList; + + private: + /* Metal buffer resource. */ + gpu::MTLBuffer *ibo_ = nullptr; + uint64_t alloc_size_ = 0; + +#ifndef NDEBUG + /* Flags whether point index buffer has been compacted + * to remove false restart indices. */ + bool point_restarts_stripped_ = false; +#endif + + /* Optimized index buffers. + * NOTE(Metal): This optimization encodes a new index buffer following + * #TriangleList topology. Parsing of Index buffers is more optimal + * when not using restart-compatible primitive topology types. */ + GPUPrimType optimized_primitive_type_; + gpu::MTLBuffer *optimized_ibo_ = nullptr; + uint32_t emulated_v_count = 0; + void free_optimized_buffer(); + + /* Flags whether an index buffer can be optimized. + * For index buffers which are partially modified + * on the host, or by the GPU, optimization cannot be performed. */ + bool can_optimize_ = true; + + public: + ~MTLIndexBuf(); + + void bind_as_ssbo(uint32_t binding) override; + const uint32_t *read() const override; + + void upload_data() override; + void update_sub(uint32_t start, uint32_t len, const void *data) override; + + /* #get_index_buffer can conditionally return an optimized index buffer of a + * differing format, if it is concluded that optimization is preferred + * for the given inputs. + * Index buffer optimization is used to replace restart-compatible + * primitive types with non-restart-compatible ones such as #TriangleList and + * #LineList. This improves GPU execution for these types significantly, while + * only incurring a small performance penalty. + * + * This is also used to emulate unsupported topology types + * such as triangle fan. */ + id<MTLBuffer> get_index_buffer(GPUPrimType &in_out_primitive_type, uint &in_out_v_count); + void flag_can_optimize(bool can_optimize); + + static MTLIndexType gpu_index_type_to_metal(GPUIndexBufType type) + { + return (type == GPU_INDEX_U16) ? MTLIndexTypeUInt16 : MTLIndexTypeUInt32; + } + + private: + void strip_restart_indices() override; + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLIndexBuf") +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_index_buffer.mm b/source/blender/gpu/metal/mtl_index_buffer.mm new file mode 100644 index 00000000000..99795d7bbd9 --- /dev/null +++ b/source/blender/gpu/metal/mtl_index_buffer.mm @@ -0,0 +1,515 @@ + +/** \file + * \ingroup gpu + */ +#include "mtl_index_buffer.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" + +#include "BLI_span.hh" + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Core MTLIndexBuf implementation. + * \{ */ + +MTLIndexBuf::~MTLIndexBuf() +{ + if (ibo_ != nullptr && !this->is_subrange_) { + ibo_->free(); + } + this->free_optimized_buffer(); +} + +void MTLIndexBuf::free_optimized_buffer() +{ + if (optimized_ibo_) { + optimized_ibo_->free(); + optimized_ibo_ = nullptr; + } +} + +void MTLIndexBuf::bind_as_ssbo(uint32_t binding) +{ + /* Flag buffer as incompatible with optimized/patched buffers as contents + * can now have partial modifications from the GPU. */ + this->flag_can_optimize(false); + this->free_optimized_buffer(); + + /* Ensure we have a valid IBO. */ + BLI_assert(this->ibo_); + + /* TODO(Metal): Support index buffer SSBO's. Dependent on compute implementation. */ + MTL_LOG_WARNING("MTLIndexBuf::bind_as_ssbo not yet implemented!\n"); +} + +const uint32_t *MTLIndexBuf::read() const +{ + if (ibo_ != nullptr) { + + /* Return host pointer. */ + void *data = ibo_->get_host_ptr(); + return static_cast<uint32_t *>(data); + } + BLI_assert(false && "Index buffer not ready to be read."); + return nullptr; +} + +void MTLIndexBuf::upload_data() +{ + /* Handle sub-range upload. */ + if (is_subrange_) { + MTLIndexBuf *mtlsrc = static_cast<MTLIndexBuf *>(src_); + mtlsrc->upload_data(); + +#ifndef NDEBUG + BLI_assert_msg(!mtlsrc->point_restarts_stripped_, + "Cannot use sub-range on stripped point buffer."); +#endif + + /* If parent sub-range allocation has changed, + * update our index buffer. */ + if (alloc_size_ != mtlsrc->alloc_size_ || ibo_ != mtlsrc->ibo_) { + + /* Update index buffer and allocation from source. */ + alloc_size_ = mtlsrc->alloc_size_; + ibo_ = mtlsrc->ibo_; + + /* Reset any allocated patched or optimized index buffers. */ + this->free_optimized_buffer(); + } + return; + } + + /* If new data ready, and index buffer already exists, release current. */ + if ((ibo_ != nullptr) && (this->data_ != nullptr)) { + MTL_LOG_INFO("Re-creating index buffer with new data. IndexBuf %p\n", this); + ibo_->free(); + ibo_ = nullptr; + } + + /* Prepare Buffer and Upload Data. */ + if (ibo_ == nullptr && data_ != nullptr) { + alloc_size_ = this->size_get(); + if (alloc_size_ == 0) { + MTL_LOG_WARNING("[Metal] Warning! Trying to allocate index buffer with size=0 bytes\n"); + } + else { + ibo_ = MTLContext::get_global_memory_manager().allocate_with_data(alloc_size_, true, data_); + BLI_assert(ibo_); + ibo_->set_label(@"Index Buffer"); + } + + /* No need to keep copy of data_ in system memory. */ + MEM_SAFE_FREE(data_); + } +} + +void MTLIndexBuf::update_sub(uint32_t start, uint32_t len, const void *data) +{ + BLI_assert(!is_subrange_); + + /* If host-side data still exists, modify and upload as normal */ + if (data_ != nullptr) { + + /* Free index buffer if one exists. */ + if (ibo_ != nullptr && !this->is_subrange_) { + ibo_->free(); + ibo_ = nullptr; + } + + BLI_assert(start + len < this->size_get()); + + /* Apply start byte offset to data pointer. */ + void *modified_base_ptr = data_; + uint8_t *ptr = static_cast<uint8_t *>(modified_base_ptr); + ptr += start; + modified_base_ptr = static_cast<void *>(ptr); + + /* Modify host-side data. */ + memcpy(modified_base_ptr, data, len); + return; + } + + /* Verify buffer. */ + BLI_assert(ibo_ != nullptr); + + /* Otherwise, we will inject a data update, using staged data, into the command stream. + * Stage update contents in temporary buffer*/ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + MTLTemporaryBuffer range = ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range(len); + memcpy(range.data, data, len); + + /* Copy updated contents into primary buffer. + * These changes need to be uploaded via blit to ensure the data copies happen in-order. */ + id<MTLBuffer> dest_buffer = ibo_->get_metal_buffer(); + BLI_assert(dest_buffer != nil); + + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + [enc copyFromBuffer:range.metal_buffer + sourceOffset:(uint32_t)range.buffer_offset + toBuffer:dest_buffer + destinationOffset:start + size:len]; + + /* Synchronize changes back to host to ensure CPU-side data is up-to-date for non + * Shared buffers. */ + if (dest_buffer.storageMode == MTLStorageModeManaged) { + [enc synchronizeResource:dest_buffer]; + } + + /* Invalidate patched/optimized buffers. */ + this->free_optimized_buffer(); + + /* Flag buffer as incompatible with optimized/patched buffers as contents + * have partial modifications. */ + this->flag_can_optimize(false); + + BLI_assert(false); +} + +void MTLIndexBuf::flag_can_optimize(bool can_optimize) +{ + can_optimize_ = can_optimize; +} + +/** \} */ + +/** \name Index buffer optimization and topology emulation + * + * Index buffer optimization and emulation. Optimize index buffers by + * eliminating restart-indices. + * Emulate unsupported index types e.g. Triangle Fan and Line Loop. + * \{ */ + +/* Returns total vertices in new buffer. */ +template<typename T> +static uint32_t populate_optimized_tri_strip_buf(Span<T> original_data, + MutableSpan<T> output_data, + uint32_t input_index_len) +{ + /* Generate #TriangleList from #TriangleStrip. */ + uint32_t current_vert_len = 0; + uint32_t current_output_ind = 0; + T indices[3]; + + for (int c_index = 0; c_index < input_index_len; c_index++) { + T current_index = original_data[c_index]; + if (current_index == T(-1)) { + /* Stop current primitive. Move onto next. */ + current_vert_len = 0; + } + else { + if (current_vert_len < 3) { + /* Prepare first triangle. + * Cache indices before generating a triangle, in case we have bad primitive-restarts. */ + indices[current_vert_len] = current_index; + } + + /* Emit triangle once we reach 3 input verts in current strip. */ + if (current_vert_len == 3) { + /* First triangle in strip. */ + output_data[current_output_ind++] = indices[0]; + output_data[current_output_ind++] = indices[1]; + output_data[current_output_ind++] = indices[2]; + } + else if (current_vert_len > 3) { + /* All other triangles in strip. + * These triangles are populated using data from previous 2 vertices + * and the latest index. */ + uint32_t tri_id = current_vert_len - 3; + uint32_t base_output_ind = current_output_ind; + if ((tri_id % 2) == 0) { + output_data[base_output_ind + 0] = output_data[base_output_ind - 2]; + output_data[base_output_ind + 1] = current_index; + output_data[base_output_ind + 2] = output_data[base_output_ind - 1]; + } + else { + output_data[base_output_ind + 0] = output_data[base_output_ind - 1]; + output_data[base_output_ind + 1] = output_data[base_output_ind - 2]; + output_data[base_output_ind + 2] = current_index; + } + current_output_ind += 3; + } + + /* Increment relative vertex index. */ + current_vert_len++; + } + } + return current_output_ind; +} + +/* Returns total vertices in new buffer. */ +template<typename T> +static uint32_t populate_emulated_tri_fan_buf(Span<T> original_data, + MutableSpan<T> output_data, + uint32_t input_index_len) +{ + /* Generate #TriangleList from #TriangleFan. */ + T base_prim_ind_val = 0; + uint32_t current_vert_len = 0; + uint32_t current_output_ind = 0; + T indices[3]; + + for (int c_index = 0; c_index < input_index_len; c_index++) { + T current_index = original_data[c_index]; + if (current_index == T(-1)) { + /* Stop current primitive. Move onto next. */ + current_vert_len = 0; + } + else { + if (current_vert_len < 3) { + /* Prepare first triangle. + * Cache indices before generating a triangle, in case we have bad primitive-restarts. */ + indices[current_vert_len] = current_index; + } + + /* emit triangle once we reach 3 input verts in current strip. */ + if (current_vert_len == 3) { + /* First triangle in strip. */ + output_data[current_output_ind++] = indices[0]; + output_data[current_output_ind++] = indices[1]; + output_data[current_output_ind++] = indices[2]; + base_prim_ind_val = indices[0]; + } + else if (current_vert_len > 3) { + /* All other triangles in strip. + * These triangles are populated using data from previous 2 vertices + * and the latest index. */ + uint32_t base_output_ind = current_output_ind; + + output_data[base_output_ind + 0] = base_prim_ind_val; + output_data[base_output_ind + 1] = output_data[base_output_ind - 1]; + output_data[base_output_ind + 2] = current_index; + current_output_ind += 3; + } + + /* Increment relative vertex index. */ + current_vert_len++; + } + } + return current_output_ind; +} + +id<MTLBuffer> MTLIndexBuf::get_index_buffer(GPUPrimType &in_out_primitive_type, + uint32_t &in_out_v_count) +{ + /* Determine whether to return the original index buffer, or whether we + * should emulate an unsupported primitive type, or optimize a restart- + * compatible type for faster performance. */ + bool should_optimize_or_emulate = (in_out_primitive_type == GPU_PRIM_TRI_FAN) || + (in_out_primitive_type == GPU_PRIM_TRI_STRIP); + if (!should_optimize_or_emulate || is_subrange_ || !can_optimize_) { + /* Ensure we are not optimized. */ + BLI_assert(this->optimized_ibo_ == nullptr); + + /* Return regular index buffer. */ + BLI_assert(this->ibo_ && this->ibo_->get_metal_buffer()); + return this->ibo_->get_metal_buffer(); + } + + /* Perform optimization on type. */ + GPUPrimType input_prim_type = in_out_primitive_type; + this->upload_data(); + if (!ibo_ && optimized_ibo_ == nullptr) { + /* Cannot optimize buffer if no source IBO exists. */ + return nil; + } + + /* Verify whether existing index buffer is valid. */ + if (optimized_ibo_ != nullptr && optimized_primitive_type_ != input_prim_type) { + BLI_assert_msg(false, + "Cannot change the optimized primitive format after generation, as source " + "index buffer data is discarded."); + return nil; + } + + /* Generate optimized index buffer. */ + if (optimized_ibo_ == nullptr) { + + /* Generate unwrapped index buffer. */ + switch (input_prim_type) { + case GPU_PRIM_TRI_FAN: { + + /* Calculate maximum size. */ + uint32_t max_possible_verts = (this->index_len_ - 2) * 3; + BLI_assert(max_possible_verts > 0); + + /* Allocate new buffer. */ + optimized_ibo_ = MTLContext::get_global_memory_manager().allocate( + max_possible_verts * + ((index_type_ == GPU_INDEX_U16) ? sizeof(uint16_t) : sizeof(uint32_t)), + true); + + /* Populate new index buffer. */ + if (index_type_ == GPU_INDEX_U16) { + Span<uint16_t> orig_data(static_cast<const uint16_t *>(ibo_->get_host_ptr()), + this->index_len_); + MutableSpan<uint16_t> output_data( + static_cast<uint16_t *>(optimized_ibo_->get_host_ptr()), this->index_len_); + emulated_v_count = populate_emulated_tri_fan_buf<uint16_t>( + orig_data, output_data, this->index_len_); + } + else { + Span<uint32_t> orig_data(static_cast<const uint32_t *>(ibo_->get_host_ptr()), + this->index_len_); + MutableSpan<uint32_t> output_data( + static_cast<uint32_t *>(optimized_ibo_->get_host_ptr()), this->index_len_); + emulated_v_count = populate_emulated_tri_fan_buf<uint32_t>( + orig_data, output_data, this->index_len_); + } + + BLI_assert(emulated_v_count <= max_possible_verts); + + /* Flush buffer and output. */ + optimized_ibo_->flush(); + optimized_primitive_type_ = input_prim_type; + in_out_v_count = emulated_v_count; + in_out_primitive_type = GPU_PRIM_TRIS; + } + + case GPU_PRIM_TRI_STRIP: { + + /* Calculate maximum size. */ + uint32_t max_possible_verts = (this->index_len_ - 2) * 3; + BLI_assert(max_possible_verts > 0); + + /* Allocate new buffer. */ + optimized_ibo_ = MTLContext::get_global_memory_manager().allocate( + max_possible_verts * + ((index_type_ == GPU_INDEX_U16) ? sizeof(uint16_t) : sizeof(uint32_t)), + true); + + /* Populate new index buffer. */ + if (index_type_ == GPU_INDEX_U16) { + Span<uint16_t> orig_data(static_cast<const uint16_t *>(ibo_->get_host_ptr()), + this->index_len_); + MutableSpan<uint16_t> output_data( + static_cast<uint16_t *>(optimized_ibo_->get_host_ptr()), this->index_len_); + emulated_v_count = populate_optimized_tri_strip_buf<uint16_t>( + orig_data, output_data, this->index_len_); + } + else { + Span<uint32_t> orig_data(static_cast<const uint32_t *>(ibo_->get_host_ptr()), + this->index_len_); + MutableSpan<uint32_t> output_data( + static_cast<uint32_t *>(optimized_ibo_->get_host_ptr()), this->index_len_); + emulated_v_count = populate_optimized_tri_strip_buf<uint32_t>( + orig_data, output_data, this->index_len_); + } + + BLI_assert(emulated_v_count <= max_possible_verts); + + /* Flush buffer and output. */ + optimized_ibo_->flush(); + optimized_primitive_type_ = input_prim_type; + in_out_v_count = emulated_v_count; + in_out_primitive_type = GPU_PRIM_TRIS; + } break; + + case GPU_PRIM_LINE_STRIP: { + /* TODO(Metal): Line strip topology types would benefit from optimization to remove + * primitive restarts, however, these do not occur frequently, nor with + * significant geometry counts. */ + MTL_LOG_INFO("TODO: Primitive topology: Optimize line strip topology types\n"); + } break; + + case GPU_PRIM_LINE_LOOP: { + /* TODO(Metal): Line Loop primitive type requires use of optimized index buffer for + * emulation, if used with indexed rendering. This path is currently not hit as #LineLoop + * does not currently appear to be used alongside an index buffer. */ + MTL_LOG_WARNING( + "TODO: Primitive topology: Line Loop Index buffer optimization required for " + "emulation.\n"); + } break; + + case GPU_PRIM_TRIS: + case GPU_PRIM_LINES: + case GPU_PRIM_POINTS: { + /* Should not get here - TRIS/LINES/POINTS do not require emulation or optimization. */ + BLI_assert_unreachable(); + return nil; + } + + default: + /* Should not get here - Invalid primitive type. */ + BLI_assert_unreachable(); + break; + } + } + + /* Return optimized buffer. */ + if (optimized_ibo_ != nullptr) { + + /* Delete original buffer if one still exists, as we do no need it. */ + if (ibo_ != nullptr) { + ibo_->free(); + ibo_ = nullptr; + } + + /* Output params. */ + in_out_v_count = emulated_v_count; + in_out_primitive_type = GPU_PRIM_TRIS; + return optimized_ibo_->get_metal_buffer(); + } + return nil; +} + +void MTLIndexBuf::strip_restart_indices() +{ + /* We remove point buffer primitive restart indices by swapping restart indices + * with the first valid index at the end of the index buffer and reducing the + * length. Primitive restarts are invalid in Metal for non-restart-compatible + * primitive types. We also cannot just use zero unlike for Lines and Triangles, + * as we cannot create de-generative point primitives to hide geometry, as each + * point is independent. + * Instead, we must remove these hidden indices from the index buffer. + * NOTE: This happens prior to index squeezing so operate on 32-bit indices. */ + MutableSpan<uint32_t> uint_idx(static_cast<uint32_t *>(data_), index_len_); + for (uint i = 0; i < index_len_; i++) { + if (uint_idx[i] == 0xFFFFFFFFu) { + + /* Find swap index at end of index buffer. */ + int swap_index = -1; + for (uint j = index_len_ - 1; j >= i; j--) { + /* If end index is restart, just reduce length. */ + if (uint_idx[j] == 0xFFFFFFFFu) { + index_len_--; + continue; + } + /* Otherwise assign swap index. */ + swap_index = j; + break; + } + + /* If swap index is not valid, then there were no valid non-restart indices + * to swap with. However, the above loop will have removed these indices by + * reducing the length of indices. Debug assertions verify that the restart + * index is no longer included. */ + if (swap_index == -1) { + BLI_assert(index_len_ <= i); + } + else { + /* If we have found an index we can swap with, flip the values. + * We also reduce the length. As per above loop, swap_index should + * now be outside the index length range. */ + uint32_t swap_index_value = uint_idx[swap_index]; + uint_idx[i] = swap_index_value; + uint_idx[swap_index] = 0xFFFFFFFFu; + index_len_--; + BLI_assert(index_len_ <= swap_index); + } + } + } + +#ifndef NDEBUG + /* Flag as having been stripped to ensure invalid usage is tracked. */ + point_restarts_stripped_ = true; +#endif +} + +/** \} */ + +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_memory.hh b/source/blender/gpu/metal/mtl_memory.hh new file mode 100644 index 00000000000..df80df6543f --- /dev/null +++ b/source/blender/gpu/metal/mtl_memory.hh @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#include <atomic> +#include <functional> +#include <map> +#include <mutex> +#include <set> +#include <unordered_map> + +#include "mtl_common.hh" + +#include <Cocoa/Cocoa.h> +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> + +@class CAMetalLayer; +@class MTLCommandQueue; +@class MTLRenderPipelineState; + +/* Metal Memory Manager Overview. */ +/* + * The Metal Backend Memory manager is designed to provide an interface + * for all other MTL_* modules where memory allocation is required. + * + * Different allocation strategies and data-structures are used depending + * on how the data is used by the backend. These aim to optimally handle + * system memory and abstract away any complexity from the MTL_* modules + * themselves. + * + * There are two primary allocation modes which can be used: + * + * ** MTLScratchBufferManager ** + * + * Each MTLContext owns a ScratchBufferManager which is implemented + * as a pool of circular buffers, designed to handle temporary + * memory allocations which occur on a per-frame basis. The scratch + * buffers allow flushing of host memory to the GPU to be batched. + * + * Each frame, the next scratch buffer is reset, then later flushed upon + * command buffer submission. + * + * NOTE: This is allocated per-context due to allocations being tied + * to workload submissions and context-specific submissions. + * + * Examples of scratch buffer usage are: + * - Immediate-mode temporary vertex buffers. + * - Shader uniform data updates + * - Staging of data for resource copies, or, data reads/writes. + * + * Usage: + * + * MTLContext::get_scratchbuffer_manager() - to fetch active manager. + * + * MTLTemporaryBuffer scratch_buffer_allocate_range(size) + * MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(size, align) + * + * --------------------------------------------------------------------------------- + * ** MTLBufferPool ** + * + * For static and longer-lasting memory allocations, such as those for UBOs, + * Vertex buffers, index buffers, etc; We want an optimal abstraction for + * fetching a MTLBuffer of the desired size and resource options. + * + * Memory allocations can be expensive so the MTLBufferPool provides + * functionality to track usage of these buffers and once a buffer + * is no longer in use, it is returned to the buffer pool for use + * by another backend resource. + * + * The MTLBufferPool provides functionality for safe tracking of resources, + * as buffers freed on the host side must have their usage by the GPU tracked, + * to ensure they are not prematurely re-used before they have finished being + * used by the GPU. + * + * NOTE: The MTLBufferPool is a global construct which can be fetched from anywhere. + * + * Usage: + * MTLContext::get_global_memory_manager(); - static routine to fetch global memory manager. + * + * gpu::MTLBuffer *allocate(size, is_cpu_visibile) + * gpu::MTLBuffer *allocate_aligned(size, alignment, is_cpu_visibile) + * gpu::MTLBuffer *allocate_with_data(size, is_cpu_visibile, data_ptr) + * gpu::MTLBuffer *allocate_aligned_with_data(size, alignment, is_cpu_visibile, data_ptr) + */ + +/* Debug memory statistics: Disabled by Macro rather than guarded for + * performance considerations. */ +#define MTL_DEBUG_MEMORY_STATISTICS 0 + +/* Allows a scratch buffer to temporarily grow beyond its maximum, which allows submission + * of one-time-use data packets which are too large. */ +#define MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION 1 + +namespace blender::gpu { + +/* Forward Declarations. */ +class MTLContext; +class MTLCommandBufferManager; +class MTLUniformBuf; + +/* -------------------------------------------------------------------- */ +/** \name Memory Management. + * \{ */ + +/* MTLBuffer allocation wrapper. */ +class MTLBuffer { + + private: + /* Metal resource. */ + id<MTLBuffer> metal_buffer_; + + /* Host-visible mapped-memory pointer. Behavior depends on buffer type: + * - Shared buffers: pointer represents base address of #MTLBuffer whose data + * access has shared access by both the CPU and GPU on + * Unified Memory Architectures (UMA). + * - Managed buffer: Host-side mapped buffer region for CPU (Host) access. Managed buffers + * must be manually flushed to transfer data to GPU-resident buffer. + * - Private buffer: Host access is invalid, `data` will be nullptr. */ + void *data_; + + /* Whether buffer is allocated from an external source. */ + bool is_external_ = false; + + /* Allocation info. */ + MTLResourceOptions options_; + id<MTLDevice> device_; + uint64_t alignment_; + uint64_t size_; + + /* Allocated size may be larger than actual size. */ + uint64_t usage_size_; + + /* Lifetime info - whether the current buffer is actively in use. A buffer + * should be in use after it has been allocated. De-allocating the buffer, and + * returning it to the free buffer pool will set in_use to false. Using a buffer + * while it is not in-use should not be allowed and result in an error. */ + std::atomic<bool> in_use_; + + public: + MTLBuffer(id<MTLDevice> device, uint64_t size, MTLResourceOptions options, uint alignment = 1); + MTLBuffer(id<MTLBuffer> external_buffer); + ~MTLBuffer(); + + /* Fetch information about backing MTLBuffer. */ + id<MTLBuffer> get_metal_buffer() const; + void *get_host_ptr() const; + uint64_t get_size_used() const; + uint64_t get_size() const; + + /* Flush data to GPU. */ + void flush(); + void flush_range(uint64_t offset, uint64_t length); + bool requires_flush(); + + /* Buffer usage tracking. */ + void flag_in_use(bool used); + bool get_in_use(); + void set_usage_size(uint64_t size_used); + + /* Debug. */ + void set_label(NSString *str); + + /* Read properties. */ + MTLResourceOptions get_resource_options(); + uint64_t get_alignment(); + + /* Resource-local free: For buffers allocated via memory manager, + * this will call the context `free_buffer` method to return the buffer to the context memory + * pool. + * + * Otherwise, free will release the associated metal resource. + * As a note, calling the destructor will also destroy the buffer and associated metal + * resource. */ + void free(); + + /* Safety check to ensure buffers are not used after free. */ + void debug_ensure_used(); +}; + +/* View into part of an MTLBuffer. */ +struct MTLBufferRange { + id<MTLBuffer> metal_buffer; + void *data; + uint64_t buffer_offset; + uint64_t size; + MTLResourceOptions options; + + void flush(); + bool requires_flush(); +}; + +/* Circular scratch buffer allocations should be seen as temporary and only used within the + * lifetime of the frame. */ +using MTLTemporaryBuffer = MTLBufferRange; + +/* Round-Robin Circular-buffer. */ +class MTLCircularBuffer { + friend class MTLScratchBufferManager; + + private: + MTLContext &own_context_; + + /* Wrapped MTLBuffer allocation handled. */ + gpu::MTLBuffer *cbuffer_; + + /* Current offset where next allocation will begin. */ + uint64_t current_offset_; + + /* Whether the Circular Buffer can grow during re-allocation if + * the size is exceeded. */ + bool can_resize_; + + /* Usage information. */ + uint64_t used_frame_index_; + uint64_t last_flush_base_offset_; + + public: + MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow); + ~MTLCircularBuffer(); + MTLTemporaryBuffer allocate_range(uint64_t alloc_size); + MTLTemporaryBuffer allocate_range_aligned(uint64_t alloc_size, uint alignment); + void flush(); + + /* Reset pointer back to start of circular buffer. */ + void reset(); +}; + +/* Wrapper struct used by Memory Manager to sort and compare gpu::MTLBuffer resources inside the + * memory pools. */ +struct MTLBufferHandle { + gpu::MTLBuffer *buffer; + uint64_t buffer_size; + + inline MTLBufferHandle(gpu::MTLBuffer *buf) + { + this->buffer = buf; + this->buffer_size = this->buffer->get_size(); + } + + inline MTLBufferHandle(uint64_t compare_size) + { + this->buffer = nullptr; + this->buffer_size = compare_size; + } +}; + +struct CompareMTLBuffer { + bool operator()(const MTLBufferHandle &lhs, const MTLBufferHandle &rhs) const + { + return lhs.buffer_size < rhs.buffer_size; + } +}; + +/* An MTLSafeFreeList is a temporary list of gpu::MTLBuffers which have + * been freed by the high level backend, but are pending GPU work execution before + * the gpu::MTLBuffers can be returned to the Memory manager pools. + * This list is implemented as a chunked linked-list. + * + * Only a single MTLSafeFreeList is active at one time and is associated with current command + * buffer submissions. If an MTLBuffer is freed during the lifetime of a command buffer, it could + * still possibly be in-use and as such, the MTLSafeFreeList will increment its reference count for + * each command buffer submitted while the current pool is active. + * + * -- Reference count is incremented upon MTLCommandBuffer commit. + * -- Reference count is decremented in the MTLCommandBuffer completion callback handler. + * + * A new MTLSafeFreeList will begin each render step (frame). This pooling of buffers, rather than + * individual buffer resource tracking reduces performance overhead. + * + * * The reference count starts at 1 to ensure that the reference count cannot prematurely reach + * zero until any command buffers have been submitted. This additional decrement happens + * when the next MTLSafeFreeList is created, to allow the existing pool to be released once + * the reference count hits zero after submitted command buffers complete. + * + * NOTE: the Metal API independently tracks resources used by command buffers for the purpose of + * keeping resources alive while in-use by the driver and CPU, however, this differs from the + * MTLSafeFreeList mechanism in the Metal backend, which exists for the purpose of allowing + * previously allocated MTLBuffer resources to be re-used. This allows us to save on the expensive + * cost of memory allocation. + */ +class MTLSafeFreeList { + friend class MTLBufferPool; + + private: + std::atomic<int> reference_count_; + std::atomic<bool> in_free_queue_; + std::recursive_mutex lock_; + + /* Linked list of next MTLSafeFreeList chunk if current chunk is full. */ + std::atomic<int> has_next_pool_; + std::atomic<MTLSafeFreeList *> next_; + + /* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations + * for performance and memory. */ + static const int MAX_NUM_BUFFERS_ = 1024; + std::atomic<int> current_list_index_; + gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_]; + + public: + MTLSafeFreeList(); + + /* Add buffer to Safe Free List, can be called from secondary threads. + * Performs a lockless list insert. */ + void insert_buffer(gpu::MTLBuffer *buffer); + + /* Increments command buffer reference count. */ + void increment_reference(); + + /* Decrement and return of buffers to pool occur on MTLCommandBuffer completion callback thread. + */ + void decrement_reference(); + + void flag_in_queue() + { + in_free_queue_ = true; + if (has_next_pool_) { + MTLSafeFreeList *next_pool = next_.load(); + BLI_assert(next_pool != nullptr); + next_pool->flag_in_queue(); + } + } +}; + +/* MTLBuffer pools. */ +/* Allocating Metal buffers is expensive, so we cache all allocated buffers, + * and when requesting a new buffer, find one which fits the required dimensions + * from an existing pool of buffers. + * + * When freeing MTLBuffers, we insert them into the current MTLSafeFreeList, which defers + * release of the buffer until the associated command buffers have finished executing. + * This prevents a buffer from being re-used while it is still in-use by the GPU. + * + * * Once command buffers complete, MTLSafeFreeList's associated with the current + * command buffer submission are added to the `completed_safelist_queue_`. + * + * * At a set point in time, all MTLSafeFreeList's in `completed_safelist_queue_` have their + * MTLBuffers re-inserted into the Memory Manager's pools. */ +class MTLBufferPool { + + private: + /* Memory statistics. */ + long long int total_allocation_bytes_ = 0; + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + /* Debug statistics. */ + std::atomic<int> per_frame_allocation_count_; + std::atomic<long long int> allocations_in_pool_; + std::atomic<long long int> buffers_in_pool_; +#endif + + /* Metal resources. */ + bool ensure_initialised_ = false; + id<MTLDevice> device_ = nil; + + /* The buffer selection aims to pick a buffer which meets the minimum size requirements. + * To do this, we keep an ordered set of all available buffers. If the buffer is larger than the + * desired allocation size, we check it against `mtl_buffer_size_threshold_factor_`, + * which defines what % larger than the original allocation the buffer can be. + * - A higher value results in greater re-use of previously allocated buffers of similar sizes. + * - A lower value may result in more dynamic allocations, but minimized memory usage for a given + * scenario. + * The current value of 1.26 is calibrated for optimal performance and memory utilization. */ + static constexpr float mtl_buffer_size_threshold_factor_ = 1.26; + + /* Buffer pools using MTLResourceOptions as key for allocation type. + * Aliased as 'uint64_t' for map type compatibility. + * - A size-ordered list (MultiSet) of allocated buffers is kept per MTLResourceOptions + * permutation. This allows efficient lookup for buffers of a given requested size. + * - MTLBufferHandle wraps a gpu::MTLBuffer pointer to achieve easy size-based sorting + * via CompareMTLBuffer. */ + using MTLBufferPoolOrderedList = std::multiset<MTLBufferHandle, CompareMTLBuffer>; + using MTLBufferResourceOptions = uint64_t; + + blender::Map<MTLBufferResourceOptions, MTLBufferPoolOrderedList *> buffer_pools_; + blender::Vector<gpu::MTLBuffer *> allocations_; + + /* Maintain a queue of all MTLSafeFreeList's that have been released + * by the GPU and are ready to have their buffers re-inserted into the + * MemoryManager pools. + * Access to this queue is made thread-safe through safelist_lock_. */ + std::mutex safelist_lock_; + blender::Vector<MTLSafeFreeList *> completed_safelist_queue_; + + /* Current free list, associated with active MTLCommandBuffer submission. */ + /* MTLBuffer::free() can be called from separate threads, due to usage within animation + * system/worker threads. */ + std::atomic<MTLSafeFreeList *> current_free_list_; + + public: + void init(id<MTLDevice> device); + ~MTLBufferPool(); + + gpu::MTLBuffer *allocate(uint64_t size, bool cpu_visible); + gpu::MTLBuffer *allocate_aligned(uint64_t size, uint alignment, bool cpu_visible); + gpu::MTLBuffer *allocate_with_data(uint64_t size, bool cpu_visible, const void *data = nullptr); + gpu::MTLBuffer *allocate_aligned_with_data(uint64_t size, + uint alignment, + bool cpu_visible, + const void *data = nullptr); + bool free_buffer(gpu::MTLBuffer *buffer); + + /* Flush MTLSafeFreeList buffers, for completed lists in `completed_safelist_queue_`, + * back to memory pools. */ + void update_memory_pools(); + + /* Access and control over active MTLSafeFreeList. */ + MTLSafeFreeList *get_current_safe_list(); + void begin_new_safe_list(); + + /* Add a completed MTLSafeFreeList to completed_safelist_queue_. */ + void push_completed_safe_list(MTLSafeFreeList *list); + + private: + void ensure_buffer_pool(MTLResourceOptions options); + void insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer); + void free(); +}; + +/* Scratch buffers are circular-buffers used for temporary data within the current frame. + * In order to preserve integrity of contents when having multiple-frames-in-flight, + * we cycle through a collection of scratch buffers which are reset upon next use. + * + * Below are a series of properties, declared to manage scratch buffers. If a scratch buffer + * overflows, then the original buffer will be flushed and submitted, with retained references + * by usage within the command buffer, and a new buffer will be created. + * - The new buffer will grow in size to account for increased demand in temporary memory. + */ +class MTLScratchBufferManager { + + private: + /* Maximum number of scratch buffers to allocate. This should be the maximum number of + * simultaneous frames in flight. */ + static constexpr uint mtl_max_scratch_buffers_ = MTL_NUM_SAFE_FRAMES; + + public: + /* Maximum size of single scratch buffer allocation. When re-sizing, this is the maximum size the + * newly allocated buffers will grow to. Larger allocations are possible if + * `MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION` is enabled, but these will instead allocate new + * buffers from the memory pools on the fly. */ + static constexpr uint mtl_scratch_buffer_max_size_ = 128 * 1024 * 1024; + + /* Initial size of circular scratch buffers prior to growth. */ + static constexpr uint mtl_scratch_buffer_initial_size_ = 16 * 1024 * 1024; + + private: + /* Parent MTLContext. */ + MTLContext &context_; + bool initialised_ = false; + + /* Scratch buffer currently in-use. */ + uint current_scratch_buffer_ = 0; + + /* Scratch buffer pool. */ + MTLCircularBuffer *scratch_buffers_[mtl_max_scratch_buffers_]; + + public: + MTLScratchBufferManager(MTLContext &context) : context_(context){}; + ~MTLScratchBufferManager(); + + /* Explicit initialization and freeing of resources. + * Initialization must occur after device creation. */ + void init(); + void free(); + + /* Allocation functions for creating temporary allocations from active circular buffer. */ + MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size); + MTLTemporaryBuffer scratch_buffer_allocate_range_aligned(uint64_t alloc_size, uint alignment); + + /* Ensure a new scratch buffer is started if we move onto a new frame. + * Called when a new command buffer begins. */ + void ensure_increment_scratch_buffer(); + + /* Flush memory for active scratch buffer to GPU. + * This call will perform a partial flush of the buffer starting from + * the last offset the data was flushed from, to the current offset. */ + void flush_active_scratch_buffer(); +}; + +/** \} */ + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_memory.mm b/source/blender/gpu/metal/mtl_memory.mm new file mode 100644 index 00000000000..788736bdfad --- /dev/null +++ b/source/blender/gpu/metal/mtl_memory.mm @@ -0,0 +1,898 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BKE_global.h" + +#include "DNA_userdef_types.h" + +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_memory.hh" + +using namespace blender; +using namespace blender::gpu; + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Memory Management - MTLBufferPool and MTLSafeFreeList implementations. */ + +void MTLBufferPool::init(id<MTLDevice> mtl_device) +{ + if (!ensure_initialised_) { + BLI_assert(mtl_device); + ensure_initialised_ = true; + device_ = mtl_device; + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + /* Debug statistics. */ + per_frame_allocation_count_ = 0; + allocations_in_pool_ = 0; + buffers_in_pool_ = 0; +#endif + + /* Free pools -- Create initial safe free pool */ + BLI_assert(current_free_list_ == nullptr); + this->begin_new_safe_list(); + } +} + +MTLBufferPool::~MTLBufferPool() +{ + this->free(); +} + +void MTLBufferPool::free() +{ + + for (auto buffer : allocations_) { + BLI_assert(buffer); + delete buffer; + } + allocations_.clear(); + + for (std::multiset<blender::gpu::MTLBufferHandle, blender::gpu::CompareMTLBuffer> *buffer_pool : + buffer_pools_.values()) { + delete buffer_pool; + } + buffer_pools_.clear(); +} + +gpu::MTLBuffer *MTLBufferPool::allocate(uint64_t size, bool cpu_visible) +{ + /* Allocate buffer with default HW-compatible alignment of 256 bytes. + * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */ + return this->allocate_aligned(size, 256, cpu_visible); +} + +gpu::MTLBuffer *MTLBufferPool::allocate_with_data(uint64_t size, + bool cpu_visible, + const void *data) +{ + /* Allocate buffer with default HW-compatible alignment of 256 bytes. + * See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf for more. */ + return this->allocate_aligned_with_data(size, 256, cpu_visible, data); +} + +gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size, + uint32_t alignment, + bool cpu_visible) +{ + /* Check not required. Main GPU module usage considered thread-safe. */ + // BLI_assert(BLI_thread_is_main()); + + /* Calculate aligned size */ + BLI_assert(alignment > 0); + uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment); + + /* Allocate new MTL Buffer */ + MTLResourceOptions options; + if (cpu_visible) { + options = ([device_ hasUnifiedMemory]) ? MTLResourceStorageModeShared : + MTLResourceStorageModeManaged; + } + else { + options = MTLResourceStorageModePrivate; + } + + /* Check if we have a suitable buffer */ + gpu::MTLBuffer *new_buffer = nullptr; + std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr( + (uint64_t)options); + + if (pool_search != nullptr) { + std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = *pool_search; + MTLBufferHandle size_compare(aligned_alloc_size); + auto result = pool->lower_bound(size_compare); + if (result != pool->end()) { + /* Potential buffer found, check if within size threshold requirements. */ + gpu::MTLBuffer *found_buffer = result->buffer; + BLI_assert(found_buffer); + BLI_assert(found_buffer->get_metal_buffer()); + + uint64_t found_size = found_buffer->get_size(); + + if (found_size >= aligned_alloc_size && + found_size <= (aligned_alloc_size * mtl_buffer_size_threshold_factor_)) { + MTL_LOG_INFO( + "[MemoryAllocator] Suitable Buffer of size %lld found, for requested size: %lld\n", + found_size, + aligned_alloc_size); + + new_buffer = found_buffer; + BLI_assert(!new_buffer->get_in_use()); + + /* Remove buffer from free set. */ + pool->erase(result); + } + else { + MTL_LOG_INFO( + "[MemoryAllocator] Buffer of size %lld found, but was incompatible with requested " + "size: " + "%lld\n", + found_size, + aligned_alloc_size); + new_buffer = nullptr; + } + } + } + + /* Allocate new buffer. */ + if (new_buffer == nullptr) { + new_buffer = new gpu::MTLBuffer(device_, size, options, alignment); + + /* Track allocation in context. */ + allocations_.append(new_buffer); + total_allocation_bytes_ += aligned_alloc_size; + } + else { + /* Re-use suitable buffer. */ + new_buffer->set_usage_size(aligned_alloc_size); + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + /* Debug. */ + allocations_in_pool_ -= new_buffer->get_size(); + buffers_in_pool_--; + BLI_assert(allocations_in_pool_ >= 0); +#endif + + /* Ensure buffer memory is correctly backed. */ + BLI_assert(new_buffer->get_metal_buffer()); + } + /* Flag buffer as actively in-use. */ + new_buffer->flag_in_use(true); + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + this->per_frame_allocation_count++; +#endif + + return new_buffer; +} + +gpu::MTLBuffer *MTLBufferPool::allocate_aligned_with_data(uint64_t size, + uint32_t alignment, + bool cpu_visible, + const void *data) +{ + gpu::MTLBuffer *buf = this->allocate_aligned(size, 256, cpu_visible); + + /* Upload initial data. */ + BLI_assert(data != nullptr); + BLI_assert(!(buf->get_resource_options() & MTLResourceStorageModePrivate)); + BLI_assert(size <= buf->get_size()); + BLI_assert(size <= [buf->get_metal_buffer() length]); + memcpy(buf->get_host_ptr(), data, size); + buf->flush_range(0, size); + return buf; +} + +bool MTLBufferPool::free_buffer(gpu::MTLBuffer *buffer) +{ + /* Ensure buffer is flagged as in-use. I.e. has not already been returned to memory pools. */ + bool buffer_in_use = buffer->get_in_use(); + BLI_assert(buffer_in_use); + if (buffer_in_use) { + + /* Fetch active safe pool from atomic ptr. */ + MTLSafeFreeList *current_pool = this->get_current_safe_list(); + + /* Place buffer in safe_free_pool before returning to MemoryManager buffer pools. */ + BLI_assert(current_pool); + current_pool->insert_buffer(buffer); + buffer->flag_in_use(false); + + return true; + } + return false; +} + +void MTLBufferPool::update_memory_pools() +{ + /* Ensure thread-safe access to `completed_safelist_queue_`, which contains + * the list of MTLSafeFreeList's whose buffers are ready to be + * re-inserted into the Memory Manager pools. */ + safelist_lock_.lock(); + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + int num_buffers_added = 0; +#endif + + /* Always free oldest MTLSafeFreeList first. */ + for (int safe_pool_free_index = 0; safe_pool_free_index < completed_safelist_queue_.size(); + safe_pool_free_index++) { + MTLSafeFreeList *current_pool = completed_safelist_queue_[safe_pool_free_index]; + + /* Iterate through all MTLSafeFreeList linked-chunks. */ + while (current_pool != nullptr) { + current_pool->lock_.lock(); + BLI_assert(current_pool); + BLI_assert(current_pool->in_free_queue_); + int counter = 0; + int size = min_ii(current_pool->current_list_index_, MTLSafeFreeList::MAX_NUM_BUFFERS_); + + /* Re-add all buffers within frame index to MemoryManager pools. */ + while (counter < size) { + + gpu::MTLBuffer *buf = current_pool->safe_free_pool_[counter]; + + /* Insert buffer back into open pools. */ + BLI_assert(buf->get_in_use() == false); + this->insert_buffer_into_pool(buf->get_resource_options(), buf); + counter++; + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + num_buffers_added++; +#endif + } + + /* Fetch next MTLSafeFreeList chunk, if any. */ + MTLSafeFreeList *next_list = nullptr; + if (current_pool->has_next_pool_ > 0) { + next_list = current_pool->next_.load(); + } + + /* Delete current MTLSafeFreeList */ + current_pool->lock_.unlock(); + delete current_pool; + current_pool = nullptr; + + /* Move onto next chunk. */ + if (next_list != nullptr) { + current_pool = next_list; + } + } + } + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + printf("--- Allocation Stats ---\n"); + printf(" Num buffers processed in pool (this frame): %u\n", num_buffers_added); + + uint framealloc = (uint)this->per_frame_allocation_count; + printf(" Allocations in frame: %u\n", framealloc); + printf(" Total Buffers allocated: %u\n", (uint)allocations_.size()); + printf(" Total Memory allocated: %u MB\n", (uint)total_allocation_bytes_ / (1024 * 1024)); + + uint allocs = (uint)(allocations_in_pool_) / 1024 / 2024; + printf(" Free memory in pools: %u MB\n", allocs); + + uint buffs = (uint)buffers_in_pool_; + printf(" Buffers in pools: %u\n", buffs); + + printf(" Pools %u:\n", (uint)buffer_pools_.size()); + auto key_iterator = buffer_pools_.keys().begin(); + auto value_iterator = buffer_pools_.values().begin(); + while (key_iterator != buffer_pools_.keys().end()) { + uint64_t mem_in_pool = 0; + uint64_t iters = 0; + for (auto it = (*value_iterator)->begin(); it != (*value_iterator)->end(); it++) { + mem_in_pool += it->buffer_size; + iters++; + } + + printf(" Buffers in pool (%u)(%llu): %u (%u MB)\n", + (uint)*key_iterator, + iters, + (uint)((*value_iterator)->size()), + (uint)mem_in_pool / 1024 / 1024); + ++key_iterator; + ++value_iterator; + } + + this->per_frame_allocation_count = 0; +#endif + + /* Clear safe pools list */ + completed_safelist_queue_.clear(); + safelist_lock_.unlock(); +} + +void MTLBufferPool::push_completed_safe_list(MTLSafeFreeList *safe_list) +{ + /* When an MTLSafeFreeList has been released by the GPU, and buffers are ready to + * be re-inserted into the MemoryManager pools for future use, add the MTLSafeFreeList + * to the `completed_safelist_queue_` for flushing at a controlled point in time. */ + safe_list->lock_.lock(); + BLI_assert(safe_list); + BLI_assert(safe_list->reference_count_ == 0 && + "Pool must be fully dereferenced by all in-use cmd buffers before returning.\n"); + BLI_assert(safe_list->in_free_queue_ == false && "Pool must not already be in queue"); + + /* Flag MTLSafeFreeList as having been added, and insert into SafeFreePool queue. */ + safe_list->flag_in_queue(); + safelist_lock_.lock(); + completed_safelist_queue_.append(safe_list); + safelist_lock_.unlock(); + safe_list->lock_.unlock(); +} + +MTLSafeFreeList *MTLBufferPool::get_current_safe_list() +{ + /* Thread-safe access via atomic ptr. */ + return current_free_list_; +} + +void MTLBufferPool::begin_new_safe_list() +{ + safelist_lock_.lock(); + current_free_list_ = new MTLSafeFreeList(); + safelist_lock_.unlock(); +} + +void MTLBufferPool::ensure_buffer_pool(MTLResourceOptions options) +{ + std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr( + (uint64_t)options); + if (pool_search == nullptr) { + std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = + new std::multiset<MTLBufferHandle, CompareMTLBuffer>(); + buffer_pools_.add_new((uint64_t)options, pool); + } +} + +void MTLBufferPool::insert_buffer_into_pool(MTLResourceOptions options, gpu::MTLBuffer *buffer) +{ + /* Ensure `safelist_lock_` is locked in calling code before modifying. */ + BLI_assert(buffer); + + /* Reset usage size to actual size of allocation. */ + buffer->set_usage_size(buffer->get_size()); + + /* Ensure pool exists. */ + this->ensure_buffer_pool(options); + + /* TODO(Metal): Support purgeability - Allow buffer in pool to have its memory taken back by the + * OS if needed. As we keep allocations around, they may not actually be in use, but we can + * ensure they do not block other apps from using memory. Upon a buffer being needed again, we + * can reset this state. + * TODO(Metal): Purgeability state does not update instantly, so this requires a deferral. */ + BLI_assert(buffer->get_metal_buffer()); + /* buffer->metal_buffer); [buffer->metal_buffer setPurgeableState:MTLPurgeableStateVolatile]; */ + + std::multiset<MTLBufferHandle, CompareMTLBuffer> *pool = buffer_pools_.lookup(options); + pool->insert(MTLBufferHandle(buffer)); + +#if MTL_DEBUG_MEMORY_STATISTICS == 1 + /* Debug statistics. */ + allocations_in_pool_ += buffer->get_size(); + buffers_in_pool_++; +#endif +} + +MTLSafeFreeList::MTLSafeFreeList() +{ + reference_count_ = 1; + in_free_queue_ = false; + current_list_index_ = 0; + next_ = nullptr; + has_next_pool_ = 0; +} + +void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer) +{ + BLI_assert(in_free_queue_ == false); + + /* Lockless list insert. */ + uint insert_index = current_list_index_++; + + /* If the current MTLSafeFreeList size is exceeded, we ripple down the linked-list chain and + * insert the buffer into the next available chunk. */ + if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) { + + /* Check if first caller to generate next pool. */ + int has_next = has_next_pool_++; + if (has_next == 0) { + next_ = new MTLSafeFreeList(); + } + MTLSafeFreeList *next_list = next_.load(); + BLI_assert(next_list); + next_list->insert_buffer(buffer); + + /* Clamp index to chunk limit if overflowing. */ + current_list_index_ = MTLSafeFreeList::MAX_NUM_BUFFERS_; + return; + } + + safe_free_pool_[insert_index] = buffer; +} + +/* Increments from active GPUContext thread. */ +void MTLSafeFreeList::increment_reference() +{ + lock_.lock(); + BLI_assert(in_free_queue_ == false); + reference_count_++; + lock_.unlock(); +} + +/* Reference decrements and addition to completed list queue can occur from MTLCommandBuffer + * completion callback thread. */ +void MTLSafeFreeList::decrement_reference() +{ + lock_.lock(); + BLI_assert(in_free_queue_ == false); + int ref_count = --reference_count_; + + if (ref_count == 0) { + MTLContext::get_global_memory_manager().push_completed_safe_list(this); + } + lock_.unlock(); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name MTLBuffer wrapper class implementation. + * \{ */ + +/* Construct a gpu::MTLBuffer wrapper around a newly created metal::MTLBuffer. */ +MTLBuffer::MTLBuffer(id<MTLDevice> mtl_device, + uint64_t size, + MTLResourceOptions options, + uint alignment) +{ + /* Calculate aligned allocation size. */ + BLI_assert(alignment > 0); + uint64_t aligned_alloc_size = ceil_to_multiple_ul(size, alignment); + + alignment_ = alignment; + device_ = mtl_device; + is_external_ = false; + + options_ = options; + this->flag_in_use(false); + + metal_buffer_ = [device_ newBufferWithLength:aligned_alloc_size options:options]; + BLI_assert(metal_buffer_); + [metal_buffer_ retain]; + + size_ = aligned_alloc_size; + this->set_usage_size(size_); + if (!(options_ & MTLResourceStorageModePrivate)) { + data_ = [metal_buffer_ contents]; + } + else { + data_ = nullptr; + } +} + +MTLBuffer::MTLBuffer(id<MTLBuffer> external_buffer) +{ + BLI_assert(external_buffer != nil); + + /* Ensure external_buffer remains referenced while in-use. */ + metal_buffer_ = external_buffer; + [metal_buffer_ retain]; + + /* Extract properties. */ + is_external_ = true; + device_ = nil; + alignment_ = 1; + options_ = [metal_buffer_ resourceOptions]; + size_ = [metal_buffer_ allocatedSize]; + this->set_usage_size(size_); + data_ = [metal_buffer_ contents]; + in_use_ = true; +} + +gpu::MTLBuffer::~MTLBuffer() +{ + if (metal_buffer_ != nil) { + [metal_buffer_ release]; + metal_buffer_ = nil; + } +} + +void gpu::MTLBuffer::free() +{ + if (!is_external_) { + MTLContext::get_global_memory_manager().free_buffer(this); + } + else { + if (metal_buffer_ != nil) { + [metal_buffer_ release]; + metal_buffer_ = nil; + } + } +} + +id<MTLBuffer> gpu::MTLBuffer::get_metal_buffer() const +{ + return metal_buffer_; +} + +void *gpu::MTLBuffer::get_host_ptr() const +{ + BLI_assert(!(options_ & MTLResourceStorageModePrivate)); + BLI_assert(data_); + return data_; +} + +uint64_t gpu::MTLBuffer::get_size() const +{ + return size_; +} + +uint64_t gpu::MTLBuffer::get_size_used() const +{ + return usage_size_; +} + +bool gpu::MTLBuffer::requires_flush() +{ + /* We do not need to flush shared memory, as addressable buffer is shared. */ + return options_ & MTLResourceStorageModeManaged; +} + +void gpu::MTLBuffer::set_label(NSString *str) +{ + metal_buffer_.label = str; +} + +void gpu::MTLBuffer::debug_ensure_used() +{ + /* Debug: If buffer is not flagged as in-use, this is a problem. */ + BLI_assert_msg( + in_use_, + "Buffer should be marked as 'in-use' if being actively used by an instance. Buffer " + "has likely already been freed."); +} + +void gpu::MTLBuffer::flush() +{ + this->debug_ensure_used(); + if (this->requires_flush()) { + [metal_buffer_ didModifyRange:NSMakeRange(0, size_)]; + } +} + +void gpu::MTLBuffer::flush_range(uint64_t offset, uint64_t length) +{ + this->debug_ensure_used(); + if (this->requires_flush()) { + BLI_assert((offset + length) <= size_); + [metal_buffer_ didModifyRange:NSMakeRange(offset, length)]; + } +} + +void gpu::MTLBuffer::flag_in_use(bool used) +{ + in_use_ = used; +} + +bool gpu::MTLBuffer::get_in_use() +{ + return in_use_; +} + +void gpu::MTLBuffer::set_usage_size(uint64_t size_used) +{ + BLI_assert(size_used > 0 && size_used <= size_); + usage_size_ = size_used; +} + +MTLResourceOptions gpu::MTLBuffer::get_resource_options() +{ + return options_; +} + +uint64_t gpu::MTLBuffer::get_alignment() +{ + return alignment_; +} + +bool MTLBufferRange::requires_flush() +{ + /* We do not need to flush shared memory. */ + return this->options & MTLResourceStorageModeManaged; +} + +void MTLBufferRange::flush() +{ + if (this->requires_flush()) { + BLI_assert(this->metal_buffer); + BLI_assert((this->buffer_offset + this->size) <= [this->metal_buffer length]); + BLI_assert(this->buffer_offset >= 0); + [this->metal_buffer + didModifyRange:NSMakeRange(this->buffer_offset, this->size - this->buffer_offset)]; + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name MTLScratchBufferManager and MTLCircularBuffer implementation. + * \{ */ + +MTLScratchBufferManager::~MTLScratchBufferManager() +{ + this->free(); +} + +void MTLScratchBufferManager::init() +{ + + if (!this->initialised_) { + BLI_assert(context_.device); + + /* Initialize Scratch buffers. */ + for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) { + scratch_buffers_[sb] = new MTLCircularBuffer( + context_, mtl_scratch_buffer_initial_size_, true); + BLI_assert(scratch_buffers_[sb]); + BLI_assert(&(scratch_buffers_[sb]->own_context_) == &context_); + } + current_scratch_buffer_ = 0; + initialised_ = true; + } +} + +void MTLScratchBufferManager::free() +{ + initialised_ = false; + + /* Release Scratch buffers */ + for (int sb = 0; sb < mtl_max_scratch_buffers_; sb++) { + delete scratch_buffers_[sb]; + scratch_buffers_[sb] = nullptr; + } + current_scratch_buffer_ = 0; +} + +MTLTemporaryBuffer MTLScratchBufferManager::scratch_buffer_allocate_range(uint64_t alloc_size) +{ + return this->scratch_buffer_allocate_range_aligned(alloc_size, 1); +} + +MTLTemporaryBuffer MTLScratchBufferManager::scratch_buffer_allocate_range_aligned( + uint64_t alloc_size, uint alignment) +{ + /* Ensure scratch buffer allocation alignment adheres to offset alignment requirements. */ + alignment = max_uu(alignment, 256); + + BLI_assert_msg(current_scratch_buffer_ >= 0, "Scratch Buffer index not set"); + MTLCircularBuffer *current_scratch_buff = this->scratch_buffers_[current_scratch_buffer_]; + BLI_assert_msg(current_scratch_buff != nullptr, "Scratch Buffer does not exist"); + MTLTemporaryBuffer allocated_range = current_scratch_buff->allocate_range_aligned(alloc_size, + alignment); + BLI_assert(allocated_range.size >= alloc_size && allocated_range.size <= alloc_size + alignment); + BLI_assert(allocated_range.metal_buffer != nil); + return allocated_range; +} + +void MTLScratchBufferManager::ensure_increment_scratch_buffer() +{ + /* Fetch active scratch buffer. */ + MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_]; + BLI_assert(&active_scratch_buf->own_context_ == &context_); + + /* Ensure existing scratch buffer is no longer in use. MTL_MAX_SCRATCH_BUFFERS specifies + * the number of allocated scratch buffers. This value should be equal to the number of + * simultaneous frames in-flight. I.e. the maximal number of scratch buffers which are + * simultaneously in-use. */ + if (active_scratch_buf->used_frame_index_ < context_.get_current_frame_index()) { + current_scratch_buffer_ = (current_scratch_buffer_ + 1) % mtl_max_scratch_buffers_; + active_scratch_buf = scratch_buffers_[current_scratch_buffer_]; + active_scratch_buf->reset(); + BLI_assert(&active_scratch_buf->own_context_ == &context_); + MTL_LOG_INFO("Scratch buffer %d reset - (ctx %p)(Frame index: %d)\n", + current_scratch_buffer_, + &context_, + context_.get_current_frame_index()); + } +} + +void MTLScratchBufferManager::flush_active_scratch_buffer() +{ + /* Fetch active scratch buffer and verify context. */ + MTLCircularBuffer *active_scratch_buf = scratch_buffers_[current_scratch_buffer_]; + BLI_assert(&active_scratch_buf->own_context_ == &context_); + active_scratch_buf->flush(); +} + +/* MTLCircularBuffer implementation. */ +MTLCircularBuffer::MTLCircularBuffer(MTLContext &ctx, uint64_t initial_size, bool allow_grow) + : own_context_(ctx) +{ + BLI_assert(this); + MTLResourceOptions options = ([own_context_.device hasUnifiedMemory]) ? + MTLResourceStorageModeShared : + MTLResourceStorageModeManaged; + cbuffer_ = new gpu::MTLBuffer(own_context_.device, initial_size, options, 256); + current_offset_ = 0; + can_resize_ = allow_grow; + cbuffer_->flag_in_use(true); + + used_frame_index_ = ctx.get_current_frame_index(); + last_flush_base_offset_ = 0; + + /* Debug label. */ + if (G.debug & G_DEBUG_GPU) { + cbuffer_->set_label(@"Circular Scratch Buffer"); + } +} + +MTLCircularBuffer::~MTLCircularBuffer() +{ + delete cbuffer_; +} + +MTLTemporaryBuffer MTLCircularBuffer::allocate_range(uint64_t alloc_size) +{ + return this->allocate_range_aligned(alloc_size, 1); +} + +MTLTemporaryBuffer MTLCircularBuffer::allocate_range_aligned(uint64_t alloc_size, uint alignment) +{ + BLI_assert(this); + + /* Ensure alignment of an allocation is aligned to compatible offset boundaries. */ + BLI_assert(alignment > 0); + alignment = max_ulul(alignment, 256); + + /* Align current offset and allocation size to desired alignment */ + uint64_t aligned_current_offset = ceil_to_multiple_ul(current_offset_, alignment); + uint64_t aligned_alloc_size = ceil_to_multiple_ul(alloc_size, alignment); + bool can_allocate = (aligned_current_offset + aligned_alloc_size) < cbuffer_->get_size(); + + BLI_assert(aligned_current_offset >= current_offset_); + BLI_assert(aligned_alloc_size >= alloc_size); + + BLI_assert(aligned_current_offset % alignment == 0); + BLI_assert(aligned_alloc_size % alignment == 0); + + /* Recreate Buffer */ + if (!can_allocate) { + uint64_t new_size = cbuffer_->get_size(); + if (can_resize_) { + /* Resize to the maximum of basic resize heuristic OR the size of the current offset + + * requested allocation -- we want the buffer to grow to a large enough size such that it + * does not need to resize mid-frame. */ + new_size = max_ulul( + min_ulul(MTLScratchBufferManager::mtl_scratch_buffer_max_size_, new_size * 1.2), + aligned_current_offset + aligned_alloc_size); + +#if MTL_SCRATCH_BUFFER_ALLOW_TEMPORARY_EXPANSION == 1 + /* IF a requested allocation EXCEEDS the maximum supported size, temporarily allocate up to + * this, but shrink down ASAP. */ + if (new_size > MTLScratchBufferManager::mtl_scratch_buffer_max_size_) { + + /* If new requested allocation is bigger than maximum allowed size, temporarily resize to + * maximum allocation size -- Otherwise, clamp the buffer size back down to the defined + * maximum */ + if (aligned_alloc_size > MTLScratchBufferManager::mtl_scratch_buffer_max_size_) { + new_size = aligned_alloc_size; + MTL_LOG_INFO("Temporarily growing Scratch buffer to %d MB\n", + (int)new_size / 1024 / 1024); + } + else { + new_size = MTLScratchBufferManager::mtl_scratch_buffer_max_size_; + MTL_LOG_INFO("Shrinking Scratch buffer back to %d MB\n", (int)new_size / 1024 / 1024); + } + } + BLI_assert(aligned_alloc_size <= new_size); +#else + new_size = min_ulul(MTLScratchBufferManager::mtl_scratch_buffer_max_size_, new_size); + + if (aligned_alloc_size > new_size) { + BLI_assert(false); + + /* Cannot allocate */ + MTLTemporaryBuffer alloc_range; + alloc_range.metal_buffer = nil; + alloc_range.data = nullptr; + alloc_range.buffer_offset = 0; + alloc_range.size = 0; + alloc_range.options = cbuffer_->options; + } +#endif + } + else { + MTL_LOG_WARNING( + "Performance Warning: Reached the end of circular buffer of size: %llu, but cannot " + "resize. Starting new buffer\n", + cbuffer_->get_size()); + BLI_assert(aligned_alloc_size <= new_size); + + /* Cannot allocate. */ + MTLTemporaryBuffer alloc_range; + alloc_range.metal_buffer = nil; + alloc_range.data = nullptr; + alloc_range.buffer_offset = 0; + alloc_range.size = 0; + alloc_range.options = cbuffer_->get_resource_options(); + } + + /* Flush current buffer to ensure changes are visible on the GPU. */ + this->flush(); + + /* Discard old buffer and create a new one - Relying on Metal reference counting to track + * in-use buffers */ + MTLResourceOptions prev_options = cbuffer_->get_resource_options(); + uint prev_alignment = cbuffer_->get_alignment(); + delete cbuffer_; + cbuffer_ = new gpu::MTLBuffer(own_context_.device, new_size, prev_options, prev_alignment); + cbuffer_->flag_in_use(true); + current_offset_ = 0; + last_flush_base_offset_ = 0; + + /* Debug label. */ + if (G.debug & G_DEBUG_GPU) { + cbuffer_->set_label(@"Circular Scratch Buffer"); + } + MTL_LOG_INFO("Resized Metal circular buffer to %llu bytes\n", new_size); + + /* Reset allocation Status. */ + aligned_current_offset = 0; + BLI_assert((aligned_current_offset + aligned_alloc_size) <= cbuffer_->get_size()); + } + + /* Allocate chunk. */ + MTLTemporaryBuffer alloc_range; + alloc_range.metal_buffer = cbuffer_->get_metal_buffer(); + alloc_range.data = (void *)((uint8_t *)([alloc_range.metal_buffer contents]) + + aligned_current_offset); + alloc_range.buffer_offset = aligned_current_offset; + alloc_range.size = aligned_alloc_size; + alloc_range.options = cbuffer_->get_resource_options(); + BLI_assert(alloc_range.data); + + /* Shift offset to match alignment. */ + current_offset_ = aligned_current_offset + aligned_alloc_size; + BLI_assert(current_offset_ <= cbuffer_->get_size()); + return alloc_range; +} + +void MTLCircularBuffer::flush() +{ + BLI_assert(this); + + uint64_t len = current_offset_ - last_flush_base_offset_; + if (len > 0) { + cbuffer_->flush_range(last_flush_base_offset_, len); + last_flush_base_offset_ = current_offset_; + } +} + +void MTLCircularBuffer::reset() +{ + BLI_assert(this); + + /* If circular buffer has data written to it, offset will be greater than zero. */ + if (current_offset_ > 0) { + + /* Ensure the circular buffer is no longer being used by an in-flight frame. */ + BLI_assert((own_context_.get_current_frame_index() >= + (used_frame_index_ + MTL_NUM_SAFE_FRAMES - 1)) && + "Trying to reset Circular scratch buffer's while its data is still being used by " + "an in-flight frame"); + + current_offset_ = 0; + last_flush_base_offset_ = 0; + } + + /* Update used frame index to current. */ + used_frame_index_ = own_context_.get_current_frame_index(); +} + +/** \} */ + +} // blender::gpu diff --git a/source/blender/gpu/metal/mtl_primitive.hh b/source/blender/gpu/metal/mtl_primitive.hh new file mode 100644 index 00000000000..5aa7a533b95 --- /dev/null +++ b/source/blender/gpu/metal/mtl_primitive.hh @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Encapsulation of Frame-buffer states (attached textures, viewport, scissors). + */ + +#pragma once + +#include "BLI_assert.h" + +#include "GPU_primitive.h" + +#include <Metal/Metal.h> + +namespace blender::gpu { + +/** Utility functions **/ +static inline MTLPrimitiveTopologyClass mtl_prim_type_to_topology_class(MTLPrimitiveType prim_type) +{ + switch (prim_type) { + case MTLPrimitiveTypePoint: + return MTLPrimitiveTopologyClassPoint; + case MTLPrimitiveTypeLine: + case MTLPrimitiveTypeLineStrip: + return MTLPrimitiveTopologyClassLine; + case MTLPrimitiveTypeTriangle: + case MTLPrimitiveTypeTriangleStrip: + return MTLPrimitiveTopologyClassTriangle; + } + return MTLPrimitiveTopologyClassUnspecified; +} + +static inline MTLPrimitiveType gpu_prim_type_to_metal(GPUPrimType prim_type) +{ + switch (prim_type) { + case GPU_PRIM_POINTS: + return MTLPrimitiveTypePoint; + case GPU_PRIM_LINES: + case GPU_PRIM_LINES_ADJ: + case GPU_PRIM_LINE_LOOP: + return MTLPrimitiveTypeLine; + case GPU_PRIM_LINE_STRIP: + case GPU_PRIM_LINE_STRIP_ADJ: + return MTLPrimitiveTypeLineStrip; + case GPU_PRIM_TRIS: + case GPU_PRIM_TRI_FAN: + case GPU_PRIM_TRIS_ADJ: + return MTLPrimitiveTypeTriangle; + case GPU_PRIM_TRI_STRIP: + return MTLPrimitiveTypeTriangleStrip; + case GPU_PRIM_NONE: + return MTLPrimitiveTypePoint; + }; +} + +/* Certain primitive types are not supported in Metal, and require emulation. + * `GPU_PRIM_LINE_LOOP` and `GPU_PRIM_TRI_FAN` required index buffer patching. + * Adjacency types do not need emulation as the input structure is the same, + * and access is controlled from the vertex shader through SSBO vertex fetch. + * -- These Adj cases are only used in geometry shaders in OpenGL. */ +static inline bool mtl_needs_topology_emulation(GPUPrimType prim_type) +{ + + BLI_assert(prim_type != GPU_PRIM_NONE); + switch (prim_type) { + case GPU_PRIM_LINE_LOOP: + case GPU_PRIM_TRI_FAN: + return true; + default: + return false; + } + return false; +} + +static inline bool mtl_vertex_count_fits_primitive_type(uint32_t vertex_count, + MTLPrimitiveType prim_type) +{ + if (vertex_count == 0) { + return false; + } + + switch (prim_type) { + case MTLPrimitiveTypeLineStrip: + return (vertex_count > 1); + case MTLPrimitiveTypeLine: + return (vertex_count % 2 == 0); + case MTLPrimitiveTypePoint: + return (vertex_count > 0); + case MTLPrimitiveTypeTriangle: + return (vertex_count % 3 == 0); + case MTLPrimitiveTypeTriangleStrip: + return (vertex_count > 2); + } + BLI_assert(false); + return false; +} + +} // namespace blender::gpu
\ No newline at end of file diff --git a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh new file mode 100644 index 00000000000..1906350679a --- /dev/null +++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh @@ -0,0 +1,250 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ +#pragma once + +#include "GPU_vertex_format.h" + +#include <Metal/Metal.h> + +namespace blender::gpu { + +/** Vertex attribute and buffer descriptor wrappers + * for use in PSO construction and caching. */ +struct MTLVertexAttributeDescriptorPSO { + MTLVertexFormat format; + int offset; + int buffer_index; + GPUVertFetchMode format_conversion_mode; + + bool operator==(const MTLVertexAttributeDescriptorPSO &other) const + { + return (format == other.format) && (offset == other.offset) && + (buffer_index == other.buffer_index) && + (format_conversion_mode == other.format_conversion_mode); + } + + uint64_t hash() const + { + return (uint64_t)((uint64_t)this->format ^ (this->offset << 4) ^ (this->buffer_index << 8) ^ + (this->format_conversion_mode << 12)); + } +}; + +struct MTLVertexBufferLayoutDescriptorPSO { + MTLVertexStepFunction step_function; + int step_rate; + int stride; + + bool operator==(const MTLVertexBufferLayoutDescriptorPSO &other) const + { + return (step_function == other.step_function) && (step_rate == other.step_rate) && + (stride == other.stride); + } + + uint64_t hash() const + { + return (uint64_t)((uint64_t)this->step_function ^ (this->step_rate << 4) ^ + (this->stride << 8)); + } +}; + +/* SSBO attribute state caching. */ +struct MTLSSBOAttribute { + + int mtl_attribute_index; + int vbo_id; + int attribute_offset; + int per_vertex_stride; + int attribute_format; + bool is_instance; + + MTLSSBOAttribute(){}; + MTLSSBOAttribute( + int attribute_ind, int vertexbuffer_ind, int offset, int stride, int format, bool instanced) + : mtl_attribute_index(attribute_ind), + vbo_id(vertexbuffer_ind), + attribute_offset(offset), + per_vertex_stride(stride), + attribute_format(format), + is_instance(instanced) + { + } + + bool operator==(const MTLSSBOAttribute &other) const + { + return (memcmp(this, &other, sizeof(MTLSSBOAttribute)) == 0); + } +}; + +struct MTLVertexDescriptor { + + /* Core Vertex Attributes. */ + MTLVertexAttributeDescriptorPSO attributes[GPU_VERT_ATTR_MAX_LEN]; + MTLVertexBufferLayoutDescriptorPSO + buffer_layouts[GPU_BATCH_VBO_MAX_LEN + GPU_BATCH_INST_VBO_MAX_LEN]; + int num_attributes; + int num_vert_buffers; + MTLPrimitiveTopologyClass prim_topology_class; + + /* WORKAROUND: SSBO Vertex-fetch attributes -- These follow the same structure + * but have slightly different binding rules, passed in via uniform + * push constant data block. */ + bool uses_ssbo_vertex_fetch; + MTLSSBOAttribute ssbo_attributes[GPU_VERT_ATTR_MAX_LEN]; + int num_ssbo_attributes; + + bool operator==(const MTLVertexDescriptor &other) const + { + if ((this->num_attributes != other.num_attributes) || + (this->num_vert_buffers != other.num_vert_buffers)) { + return false; + } + if (this->prim_topology_class != other.prim_topology_class) { + return false; + }; + + for (const int a : IndexRange(this->num_attributes)) { + if (!(this->attributes[a] == other.attributes[a])) { + return false; + } + } + + for (const int b : IndexRange(this->num_vert_buffers)) { + if (!(this->buffer_layouts[b] == other.buffer_layouts[b])) { + return false; + } + } + + /* NOTE: No need to compare SSBO attributes, as these will match attribute bindings for the + * given shader. These are simply extra pre-resolved properties we want to include in the + * cache. */ + return true; + } + + uint64_t hash() const + { + uint64_t hash = (uint64_t)(this->num_attributes ^ this->num_vert_buffers); + for (const int a : IndexRange(this->num_attributes)) { + hash ^= this->attributes[a].hash() << a; + } + + for (const int b : IndexRange(this->num_vert_buffers)) { + hash ^= this->buffer_layouts[b].hash() << (b + 10); + } + + /* NOTE: SSBO vertex fetch members not hashed as these will match attribute bindings. */ + return hash; + } +}; + +/* Metal Render Pipeline State Descriptor -- All unique information which feeds PSO creation. */ +struct MTLRenderPipelineStateDescriptor { + /* This state descriptor will contain ALL parameters which generate a unique PSO. + * We will then use this state-object to efficiently look-up or create a + * new PSO for the current shader. + * + * Unlike the 'MTLContextGlobalShaderPipelineState', this struct contains a subset of + * parameters used to distinguish between unique PSOs. This struct is hash-able and only contains + * those parameters which are required by PSO generation. Non-unique state such as bound + * resources is not tracked here, as it does not require a unique PSO permutation if changed. */ + + /* Input Vertex Descriptor. */ + MTLVertexDescriptor vertex_descriptor; + + /* Render Target attachment state. + * Assign to #MTLPixelFormatInvalid if not used. */ + int num_color_attachments; + MTLPixelFormat color_attachment_format[GPU_FB_MAX_COLOR_ATTACHMENT]; + MTLPixelFormat depth_attachment_format; + MTLPixelFormat stencil_attachment_format; + + /* Render Pipeline State affecting PSO creation. */ + bool blending_enabled; + MTLBlendOperation alpha_blend_op; + MTLBlendOperation rgb_blend_op; + MTLBlendFactor dest_alpha_blend_factor; + MTLBlendFactor dest_rgb_blend_factor; + MTLBlendFactor src_alpha_blend_factor; + MTLBlendFactor src_rgb_blend_factor; + + /* Global color write mask as this cannot be specified per attachment. */ + MTLColorWriteMask color_write_mask; + + /* Point size required by point primitives. */ + float point_size = 0.0f; + + /* Comparison Operator for caching. */ + bool operator==(const MTLRenderPipelineStateDescriptor &other) const + { + if (!(vertex_descriptor == other.vertex_descriptor)) { + return false; + } + + if ((num_color_attachments != other.num_color_attachments) || + (depth_attachment_format != other.depth_attachment_format) || + (stencil_attachment_format != other.stencil_attachment_format) || + (color_write_mask != other.color_write_mask) || + (blending_enabled != other.blending_enabled) || (alpha_blend_op != other.alpha_blend_op) || + (rgb_blend_op != other.rgb_blend_op) || + (dest_alpha_blend_factor != other.dest_alpha_blend_factor) || + (dest_rgb_blend_factor != other.dest_rgb_blend_factor) || + (src_alpha_blend_factor != other.src_alpha_blend_factor) || + (src_rgb_blend_factor != other.src_rgb_blend_factor) || + (vertex_descriptor.prim_topology_class != other.vertex_descriptor.prim_topology_class) || + (point_size != other.point_size)) { + return false; + } + + /* Attachments can be skipped, so num_color_attachments will not define the range. */ + for (const int c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) { + if (color_attachment_format[c] != other.color_attachment_format[c]) { + return false; + } + } + + return true; + } + + uint64_t hash() const + { + /* NOTE(Metal): Current setup aims to minimize overlap of parameters + * which are more likely to be different, to ensure earlier hash + * differences without having to fallback to comparisons. + * Though this could likely be further improved to remove + * has collisions. */ + + uint64_t hash = this->vertex_descriptor.hash(); + hash ^= (uint64_t)this->num_color_attachments << 16; /* up to 6 (3 bits). */ + hash ^= (uint64_t)this->depth_attachment_format << 18; /* up to 555 (9 bits). */ + hash ^= (uint64_t)this->stencil_attachment_format << 20; /* up to 555 (9 bits). */ + hash ^= (uint64_t)(*( + (uint64_t *)&this->vertex_descriptor.prim_topology_class)); /* Up to 3 (2 bits). */ + + /* Only include elements in Hash if they are needed - avoids variable null assignments + * influencing hash. */ + if (this->num_color_attachments > 0) { + hash ^= (uint64_t)this->color_write_mask << 22; /* 4 bit bit-mask. */ + hash ^= (uint64_t)this->alpha_blend_op << 26; /* Up to 4 (3 bits). */ + hash ^= (uint64_t)this->rgb_blend_op << 29; /* Up to 4 (3 bits). */ + hash ^= (uint64_t)this->dest_alpha_blend_factor << 32; /* Up to 18 (5 bits). */ + hash ^= (uint64_t)this->dest_rgb_blend_factor << 37; /* Up to 18 (5 bits). */ + hash ^= (uint64_t)this->src_alpha_blend_factor << 42; /* Up to 18 (5 bits). */ + hash ^= (uint64_t)this->src_rgb_blend_factor << 47; /* Up to 18 (5 bits). */ + } + + for (const uint c : IndexRange(GPU_FB_MAX_COLOR_ATTACHMENT)) { + hash ^= (uint64_t)this->color_attachment_format[c] << (c + 52); // up to 555 (9 bits) + } + + hash |= (uint64_t)((this->blending_enabled && (this->num_color_attachments > 0)) ? 1 : 0) + << 62; + hash ^= (uint64_t)this->point_size; + + return hash; + } +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_query.hh b/source/blender/gpu/metal/mtl_query.hh new file mode 100644 index 00000000000..03436fcd67d --- /dev/null +++ b/source/blender/gpu/metal/mtl_query.hh @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "BLI_vector.hh" + +#include "gpu_query.hh" +#include "mtl_context.hh" + +namespace blender::gpu { + +class MTLQueryPool : public QueryPool { + private: + /** Number of queries that have been issued since last initialization. + * Should be equal to query_ids_.size(). */ + uint32_t query_issued_; + /** Type of this query pool. */ + GPUQueryType type_; + /** Can only be initialized once. */ + bool initialized_ = false; + MTLVisibilityResultMode mtl_type_; + Vector<gpu::MTLBuffer *> buffer_; + + void allocate(); + + public: + MTLQueryPool(); + ~MTLQueryPool(); + + void init(GPUQueryType type) override; + + void begin_query() override; + void end_query() override; + + void get_occlusion_result(MutableSpan<uint32_t> r_values) override; +}; +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_query.mm b/source/blender/gpu/metal/mtl_query.mm new file mode 100644 index 00000000000..f4bd5754b77 --- /dev/null +++ b/source/blender/gpu/metal/mtl_query.mm @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "mtl_query.hh" + +namespace blender::gpu { + +static const size_t VISIBILITY_COUNT_PER_BUFFER = 512; +/* Defined in the documentation but can't be queried programmatically: + * https://developer.apple.com/documentation/metal/mtlvisibilityresultmode/mtlvisibilityresultmodeboolean?language=objc + */ +static const size_t VISIBILITY_RESULT_SIZE_IN_BYTES = 8; + +MTLQueryPool::MTLQueryPool() +{ + allocate(); +} +MTLQueryPool::~MTLQueryPool() +{ + for (gpu::MTLBuffer *buf : buffer_) { + BLI_assert(buf); + buf->free(); + } +} + +void MTLQueryPool::allocate() +{ + /* Allocate Metal buffer for visibility results. */ + size_t buffer_size_in_bytes = VISIBILITY_COUNT_PER_BUFFER * VISIBILITY_RESULT_SIZE_IN_BYTES; + gpu::MTLBuffer *buffer = MTLContext::get_global_memory_manager().allocate(buffer_size_in_bytes, + true); + BLI_assert(buffer); + buffer_.append(buffer); +} + +static inline MTLVisibilityResultMode to_mtl_type(GPUQueryType type) +{ + if (type == GPU_QUERY_OCCLUSION) { + return MTLVisibilityResultModeBoolean; + } + BLI_assert(0); + return MTLVisibilityResultModeBoolean; +} + +void MTLQueryPool::init(GPUQueryType type) +{ + BLI_assert(initialized_ == false); + initialized_ = true; + type_ = type; + mtl_type_ = to_mtl_type(type); + query_issued_ = 0; +} + +void MTLQueryPool::begin_query() +{ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + + /* Ensure our allocated buffer pool has enough space for the current queries. */ + int query_id = query_issued_; + int requested_buffer = query_id / VISIBILITY_COUNT_PER_BUFFER; + if (requested_buffer >= buffer_.size()) { + allocate(); + } + + BLI_assert(requested_buffer < buffer_.size()); + gpu::MTLBuffer *buffer = buffer_[requested_buffer]; + + /* Ensure visibility buffer is set on the context. If visibility buffer changes, + * we need to begin a new render pass with an updated reference in the + * MTLRenderPassDescriptor. */ + ctx->set_visibility_buffer(buffer); + + ctx->ensure_begin_render_pass(); + id<MTLRenderCommandEncoder> rec = ctx->main_command_buffer.get_active_render_command_encoder(); + [rec setVisibilityResultMode:mtl_type_ + offset:(query_id % VISIBILITY_COUNT_PER_BUFFER) * + VISIBILITY_RESULT_SIZE_IN_BYTES]; + query_issued_ += 1; +} + +void MTLQueryPool::end_query() +{ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + + id<MTLRenderCommandEncoder> rec = ctx->main_command_buffer.get_active_render_command_encoder(); + [rec setVisibilityResultMode:MTLVisibilityResultModeDisabled offset:0]; +} + +void MTLQueryPool::get_occlusion_result(MutableSpan<uint32_t> r_values) +{ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + + /* Create a blit encoder to synchronize the query buffer results between + * GPU and CPU when not using shared-memory. */ + if ([ctx->device hasUnifiedMemory] == false) { + id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); + BLI_assert(blit_encoder); + for (gpu::MTLBuffer *buf : buffer_) { + [blit_encoder synchronizeResource:buf->get_metal_buffer()]; + } + BLI_assert(ctx->get_inside_frame()); + } + + /* Wait for GPU operations to complete and for query buffer contents + * to be synchronized back to host memory. */ + GPU_finish(); + + /* Iterate through all possible visibility buffers and copy results into provided + * container. */ + for (const int i : IndexRange(query_issued_)) { + int requested_buffer = i / VISIBILITY_COUNT_PER_BUFFER; + const uint64_t *queries = static_cast<const uint64_t *>( + buffer_[requested_buffer]->get_host_ptr()); + r_values[i] = static_cast<uint32_t>(queries[i % VISIBILITY_COUNT_PER_BUFFER]); + } + ctx->set_visibility_buffer(nullptr); +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh new file mode 100644 index 00000000000..64d9d1cf849 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader.hh @@ -0,0 +1,1165 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "MEM_guardedalloc.h" + +#include "GPU_batch.h" +#include "GPU_capabilities.h" +#include "GPU_shader.h" +#include "GPU_vertex_format.h" + +#include <Metal/Metal.h> +#include <QuartzCore/QuartzCore.h> +#include <functional> +#include <unordered_map> + +#include <mutex> +#include <thread> + +#include "mtl_framebuffer.hh" +#include "mtl_shader_interface.hh" +#include "mtl_shader_shared.h" +#include "mtl_state.hh" +#include "mtl_texture.hh" + +#include "gpu_shader_create_info.hh" +#include "gpu_shader_private.hh" + +namespace blender::gpu { + +class MTLShaderInterface; +class MTLContext; + +/* Debug control. */ +#define MTL_SHADER_DEBUG_EXPORT_SOURCE 1 +#define MTL_SHADER_TRANSLATION_DEBUG_OUTPUT 0 + +/* Separate print used only during development and debugging. */ +#if MTL_SHADER_TRANSLATION_DEBUG_OUTPUT +# define shader_debug_printf printf +#else +# define shader_debug_printf(...) /* Null print. */ +#endif + +/* Desired reflection data for a buffer binding. */ +struct MTLBufferArgumentData { + uint32_t index; + uint32_t size; + uint32_t alignment; + bool active; +}; + +/* Metal Render Pipeline State Instance. */ +struct MTLRenderPipelineStateInstance { + /* Function instances with specialization. + * Required for argument encoder construction. */ + id<MTLFunction> vert; + id<MTLFunction> frag; + + /* PSO handle. */ + id<MTLRenderPipelineState> pso; + + /** Derived information. */ + /* Unique index for PSO variant. */ + uint32_t shader_pso_index; + /* Base bind index for binding uniform buffers, offset based on other + * bound buffers such as vertex buffers, as the count can vary. */ + int base_uniform_buffer_index; + /* buffer bind slot used for null attributes (-1 if not needed). */ + int null_attribute_buffer_index; + /* buffer bind used for transform feedback output buffer. */ + int transform_feedback_buffer_index; + + /** Reflection Data. + * Currently used to verify whether uniform buffers of incorrect sizes being bound, due to left + * over bindings being used for slots that did not need updating for a particular draw. Metal + * Back-end over-generates bindings due to detecting their presence, though in many cases, the + * bindings in the source are not all used for a given shader. + * This information can also be used to eliminate redundant/unused bindings. */ + bool reflection_data_available; + blender::Vector<MTLBufferArgumentData> buffer_bindings_reflection_data_vert; + blender::Vector<MTLBufferArgumentData> buffer_bindings_reflection_data_frag; +}; + +/* #MTLShaderBuilder source wrapper used during initial compilation. */ +struct MTLShaderBuilder { + NSString *msl_source_vert_ = @""; + NSString *msl_source_frag_ = @""; + + /* Generated GLSL source used during compilation. */ + std::string glsl_vertex_source_ = ""; + std::string glsl_fragment_source_ = ""; + + /* Indicates whether source code has been provided via MSL directly. */ + bool source_from_msl_ = false; +}; + +/** + * #MTLShader implements shader compilation, Pipeline State Object (PSO) + * creation for rendering and uniform data binding. + * Shaders can either be created from native MSL, or generated + * from a GLSL source shader using #GPUShaderCreateInfo. + * + * Shader creation process: + * - Create #MTLShader: + * - Convert GLSL to MSL source if required. + * - set MSL source. + * - set Vertex/Fragment function names. + * - Create and populate #MTLShaderInterface. + **/ +class MTLShader : public Shader { + friend shader::ShaderCreateInfo; + friend shader::StageInterfaceInfo; + + public: + /* Cached SSBO vertex fetch attribute uniform locations. */ + int uni_ssbo_input_prim_type_loc = -1; + int uni_ssbo_input_vert_count_loc = -1; + int uni_ssbo_uses_indexed_rendering = -1; + int uni_ssbo_uses_index_mode_u16 = -1; + + private: + /* Context Handle. */ + MTLContext *context_ = nullptr; + + /** Transform Feedback. */ + /* Transform feedback mode. */ + eGPUShaderTFBType transform_feedback_type_ = GPU_SHADER_TFB_NONE; + /* Transform feedback outputs written to TFB buffer. */ + blender::Vector<std::string> tf_output_name_list_; + /* Whether transform feedback is currently active. */ + bool transform_feedback_active_ = false; + /* Vertex buffer to write transform feedback data into. */ + GPUVertBuf *transform_feedback_vertbuf_ = nullptr; + + /** Shader source code. */ + MTLShaderBuilder *shd_builder_ = nullptr; + NSString *vertex_function_name_ = @""; + NSString *fragment_function_name_ = @""; + + /** Compiled shader resources. */ + id<MTLLibrary> shader_library_vert_ = nil; + id<MTLLibrary> shader_library_frag_ = nil; + bool valid_ = false; + + /** Render pipeline state and PSO caching. */ + /* Metal API Descriptor used for creation of unique PSOs based on rendering state. */ + MTLRenderPipelineDescriptor *pso_descriptor_ = nil; + /* Metal backend struct containing all high-level pipeline state parameters + * which contribute to instantiation of a unique PSO. */ + MTLRenderPipelineStateDescriptor current_pipeline_state_; + /* Cache of compiled PipelineStateObjects. */ + blender::Map<MTLRenderPipelineStateDescriptor, MTLRenderPipelineStateInstance *> pso_cache_; + + /* True to enable multi-layered rendering support. */ + bool uses_mtl_array_index_ = false; + + /** SSBO Vertex fetch pragma options. */ + /* Indicates whether to pass in VertexBuffer's as regular buffer bindings + * and perform vertex assembly manually, rather than using Stage-in. + * This is used to give a vertex shader full access to all of the + * vertex data. + * This is primarily used for optimization techniques and + * alternative solutions for Geometry-shaders which are unsupported + * by Metal. */ + bool use_ssbo_vertex_fetch_mode_ = false; + /* Output primitive type when rendering sing ssbo_vertex_fetch. */ + MTLPrimitiveType ssbo_vertex_fetch_output_prim_type_; + + /* Output vertices per original vertex shader instance. + * This number will be multiplied by the number of input primitives + * from the source draw call. */ + uint32_t ssbo_vertex_fetch_output_num_verts_ = 0; + + bool ssbo_vertex_attribute_bind_active_ = false; + int ssbo_vertex_attribute_bind_mask_ = 0; + bool ssbo_vbo_slot_used_[MTL_SSBO_VERTEX_FETCH_MAX_VBOS]; + + struct ShaderSSBOAttributeBinding { + int attribute_index = -1; + int uniform_stride; + int uniform_offset; + int uniform_fetchmode; + int uniform_vbo_id; + int uniform_attr_type; + }; + ShaderSSBOAttributeBinding cached_ssbo_attribute_bindings_[MTL_MAX_VERTEX_INPUT_ATTRIBUTES] = {}; + + /* Metal Shader Uniform data store. + * This blocks is used to store current shader push_constant + * data before it is submitted to the GPU. This is currently + * stored per shader instance, though depending on GPU module + * functionality, this could potentially be a global data store. + * This data is associated with the PushConstantBlock, which is + * always at index zero in the UBO list. */ + void *push_constant_data_ = nullptr; + bool push_constant_modified_ = false; + + public: + MTLShader(MTLContext *ctx, const char *name); + MTLShader(MTLContext *ctx, + MTLShaderInterface *interface, + const char *name, + NSString *input_vertex_source, + NSString *input_fragment_source, + NSString *vertex_function_name_, + NSString *fragment_function_name_); + ~MTLShader(); + + /* Assign GLSL source. */ + void vertex_shader_from_glsl(MutableSpan<const char *> sources) override; + void geometry_shader_from_glsl(MutableSpan<const char *> sources) override; + void fragment_shader_from_glsl(MutableSpan<const char *> sources) override; + void compute_shader_from_glsl(MutableSpan<const char *> sources) override; + + /* Compile and build - Return true if successful. */ + bool finalize(const shader::ShaderCreateInfo *info = nullptr) override; + + /* Utility. */ + bool is_valid() + { + return valid_; + } + MTLRenderPipelineStateDescriptor &get_current_pipeline_state() + { + return current_pipeline_state_; + } + MTLShaderInterface *get_interface() + { + return static_cast<MTLShaderInterface *>(this->interface); + } + void *get_push_constant_data() + { + return push_constant_data_; + } + + /* Shader source generators from create-info. + * These aren't all used by Metal, as certain parts of source code generation + * for shader entry-points and resource mapping occur during `finalize`. */ + std::string resources_declare(const shader::ShaderCreateInfo &info) const override; + std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const override; + std::string fragment_interface_declare(const shader::ShaderCreateInfo &info) const override; + std::string geometry_interface_declare(const shader::ShaderCreateInfo &info) const override; + std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override; + std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override; + + void transform_feedback_names_set(Span<const char *> name_list, + const eGPUShaderTFBType geom_type) override; + bool transform_feedback_enable(GPUVertBuf *buf) override; + void transform_feedback_disable() override; + + void bind() override; + void unbind() override; + + void uniform_float(int location, int comp_len, int array_size, const float *data) override; + void uniform_int(int location, int comp_len, int array_size, const int *data) override; + bool get_push_constant_is_dirty(); + void push_constant_bindstate_mark_dirty(bool is_dirty); + + void vertformat_from_shader(GPUVertFormat *format) const override; + + /* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */ + int program_handle_get() const override + { + return -1; + } + + bool get_uses_ssbo_vertex_fetch() + { + return use_ssbo_vertex_fetch_mode_; + } + MTLPrimitiveType get_ssbo_vertex_fetch_output_prim_type() + { + return ssbo_vertex_fetch_output_prim_type_; + } + uint32_t get_ssbo_vertex_fetch_output_num_verts() + { + return ssbo_vertex_fetch_output_num_verts_; + } + static int ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type); + void prepare_ssbo_vertex_fetch_metadata(); + + /* SSBO Vertex Bindings Utility functions. */ + void ssbo_vertex_fetch_bind_attributes_begin(); + void ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_attr); + void ssbo_vertex_fetch_bind_attributes_end(id<MTLRenderCommandEncoder> active_encoder); + + /* Metal shader properties and source mapping. */ + void set_vertex_function_name(NSString *vetex_function_name); + void set_fragment_function_name(NSString *fragment_function_name_); + void shader_source_from_msl(NSString *input_vertex_source, NSString *input_fragment_source); + void set_interface(MTLShaderInterface *interface); + MTLRenderPipelineStateInstance *bake_current_pipeline_state(MTLContext *ctx, + MTLPrimitiveTopologyClass prim_type); + + /* Transform Feedback. */ + GPUVertBuf *get_transform_feedback_active_buffer(); + bool has_transform_feedback_varying(std::string str); + + private: + /* Generate MSL shader from GLSL source. */ + bool generate_msl_from_glsl(const shader::ShaderCreateInfo *info); + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLShader"); +}; + +/* Vertex format conversion. + * Determines whether it is possible to resize a vertex attribute type + * during input assembly. A conversion is implied by the difference + * between the input vertex descriptor (from MTLBatch/MTLImmediate) + * and the type specified in the shader source. + * + * e.g. vec3 to vec4 expansion, or vec4 to vec2 truncation. + * NOTE: Vector expansion will replace empty elements with the values + * (0,0,0,1). + * + * If implicit format resize is not possible, this function + * returns false. + * + * Implicitly supported conversions in Metal are described here: + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc + */ +inline bool mtl_vertex_format_resize(MTLVertexFormat mtl_format, + uint32_t components, + MTLVertexFormat *r_convertedFormat) +{ + MTLVertexFormat out_vert_format = MTLVertexFormatInvalid; + switch (mtl_format) { + /* Char. */ + case MTLVertexFormatChar: + case MTLVertexFormatChar2: + case MTLVertexFormatChar3: + case MTLVertexFormatChar4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatChar; + break; + case 2: + out_vert_format = MTLVertexFormatChar2; + break; + case 3: + out_vert_format = MTLVertexFormatChar3; + break; + case 4: + out_vert_format = MTLVertexFormatChar4; + break; + } + break; + + /* Normalized Char. */ + case MTLVertexFormatCharNormalized: + case MTLVertexFormatChar2Normalized: + case MTLVertexFormatChar3Normalized: + case MTLVertexFormatChar4Normalized: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatCharNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatChar2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatChar3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatChar4Normalized; + break; + } + break; + + /* Unsigned Char. */ + case MTLVertexFormatUChar: + case MTLVertexFormatUChar2: + case MTLVertexFormatUChar3: + case MTLVertexFormatUChar4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUChar; + break; + case 2: + out_vert_format = MTLVertexFormatUChar2; + break; + case 3: + out_vert_format = MTLVertexFormatUChar3; + break; + case 4: + out_vert_format = MTLVertexFormatUChar4; + break; + } + break; + + /* Normalized Unsigned char */ + case MTLVertexFormatUCharNormalized: + case MTLVertexFormatUChar2Normalized: + case MTLVertexFormatUChar3Normalized: + case MTLVertexFormatUChar4Normalized: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUCharNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatUChar2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatUChar3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatUChar4Normalized; + break; + } + break; + + /* Short. */ + case MTLVertexFormatShort: + case MTLVertexFormatShort2: + case MTLVertexFormatShort3: + case MTLVertexFormatShort4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatShort; + break; + case 2: + out_vert_format = MTLVertexFormatShort2; + break; + case 3: + out_vert_format = MTLVertexFormatShort3; + break; + case 4: + out_vert_format = MTLVertexFormatShort4; + break; + } + break; + + /* Normalized Short. */ + case MTLVertexFormatShortNormalized: + case MTLVertexFormatShort2Normalized: + case MTLVertexFormatShort3Normalized: + case MTLVertexFormatShort4Normalized: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatShortNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatShort2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatShort3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatShort4Normalized; + break; + } + break; + + /* Unsigned Short. */ + case MTLVertexFormatUShort: + case MTLVertexFormatUShort2: + case MTLVertexFormatUShort3: + case MTLVertexFormatUShort4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUShort; + break; + case 2: + out_vert_format = MTLVertexFormatUShort2; + break; + case 3: + out_vert_format = MTLVertexFormatUShort3; + break; + case 4: + out_vert_format = MTLVertexFormatUShort4; + break; + } + break; + + /* Normalized Unsigned Short. */ + case MTLVertexFormatUShortNormalized: + case MTLVertexFormatUShort2Normalized: + case MTLVertexFormatUShort3Normalized: + case MTLVertexFormatUShort4Normalized: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUShortNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatUShort2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatUShort3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatUShort4Normalized; + break; + } + break; + + /* Integer. */ + case MTLVertexFormatInt: + case MTLVertexFormatInt2: + case MTLVertexFormatInt3: + case MTLVertexFormatInt4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatInt; + break; + case 2: + out_vert_format = MTLVertexFormatInt2; + break; + case 3: + out_vert_format = MTLVertexFormatInt3; + break; + case 4: + out_vert_format = MTLVertexFormatInt4; + break; + } + break; + + /* Unsigned Integer. */ + case MTLVertexFormatUInt: + case MTLVertexFormatUInt2: + case MTLVertexFormatUInt3: + case MTLVertexFormatUInt4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatUInt; + break; + case 2: + out_vert_format = MTLVertexFormatUInt2; + break; + case 3: + out_vert_format = MTLVertexFormatUInt3; + break; + case 4: + out_vert_format = MTLVertexFormatUInt4; + break; + } + break; + + /* Half. */ + case MTLVertexFormatHalf: + case MTLVertexFormatHalf2: + case MTLVertexFormatHalf3: + case MTLVertexFormatHalf4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatHalf; + break; + case 2: + out_vert_format = MTLVertexFormatHalf2; + break; + case 3: + out_vert_format = MTLVertexFormatHalf3; + break; + case 4: + out_vert_format = MTLVertexFormatHalf4; + break; + } + break; + + /* Float. */ + case MTLVertexFormatFloat: + case MTLVertexFormatFloat2: + case MTLVertexFormatFloat3: + case MTLVertexFormatFloat4: + switch (components) { + case 1: + out_vert_format = MTLVertexFormatFloat; + break; + case 2: + out_vert_format = MTLVertexFormatFloat2; + break; + case 3: + out_vert_format = MTLVertexFormatFloat3; + break; + case 4: + out_vert_format = MTLVertexFormatFloat4; + break; + } + break; + + /* Other formats */ + default: + out_vert_format = mtl_format; + break; + } + *r_convertedFormat = out_vert_format; + return out_vert_format != MTLVertexFormatInvalid; +} + +/** + * Returns whether the METAL API can internally convert between the input type of data in the + * incoming vertex buffer and the format used by the vertex attribute inside the shader. + * + * - Returns TRUE if the type can be converted internally, along with returning the appropriate + * type to be passed into the #MTLVertexAttributeDescriptorPSO. + * + * - Returns FALSE if the type cannot be converted internally e.g. casting Int4 to Float4. + * + * If implicit conversion is not possible, then we can fallback to performing manual attribute + * conversion using the special attribute read function specializations in the shader. + * These functions selectively convert between types based on the specified vertex + * attribute `GPUVertFetchMode fetch_mode` e.g. `GPU_FETCH_INT`. + */ +inline bool mtl_convert_vertex_format(MTLVertexFormat shader_attrib_format, + GPUVertCompType component_type, + uint32_t component_length, + GPUVertFetchMode fetch_mode, + MTLVertexFormat *r_convertedFormat) +{ + bool normalized = (fetch_mode == GPU_FETCH_INT_TO_FLOAT_UNIT); + MTLVertexFormat out_vert_format = MTLVertexFormatInvalid; + + switch (component_type) { + + case GPU_COMP_I8: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatChar || + shader_attrib_format == MTLVertexFormatChar2 || + shader_attrib_format == MTLVertexFormatChar3 || + shader_attrib_format == MTLVertexFormatChar4) { + + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_type, &out_vert_format); + + /* Ensure format resize successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else if (shader_attrib_format == MTLVertexFormatInt4 && component_length == 4) { + /* Allow type expansion - Shader expects MTLVertexFormatInt4, we can supply a type + * with fewer bytes if component count is the same. Sign must also match original type + * -- which is not a problem in this case. */ + out_vert_format = MTLVertexFormatChar4; + } + else if (shader_attrib_format == MTLVertexFormatInt3 && component_length == 3) { + /* Same as above case for matching length and signage (Len=3)*/ + out_vert_format = MTLVertexFormatChar3; + } + else if (shader_attrib_format == MTLVertexFormatInt2 && component_length == 2) { + /* Same as above case for matching length and signage (Len=2)*/ + out_vert_format = MTLVertexFormatChar2; + } + else if (shader_attrib_format == MTLVertexFormatInt && component_length == 1) { + /* Same as above case for matching length and signage (Len=1)*/ + out_vert_format = MTLVertexFormatChar; + } + else if (shader_attrib_format == MTLVertexFormatInt && component_length == 4) { + /* Special case here, format has been specified as GPU_COMP_U8 with 4 components, which + * is equivalent to an Int -- so data will be compatible with the shader interface. */ + out_vert_format = MTLVertexFormatInt; + } + else { + BLI_assert_msg(false, + "Source vertex data format is either Char, Char2, Char3, Char4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + /* Source vertex data is integer type, but shader interface type is floating point. + * If the input attribute is specified as normalized, we can convert. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (normalized) { + switch (component_length) { + case 1: + out_vert_format = MTLVertexFormatCharNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatChar2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatChar3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatChar4Normalized; + break; + default: + BLI_assert_msg(false, "invalid vertex format"); + out_vert_format = MTLVertexFormatInvalid; + } + } + else { + /* Cannot convert. */ + out_vert_format = MTLVertexFormatInvalid; + } + break; + } + break; + + case GPU_COMP_U8: + switch (fetch_mode) { + /* Fetching INT: Check backing shader format matches source input. */ + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatUChar || + shader_attrib_format == MTLVertexFormatUChar2 || + shader_attrib_format == MTLVertexFormatUChar3 || + shader_attrib_format == MTLVertexFormatUChar4) { + + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Ensure format resize successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + /* TODO(Metal): Add other format conversions if needed. Currently no attributes hit + * this path. */ + } + else if (shader_attrib_format == MTLVertexFormatUInt4 && component_length == 4) { + /* Allow type expansion - Shader expects MTLVertexFormatUInt4, we can supply a type + * with fewer bytes if component count is the same. */ + out_vert_format = MTLVertexFormatUChar4; + } + else if (shader_attrib_format == MTLVertexFormatUInt3 && component_length == 3) { + /* Same as above case for matching length and signage (Len=3)*/ + out_vert_format = MTLVertexFormatUChar3; + } + else if (shader_attrib_format == MTLVertexFormatUInt2 && component_length == 2) { + /* Same as above case for matching length and signage (Len=2)*/ + out_vert_format = MTLVertexFormatUChar2; + } + else if (shader_attrib_format == MTLVertexFormatUInt && component_length == 1) { + /* Same as above case for matching length and signage (Len=1)*/ + out_vert_format = MTLVertexFormatUChar; + } + else if (shader_attrib_format == MTLVertexFormatInt && component_length == 4) { + /* Special case here, format has been specified as GPU_COMP_U8 with 4 components, which + * is equivalent to an Int-- so data will be compatible with shader interface. */ + out_vert_format = MTLVertexFormatInt; + } + else if (shader_attrib_format == MTLVertexFormatUInt && component_length == 4) { + /* Special case here, format has been specified as GPU_COMP_U8 with 4 components, which + *is equivalent to a UInt-- so data will be compatible with shader interface. */ + out_vert_format = MTLVertexFormatUInt; + } + else { + BLI_assert_msg(false, + "Source vertex data format is either UChar, UChar2, UChar3, UChar4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + /* Source vertex data is integral type, but shader interface type is floating point. + * If the input attribute is specified as normalized, we can convert. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (normalized) { + switch (component_length) { + case 1: + out_vert_format = MTLVertexFormatUCharNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatUChar2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatUChar3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatUChar4Normalized; + break; + default: + BLI_assert_msg(false, "invalid vertex format"); + out_vert_format = MTLVertexFormatInvalid; + } + } + else { + /* Cannot convert. */ + out_vert_format = MTLVertexFormatInvalid; + } + break; + } + break; + + case GPU_COMP_I16: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatShort || + shader_attrib_format == MTLVertexFormatShort2 || + shader_attrib_format == MTLVertexFormatShort3 || + shader_attrib_format == MTLVertexFormatShort4) { + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Ensure conversion successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either Short, Short2, Short3, Short4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + /* Source vertex data is integral type, but shader interface type is floating point. + * If the input attribute is specified as normalized, we can convert. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (normalized) { + switch (component_length) { + case 1: + out_vert_format = MTLVertexFormatShortNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatShort2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatShort3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatShort4Normalized; + break; + default: + BLI_assert_msg(false, "invalid vertex format"); + out_vert_format = MTLVertexFormatInvalid; + } + } + else { + /* Cannot convert. */ + out_vert_format = MTLVertexFormatInvalid; + } + break; + } + break; + + case GPU_COMP_U16: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatUShort || + shader_attrib_format == MTLVertexFormatUShort2 || + shader_attrib_format == MTLVertexFormatUShort3 || + shader_attrib_format == MTLVertexFormatUShort4) { + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Ensure format resize successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either UShort, UShort2, UShort3, UShort4 " + "but format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + /* Source vertex data is integral type, but shader interface type is floating point. + * If the input attribute is specified as normalized, we can convert. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (normalized) { + switch (component_length) { + case 1: + out_vert_format = MTLVertexFormatUShortNormalized; + break; + case 2: + out_vert_format = MTLVertexFormatUShort2Normalized; + break; + case 3: + out_vert_format = MTLVertexFormatUShort3Normalized; + break; + case 4: + out_vert_format = MTLVertexFormatUShort4Normalized; + break; + default: + BLI_assert_msg(false, "invalid vertex format"); + out_vert_format = MTLVertexFormatInvalid; + } + } + else { + /* Cannot convert. */ + out_vert_format = MTLVertexFormatInvalid; + } + break; + } + break; + + case GPU_COMP_I32: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatInt || + shader_attrib_format == MTLVertexFormatInt2 || + shader_attrib_format == MTLVertexFormatInt3 || + shader_attrib_format == MTLVertexFormatInt4) { + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Verify conversion successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either Int, Int2, Int3, Int4 but format " + "in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + /* Unfortunately we cannot implicitly convert between Int and Float in METAL. */ + out_vert_format = MTLVertexFormatInvalid; + break; + } + break; + + case GPU_COMP_U32: + switch (fetch_mode) { + case GPU_FETCH_INT: + if (shader_attrib_format == MTLVertexFormatUInt || + shader_attrib_format == MTLVertexFormatUInt2 || + shader_attrib_format == MTLVertexFormatUInt3 || + shader_attrib_format == MTLVertexFormatUInt4) { + /* No conversion Needed (as type matches) - Just a vector resize if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Verify conversion successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either UInt, UInt2, UInt3, UInt4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + /* Unfortunately we cannot convert between UInt and Float in METAL */ + out_vert_format = MTLVertexFormatInvalid; + break; + } + break; + + case GPU_COMP_F32: + switch (fetch_mode) { + + /* Source data is float. This will be compatible + * if type specified in shader is also float. */ + case GPU_FETCH_FLOAT: + case GPU_FETCH_INT_TO_FLOAT: + case GPU_FETCH_INT_TO_FLOAT_UNIT: + if (shader_attrib_format == MTLVertexFormatFloat || + shader_attrib_format == MTLVertexFormatFloat2 || + shader_attrib_format == MTLVertexFormatFloat3 || + shader_attrib_format == MTLVertexFormatFloat4) { + /* No conversion Needed (as type matches) - Just a vector resize, if needed. */ + bool can_convert = mtl_vertex_format_resize( + shader_attrib_format, component_length, &out_vert_format); + + /* Verify conversion successful. */ + BLI_assert(can_convert); + UNUSED_VARS_NDEBUG(can_convert); + } + else { + BLI_assert_msg(false, + "Source vertex data format is either Float, Float2, Float3, Float4 but " + "format in shader interface is NOT compatible.\n"); + out_vert_format = MTLVertexFormatInvalid; + } + break; + + case GPU_FETCH_INT: + /* Unfortunately we cannot convert between Float and Int implicitly in METAL. */ + out_vert_format = MTLVertexFormatInvalid; + break; + } + break; + + case GPU_COMP_I10: + out_vert_format = MTLVertexFormatInt1010102Normalized; + break; + } + *r_convertedFormat = out_vert_format; + return (out_vert_format != MTLVertexFormatInvalid); +} + +inline uint comp_count_from_vert_format(MTLVertexFormat vert_format) +{ + switch (vert_format) { + case MTLVertexFormatFloat: + case MTLVertexFormatInt: + case MTLVertexFormatUInt: + case MTLVertexFormatShort: + case MTLVertexFormatUChar: + case MTLVertexFormatUCharNormalized: + return 1; + case MTLVertexFormatUChar2: + case MTLVertexFormatUInt2: + case MTLVertexFormatFloat2: + case MTLVertexFormatInt2: + case MTLVertexFormatUChar2Normalized: + return 2; + case MTLVertexFormatUChar3: + case MTLVertexFormatUInt3: + case MTLVertexFormatFloat3: + case MTLVertexFormatInt3: + case MTLVertexFormatShort3Normalized: + case MTLVertexFormatUChar3Normalized: + return 3; + case MTLVertexFormatUChar4: + case MTLVertexFormatFloat4: + case MTLVertexFormatUInt4: + case MTLVertexFormatInt4: + case MTLVertexFormatUChar4Normalized: + case MTLVertexFormatInt1010102Normalized: + + default: + BLI_assert_msg(false, "Unrecognized attribute type. Add types to switch as needed."); + return 0; + } +} + +inline GPUVertFetchMode fetchmode_from_vert_format(MTLVertexFormat vert_format) +{ + switch (vert_format) { + case MTLVertexFormatFloat: + case MTLVertexFormatFloat2: + case MTLVertexFormatFloat3: + case MTLVertexFormatFloat4: + return GPU_FETCH_FLOAT; + + case MTLVertexFormatUChar: + case MTLVertexFormatUChar2: + case MTLVertexFormatUChar3: + case MTLVertexFormatUChar4: + case MTLVertexFormatChar: + case MTLVertexFormatChar2: + case MTLVertexFormatChar3: + case MTLVertexFormatChar4: + case MTLVertexFormatUShort: + case MTLVertexFormatUShort2: + case MTLVertexFormatUShort3: + case MTLVertexFormatUShort4: + case MTLVertexFormatShort: + case MTLVertexFormatShort2: + case MTLVertexFormatShort3: + case MTLVertexFormatShort4: + case MTLVertexFormatUInt: + case MTLVertexFormatUInt2: + case MTLVertexFormatUInt3: + case MTLVertexFormatUInt4: + case MTLVertexFormatInt: + case MTLVertexFormatInt2: + case MTLVertexFormatInt3: + case MTLVertexFormatInt4: + return GPU_FETCH_INT; + + case MTLVertexFormatUCharNormalized: + case MTLVertexFormatUChar2Normalized: + case MTLVertexFormatUChar3Normalized: + case MTLVertexFormatUChar4Normalized: + case MTLVertexFormatCharNormalized: + case MTLVertexFormatChar2Normalized: + case MTLVertexFormatChar3Normalized: + case MTLVertexFormatChar4Normalized: + case MTLVertexFormatUShortNormalized: + case MTLVertexFormatUShort2Normalized: + case MTLVertexFormatUShort3Normalized: + case MTLVertexFormatUShort4Normalized: + case MTLVertexFormatShortNormalized: + case MTLVertexFormatShort2Normalized: + case MTLVertexFormatShort3Normalized: + case MTLVertexFormatShort4Normalized: + case MTLVertexFormatInt1010102Normalized: + return GPU_FETCH_INT_TO_FLOAT_UNIT; + + default: + BLI_assert_msg(false, "Unrecognized attribute type. Add types to switch as needed."); + return GPU_FETCH_FLOAT; + } +} + +inline GPUVertCompType comp_type_from_vert_format(MTLVertexFormat vert_format) +{ + switch (vert_format) { + case MTLVertexFormatUChar: + case MTLVertexFormatUChar2: + case MTLVertexFormatUChar3: + case MTLVertexFormatUChar4: + case MTLVertexFormatUCharNormalized: + case MTLVertexFormatUChar2Normalized: + case MTLVertexFormatUChar3Normalized: + case MTLVertexFormatUChar4Normalized: + return GPU_COMP_U8; + + case MTLVertexFormatChar: + case MTLVertexFormatChar2: + case MTLVertexFormatChar3: + case MTLVertexFormatChar4: + case MTLVertexFormatCharNormalized: + case MTLVertexFormatChar2Normalized: + case MTLVertexFormatChar3Normalized: + case MTLVertexFormatChar4Normalized: + return GPU_COMP_I8; + + case MTLVertexFormatShort: + case MTLVertexFormatShort2: + case MTLVertexFormatShort3: + case MTLVertexFormatShort4: + case MTLVertexFormatShortNormalized: + case MTLVertexFormatShort2Normalized: + case MTLVertexFormatShort3Normalized: + case MTLVertexFormatShort4Normalized: + return GPU_COMP_I16; + + case MTLVertexFormatUShort: + case MTLVertexFormatUShort2: + case MTLVertexFormatUShort3: + case MTLVertexFormatUShort4: + case MTLVertexFormatUShortNormalized: + case MTLVertexFormatUShort2Normalized: + case MTLVertexFormatUShort3Normalized: + case MTLVertexFormatUShort4Normalized: + return GPU_COMP_U16; + + case MTLVertexFormatInt: + case MTLVertexFormatInt2: + case MTLVertexFormatInt3: + case MTLVertexFormatInt4: + return GPU_COMP_I32; + + case MTLVertexFormatUInt: + case MTLVertexFormatUInt2: + case MTLVertexFormatUInt3: + case MTLVertexFormatUInt4: + return GPU_COMP_U32; + + case MTLVertexFormatFloat: + case MTLVertexFormatFloat2: + case MTLVertexFormatFloat3: + case MTLVertexFormatFloat4: + return GPU_COMP_F32; + + case MTLVertexFormatInt1010102Normalized: + return GPU_COMP_I10; + + default: + BLI_assert_msg(false, "Unrecognized attribute type. Add types to switch as needed."); + return GPU_COMP_F32; + } +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm new file mode 100644 index 00000000000..23097f312f0 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader.mm @@ -0,0 +1,1266 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "BKE_global.h" + +#include "BLI_string.h" +#include <algorithm> +#include <fstream> +#include <iostream> +#include <map> +#include <mutex> +#include <regex> +#include <sstream> +#include <string> + +#include <cstring> + +#include "GPU_platform.h" +#include "GPU_vertex_format.h" + +#include "mtl_common.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_pso_descriptor_state.hh" +#include "mtl_shader.hh" +#include "mtl_shader_generator.hh" +#include "mtl_shader_interface.hh" +#include "mtl_texture.hh" + +extern char datatoc_mtl_shader_common_msl[]; + +using namespace blender; +using namespace blender::gpu; +using namespace blender::gpu::shader; + +namespace blender::gpu { + +/* -------------------------------------------------------------------- */ +/** \name Creation / Destruction. + * \{ */ + +/* Create empty shader to be populated later. */ +MTLShader::MTLShader(MTLContext *ctx, const char *name) : Shader(name) +{ + context_ = ctx; + + /* Create SHD builder to hold temporary resources until compilation is complete. */ + shd_builder_ = new MTLShaderBuilder(); + +#ifndef NDEBUG + /* Remove invalid symbols from shader name to ensure debug entry-point function name is valid. */ + for (uint i : IndexRange(strlen(this->name))) { + char c = this->name[i]; + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { + } + else { + this->name[i] = '_'; + } + } +#endif +} + +/* Create shader from MSL source. */ +MTLShader::MTLShader(MTLContext *ctx, + MTLShaderInterface *interface, + const char *name, + NSString *input_vertex_source, + NSString *input_fragment_source, + NSString *vert_function_name, + NSString *frag_function_name) + : MTLShader(ctx, name) +{ + BLI_assert([vert_function_name length]); + BLI_assert([frag_function_name length]); + + this->set_vertex_function_name(vert_function_name); + this->set_fragment_function_name(frag_function_name); + this->shader_source_from_msl(input_vertex_source, input_fragment_source); + this->set_interface(interface); + this->finalize(nullptr); +} + +MTLShader::~MTLShader() +{ + if (this->is_valid()) { + + /* Free uniform data block. */ + if (push_constant_data_ != nullptr) { + MEM_freeN(push_constant_data_); + push_constant_data_ = nullptr; + } + + /* Free Metal resources. */ + if (shader_library_vert_ != nil) { + [shader_library_vert_ release]; + shader_library_vert_ = nil; + } + if (shader_library_frag_ != nil) { + [shader_library_frag_ release]; + shader_library_frag_ = nil; + } + + if (pso_descriptor_ != nil) { + [pso_descriptor_ release]; + pso_descriptor_ = nil; + } + + /* Free Pipeline Cache. */ + for (const MTLRenderPipelineStateInstance *pso_inst : pso_cache_.values()) { + if (pso_inst->vert) { + [pso_inst->vert release]; + } + if (pso_inst->frag) { + [pso_inst->frag release]; + } + if (pso_inst->pso) { + [pso_inst->pso release]; + } + delete pso_inst; + } + pso_cache_.clear(); + + /* NOTE(Metal): #ShaderInterface deletion is handled in the super destructor `~Shader()`. */ + } + valid_ = false; + + if (shd_builder_ != nullptr) { + delete shd_builder_; + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Shader stage creation. + * \{ */ + +void MTLShader::vertex_shader_from_glsl(MutableSpan<const char *> sources) +{ + /* Flag source as not being compiled from native MSL. */ + BLI_assert(shd_builder_ != nullptr); + shd_builder_->source_from_msl_ = false; + + /* Remove #version tag entry. */ + sources[0] = ""; + + /* Consolidate GLSL vertex sources. */ + std::stringstream ss; + for (int i = 0; i < sources.size(); i++) { + ss << sources[i] << std::endl; + } + shd_builder_->glsl_vertex_source_ = ss.str(); +} + +void MTLShader::geometry_shader_from_glsl(MutableSpan<const char *> sources) +{ + MTL_LOG_ERROR("MTLShader::geometry_shader_from_glsl - Geometry shaders unsupported!\n"); +} + +void MTLShader::fragment_shader_from_glsl(MutableSpan<const char *> sources) +{ + /* Flag source as not being compiled from native MSL. */ + BLI_assert(shd_builder_ != nullptr); + shd_builder_->source_from_msl_ = false; + + /* Remove #version tag entry. */ + sources[0] = ""; + + /* Consolidate GLSL fragment sources. */ + std::stringstream ss; + for (int i = 0; i < sources.size(); i++) { + ss << sources[i] << std::endl; + } + shd_builder_->glsl_fragment_source_ = ss.str(); +} + +void MTLShader::compute_shader_from_glsl(MutableSpan<const char *> sources) +{ + /* Remove #version tag entry. */ + sources[0] = ""; + + /* TODO(Metal): Support compute shaders in Metal. */ + MTL_LOG_WARNING( + "MTLShader::compute_shader_from_glsl - Compute shaders currently unsupported!\n"); +} + +bool MTLShader::finalize(const shader::ShaderCreateInfo *info) +{ + /* Check if Shader has already been finalized. */ + if (this->is_valid()) { + MTL_LOG_ERROR("Shader (%p) '%s' has already been finalized!\n", this, this->name_get()); + } + + /* Perform GLSL to MSL source translation. */ + BLI_assert(shd_builder_ != nullptr); + if (!shd_builder_->source_from_msl_) { + bool success = generate_msl_from_glsl(info); + if (!success) { + /* GLSL to MSL translation has failed, or is unsupported for this shader. */ + valid_ = false; + BLI_assert_msg(false, "Shader translation from GLSL to MSL has failed. \n"); + + /* Create empty interface to allow shader to be silently used. */ + MTLShaderInterface *mtl_interface = new MTLShaderInterface(this->name_get()); + this->set_interface(mtl_interface); + + /* Release temporary compilation resources. */ + delete shd_builder_; + return false; + } + } + + /* Ensure we have a valid shader interface. */ + MTLShaderInterface *mtl_interface = this->get_interface(); + BLI_assert(mtl_interface != nullptr); + + /* Verify Context handle, fetch device and compile shader. */ + BLI_assert(context_); + id<MTLDevice> device = context_->device; + BLI_assert(device != nil); + + /* Ensure source and stage entry-point names are set. */ + BLI_assert([vertex_function_name_ length] > 0); + if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) { + BLI_assert([fragment_function_name_ length] > 0); + } + BLI_assert(shd_builder_ != nullptr); + BLI_assert([shd_builder_->msl_source_vert_ length] > 0); + + @autoreleasepool { + MTLCompileOptions *options = [[[MTLCompileOptions alloc] init] autorelease]; + options.languageVersion = MTLLanguageVersion2_2; + options.fastMathEnabled = YES; + + NSString *source_to_compile = shd_builder_->msl_source_vert_; + for (int src_stage = 0; src_stage <= 1; src_stage++) { + + source_to_compile = (src_stage == 0) ? shd_builder_->msl_source_vert_ : + shd_builder_->msl_source_frag_; + + /* Transform feedback, skip compilation. */ + if (src_stage == 1 && (transform_feedback_type_ != GPU_SHADER_TFB_NONE)) { + shader_library_frag_ = nil; + break; + } + + /* Concatenate common source. */ + NSString *str = [NSString stringWithUTF8String:datatoc_mtl_shader_common_msl]; + NSString *source_with_header_a = [str stringByAppendingString:source_to_compile]; + + /* Inject unique context ID to avoid cross-context shader cache collisions. + * Required on macOS 11.0. */ + NSString *source_with_header = source_with_header_a; + if (@available(macos 11.0, *)) { + /* Pass-through. Availability syntax requirement, expression cannot be negated. */ + } + else { + source_with_header = [source_with_header_a + stringByAppendingString:[NSString stringWithFormat:@"\n\n#define MTL_CONTEXT_IND %d\n", + context_->context_id]]; + } + [source_with_header retain]; + + /* Prepare Shader Library. */ + NSError *error = nullptr; + id<MTLLibrary> library = [device newLibraryWithSource:source_with_header + options:options + error:&error]; + if (error) { + /* Only exit out if genuine error and not warning. */ + if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location == + NSNotFound) { + NSLog( + @"Compile Error - Metal Shader Library (Stage: %d), error %@ \n", src_stage, error); + BLI_assert(false); + + /* Release temporary compilation resources. */ + delete shd_builder_; + return false; + } + } + + MTL_LOG_INFO("Successfully compiled Metal Shader Library (Stage: %d) for shader; %s\n", + src_stage, + name); + BLI_assert(library != nil); + if (src_stage == 0) { + /* Retain generated library and assign debug name. */ + shader_library_vert_ = library; + [shader_library_vert_ retain]; + shader_library_vert_.label = [NSString stringWithUTF8String:this->name]; + } + else { + /* Retain generated library for fragment shader and assign debug name. */ + shader_library_frag_ = library; + [shader_library_frag_ retain]; + shader_library_frag_.label = [NSString stringWithUTF8String:this->name]; + } + + [source_with_header autorelease]; + } + pso_descriptor_.label = [NSString stringWithUTF8String:this->name]; + + /* Prepare descriptor. */ + pso_descriptor_ = [[MTLRenderPipelineDescriptor alloc] init]; + [pso_descriptor_ retain]; + + /* Shader has successfully been created. */ + valid_ = true; + + /* Prepare backing data storage for local uniforms. */ + const MTLShaderUniformBlock &push_constant_block = mtl_interface->get_push_constant_block(); + if (push_constant_block.size > 0) { + push_constant_data_ = MEM_callocN(push_constant_block.size, __func__); + this->push_constant_bindstate_mark_dirty(true); + } + else { + push_constant_data_ = nullptr; + } + } + + /* Release temporary compilation resources. */ + delete shd_builder_; + return true; +} + +void MTLShader::transform_feedback_names_set(Span<const char *> name_list, + const eGPUShaderTFBType geom_type) +{ + tf_output_name_list_.clear(); + for (int i = 0; i < name_list.size(); i++) { + tf_output_name_list_.append(std::string(name_list[i])); + } + transform_feedback_type_ = geom_type; +} + +bool MTLShader::transform_feedback_enable(GPUVertBuf *buf) +{ + BLI_assert(transform_feedback_type_ != GPU_SHADER_TFB_NONE); + BLI_assert(buf); + transform_feedback_active_ = true; + transform_feedback_vertbuf_ = buf; + /* TODO(Metal): Enable this assertion once #MTLVertBuf lands. */ + // BLI_assert(static_cast<MTLVertBuf *>(unwrap(transform_feedback_vertbuf_))->get_usage_type() == + // GPU_USAGE_DEVICE_ONLY); + return true; +} + +void MTLShader::transform_feedback_disable() +{ + transform_feedback_active_ = false; + transform_feedback_vertbuf_ = nullptr; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Shader Binding. + * \{ */ + +void MTLShader::bind() +{ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + if (interface == nullptr || !this->is_valid()) { + MTL_LOG_WARNING( + "MTLShader::bind - Shader '%s' has no valid implementation in Metal, draw calls will be " + "skipped.\n", + this->name_get()); + } + ctx->pipeline_state.active_shader = this; +} + +void MTLShader::unbind() +{ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + ctx->pipeline_state.active_shader = nullptr; +} + +void MTLShader::uniform_float(int location, int comp_len, int array_size, const float *data) +{ + BLI_assert(this); + if (!this->is_valid()) { + return; + } + MTLShaderInterface *mtl_interface = get_interface(); + if (location < 0 || location >= mtl_interface->get_total_uniforms()) { + MTL_LOG_WARNING("Uniform location %d is not valid in Shader %s\n", location, this->name_get()); + return; + } + + /* Fetch more information about uniform from interface. */ + const MTLShaderUniform &uniform = mtl_interface->get_uniform(location); + + /* Prepare to copy data into local shader push constant memory block. */ + BLI_assert(push_constant_data_ != nullptr); + uint8_t *dest_ptr = (uint8_t *)push_constant_data_; + dest_ptr += uniform.byte_offset; + uint32_t copy_size = sizeof(float) * comp_len * array_size; + + /* Test per-element size. It is valid to copy less array elements than the total, but each + * array element needs to match. */ + uint32_t source_per_element_size = sizeof(float) * comp_len; + uint32_t dest_per_element_size = uniform.size_in_bytes / uniform.array_len; + BLI_assert_msg( + source_per_element_size <= dest_per_element_size, + "source Per-array-element size must be smaller than destination storage capacity for " + "that data"); + + if (source_per_element_size < dest_per_element_size) { + switch (uniform.type) { + + /* Special case for handling 'vec3' array upload. */ + case MTL_DATATYPE_FLOAT3: { + int numvecs = uniform.array_len; + uint8_t *data_c = (uint8_t *)data; + + /* It is more efficient on the host to only modify data if it has changed. + * Data modifications are small, so memory comparison is cheap. + * If uniforms have remained unchanged, then we avoid both copying + * data into the local uniform struct, and upload of the modified uniform + * contents in the command stream. */ + bool changed = false; + for (int i = 0; i < numvecs; i++) { + changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0); + if (changed) { + memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3); + } + data_c += sizeof(float) * 3; + dest_ptr += sizeof(float) * 4; + } + if (changed) { + this->push_constant_bindstate_mark_dirty(true); + } + return; + } + + /* Special case for handling 'mat3' upload. */ + case MTL_DATATYPE_FLOAT3x3: { + int numvecs = 3 * uniform.array_len; + uint8_t *data_c = (uint8_t *)data; + + /* It is more efficient on the host to only modify data if it has changed. + * Data modifications are small, so memory comparison is cheap. + * If uniforms have remained unchanged, then we avoid both copying + * data into the local uniform struct, and upload of the modified uniform + * contents in the command stream. */ + bool changed = false; + for (int i = 0; i < numvecs; i++) { + changed = changed || (memcmp((void *)dest_ptr, (void *)data_c, sizeof(float) * 3) != 0); + if (changed) { + memcpy((void *)dest_ptr, (void *)data_c, sizeof(float) * 3); + } + data_c += sizeof(float) * 3; + dest_ptr += sizeof(float) * 4; + } + if (changed) { + this->push_constant_bindstate_mark_dirty(true); + } + return; + } + default: + shader_debug_printf("INCOMPATIBLE UNIFORM TYPE: %d\n", uniform.type); + break; + } + } + + /* Debug checks. */ + BLI_assert_msg( + copy_size <= uniform.size_in_bytes, + "Size of provided uniform data is greater than size specified in Shader interface\n"); + + /* Only flag UBO as modified if data is different -- This can avoid re-binding of unmodified + * local uniform data. */ + bool data_changed = (memcmp((void *)dest_ptr, (void *)data, copy_size) != 0); + if (data_changed) { + this->push_constant_bindstate_mark_dirty(true); + memcpy((void *)dest_ptr, (void *)data, copy_size); + } +} + +void MTLShader::uniform_int(int location, int comp_len, int array_size, const int *data) +{ + BLI_assert(this); + if (!this->is_valid()) { + return; + } + + /* NOTE(Metal): Invalidation warning for uniform re-mapping of texture slots, unsupported in + * Metal, as we cannot point a texture binding at a different slot. */ + MTLShaderInterface *mtl_interface = this->get_interface(); + if (location >= mtl_interface->get_total_uniforms() && + location < (mtl_interface->get_total_uniforms() + mtl_interface->get_total_textures())) { + MTL_LOG_WARNING( + "Texture uniform location re-mapping unsupported in Metal. (Possibly also bad uniform " + "location %d)\n", + location); + return; + } + + if (location < 0 || location >= mtl_interface->get_total_uniforms()) { + MTL_LOG_WARNING( + "Uniform is not valid at location %d - Shader %s\n", location, this->name_get()); + return; + } + + /* Fetch more information about uniform from interface. */ + const MTLShaderUniform &uniform = mtl_interface->get_uniform(location); + + /* Determine data location in uniform block. */ + BLI_assert(push_constant_data_ != nullptr); + uint8_t *ptr = (uint8_t *)push_constant_data_; + ptr += uniform.byte_offset; + + /* Copy data into local block. Only flag UBO as modified if data is different + * This can avoid re-binding of unmodified local uniform data, reducing + * the total number of copy operations needed and data transfers between + * CPU and GPU. */ + bool data_changed = (memcmp((void *)ptr, (void *)data, sizeof(int) * comp_len * array_size) != + 0); + if (data_changed) { + this->push_constant_bindstate_mark_dirty(true); + memcpy((void *)ptr, (void *)data, sizeof(int) * comp_len * array_size); + } +} + +bool MTLShader::get_push_constant_is_dirty() +{ + return push_constant_modified_; +} + +void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty) +{ + push_constant_modified_ = is_dirty; +} + +void MTLShader::vertformat_from_shader(GPUVertFormat *format) const +{ + GPU_vertformat_clear(format); + + const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface); + for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) { + const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id); + + /* Extract type parameters from Metal type. */ + GPUVertCompType comp_type = comp_type_from_vert_format(attr.format); + uint comp_len = comp_count_from_vert_format(attr.format); + GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format); + + GPU_vertformat_attr_add(format, + mtl_interface->get_name_at_offset(attr.name_offset), + comp_type, + comp_len, + fetch_mode); + } +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name METAL Custom Behavior + * \{ */ + +void MTLShader::set_vertex_function_name(NSString *vert_function_name) +{ + vertex_function_name_ = vert_function_name; +} + +void MTLShader::set_fragment_function_name(NSString *frag_function_name) +{ + fragment_function_name_ = frag_function_name; +} + +void MTLShader::shader_source_from_msl(NSString *input_vertex_source, + NSString *input_fragment_source) +{ + BLI_assert(shd_builder_ != nullptr); + shd_builder_->msl_source_vert_ = input_vertex_source; + shd_builder_->msl_source_frag_ = input_fragment_source; + shd_builder_->source_from_msl_ = true; +} + +void MTLShader::set_interface(MTLShaderInterface *interface) +{ + /* Assign gpu::Shader super-class interface. */ + Shader::interface = interface; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Bake Pipeline State Objects + * \{ */ + +/** + * Bakes or fetches a pipeline state using the current + * #MTLRenderPipelineStateDescriptor state. + * + * This state contains information on shader inputs/outputs, such + * as the vertex descriptor, used to control vertex assembly for + * current vertex data, and active render target information, + * describing the output attachment pixel formats. + * + * Other rendering parameters such as global point-size, blend state, color mask + * etc; are also used. See mtl_shader.h for full #MLRenderPipelineStateDescriptor. + */ +MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state( + MTLContext *ctx, MTLPrimitiveTopologyClass prim_type) +{ + /* NOTE(Metal): PSO cache can be accessed from multiple threads, though these operations should + * be thread-safe due to organization of high-level renderer. If there are any issues, then + * access can be guarded as appropriate. */ + BLI_assert(this); + MTLShaderInterface *mtl_interface = this->get_interface(); + BLI_assert(mtl_interface); + BLI_assert(this->is_valid()); + + /* NOTE(Metal): Vertex input assembly description will have been populated externally + * via #MTLBatch or #MTLImmediate during binding or draw. */ + + /* Resolve Context Frame-buffer state. */ + MTLFrameBuffer *framebuffer = ctx->get_current_framebuffer(); + + /* Update global pipeline descriptor. */ + MTLStateManager *state_manager = static_cast<MTLStateManager *>( + MTLContext::get()->state_manager); + MTLRenderPipelineStateDescriptor &pipeline_descriptor = state_manager->get_pipeline_descriptor(); + + pipeline_descriptor.num_color_attachments = 0; + for (int attachment = 0; attachment < GPU_FB_MAX_COLOR_ATTACHMENT; attachment++) { + MTLAttachment color_attachment = framebuffer->get_color_attachment(attachment); + + if (color_attachment.used) { + /* If SRGB is disabled and format is SRGB, use color data directly with no conversions + * between linear and SRGB. */ + MTLPixelFormat mtl_format = gpu_texture_format_to_metal( + color_attachment.texture->format_get()); + if (framebuffer->get_is_srgb() && !framebuffer->get_srgb_enabled()) { + mtl_format = MTLPixelFormatRGBA8Unorm; + } + pipeline_descriptor.color_attachment_format[attachment] = mtl_format; + } + else { + pipeline_descriptor.color_attachment_format[attachment] = MTLPixelFormatInvalid; + } + + pipeline_descriptor.num_color_attachments += (color_attachment.used) ? 1 : 0; + } + MTLAttachment depth_attachment = framebuffer->get_depth_attachment(); + MTLAttachment stencil_attachment = framebuffer->get_stencil_attachment(); + pipeline_descriptor.depth_attachment_format = (depth_attachment.used) ? + gpu_texture_format_to_metal( + depth_attachment.texture->format_get()) : + MTLPixelFormatInvalid; + pipeline_descriptor.stencil_attachment_format = + (stencil_attachment.used) ? + gpu_texture_format_to_metal(stencil_attachment.texture->format_get()) : + MTLPixelFormatInvalid; + + /* Resolve Context Pipeline State (required by PSO). */ + pipeline_descriptor.color_write_mask = ctx->pipeline_state.color_write_mask; + pipeline_descriptor.blending_enabled = ctx->pipeline_state.blending_enabled; + pipeline_descriptor.alpha_blend_op = ctx->pipeline_state.alpha_blend_op; + pipeline_descriptor.rgb_blend_op = ctx->pipeline_state.rgb_blend_op; + pipeline_descriptor.dest_alpha_blend_factor = ctx->pipeline_state.dest_alpha_blend_factor; + pipeline_descriptor.dest_rgb_blend_factor = ctx->pipeline_state.dest_rgb_blend_factor; + pipeline_descriptor.src_alpha_blend_factor = ctx->pipeline_state.src_alpha_blend_factor; + pipeline_descriptor.src_rgb_blend_factor = ctx->pipeline_state.src_rgb_blend_factor; + pipeline_descriptor.point_size = ctx->pipeline_state.point_size; + + /* Primitive Type -- Primitive topology class needs to be specified for layered rendering. */ + bool requires_specific_topology_class = uses_mtl_array_index_ || + prim_type == MTLPrimitiveTopologyClassPoint; + pipeline_descriptor.vertex_descriptor.prim_topology_class = + (requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified; + + /* Check if current PSO exists in the cache. */ + MTLRenderPipelineStateInstance **pso_lookup = pso_cache_.lookup_ptr(pipeline_descriptor); + MTLRenderPipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr; + if (pipeline_state != nullptr) { + return pipeline_state; + } + + shader_debug_printf("Baking new pipeline variant for shader: %s\n", this->name); + + /* Generate new Render Pipeline State Object (PSO). */ + @autoreleasepool { + /* Prepare Render Pipeline Descriptor. */ + + /* Setup function specialization constants, used to modify and optimize + * generated code based on current render pipeline configuration. */ + MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease]; + + /* Prepare Vertex descriptor based on current pipeline vertex binding state. */ + MTLRenderPipelineStateDescriptor ¤t_state = pipeline_descriptor; + MTLRenderPipelineDescriptor *desc = pso_descriptor_; + [desc reset]; + pso_descriptor_.label = [NSString stringWithUTF8String:this->name]; + + /* Offset the bind index for Uniform buffers such that they begin after the VBO + * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function + * specialization constant, customized per unique pipeline state permutation. + * + * NOTE: For binding point compaction, we could use the number of VBOs present + * in the current PSO configuration `current_state.vertex_descriptor.num_vert_buffers`). + * However, it is more efficient to simply offset the uniform buffer base index to the + * maximal number of VBO bind-points, as then UBO bind-points for similar draw calls + * will align and avoid the requirement for additional binding. */ + int MTL_uniform_buffer_base_index = GPU_BATCH_VBO_MAX_LEN; + + /* Null buffer index is used if an attribute is not found in the + * bound VBOs #VertexFormat. */ + int null_buffer_index = current_state.vertex_descriptor.num_vert_buffers; + bool using_null_buffer = false; + + if (this->get_uses_ssbo_vertex_fetch()) { + /* If using SSBO Vertex fetch mode, no vertex descriptor is required + * as we wont be using stage-in. */ + desc.vertexDescriptor = nil; + desc.inputPrimitiveTopology = MTLPrimitiveTopologyClassUnspecified; + + /* We want to offset the uniform buffer base to allow for sufficient VBO binding slots - We + * also require +1 slot for the Index buffer. */ + MTL_uniform_buffer_base_index = MTL_SSBO_VERTEX_FETCH_IBO_INDEX + 1; + } + else { + for (const uint i : IndexRange(current_state.vertex_descriptor.num_attributes)) { + + /* Metal back-end attribute descriptor state. */ + MTLVertexAttributeDescriptorPSO &attribute_desc = + current_state.vertex_descriptor.attributes[i]; + + /* Flag format conversion */ + /* In some cases, Metal cannot implicitly convert between data types. + * In these instances, the fetch mode #GPUVertFetchMode as provided in the vertex format + * is passed in, and used to populate function constants named: MTL_AttributeConvert0..15. + * + * It is then the responsibility of the vertex shader to perform any necessary type + * casting. + * + * See `mtl_shader.hh` for more information. Relevant Metal API documentation: + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc + */ + if (attribute_desc.format == MTLVertexFormatInvalid) { + MTL_LOG_WARNING( + "MTLShader: baking pipeline state for '%s'- expected input attribute at " + "index '%d' but none was specified in the current vertex state\n", + mtl_interface->get_name(), + i); + + /* Write out null conversion constant if attribute unused. */ + int MTL_attribute_conversion_mode = 0; + [values setConstantValue:&MTL_attribute_conversion_mode + type:MTLDataTypeInt + withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]]; + continue; + } + + int MTL_attribute_conversion_mode = (int)attribute_desc.format_conversion_mode; + [values setConstantValue:&MTL_attribute_conversion_mode + type:MTLDataTypeInt + withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]]; + if (MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT_UNIT || + MTL_attribute_conversion_mode == GPU_FETCH_INT_TO_FLOAT) { + shader_debug_printf( + "TODO(Metal): Shader %s needs to support internal format conversion\n", + mtl_interface->name); + } + + /* Copy metal back-end attribute descriptor state into PSO descriptor. + * NOTE: need to copy each element due to direct assignment restrictions. + * Also note */ + MTLVertexAttributeDescriptor *mtl_attribute = desc.vertexDescriptor.attributes[i]; + + mtl_attribute.format = attribute_desc.format; + mtl_attribute.offset = attribute_desc.offset; + mtl_attribute.bufferIndex = attribute_desc.buffer_index; + } + + for (const uint i : IndexRange(current_state.vertex_descriptor.num_vert_buffers)) { + /* Metal back-end state buffer layout. */ + const MTLVertexBufferLayoutDescriptorPSO &buf_layout = + current_state.vertex_descriptor.buffer_layouts[i]; + /* Copy metal back-end buffer layout state into PSO descriptor. + * NOTE: need to copy each element due to copying from internal + * back-end descriptor to Metal API descriptor. */ + MTLVertexBufferLayoutDescriptor *mtl_buf_layout = desc.vertexDescriptor.layouts[i]; + + mtl_buf_layout.stepFunction = buf_layout.step_function; + mtl_buf_layout.stepRate = buf_layout.step_rate; + mtl_buf_layout.stride = buf_layout.stride; + } + + /* Mark empty attribute conversion. */ + for (int i = current_state.vertex_descriptor.num_attributes; i < GPU_VERT_ATTR_MAX_LEN; + i++) { + int MTL_attribute_conversion_mode = 0; + [values setConstantValue:&MTL_attribute_conversion_mode + type:MTLDataTypeInt + withName:[NSString stringWithFormat:@"MTL_AttributeConvert%d", i]]; + } + + /* DEBUG: Missing/empty attributes. */ + /* Attributes are normally mapped as part of the state setting based on the used + * #GPUVertFormat, however, if attributes have not been set, we can sort them out here. */ + for (const uint i : IndexRange(mtl_interface->get_total_attributes())) { + const MTLShaderInputAttribute &attribute = mtl_interface->get_attribute(i); + MTLVertexAttributeDescriptor *current_attribute = desc.vertexDescriptor.attributes[i]; + + if (current_attribute.format == MTLVertexFormatInvalid) { +#if MTL_DEBUG_SHADER_ATTRIBUTES == 1 + MTL_LOG_INFO("-> Filling in unbound attribute '%s' for shader PSO '%s' \n", + attribute.name, + mtl_interface->name); +#endif + current_attribute.format = attribute.format; + current_attribute.offset = 0; + current_attribute.bufferIndex = null_buffer_index; + + /* Add Null vert buffer binding for invalid attributes. */ + if (!using_null_buffer) { + MTLVertexBufferLayoutDescriptor *null_buf_layout = + desc.vertexDescriptor.layouts[null_buffer_index]; + + /* Use constant step function such that null buffer can + * contain just a singular dummy attribute. */ + null_buf_layout.stepFunction = MTLVertexStepFunctionConstant; + null_buf_layout.stepRate = 0; + null_buf_layout.stride = max_ii(null_buf_layout.stride, attribute.size); + + /* If we are using the maximum number of vertex buffers, or tight binding indices, + * MTL_uniform_buffer_base_index needs shifting to the bind slot after the null buffer + * index. */ + if (null_buffer_index >= MTL_uniform_buffer_base_index) { + MTL_uniform_buffer_base_index = null_buffer_index + 1; + } + using_null_buffer = true; +#if MTL_DEBUG_SHADER_ATTRIBUTES == 1 + MTL_LOG_INFO("Setting up buffer binding for null attribute with buffer index %d\n", + null_buffer_index); +#endif + } + } + } + + /* Primitive Topology */ + desc.inputPrimitiveTopology = pipeline_descriptor.vertex_descriptor.prim_topology_class; + } + + /* Update constant value for 'MTL_uniform_buffer_base_index' */ + [values setConstantValue:&MTL_uniform_buffer_base_index + type:MTLDataTypeInt + withName:@"MTL_uniform_buffer_base_index"]; + + /* Transform feedback constant */ + int MTL_transform_feedback_buffer_index = (this->transform_feedback_type_ != + GPU_SHADER_TFB_NONE) ? + MTL_uniform_buffer_base_index + + mtl_interface->get_total_uniform_blocks() : + -1; + if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) { + [values setConstantValue:&MTL_transform_feedback_buffer_index + type:MTLDataTypeInt + withName:@"MTL_transform_feedback_buffer_index"]; + } + + /* gl_PointSize constant */ + bool null_pointsize = true; + float MTL_pointsize = pipeline_descriptor.point_size; + if (pipeline_descriptor.vertex_descriptor.prim_topology_class == + MTLPrimitiveTopologyClassPoint) { + /* `if pointsize is > 0.0`, PROGRAM_POINT_SIZE is enabled, and `gl_PointSize` shader keyword + * overrides the value. Otherwise, if < 0.0, use global constant point size. */ + if (MTL_pointsize < 0.0) { + MTL_pointsize = fabsf(MTL_pointsize); + [values setConstantValue:&MTL_pointsize + type:MTLDataTypeFloat + withName:@"MTL_global_pointsize"]; + null_pointsize = false; + } + } + + if (null_pointsize) { + MTL_pointsize = 0.0f; + [values setConstantValue:&MTL_pointsize + type:MTLDataTypeFloat + withName:@"MTL_global_pointsize"]; + } + + /* Compile functions */ + NSError *error = nullptr; + desc.vertexFunction = [shader_library_vert_ newFunctionWithName:vertex_function_name_ + constantValues:values + error:&error]; + if (error) { + NSLog(@"Compile Error - Metal Shader vertex function, error %@", error); + + /* Only exit out if genuine error and not warning */ + if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location == + NSNotFound) { + BLI_assert(false); + return nullptr; + } + } + + /* If transform feedback is used, Vertex-only stage */ + if (transform_feedback_type_ == GPU_SHADER_TFB_NONE) { + desc.fragmentFunction = [shader_library_frag_ newFunctionWithName:fragment_function_name_ + constantValues:values + error:&error]; + if (error) { + NSLog(@"Compile Error - Metal Shader fragment function, error %@", error); + + /* Only exit out if genuine error and not warning */ + if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location == + NSNotFound) { + BLI_assert(false); + return nullptr; + } + } + } + else { + desc.fragmentFunction = nil; + desc.rasterizationEnabled = false; + } + + /* Setup pixel format state */ + for (int color_attachment = 0; color_attachment < GPU_FB_MAX_COLOR_ATTACHMENT; + color_attachment++) { + /* Fetch color attachment pixel format in back-end pipeline state. */ + MTLPixelFormat pixel_format = current_state.color_attachment_format[color_attachment]; + /* Populate MTL API PSO attachment descriptor. */ + MTLRenderPipelineColorAttachmentDescriptor *col_attachment = + desc.colorAttachments[color_attachment]; + + col_attachment.pixelFormat = pixel_format; + if (pixel_format != MTLPixelFormatInvalid) { + bool format_supports_blending = mtl_format_supports_blending(pixel_format); + + col_attachment.writeMask = current_state.color_write_mask; + col_attachment.blendingEnabled = current_state.blending_enabled && + format_supports_blending; + if (format_supports_blending && current_state.blending_enabled) { + col_attachment.alphaBlendOperation = current_state.alpha_blend_op; + col_attachment.rgbBlendOperation = current_state.rgb_blend_op; + col_attachment.destinationAlphaBlendFactor = current_state.dest_alpha_blend_factor; + col_attachment.destinationRGBBlendFactor = current_state.dest_rgb_blend_factor; + col_attachment.sourceAlphaBlendFactor = current_state.src_alpha_blend_factor; + col_attachment.sourceRGBBlendFactor = current_state.src_rgb_blend_factor; + } + else { + if (current_state.blending_enabled && !format_supports_blending) { + shader_debug_printf( + "[Warning] Attempting to Bake PSO, but MTLPixelFormat %d does not support " + "blending\n", + *((int *)&pixel_format)); + } + } + } + } + desc.depthAttachmentPixelFormat = current_state.depth_attachment_format; + desc.stencilAttachmentPixelFormat = current_state.stencil_attachment_format; + + /* Compile PSO */ + + MTLAutoreleasedRenderPipelineReflection reflection_data; + id<MTLRenderPipelineState> pso = [ctx->device + newRenderPipelineStateWithDescriptor:desc + options:MTLPipelineOptionBufferTypeInfo + reflection:&reflection_data + error:&error]; + if (error) { + NSLog(@"Failed to create PSO for shader: %s error %@\n", this->name, error); + BLI_assert(false); + return nullptr; + } + else if (!pso) { + NSLog(@"Failed to create PSO for shader: %s, but no error was provided!\n", this->name); + BLI_assert(false); + return nullptr; + } + else { + NSLog(@"Successfully compiled PSO for shader: %s (Metal Context: %p)\n", this->name, ctx); + } + + /* Prepare pipeline state instance. */ + MTLRenderPipelineStateInstance *pso_inst = new MTLRenderPipelineStateInstance(); + pso_inst->vert = desc.vertexFunction; + pso_inst->frag = desc.fragmentFunction; + pso_inst->pso = pso; + pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index; + pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1; + pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index; + pso_inst->shader_pso_index = pso_cache_.size(); + + pso_inst->reflection_data_available = (reflection_data != nil); + if (reflection_data != nil) { + + /* Extract shader reflection data for buffer bindings. + * This reflection data is used to contrast the binding information + * we know about in the interface against the bindings in the finalized + * PSO. This accounts for bindings which have been stripped out during + * optimization, and allows us to both avoid over-binding and also + * allows us to verify size-correctness for bindings, to ensure + * that buffers bound are not smaller than the size of expected data. */ + NSArray<MTLArgument *> *vert_args = [reflection_data vertexArguments]; + + pso_inst->buffer_bindings_reflection_data_vert.clear(); + int buffer_binding_max_ind = 0; + + for (int i = 0; i < [vert_args count]; i++) { + MTLArgument *arg = [vert_args objectAtIndex:i]; + if ([arg type] == MTLArgumentTypeBuffer) { + int buf_index = [arg index] - MTL_uniform_buffer_base_index; + if (buf_index >= 0) { + buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index); + } + } + } + pso_inst->buffer_bindings_reflection_data_vert.resize(buffer_binding_max_ind + 1); + for (int i = 0; i < buffer_binding_max_ind + 1; i++) { + pso_inst->buffer_bindings_reflection_data_vert[i] = {0, 0, 0, false}; + } + + for (int i = 0; i < [vert_args count]; i++) { + MTLArgument *arg = [vert_args objectAtIndex:i]; + if ([arg type] == MTLArgumentTypeBuffer) { + int buf_index = [arg index] - MTL_uniform_buffer_base_index; + + if (buf_index >= 0) { + pso_inst->buffer_bindings_reflection_data_vert[buf_index] = { + (uint32_t)([arg index]), + (uint32_t)([arg bufferDataSize]), + (uint32_t)([arg bufferAlignment]), + ([arg isActive] == YES) ? true : false}; + } + } + } + + NSArray<MTLArgument *> *frag_args = [reflection_data fragmentArguments]; + + pso_inst->buffer_bindings_reflection_data_frag.clear(); + buffer_binding_max_ind = 0; + + for (int i = 0; i < [frag_args count]; i++) { + MTLArgument *arg = [frag_args objectAtIndex:i]; + if ([arg type] == MTLArgumentTypeBuffer) { + int buf_index = [arg index] - MTL_uniform_buffer_base_index; + if (buf_index >= 0) { + buffer_binding_max_ind = max_ii(buffer_binding_max_ind, buf_index); + } + } + } + pso_inst->buffer_bindings_reflection_data_frag.resize(buffer_binding_max_ind + 1); + for (int i = 0; i < buffer_binding_max_ind + 1; i++) { + pso_inst->buffer_bindings_reflection_data_frag[i] = {0, 0, 0, false}; + } + + for (int i = 0; i < [frag_args count]; i++) { + MTLArgument *arg = [frag_args objectAtIndex:i]; + if ([arg type] == MTLArgumentTypeBuffer) { + int buf_index = [arg index] - MTL_uniform_buffer_base_index; + shader_debug_printf(" BUF IND: %d (arg name: %s)\n", buf_index, [[arg name] UTF8String]); + if (buf_index >= 0) { + pso_inst->buffer_bindings_reflection_data_frag[buf_index] = { + (uint32_t)([arg index]), + (uint32_t)([arg bufferDataSize]), + (uint32_t)([arg bufferAlignment]), + ([arg isActive] == YES) ? true : false}; + } + } + } + } + + [pso_inst->vert retain]; + [pso_inst->frag retain]; + [pso_inst->pso retain]; + + /* Insert into pso cache. */ + pso_cache_.add(pipeline_descriptor, pso_inst); + shader_debug_printf("PSO CACHE: Stored new variant in PSO cache for shader '%s'\n", + this->name); + return pso_inst; + } +} +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name SSBO-vertex-fetch-mode attribute control. + * \{ */ + +int MTLShader::ssbo_vertex_type_to_attr_type(MTLVertexFormat attribute_type) +{ + switch (attribute_type) { + case MTLVertexFormatFloat: + return GPU_SHADER_ATTR_TYPE_FLOAT; + case MTLVertexFormatInt: + return GPU_SHADER_ATTR_TYPE_INT; + case MTLVertexFormatUInt: + return GPU_SHADER_ATTR_TYPE_UINT; + case MTLVertexFormatShort: + return GPU_SHADER_ATTR_TYPE_SHORT; + case MTLVertexFormatUChar: + return GPU_SHADER_ATTR_TYPE_CHAR; + case MTLVertexFormatUChar2: + return GPU_SHADER_ATTR_TYPE_CHAR2; + case MTLVertexFormatUChar3: + return GPU_SHADER_ATTR_TYPE_CHAR3; + case MTLVertexFormatUChar4: + return GPU_SHADER_ATTR_TYPE_CHAR4; + case MTLVertexFormatFloat2: + return GPU_SHADER_ATTR_TYPE_VEC2; + case MTLVertexFormatFloat3: + return GPU_SHADER_ATTR_TYPE_VEC3; + case MTLVertexFormatFloat4: + return GPU_SHADER_ATTR_TYPE_VEC4; + case MTLVertexFormatUInt2: + return GPU_SHADER_ATTR_TYPE_UVEC2; + case MTLVertexFormatUInt3: + return GPU_SHADER_ATTR_TYPE_UVEC3; + case MTLVertexFormatUInt4: + return GPU_SHADER_ATTR_TYPE_UVEC4; + case MTLVertexFormatInt2: + return GPU_SHADER_ATTR_TYPE_IVEC2; + case MTLVertexFormatInt3: + return GPU_SHADER_ATTR_TYPE_IVEC3; + case MTLVertexFormatInt4: + return GPU_SHADER_ATTR_TYPE_IVEC4; + case MTLVertexFormatUCharNormalized: + return GPU_SHADER_ATTR_TYPE_UCHAR_NORM; + case MTLVertexFormatUChar2Normalized: + return GPU_SHADER_ATTR_TYPE_UCHAR2_NORM; + case MTLVertexFormatUChar3Normalized: + return GPU_SHADER_ATTR_TYPE_UCHAR3_NORM; + case MTLVertexFormatUChar4Normalized: + return GPU_SHADER_ATTR_TYPE_UCHAR4_NORM; + case MTLVertexFormatInt1010102Normalized: + return GPU_SHADER_ATTR_TYPE_INT1010102_NORM; + case MTLVertexFormatShort3Normalized: + return GPU_SHADER_ATTR_TYPE_SHORT3_NORM; + default: + BLI_assert_msg(false, + "Not yet supported attribute type for SSBO vertex fetch -- Add entry " + "GPU_SHADER_ATTR_TYPE_** to shader defines, and in this table"); + return -1; + } + return -1; +} + +void MTLShader::ssbo_vertex_fetch_bind_attributes_begin() +{ + MTLShaderInterface *mtl_interface = this->get_interface(); + ssbo_vertex_attribute_bind_active_ = true; + ssbo_vertex_attribute_bind_mask_ = (1 << mtl_interface->get_total_attributes()) - 1; + + /* Reset tracking of actively used VBO bind slots for SSBO vertex fetch mode. */ + for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + ssbo_vbo_slot_used_[i] = false; + } +} + +void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_attr) +{ + /* Fetch attribute. */ + MTLShaderInterface *mtl_interface = this->get_interface(); + BLI_assert(ssbo_attr.mtl_attribute_index >= 0 && + ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes()); + + /* Update bind-mask to verify this attribute has been used. */ + BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) == + (1 << ssbo_attr.mtl_attribute_index) && + "Attribute has already been bound"); + ssbo_vertex_attribute_bind_mask_ &= ~(1 << ssbo_attr.mtl_attribute_index); + + /* Fetch attribute uniform addresses from cache. */ + ShaderSSBOAttributeBinding &cached_ssbo_attribute = + cached_ssbo_attribute_bindings_[ssbo_attr.mtl_attribute_index]; + BLI_assert(cached_ssbo_attribute.attribute_index >= 0); + + /* Write attribute descriptor properties to shader uniforms. */ + this->uniform_int(cached_ssbo_attribute.uniform_offset, 1, 1, &ssbo_attr.attribute_offset); + this->uniform_int(cached_ssbo_attribute.uniform_stride, 1, 1, &ssbo_attr.per_vertex_stride); + int inst_val = (ssbo_attr.is_instance ? 1 : 0); + this->uniform_int(cached_ssbo_attribute.uniform_fetchmode, 1, 1, &inst_val); + this->uniform_int(cached_ssbo_attribute.uniform_vbo_id, 1, 1, &ssbo_attr.vbo_id); + BLI_assert(ssbo_attr.attribute_format >= 0); + this->uniform_int(cached_ssbo_attribute.uniform_attr_type, 1, 1, &ssbo_attr.attribute_format); + ssbo_vbo_slot_used_[ssbo_attr.vbo_id] = true; +} + +void MTLShader::ssbo_vertex_fetch_bind_attributes_end(id<MTLRenderCommandEncoder> active_encoder) +{ + ssbo_vertex_attribute_bind_active_ = false; + + /* If our mask is non-zero, we have unassigned attributes. */ + if (ssbo_vertex_attribute_bind_mask_ != 0) { + MTLShaderInterface *mtl_interface = this->get_interface(); + + /* Determine if there is a free slot we can bind the null buffer to -- We should have at + * least ONE free slot in this instance. */ + int null_attr_buffer_slot = -1; + for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + if (!ssbo_vbo_slot_used_[i]) { + null_attr_buffer_slot = i; + break; + } + } + BLI_assert_msg(null_attr_buffer_slot >= 0, + "No suitable bind location for a NULL buffer was found"); + + for (int i = 0; i < mtl_interface->get_total_attributes(); i++) { + if (ssbo_vertex_attribute_bind_mask_ & (1 << i)) { + const MTLShaderInputAttribute *mtl_shader_attribute = &mtl_interface->get_attribute(i); +#if MTL_DEBUG_SHADER_ATTRIBUTES == 1 + MTL_LOG_WARNING( + "SSBO Vertex Fetch missing attribute with index: %d. Shader: %s, Attr " + "Name: " + "%s - Null buffer bound\n", + i, + this->name_get(), + mtl_shader_attribute->name); +#endif + /* Bind Attribute with NULL buffer index and stride zero (for constant access). */ + MTLSSBOAttribute ssbo_attr( + i, null_attr_buffer_slot, 0, 0, GPU_SHADER_ATTR_TYPE_FLOAT, false); + ssbo_vertex_fetch_bind_attribute(ssbo_attr); + MTL_LOG_WARNING( + "Unassigned Shader attribute: %s, Attr Name: %s -- Binding NULL BUFFER to " + "slot %d\n", + this->name_get(), + mtl_interface->get_name_at_offset(mtl_shader_attribute->name_offset), + null_attr_buffer_slot); + } + } + + /* Bind NULL buffer to given VBO slot. */ + MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); + id<MTLBuffer> null_buf = ctx->get_null_attribute_buffer(); + BLI_assert(null_buf); + + MTLRenderPassState &rps = ctx->main_command_buffer.get_render_pass_state(); + rps.bind_vertex_buffer(null_buf, 0, null_attr_buffer_slot); + } +} + +GPUVertBuf *MTLShader::get_transform_feedback_active_buffer() +{ + if (transform_feedback_type_ == GPU_SHADER_TFB_NONE || !transform_feedback_active_) { + return nullptr; + } + return transform_feedback_vertbuf_; +} + +bool MTLShader::has_transform_feedback_varying(std::string str) +{ + if (this->transform_feedback_type_ == GPU_SHADER_TFB_NONE) { + return false; + } + + return (std::find(tf_output_name_list_.begin(), tf_output_name_list_.end(), str) != + tf_output_name_list_.end()); +} + +} // blender::gpu::shdaer diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh new file mode 100644 index 00000000000..43890ca0170 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_generator.hh @@ -0,0 +1,727 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#include "gpu_shader_create_info.hh" +#include "gpu_shader_private.hh" + +/** -- Metal Shader Generator for GLSL -> MSL conversion -- + * + * The Metal shader generator class is used as a conversion utility for generating + * a compatible MSL shader from a source GLSL shader. There are several steps + * involved in creating a shader, and structural changes which enable the source + * to function in the same way. + * + * 1) Extraction and conversion of shaders input's and output's to their Metal-compatible + * version. This is a subtle data transformation from GPUShaderCreateInfo, allowing + * for Metal-specific parameters. + * + * 2) Determine usage of shader features such as GL global variable usage, depth write output, + * clip distances, multilayered rendering, barycentric coordinates etc; + * + * 3) Generate MSL shader. + * + * 4) Populate #MTLShaderInterface, describing input/output structure, bind-points, buffer size and + * alignment, shader feature usage etc; Everything required by the Metal back-end to + * successfully enable use of shaders and GPU back-end features. + * + * + * + * For each shading stage, we generate an MSL shader following these steps: + * + * 1) Output custom shader defines describing modes e.g. whether we are using + * sampler bindings or argument buffers; at the top of the shader. + * + * 2) Inject common Metal headers. + * - `mtl_shader_defines.msl` is used to map GLSL functions to MSL. + * - `mtl_shader_common.msl` is added to ALL MSL shaders to provide + * common functionality required by the back-end. This primarily + * contains function-constant hooks, used in PSO generation. + * + * 3) Create a class Scope which wraps the GLSL shader. This is used to + * create a global per-thread scope around the shader source, to allow + * access to common shader members (GLSL globals, shader inputs/outputs etc) + * + * 4) Generate shader interface structs and populate local members where required for: + * - `VertexInputs` + * - `VertexOutputs` + * - `Uniforms` + * - `Uniform Blocks` + * - `textures` ; + * etc; + * + * 5) Inject GLSL source. + * + * 6) Generate MSL shader entry point function. Every Metal shader must have a + * vertex/fragment/kernel entry-point, which contains the function binding table. + * This is where bindings are specified and passed into the shader. + * + * For converted shaders, the MSL entry-point will also instantiate a shader + * class per thread, and pass over bound resource references into the class. + * + * Finally, the shaders "main()" method will be called, and outputs are copied. + * + * NOTE: For position outputs, the default output position will be converted to + * the Metal coordinate space, which involves flipping the Y coordinate and + * re-mapping the depth range between 0 and 1, as with Vulkan. + * + * + * The final shader structure looks as follows: + * + * \code{.cc} + * -- Shader defines -- + * #define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 0 + * ... etc ...; + * + * class MetalShaderVertexImp { + * + * -- Common shader interface structs -- + * struct VertexIn { + * vec4 pos [[attribute(0)]] + * } + * struct VertexOut {...} + * struct PushConstantBlock {...} + * struct drw_Globals {...} + * ... + * + * -- GLSL source code -- + * ... + * }; + * + * vertex MetalShaderVertexImp::VertexOut vertex_function_entry( + * MetalShaderVertexImp::VertexIn v_in [[stage_in]], + * constant PushConstantBlock& globals [[buffer(MTL_uniform_buffer_base_index)]]) { + * + * MetalShaderVertexImp impl; + * -- Copy input members into impl instance -- + * -- Execute GLSL main function -- + * impl.main(); + * + * -- Copy outputs and return -- + * MetalShaderVertexImp::VertexOut out; + * out.pos = impl.pos; + * -- transform position to Metal coordinate system -- + * return v_out; + * } + * \endcode + * + * -- SSBO-vertex-fetchmode -- + * + * SSBO-vertex-fetchmode is a special option wherein vertex buffers are bound directly + * as buffers in the shader, rather than using the VertexDescriptor and [[stage_in]] vertex + * assembly. + * + * The purpose of this mode is to enable random-access reading of all vertex data. This is + * particularly useful for efficiently converting geometry shaders to Metal shading language, + * as these techniques are not supported natively in Metal. + * + * Geometry shaders can be re-created by firing off a vertex shader with the desired number of + * total output vertices. Each vertex can then read whichever input attributes it needs to + * achieve the output result. + * This manual reading is also used to provide support for GPU_provoking_vertex, wherein the + * output vertex for flat shading needs to change. In these cases, the manual vertex assembly + * can flip which vertices are read within the primitive. + * + * From an efficiency perspective, this is more GPU-friendly than geometry shading, due to improved + * parallelism throughout the whole pipe, and for Apple hardware specifically, there is no + * significant performance loss from manual vertex assembly vs under-the-hood assembly. + * + * This mode works by passing the required vertex descriptor information into the shader + * as uniform data, describing the type, stride, offset, step-mode and buffer index of each + * attribute, such that the shader SSBO-vertex-fetch utility functions know how to extract data. + * + * This also works with indexed rendering, + * by similarly binding the index buffer as a manual buffer. + * + * When this mode is used, the code generation and shader interface generation varies to + * accommodate the required features. + * + * This mode can be enabled in a shader with: + * + * `#pragma USE_SSBO_VERTEX_FETCH(TriangleList/LineList, output_vertex_count_per_input_primitive)` + * + * This mirrors the geometry shader interface `layout(triangle_strip, max_vertices = 3) out;` + */ + +/* SSBO vertex fetch attribute uniform parameter names. + * These uniforms are used to pass the information + * required to perform manual vertex assembly within + * the vertex shader. + * Each vertex attribute requires a number of properties + * in order to correctly extract data from the bound vertex + * buffers. */ +#ifndef NDEBUG +/* Global. */ +# define UNIFORM_SSBO_USES_INDEXED_RENDERING_STR "uniform_ssbo_uses_indexed_rendering" +# define UNIFORM_SSBO_INDEX_MODE_U16_STR "uniform_ssbo_index_mode_u16" +# define UNIFORM_SSBO_INPUT_PRIM_TYPE_STR "uniform_ssbo_input_prim_type" +# define UNIFORM_SSBO_INPUT_VERT_COUNT_STR "uniform_ssbo_input_vert_count" +/* Per-attribute. */ +# define UNIFORM_SSBO_OFFSET_STR "uniform_ssbo_offset_" +# define UNIFORM_SSBO_STRIDE_STR "uniform_ssbo_stride_" +# define UNIFORM_SSBO_FETCHMODE_STR "uniform_ssbo_fetchmode_" +# define UNIFORM_SSBO_VBO_ID_STR "uniform_ssbo_vbo_id_" +# define UNIFORM_SSBO_TYPE_STR "uniform_ssbo_type_" +#else +/* Global. */ +# define UNIFORM_SSBO_USES_INDEXED_RENDERING_STR "_ir" +# define UNIFORM_SSBO_INDEX_MODE_U16_STR "_mu" +# define UNIFORM_SSBO_INPUT_PRIM_TYPE_STR "_pt" +# define UNIFORM_SSBO_INPUT_VERT_COUNT_STR "_vc" +/* Per-attribute. */ +# define UNIFORM_SSBO_OFFSET_STR "_so" +# define UNIFORM_SSBO_STRIDE_STR "_ss" +# define UNIFORM_SSBO_FETCHMODE_STR "_sf" +# define UNIFORM_SSBO_VBO_ID_STR "_sv" +# define UNIFORM_SSBO_TYPE_STR "_st" +#endif + +namespace blender::gpu { + +struct MSLUniform { + shader::Type type; + std::string name; + bool is_array; + int array_elems; + ShaderStage stage; + + MSLUniform(shader::Type uniform_type, + std::string uniform_name, + bool is_array_type, + uint32_t num_elems = 1) + : type(uniform_type), name(uniform_name), is_array(is_array_type), array_elems(num_elems) + { + } + + bool operator==(const MSLUniform &right) const + { + return (type == right.type && name == right.name && is_array == right.is_array && + array_elems == right.array_elems); + } +}; + +struct MSLUniformBlock { + std::string type_name; + std::string name; + ShaderStage stage; + bool is_array; + + bool operator==(const MSLUniformBlock &right) const + { + return (type_name == right.type_name && name == right.name); + } +}; + +enum MSLTextureSamplerAccess { + TEXTURE_ACCESS_NONE = 0, + TEXTURE_ACCESS_SAMPLE, + TEXTURE_ACCESS_READ, + TEXTURE_ACCESS_WRITE, + TEXTURE_ACCESS_READWRITE, +}; + +struct MSLTextureSampler { + ShaderStage stage; + shader::ImageType type; + std::string name; + MSLTextureSamplerAccess access; + uint location; + + eGPUTextureType get_texture_binding_type() const; + + void resolve_binding_indices(); + + MSLTextureSampler(ShaderStage in_stage, + shader::ImageType in_sampler_type, + std::string in_sampler_name, + MSLTextureSamplerAccess in_access, + uint in_location) + : stage(in_stage), + type(in_sampler_type), + name(in_sampler_name), + access(in_access), + location(in_location) + { + } + + bool operator==(const MSLTextureSampler &right) const + { + /* We do not compare stage as we want to avoid duplication of resources used across multiple + * stages. */ + return (type == right.type && name == right.name && access == right.access); + } + + std::string get_msl_access_str() const + { + switch (access) { + case TEXTURE_ACCESS_SAMPLE: + return "access::sample"; + case TEXTURE_ACCESS_READ: + return "access::read"; + case TEXTURE_ACCESS_WRITE: + return "access::write"; + case TEXTURE_ACCESS_READWRITE: + return "access::read_write"; + default: + BLI_assert(false); + return ""; + } + return ""; + } + + /* Get typestring for wrapped texture class members. + * wrapper struct type contains combined texture and sampler, templated + * against the texture type. + * See `COMBINED_SAMPLER_TYPE` in `mtl_shader_defines.msl`. */ + std::string get_msl_typestring_wrapper(bool is_addr) const + { + std::string str; + str = this->get_msl_wrapper_type_str() + "<" + this->get_msl_return_type_str() + "," + + this->get_msl_access_str() + ">" + ((is_addr) ? "* " : " ") + this->name; + return str; + } + + /* Get raw texture typestring -- used in entry-point function argument table. */ + std::string get_msl_typestring(bool is_addr) const + { + std::string str; + str = this->get_msl_texture_type_str() + "<" + this->get_msl_return_type_str() + "," + + this->get_msl_access_str() + ">" + ((is_addr) ? "* " : " ") + this->name; + return str; + } + + std::string get_msl_return_type_str() const; + std::string get_msl_texture_type_str() const; + std::string get_msl_wrapper_type_str() const; +}; + +struct MSLVertexInputAttribute { + /* layout_location of -1 means unspecified and will + * be populated manually. */ + int layout_location; + shader::Type type; + std::string name; + + bool operator==(const MSLVertexInputAttribute &right) const + { + return (layout_location == right.layout_location && type == right.type && name == right.name); + } +}; + +struct MSLVertexOutputAttribute { + std::string type; + std::string name; + /* Instance name specified if attributes belong to a struct. */ + std::string instance_name; + /* Interpolation qualifier can be any of smooth (default), flat, no_perspective. */ + std::string interpolation_qualifier; + bool is_array; + int array_elems; + + bool operator==(const MSLVertexOutputAttribute &right) const + { + return (type == right.type && name == right.name && + interpolation_qualifier == right.interpolation_qualifier && + is_array == right.is_array && array_elems == right.array_elems); + } + std::string get_mtl_interpolation_qualifier() const + { + if (interpolation_qualifier == "" || interpolation_qualifier == "smooth") { + return ""; + } + else if (interpolation_qualifier == "flat") { + return " [[flat]]"; + } + else if (interpolation_qualifier == "noperspective") { + return " [[center_no_perspective]]"; + } + return ""; + } +}; + +struct MSLFragmentOutputAttribute { + /* Explicit output binding location N for [[color(N)]] -1 = unspecified. */ + int layout_location; + /* Output index for dual source blending. -1 = unspecified. */ + int layout_index; + shader::Type type; + std::string name; + + bool operator==(const MSLFragmentOutputAttribute &right) const + { + return (layout_location == right.layout_location && type == right.type && name == right.name && + layout_index == right.layout_index); + } +}; + +class MSLGeneratorInterface { + static char *msl_patch_default; + + public: + /** Shader stage input/output binding information. + * Derived from shader source reflection or GPUShaderCreateInfo. */ + blender::Vector<MSLUniformBlock> uniform_blocks; + blender::Vector<MSLUniform> uniforms; + blender::Vector<MSLTextureSampler> texture_samplers; + blender::Vector<MSLVertexInputAttribute> vertex_input_attributes; + blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings; + /* Should match vertex outputs, but defined separately as + * some shader permutations will not utilize all inputs/outputs. + * Final shader uses the intersection between the two sets. */ + blender::Vector<MSLVertexOutputAttribute> fragment_input_varyings; + blender::Vector<MSLFragmentOutputAttribute> fragment_outputs; + /* Transform feedback interface. */ + blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings_tf; + /* Clip Distances. */ + blender::Vector<std::string> clip_distances; + + /** GL Global usage. */ + /* Whether GL position is used, or an alternative vertex output should be the default. */ + bool uses_gl_Position; + /* Whether gl_FragColor is used, or whether an alternative fragment output + * should be the default. */ + bool uses_gl_FragColor; + /* Whether gl_PointCoord is used in the fragment shader. If so, + * we define float2 gl_PointCoord [[point_coord]]. */ + bool uses_gl_PointCoord; + /* Writes out to gl_PointSize in the vertex shader output. */ + bool uses_gl_PointSize; + bool uses_gl_VertexID; + bool uses_gl_InstanceID; + bool uses_gl_BaseInstanceARB; + bool uses_gl_FrontFacing; + /* Sets the output render target array index when using multilayered rendering. */ + bool uses_gl_FragDepth; + bool uses_mtl_array_index_; + bool uses_transform_feedback; + bool uses_barycentrics; + + /* Parameters. */ + shader::DepthWrite depth_write; + + /* Shader buffer bind indices for argument buffers. */ + int sampler_argument_buffer_bind_index[2] = {-1, -1}; + + /*** SSBO Vertex fetch mode. ***/ + /* Indicates whether to pass in Vertex Buffer's as a regular buffers instead of using vertex + * assembly in the PSO descriptor. Enabled with special pragma. */ + bool uses_ssbo_vertex_fetch_mode; + + private: + /* Parent shader instance. */ + MTLShader &parent_shader_; + + /* If prepared from Create info. */ + const shader::ShaderCreateInfo *create_info_; + + public: + MSLGeneratorInterface(MTLShader &shader) : parent_shader_(shader){}; + + /** Prepare MSLGeneratorInterface from create-info. **/ + void prepare_from_createinfo(const shader::ShaderCreateInfo *info); + + /* When SSBO Vertex Fetch mode is used, uniforms are used to pass on the required information + * about vertex attribute bindings, in order to perform manual vertex assembly and random-access + * vertex lookup throughout the bound VBOs. + * + * Some parameters are global for the shader, others change with the currently bound + * VertexBuffers, and their format, as they do with regular GPUBatch's. + * + * (Where ##attr is the attributes name) + * uniform_ssbo_stride_##attr -- Representing the stride between elements of attribute(attr) + * uniform_ssbo_offset_##attr -- Representing the base offset within the vertex + * uniform_ssbo_fetchmode_##attr -- Whether using per-vertex fetch or per-instance fetch + * (0=vert, 1=inst) uniform_ssbo_vbo_id_##attr -- index of the vertex buffer within which the + * data for this attribute is contained uniform_ssbo_type_##attr - The type of data in the + * currently bound buffer -- Could be a mismatch with the Officially reported type. */ + void prepare_ssbo_vertex_fetch_uniforms(); + + /* Samplers. */ + bool use_argument_buffer_for_samplers() const; + uint32_t num_samplers_for_stage(ShaderStage stage) const; + + /* Returns the bind index, relative to MTL_uniform_buffer_base_index. */ + uint32_t get_sampler_argument_buffer_bind_index(ShaderStage stage); + + /* Code generation utility functions. */ + std::string generate_msl_uniform_structs(ShaderStage shader_stage); + std::string generate_msl_vertex_in_struct(); + std::string generate_msl_vertex_out_struct(ShaderStage shader_stage); + std::string generate_msl_vertex_transform_feedback_out_struct(ShaderStage shader_stage); + std::string generate_msl_fragment_out_struct(); + std::string generate_msl_vertex_inputs_string(); + std::string generate_msl_fragment_inputs_string(); + std::string generate_msl_vertex_entry_stub(); + std::string generate_msl_fragment_entry_stub(); + std::string generate_msl_global_uniform_population(ShaderStage stage); + std::string generate_ubo_block_macro_chain(MSLUniformBlock block); + std::string generate_msl_uniform_block_population(ShaderStage stage); + std::string generate_msl_vertex_attribute_input_population(); + std::string generate_msl_vertex_output_population(); + std::string generate_msl_vertex_output_tf_population(); + std::string generate_msl_fragment_input_population(); + std::string generate_msl_fragment_output_population(); + std::string generate_msl_uniform_undefs(ShaderStage stage); + std::string generate_ubo_block_undef_chain(ShaderStage stage); + std::string generate_msl_texture_vars(ShaderStage shader_stage); + void generate_msl_textures_input_string(std::stringstream &out, ShaderStage stage); + void generate_msl_uniforms_input_string(std::stringstream &out, ShaderStage stage); + + /* Location is not always specified, so this will resolve outstanding locations. */ + void resolve_input_attribute_locations(); + void resolve_fragment_output_locations(); + + /* Create shader interface for converted GLSL shader. */ + MTLShaderInterface *bake_shader_interface(const char *name); + + /* Fetch combined shader source header. */ + char *msl_patch_default_get(); + + MEM_CXX_CLASS_ALLOC_FUNCS("MSLGeneratorInterface"); +}; + +inline std::string get_stage_class_name(ShaderStage stage) +{ + switch (stage) { + case ShaderStage::VERTEX: + return "MTLShaderVertexImpl"; + case ShaderStage::FRAGMENT: + return "MTLShaderFragmentImpl"; + default: + BLI_assert_unreachable(); + return ""; + } + return ""; +} + +inline bool is_builtin_type(std::string type) +{ + /* Add Types as needed. */ + /* TODO(Metal): Consider replacing this with a switch and constexpr hash and switch. + * Though most efficient and maintainable approach to be determined. */ + static std::map<std::string, eMTLDataType> glsl_builtin_types = { + {"float", MTL_DATATYPE_FLOAT}, + {"vec2", MTL_DATATYPE_FLOAT2}, + {"vec3", MTL_DATATYPE_FLOAT3}, + {"vec4", MTL_DATATYPE_FLOAT4}, + {"int", MTL_DATATYPE_INT}, + {"ivec2", MTL_DATATYPE_INT2}, + {"ivec3", MTL_DATATYPE_INT3}, + {"ivec4", MTL_DATATYPE_INT4}, + {"uint32_t", MTL_DATATYPE_UINT}, + {"uvec2", MTL_DATATYPE_UINT2}, + {"uvec3", MTL_DATATYPE_UINT3}, + {"uvec4", MTL_DATATYPE_UINT4}, + {"mat3", MTL_DATATYPE_FLOAT3x3}, + {"mat4", MTL_DATATYPE_FLOAT4x4}, + {"bool", MTL_DATATYPE_INT}, + {"uchar", MTL_DATATYPE_UCHAR}, + {"uchar2", MTL_DATATYPE_UCHAR2}, + {"uchar2", MTL_DATATYPE_UCHAR3}, + {"uchar4", MTL_DATATYPE_UCHAR4}, + {"vec3_1010102_Unorm", MTL_DATATYPE_UINT1010102_NORM}, + {"vec3_1010102_Inorm", MTL_DATATYPE_INT1010102_NORM}, + }; + return (glsl_builtin_types.find(type) != glsl_builtin_types.end()); +} + +inline bool is_matrix_type(const std::string &type) +{ + /* Matrix type support. Add types as necessary. */ + return (type == "mat4"); +} + +inline bool is_matrix_type(const shader::Type &type) +{ + /* Matrix type support. Add types as necessary. */ + return (type == shader::Type::MAT4 || type == shader::Type::MAT3); +} + +inline int get_matrix_location_count(const std::string &type) +{ + /* Matrix type support. Add types as necessary. */ + if (type == "mat4") { + return 4; + } + if (type == "mat3") { + return 3; + } + return 1; +} + +inline int get_matrix_location_count(const shader::Type &type) +{ + /* Matrix type support. Add types as necessary. */ + if (type == shader::Type::MAT4) { + return 4; + } + else if (type == shader::Type::MAT3) { + return 3; + } + return 1; +} + +inline std::string get_matrix_subtype(const std::string &type) +{ + if (type == "mat4") { + return "vec4"; + } + return type; +} + +inline shader::Type get_matrix_subtype(const shader::Type &type) +{ + if (type == shader::Type::MAT4) { + return shader::Type::VEC4; + } + if (type == shader::Type::MAT3) { + return shader::Type::VEC3; + } + return type; +} + +inline std::string get_attribute_conversion_function(bool *uses_conversion, + const shader::Type &type) +{ + /* NOTE(Metal): Add more attribute types as required. */ + if (type == shader::Type::FLOAT) { + *uses_conversion = true; + return "internal_vertex_attribute_convert_read_float"; + } + else if (type == shader::Type::VEC2) { + *uses_conversion = true; + return "internal_vertex_attribute_convert_read_float2"; + } + else if (type == shader::Type::VEC3) { + *uses_conversion = true; + return "internal_vertex_attribute_convert_read_float3"; + } + else if (type == shader::Type::VEC4) { + *uses_conversion = true; + return "internal_vertex_attribute_convert_read_float4"; + } + *uses_conversion = false; + return ""; +} + +inline const char *to_string(const shader::PrimitiveOut &layout) +{ + switch (layout) { + case shader::PrimitiveOut::POINTS: + return "points"; + case shader::PrimitiveOut::LINE_STRIP: + return "line_strip"; + case shader::PrimitiveOut::TRIANGLE_STRIP: + return "triangle_strip"; + default: + BLI_assert(false); + return "unknown"; + } +} + +inline const char *to_string(const shader::PrimitiveIn &layout) +{ + switch (layout) { + case shader::PrimitiveIn::POINTS: + return "points"; + case shader::PrimitiveIn::LINES: + return "lines"; + case shader::PrimitiveIn::LINES_ADJACENCY: + return "lines_adjacency"; + case shader::PrimitiveIn::TRIANGLES: + return "triangles"; + case shader::PrimitiveIn::TRIANGLES_ADJACENCY: + return "triangles_adjacency"; + default: + BLI_assert(false); + return "unknown"; + } +} + +inline const char *to_string(const shader::Interpolation &interp) +{ + switch (interp) { + case shader::Interpolation::SMOOTH: + return "smooth"; + case shader::Interpolation::FLAT: + return "flat"; + case shader::Interpolation::NO_PERSPECTIVE: + return "noperspective"; + default: + BLI_assert(false); + return "unkown"; + } +} + +inline const char *to_string_msl(const shader::Interpolation &interp) +{ + switch (interp) { + case shader::Interpolation::SMOOTH: + return "[[smooth]]"; + case shader::Interpolation::FLAT: + return "[[flat]]"; + case shader::Interpolation::NO_PERSPECTIVE: + return "[[center_no_perspective]]"; + default: + return ""; + } +} + +inline const char *to_string(const shader::Type &type) +{ + switch (type) { + case shader::Type::FLOAT: + return "float"; + case shader::Type::VEC2: + return "vec2"; + case shader::Type::VEC3: + return "vec3"; + case shader::Type::VEC3_101010I2: + return "vec3_1010102_Inorm"; + case shader::Type::VEC4: + return "vec4"; + case shader::Type::MAT3: + return "mat3"; + case shader::Type::MAT4: + return "mat4"; + case shader::Type::UINT: + return "uint32_t"; + case shader::Type::UVEC2: + return "uvec2"; + case shader::Type::UVEC3: + return "uvec3"; + case shader::Type::UVEC4: + return "uvec4"; + case shader::Type::INT: + return "int"; + case shader::Type::IVEC2: + return "ivec2"; + case shader::Type::IVEC3: + return "ivec3"; + case shader::Type::IVEC4: + return "ivec4"; + case shader::Type::BOOL: + return "bool"; + case shader::Type::UCHAR: + return "uchar"; + case shader::Type::UCHAR2: + return "uchar2"; + case shader::Type::UCHAR3: + return "uchar3"; + case shader::Type::UCHAR4: + return "uchar4"; + case shader::Type::CHAR: + return "char"; + case shader::Type::CHAR2: + return "char2"; + case shader::Type::CHAR3: + return "char3"; + case shader::Type::CHAR4: + return "char4"; + default: + BLI_assert(false); + return "unkown"; + } +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm new file mode 100644 index 00000000000..977e97dbd82 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -0,0 +1,2980 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "BKE_global.h" + +#include "BLI_string.h" + +#include "BLI_string.h" +#include <algorithm> +#include <fstream> +#include <iostream> +#include <map> +#include <mutex> +#include <regex> +#include <sstream> +#include <string> + +#include <cstring> + +#include "GPU_platform.h" +#include "GPU_vertex_format.h" + +#include "gpu_shader_dependency_private.h" + +#include "mtl_common.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_shader.hh" +#include "mtl_shader_generator.hh" +#include "mtl_shader_interface.hh" +#include "mtl_texture.hh" + +extern char datatoc_mtl_shader_defines_msl[]; +extern char datatoc_mtl_shader_shared_h[]; + +using namespace blender; +using namespace blender::gpu; +using namespace blender::gpu::shader; + +namespace blender::gpu { + +char *MSLGeneratorInterface::msl_patch_default = nullptr; + +/* -------------------------------------------------------------------- */ +/** \name Shader Translation utility functions. + * \{ */ + +static eMTLDataType to_mtl_type(Type type) +{ + switch (type) { + case Type::FLOAT: + return MTL_DATATYPE_FLOAT; + case Type::VEC2: + return MTL_DATATYPE_FLOAT2; + case Type::VEC3: + return MTL_DATATYPE_FLOAT3; + case Type::VEC4: + return MTL_DATATYPE_FLOAT4; + case Type::MAT3: + return MTL_DATATYPE_FLOAT3x3; + case Type::MAT4: + return MTL_DATATYPE_FLOAT4x4; + case Type::UINT: + return MTL_DATATYPE_UINT; + case Type::UVEC2: + return MTL_DATATYPE_UINT2; + case Type::UVEC3: + return MTL_DATATYPE_UINT3; + case Type::UVEC4: + return MTL_DATATYPE_UINT4; + case Type::INT: + return MTL_DATATYPE_INT; + case Type::IVEC2: + return MTL_DATATYPE_INT2; + case Type::IVEC3: + return MTL_DATATYPE_INT3; + case Type::IVEC4: + return MTL_DATATYPE_INT4; + case Type::VEC3_101010I2: + return MTL_DATATYPE_INT1010102_NORM; + case Type::BOOL: + return MTL_DATATYPE_BOOL; + case Type::UCHAR: + return MTL_DATATYPE_UCHAR; + case Type::UCHAR2: + return MTL_DATATYPE_UCHAR2; + case Type::UCHAR3: + return MTL_DATATYPE_UCHAR3; + case Type::UCHAR4: + return MTL_DATATYPE_UCHAR4; + case Type::CHAR: + return MTL_DATATYPE_CHAR; + case Type::CHAR2: + return MTL_DATATYPE_CHAR2; + case Type::CHAR3: + return MTL_DATATYPE_CHAR3; + case Type::CHAR4: + return MTL_DATATYPE_CHAR4; + default: { + BLI_assert_msg(false, "Unexpected data type"); + } + } + return MTL_DATATYPE_FLOAT; +} + +static std::regex remove_non_numeric_characters("[^0-9]"); + +#ifndef NDEBUG +static void remove_multiline_comments_func(std::string &str) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + bool is_inside_comment = false; + for (char *c = current_str_begin; c < current_str_end; c++) { + if (is_inside_comment) { + if ((*c == '*') && (c < current_str_end - 1) && (*(c + 1) == '/')) { + is_inside_comment = false; + *c = ' '; + *(c + 1) = ' '; + } + else { + *c = ' '; + } + } + else { + if ((*c == '/') && (c < current_str_end - 1) && (*(c + 1) == '*')) { + is_inside_comment = true; + *c = ' '; + } + } + } +} + +static void remove_singleline_comments_func(std::string &str) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + bool is_inside_comment = false; + for (char *c = current_str_begin; c < current_str_end; c++) { + if (is_inside_comment) { + if (*c == '\n') { + is_inside_comment = false; + } + else { + *c = ' '; + } + } + else { + if ((*c == '/') && (c < current_str_end - 1) && (*(c + 1) == '/')) { + is_inside_comment = true; + *c = ' '; + } + } + } +} +#endif + +static bool is_program_word(const char *chr, int *len) +{ + int numchars = 0; + for (const char *c = chr; *c != '\0'; c++) { + char ch = *c; + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || + (numchars > 0 && ch >= '0' && ch <= '9') || ch == '_') { + numchars++; + } + else { + *len = numchars; + return (numchars > 0); + } + } + *len = numchars; + return true; +} + +/** + * Replace function parameter patterns containing: + * `out vec3 somevar` with `THD vec3&somevar`. + * which enables pass by reference via resolved macro: + * `thread vec3& somevar`. + */ +static void replace_outvars(std::string &str) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + for (char *c = current_str_begin + 2; c < current_str_end - 6; c++) { + char *start = c; + if (strncmp(c, "out ", 4) == 0) { + if (strncmp(c - 2, "in", 2) == 0) { + start = c - 2; + } + + /* Check that the following are words. */ + int len1, len2; + char *word_base1 = c + 4; + char *word_base2 = word_base1; + + if (is_program_word(word_base1, &len1) && (*(word_base1 + len1) == ' ')) { + word_base2 = word_base1 + len1 + 1; + if (is_program_word(word_base2, &len2)) { + /* Match found. */ + bool is_array = (*(word_base2 + len2) == '['); + + /* Generate out-variable pattern of form `THD type&var` from original `out vec4 var`. */ + *start = 'T'; + *(start + 1) = 'H'; + *(start + 2) = 'D'; + for (char *clear = start + 3; clear < c + 4; clear++) { + *clear = ' '; + } + *(word_base2 - 1) = is_array ? '*' : '&'; + } + } + } + } +} + +static void replace_array_initializers_func(std::string &str) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + for (char *c = current_str_begin; c < current_str_end - 6; c++) { + char *base_scan = c; + int typelen = 0; + + if (is_program_word(c, &typelen) && *(c + typelen) == '[') { + + char *array_len_start = c + typelen + 1; + c = array_len_start; + char *closing_square_brace = strchr(c, ']'); + if (closing_square_brace != nullptr) { + c = closing_square_brace; + char *first_bracket = c + 1; + if (*first_bracket == '(') { + c += 1; + char *semi_colon = strchr(c, ';'); + if (semi_colon != nullptr && *(semi_colon - 1) == ')') { + char *closing_bracket = semi_colon - 1; + + /* Resolve to MSL-compatible array formatting. */ + *first_bracket = '{'; + *closing_bracket = '}'; + for (char *clear = base_scan; clear <= closing_square_brace; clear++) { + *clear = ' '; + } + } + } + } + else { + return; + } + } + } +} + +#ifndef NDEBUG + +static bool balanced_braces(char *current_str_begin, char *current_str_end) +{ + int nested_bracket_depth = 0; + for (char *c = current_str_begin; c < current_str_end; c++) { + /* Track whether we are in global scope. */ + if (*c == '{' || *c == '[' || *c == '(') { + nested_bracket_depth++; + continue; + } + if (*c == '}' || *c == ']' || *c == ')') { + nested_bracket_depth--; + continue; + } + } + return (nested_bracket_depth == 0); +} + +/** + * Certain Constants (such as arrays, or pointer types) declared in Global-scope + * end up being initialized per shader thread, resulting in high + * register pressure within the shader. + * Here we flag occurrences of these constants such that + * they can be moved to a place where this is not a problem. + * + * Constants declared within function-scope do not exhibit this problem. + */ +static void extract_global_scope_constants(std::string &str, std::stringstream &global_scope_out) +{ + char *current_str_begin = &*str.begin(); + char *current_str_end = &*str.end(); + + int nested_bracket_depth = 0; + for (char *c = current_str_begin; c < current_str_end - 6; c++) { + /* Track whether we are in global scope. */ + if (*c == '{' || *c == '[' || *c == '(') { + nested_bracket_depth++; + continue; + } + if (*c == '}' || *c == ']' || *c == ')') { + nested_bracket_depth--; + BLI_assert(nested_bracket_depth >= 0); + continue; + } + + /* Check For global const declarations */ + if (nested_bracket_depth == 0 && strncmp(c, "const ", 6) == 0 && + strncmp(c, "const constant ", 15) != 0) { + char *c_expr_end = strstr(c, ";"); + if (c_expr_end != nullptr && balanced_braces(c, c_expr_end)) { + MTL_LOG_INFO( + "[PERFORMANCE WARNING] Global scope constant expression found - These get allocated " + "per-thread in METAL - Best to use Macro's or uniforms to avoid overhead: '%.*s'\n", + (int)(c_expr_end + 1 - c), + c); + + /* Jump ptr forward as we know we remain in global scope. */ + c = c_expr_end - 1; + continue; + } + } + } +} +#endif + +static bool extract_ssbo_pragma_info(const MTLShader *shader, + const MSLGeneratorInterface &, + const std::string &in_vertex_src, + MTLPrimitiveType &out_prim_tye, + uint32_t &out_num_output_verts) +{ + /* SSBO Vertex-fetch parameter extraction. */ + static std::regex use_ssbo_fetch_mode_find( + "#pragma " + "USE_SSBO_VERTEX_FETCH\\(\\s*(TriangleList|LineList|\\w+)\\s*,\\s*([0-9]+)\\s*\\)"); + + /* Perform regex search if pragma string found. */ + std::smatch vertex_shader_ssbo_flags; + bool uses_ssbo_fetch = false; + if (in_vertex_src.find("#pragma USE_SSBO_VERTEX_FETCH") != std::string::npos) { + uses_ssbo_fetch = std::regex_search( + in_vertex_src, vertex_shader_ssbo_flags, use_ssbo_fetch_mode_find); + } + if (uses_ssbo_fetch) { + /* Extract Expected output primitive type: + * #pragma USE_SSBO_VERTEX_FETCH(Output Prim Type, num output vertices per input primitive) + * + * Supported Primitive Types (Others can be added if needed, but List types for efficiency): + * - TriangleList + * - LineList + * + * Output vertex count is determined by calculating the number of input primitives, and + * multiplying that by the number of output vertices specified. */ + std::string str_output_primitive_type = vertex_shader_ssbo_flags[1].str(); + std::string str_output_prim_count_per_vertex = vertex_shader_ssbo_flags[2].str(); + + /* Ensure output primitive type is valid. */ + if (str_output_primitive_type == "TriangleList") { + out_prim_tye = MTLPrimitiveTypeTriangle; + } + else if (str_output_primitive_type == "LineList") { + out_prim_tye = MTLPrimitiveTypeLine; + } + else { + MTL_LOG_ERROR("Unsupported output primitive type for SSBO VERTEX FETCH MODE. Shader: %s", + shader->name_get()); + return false; + } + + /* Assign output num vertices per primitive. */ + out_num_output_verts = std::stoi( + std::regex_replace(str_output_prim_count_per_vertex, remove_non_numeric_characters, "")); + BLI_assert(out_num_output_verts > 0); + return true; + } + + /* SSBO Vertex fetchmode not used. */ + return false; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name MTLShader builtin shader generation utilities. + * \{ */ + +static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &res) +{ + switch (res.bind_type) { + case ShaderCreateInfo::Resource::BindType::SAMPLER: + break; + case ShaderCreateInfo::Resource::BindType::IMAGE: + break; + case ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: { + int64_t array_offset = res.uniformbuf.name.find_first_of("["); + if (array_offset == -1) { + /* Create local class member as constant pointer reference to bound UBO buffer. + * Given usage within a shader follows ubo_name.ubo_element syntax, we can + * dereference the pointer as the compiler will optimize this data fetch. + * To do this, we also give the UBO name a post-fix of `_local` to avoid + * macro accessor collisions. */ + os << "constant " << res.uniformbuf.type_name << " *" << res.uniformbuf.name + << "_local;\n"; + os << "#define " << res.uniformbuf.name << " (*" << res.uniformbuf.name << "_local)\n"; + } + else { + /* For arrays, we can directly provide the constant access pointer, as the array + * syntax will de-reference this at the correct fetch index. */ + StringRef name_no_array = StringRef(res.uniformbuf.name.c_str(), array_offset); + os << "constant " << res.uniformbuf.type_name << " *" << name_no_array << ";\n"; + } + break; + } + case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: + break; + } +} + +std::string MTLShader::resources_declare(const ShaderCreateInfo &info) const +{ + /* NOTE(Metal): We only use the upfront preparation functions to populate members which + * would exist in the original non-create-info variant. + * + * This function is only used to generate resource structs. + * Global-scope handles for Uniforms, UBOs, textures and samplers + * are generated during class-wrapper construction in `generate_msl_from_glsl`. */ + std::stringstream ss; + + /* Generate resource stubs for UBOs and textures. */ + ss << "\n/* Pass Resources. */\n"; + for (const ShaderCreateInfo::Resource &res : info.pass_resources_) { + print_resource(ss, res); + } + ss << "\n/* Batch Resources. */\n"; + for (const ShaderCreateInfo::Resource &res : info.batch_resources_) { + print_resource(ss, res); + } + /* NOTE: Push constant uniform data is generated during `generate_msl_from_glsl` + * as the generated output is needed for all paths. This includes generation + * of the push constant data structure (struct PushConstantBlock). + * As all shader generation paths require creation of this. */ + return ss.str(); +} + +std::string MTLShader::vertex_interface_declare(const shader::ShaderCreateInfo &info) const +{ + /* NOTE(Metal): We only use the upfront preparation functions to populate members which + * would exist in the original non-create-info variant. + * + * Here we generate the variables within class wrapper scope to allow reading of + * input attributes by the main code. */ + std::stringstream ss; + ss << "\n/* Vertex Inputs. */\n"; + for (const ShaderCreateInfo::VertIn &attr : info.vertex_inputs_) { + ss << to_string(attr.type) << " " << attr.name << ";\n"; + } + return ss.str(); +} + +std::string MTLShader::fragment_interface_declare(const shader::ShaderCreateInfo &info) const +{ + /* For shaders generated from MSL, the fragment-output struct is generated as part of the entry + * stub during glsl->MSL conversion in `generate_msl_from_glsl`. + * Here, we can instead generate the global-scope variables which will be populated during + * execution. + * + * NOTE: The output declaration for location and blend index are generated in the entry-point + * struct. This is simply a mirror class member which stores the value during main shader body + * execution. */ + std::stringstream ss; + ss << "\n/* Fragment Outputs. */\n"; + for (const ShaderCreateInfo::FragOut &output : info.fragment_outputs_) { + ss << to_string(output.type) << " " << output.name << ";\n"; + } + ss << "\n"; + + return ss.str(); +} + +std::string MTLShader::MTLShader::geometry_interface_declare( + const shader::ShaderCreateInfo &info) const +{ + BLI_assert_msg(false, "Geometry shading unsupported by Metal"); + return ""; +} + +std::string MTLShader::geometry_layout_declare(const shader::ShaderCreateInfo &info) const +{ + BLI_assert_msg(false, "Geometry shading unsupported by Metal"); + return ""; +} + +std::string MTLShader::compute_layout_declare(const ShaderCreateInfo &info) const +{ + /* TODO(Metal): Metal compute layout pending compute support. */ + BLI_assert_msg(false, "Compute shaders unsupported by Metal"); + return ""; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Shader Translation. + * \{ */ + +char *MSLGeneratorInterface::msl_patch_default_get() +{ + if (msl_patch_default != nullptr) { + return msl_patch_default; + } + + std::stringstream ss_patch; + ss_patch << datatoc_mtl_shader_shared_h << std::endl; + ss_patch << datatoc_mtl_shader_defines_msl << std::endl; + size_t len = strlen(ss_patch.str().c_str()); + + msl_patch_default = (char *)malloc(len * sizeof(char)); + strcpy(msl_patch_default, ss_patch.str().c_str()); + return msl_patch_default; +} + +bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) +{ + /* Verify if create-info is available. + * NOTE(Metal): For now, only support creation from CreateInfo. + * If needed, we can perform source translation without this using + * manual reflection. */ + bool uses_create_info = info != nullptr; + if (!uses_create_info) { + MTL_LOG_WARNING("Unable to compile shader %p '%s' as no create-info was provided!\n", + this, + this->name_get()); + valid_ = false; + return false; + } + + /* #MSLGeneratorInterface is a class populated to describe all parameters, resources, bindings + * and features used by the source GLSL shader. This information is then used to generate the + * appropriate Metal entry points and perform any required source translation. */ + MSLGeneratorInterface msl_iface(*this); + BLI_assert(shd_builder_ != nullptr); + + /* Populate #MSLGeneratorInterface from Create-Info. + * NOTE: this is a separate path as #MSLGeneratorInterface can also be manually populated + * from parsing, if support for shaders without create-info is required. */ + msl_iface.prepare_from_createinfo(info); + + /* Verify Source sizes are greater than zero. */ + BLI_assert(shd_builder_->glsl_vertex_source_.size() > 0); + if (!msl_iface.uses_transform_feedback) { + BLI_assert(shd_builder_->glsl_fragment_source_.size() > 0); + } + + /** Determine use of Transform Feedback. **/ + msl_iface.uses_transform_feedback = false; + if (transform_feedback_type_ != GPU_SHADER_TFB_NONE) { + /* Ensure #TransformFeedback is configured correctly. */ + BLI_assert(tf_output_name_list_.size() > 0); + msl_iface.uses_transform_feedback = true; + } + + /* Concatenate msl_shader_defines to provide functionality mapping + * from GLSL to MSL. Also include additional GPU defines for + * optional high-level feature support. */ + const std::string msl_defines_string = + "#define GPU_ARB_texture_cube_map_array 1\n\ + #define GPU_ARB_shader_draw_parameters 1\n\ + #define GPU_ARB_texture_gather 1\n"; + + shd_builder_->glsl_vertex_source_ = msl_defines_string + shd_builder_->glsl_vertex_source_; + if (!msl_iface.uses_transform_feedback) { + shd_builder_->glsl_fragment_source_ = msl_defines_string + shd_builder_->glsl_fragment_source_; + } + + /* Extract SSBO usage information from shader pragma: + * + * #pragma USE_SSBO_VERTEX_FETCH(Output Prim Type, num output vertices per input primitive) + * + * This will determine whether SSBO-vertex-fetch + * mode is used for this shader. Returns true if used, and populates output reference + * values with the output prim type and output number of vertices. */ + MTLPrimitiveType vertex_fetch_ssbo_output_prim_type = MTLPrimitiveTypeTriangle; + uint32_t vertex_fetch_ssbo_num_output_verts = 0; + msl_iface.uses_ssbo_vertex_fetch_mode = extract_ssbo_pragma_info( + this, + msl_iface, + shd_builder_->glsl_vertex_source_, + vertex_fetch_ssbo_output_prim_type, + vertex_fetch_ssbo_num_output_verts); + + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + shader_debug_printf( + "[Shader] SSBO VERTEX FETCH Enabled for Shader '%s' With Output primitive type: %s, " + "vertex count: %u\n", + this->name_get(), + output_primitive_type.c_str(), + vertex_fetch_ssbo_num_output_verts); + } + + /*** Regex Commands ***/ + /* Source cleanup and syntax replacement. */ + static std::regex remove_excess_newlines("\\n+"); + static std::regex replace_mat3("mat3\\s*\\("); + + /* Special condition - mat3 and array constructor replacement. + * Also replace excessive new lines to ensure cases are not missed. + * NOTE(Metal): May be able to skip excess-newline removal. */ + shd_builder_->glsl_vertex_source_ = std::regex_replace( + shd_builder_->glsl_vertex_source_, remove_excess_newlines, "\n"); + shd_builder_->glsl_vertex_source_ = std::regex_replace( + shd_builder_->glsl_vertex_source_, replace_mat3, "MAT3("); + replace_array_initializers_func(shd_builder_->glsl_vertex_source_); + + if (!msl_iface.uses_transform_feedback) { + shd_builder_->glsl_fragment_source_ = std::regex_replace( + shd_builder_->glsl_fragment_source_, remove_excess_newlines, "\n"); + shd_builder_->glsl_fragment_source_ = std::regex_replace( + shd_builder_->glsl_fragment_source_, replace_mat3, "MAT3("); + replace_array_initializers_func(shd_builder_->glsl_fragment_source_); + } + + /**** Extract usage of GL globals. ****/ + /* NOTE(METAL): Currently still performing fallback string scan, as info->builtins_ does + * not always contain the usage flag. This can be removed once all appropriate create-info's + * have been updated. In some cases, this may incur a false positive if access is guarded + * behind a macro. Though in these cases, unused code paths and parameters will be + * optimized out by the Metal shader compiler. */ + + /** Identify usage of vertex-shader builtins. */ + msl_iface.uses_gl_VertexID = bool(info->builtins_ & BuiltinBits::VERTEX_ID) || + shd_builder_->glsl_vertex_source_.find("gl_VertexID") != + std::string::npos; + msl_iface.uses_gl_InstanceID = bool(info->builtins_ & BuiltinBits::INSTANCE_ID) || + shd_builder_->glsl_vertex_source_.find("gl_InstanceID") != + std::string::npos || + shd_builder_->glsl_vertex_source_.find("gpu_InstanceIndex") != + std::string::npos || + msl_iface.uses_ssbo_vertex_fetch_mode; + + /* instance ID in GL is `[0, instance_count]` in metal it is + * `[base_instance, base_instance + instance_count]`, + * so we need to offset instance_ID by base instance in Metal -- + * Thus we expose the `[[base_instance]]` attribute if instance ID is used at all. */ + msl_iface.uses_gl_BaseInstanceARB = msl_iface.uses_gl_InstanceID || + shd_builder_->glsl_vertex_source_.find( + "gl_BaseInstanceARB") != std::string::npos || + shd_builder_->glsl_vertex_source_.find("gpu_BaseInstance") != + std::string::npos; + msl_iface.uses_gl_Position = shd_builder_->glsl_vertex_source_.find("gl_Position") != + std::string::npos; + msl_iface.uses_gl_PointSize = shd_builder_->glsl_vertex_source_.find("gl_PointSize") != + std::string::npos; + msl_iface.uses_mtl_array_index_ = shd_builder_->glsl_vertex_source_.find( + "MTLRenderTargetArrayIndex") != std::string::npos; + + /** Identify usage of fragment-shader builtins. */ + if (!msl_iface.uses_transform_feedback) { + std::smatch gl_special_cases; + msl_iface.uses_gl_PointCoord = bool(info->builtins_ & BuiltinBits::POINT_COORD) || + shd_builder_->glsl_fragment_source_.find("gl_PointCoord") != + std::string::npos; + msl_iface.uses_barycentrics = bool(info->builtins_ & BuiltinBits::BARYCENTRIC_COORD); + msl_iface.uses_gl_FrontFacing = bool(info->builtins_ & BuiltinBits::FRONT_FACING) || + shd_builder_->glsl_fragment_source_.find("gl_FrontFacing") != + std::string::npos; + + /* NOTE(Metal): If FragColor is not used, then we treat the first fragment output attachment + * as the primary output. */ + msl_iface.uses_gl_FragColor = shd_builder_->glsl_fragment_source_.find("gl_FragColor") != + std::string::npos; + + /* NOTE(Metal): FragDepth output mode specified in create-info 'DepthWrite depth_write_'. + * If parsing without create-info, manual extraction will be required. */ + msl_iface.uses_gl_FragDepth = shd_builder_->glsl_fragment_source_.find("gl_FragDepth") != + std::string::npos; + msl_iface.depth_write = info->depth_write_; + } + + /* Generate SSBO vertex fetch mode uniform data hooks. */ + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + msl_iface.prepare_ssbo_vertex_fetch_uniforms(); + } + + /* Extract gl_ClipDistances. */ + static std::regex gl_clipdistance_find("gl_ClipDistance\\[([0-9])\\]"); + + std::string clip_search_str = shd_builder_->glsl_vertex_source_; + std::smatch vertex_clip_distances; + + while (std::regex_search(clip_search_str, vertex_clip_distances, gl_clipdistance_find)) { + shader_debug_printf("VERTEX CLIP DISTANCES FOUND: str: %s\n", + vertex_clip_distances[1].str().c_str()); + auto found = std::find(msl_iface.clip_distances.begin(), + msl_iface.clip_distances.end(), + vertex_clip_distances[1].str()); + if (found == msl_iface.clip_distances.end()) { + msl_iface.clip_distances.append(vertex_clip_distances[1].str()); + } + clip_search_str = vertex_clip_distances.suffix(); + } + shd_builder_->glsl_vertex_source_ = std::regex_replace( + shd_builder_->glsl_vertex_source_, gl_clipdistance_find, "gl_ClipDistance_$1"); + + /* Replace 'out' attribute on function parameters with pass-by-reference. */ + replace_outvars(shd_builder_->glsl_vertex_source_); + if (!msl_iface.uses_transform_feedback) { + replace_outvars(shd_builder_->glsl_fragment_source_); + } + + /**** METAL Shader source generation. ****/ + /* Setup `stringstream` for populating generated MSL shader vertex/frag shaders. */ + std::stringstream ss_vertex; + std::stringstream ss_fragment; + + /*** Generate VERTEX Stage ***/ + /* Conditional defines. */ + if (msl_iface.use_argument_buffer_for_samplers()) { + ss_vertex << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl; + ss_vertex << "#define ARGUMENT_BUFFER_NUM_SAMPLERS " + << msl_iface.num_samplers_for_stage(ShaderStage::VERTEX) << std::endl; + } + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl; + ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS + << std::endl; + ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX + << std::endl; + for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) { + ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl; + } + + /* Macro's */ + ss_vertex << "#define " + "UNIFORM_SSBO_USES_INDEXED_RENDERING_STR " UNIFORM_SSBO_USES_INDEXED_RENDERING_STR + "\n" + "#define UNIFORM_SSBO_INDEX_MODE_U16_STR " UNIFORM_SSBO_INDEX_MODE_U16_STR + "\n" + "#define UNIFORM_SSBO_INPUT_PRIM_TYPE_STR " UNIFORM_SSBO_INPUT_PRIM_TYPE_STR + "\n" + "#define UNIFORM_SSBO_INPUT_VERT_COUNT_STR " UNIFORM_SSBO_INPUT_VERT_COUNT_STR + "\n" + "#define UNIFORM_SSBO_OFFSET_STR " UNIFORM_SSBO_OFFSET_STR + "\n" + "#define UNIFORM_SSBO_STRIDE_STR " UNIFORM_SSBO_STRIDE_STR + "\n" + "#define UNIFORM_SSBO_FETCHMODE_STR " UNIFORM_SSBO_FETCHMODE_STR + "\n" + "#define UNIFORM_SSBO_VBO_ID_STR " UNIFORM_SSBO_VBO_ID_STR + "\n" + "#define UNIFORM_SSBO_TYPE_STR " UNIFORM_SSBO_TYPE_STR "\n"; + } + + /* Inject common Metal header. */ + ss_vertex << msl_iface.msl_patch_default_get() << std::endl << std::endl; + +#ifndef NDEBUG + /* Performance warning: Extract global-scope expressions. + * NOTE: This is dependent on stripping out comments + * to remove false positives. */ + remove_multiline_comments_func(shd_builder_->glsl_vertex_source_); + remove_singleline_comments_func(shd_builder_->glsl_vertex_source_); + extract_global_scope_constants(shd_builder_->glsl_vertex_source_, ss_vertex); +#endif + + /* Generate additional shader interface struct members from create-info. */ + for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) { + + /* Only generate struct for ones with instance names */ + if (!iface->instance_name.is_empty()) { + ss_vertex << "struct " << iface->name << " {" << std::endl; + for (const StageInterfaceInfo::InOut &inout : iface->inouts) { + ss_vertex << to_string(inout.type) << " " << inout.name << " " + << to_string_msl(inout.interp) << ";" << std::endl; + } + ss_vertex << "};" << std::endl; + } + } + + /* Wrap entire GLSL source inside class to create + * a scope within the class to enable use of global variables. + * e.g. global access to attributes, uniforms, UBOs, textures etc; */ + ss_vertex << "class " << get_stage_class_name(ShaderStage::VERTEX) << " {" << std::endl; + ss_vertex << "public:" << std::endl; + + /* Generate additional shader interface struct members from create-info. */ + for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) { + + bool is_inside_struct = false; + if (!iface->instance_name.is_empty()) { + /* If shader stage interface has an instance name, then it + * is using a struct format and as such we only need a local + * class member for the struct, not each element. */ + ss_vertex << iface->name << " " << iface->instance_name << ";" << std::endl; + is_inside_struct = true; + } + + /* Generate local variables, populate elems for vertex out struct gen. */ + for (const StageInterfaceInfo::InOut &inout : iface->inouts) { + + /* Only output individual elements if they are not part of an interface struct instance. */ + if (!is_inside_struct) { + ss_vertex << to_string(inout.type) << " " << inout.name << ";" << std::endl; + } + + const char *arraystart = strstr(inout.name.c_str(), "["); + bool is_array = (arraystart != nullptr); + int array_len = (is_array) ? std::stoi(std::regex_replace( + arraystart, remove_non_numeric_characters, "")) : + 0; + + /* Remove array from string name. */ + std::string out_name = inout.name.c_str(); + std::size_t pos = out_name.find('['); + if (is_array && pos != std::string::npos) { + out_name.resize(pos); + } + + /* Add to vertex-output interface. */ + msl_iface.vertex_output_varyings.append( + {to_string(inout.type), + out_name.c_str(), + ((is_inside_struct) ? iface->instance_name.c_str() : ""), + to_string(inout.interp), + is_array, + array_len}); + + /* Add to fragment-input interface. */ + msl_iface.fragment_input_varyings.append( + {to_string(inout.type), + out_name.c_str(), + ((is_inside_struct) ? iface->instance_name.c_str() : ""), + to_string(inout.interp), + is_array, + array_len}); + } + } + + /** Generate structs from MSL Interface. **/ + /* Generate VertexIn struct. */ + if (!msl_iface.uses_ssbo_vertex_fetch_mode) { + ss_vertex << msl_iface.generate_msl_vertex_in_struct(); + } + /* Generate Uniform data structs. */ + ss_vertex << msl_iface.generate_msl_uniform_structs(ShaderStage::VERTEX); + + /* Conditionally use global GL variables. */ + if (msl_iface.uses_gl_Position) { + ss_vertex << "float4 gl_Position;" << std::endl; + } + if (msl_iface.uses_gl_PointSize) { + ss_vertex << "float gl_PointSize = 1.0;" << std::endl; + } + if (msl_iface.uses_gl_VertexID) { + ss_vertex << "int gl_VertexID;" << std::endl; + } + if (msl_iface.uses_gl_InstanceID) { + ss_vertex << "int gl_InstanceID;" << std::endl; + } + if (msl_iface.uses_gl_BaseInstanceARB) { + ss_vertex << "int gl_BaseInstanceARB;" << std::endl; + } + for (const int cd : IndexRange(msl_iface.clip_distances.size())) { + ss_vertex << "float gl_ClipDistance_" << cd << ";" << std::endl; + } + + /* Render target array index if using multilayered rendering. */ + if (msl_iface.uses_mtl_array_index_) { + ss_vertex << "int MTLRenderTargetArrayIndex = 0;" << std::endl; + } + + /* Global vertex data pointers when using SSBO vertex fetch mode. + * Bound vertex buffers passed in via the entry point function + * are assigned to these pointers to be globally accessible + * from any function within the GLSL source shader. */ + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + ss_vertex << "constant uchar** MTL_VERTEX_DATA;" << std::endl; + ss_vertex << "constant ushort* MTL_INDEX_DATA_U16 = nullptr;" << std::endl; + ss_vertex << "constant uint32_t* MTL_INDEX_DATA_U32 = nullptr;" << std::endl; + } + + /* Add Texture members. + * These members pack both a texture and a sampler into a single + * struct, as both are needed within texture functions. + * e.g. `_mtl_combined_image_sampler_2d<float, access::read>` + * The exact typename is generated inside `get_msl_typestring_wrapper()`. */ + for (const MSLTextureSampler &tex : msl_iface.texture_samplers) { + if (bool(tex.stage & ShaderStage::VERTEX)) { + ss_vertex << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl; + } + } + ss_vertex << std::endl; + + /* Inject main GLSL source into output stream. */ + ss_vertex << shd_builder_->glsl_vertex_source_ << std::endl; + + /* Generate VertexOut and TransformFeedbackOutput structs. */ + ss_vertex << msl_iface.generate_msl_vertex_out_struct(ShaderStage::VERTEX); + if (msl_iface.uses_transform_feedback) { + ss_vertex << msl_iface.generate_msl_vertex_transform_feedback_out_struct(ShaderStage::VERTEX); + } + + /* Class Closing Bracket to end shader global scope. */ + ss_vertex << "};" << std::endl; + + /* Generate Vertex shader entry-point function containing resource bindings. */ + ss_vertex << msl_iface.generate_msl_vertex_entry_stub(); + + /*** Generate FRAGMENT Stage. ***/ + if (!msl_iface.uses_transform_feedback) { + + /* Conditional defines. */ + if (msl_iface.use_argument_buffer_for_samplers()) { + ss_fragment << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl; + ss_fragment << "#define ARGUMENT_BUFFER_NUM_SAMPLERS " + << msl_iface.num_samplers_for_stage(ShaderStage::FRAGMENT) << std::endl; + } + + /* Inject common Metal header. */ + ss_fragment << msl_iface.msl_patch_default_get() << std::endl << std::endl; + +#ifndef NDEBUG + /* Performance warning: Identify global-scope expressions. + * These cause excessive register pressure due to global arrays being instantiated per-thread. + * NOTE: This is dependent on stripping out comments to remove false positives. */ + remove_multiline_comments_func(shd_builder_->glsl_fragment_source_); + remove_singleline_comments_func(shd_builder_->glsl_fragment_source_); + extract_global_scope_constants(shd_builder_->glsl_fragment_source_, ss_fragment); +#endif + + /* Generate additional shader interface struct members from create-info. */ + for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) { + + /* Only generate struct for ones with instance names. */ + if (!iface->instance_name.is_empty()) { + ss_fragment << "struct " << iface->name << " {" << std::endl; + for (const StageInterfaceInfo::InOut &inout : iface->inouts) { + ss_fragment << to_string(inout.type) << " " << inout.name << "" + << to_string_msl(inout.interp) << ";" << std::endl; + } + ss_fragment << "};" << std::endl; + } + } + + /* Wrap entire GLSL source inside class to create + * a scope within the class to enable use of global variables. */ + ss_fragment << "class " << get_stage_class_name(ShaderStage::FRAGMENT) << " {" << std::endl; + ss_fragment << "public:" << std::endl; + + /* In/out interface values */ + /* Generate additional shader interface struct members from create-info. */ + for (const StageInterfaceInfo *iface : info->vertex_out_interfaces_) { + bool is_inside_struct = false; + if (!iface->instance_name.is_empty()) { + /* Struct local variable. */ + ss_fragment << iface->name << " " << iface->instance_name << ";" << std::endl; + is_inside_struct = true; + } + + /* Generate local variables, populate elems for vertex out struct gen. */ + for (const StageInterfaceInfo::InOut &inout : iface->inouts) { + /* Only output individual elements if they are not part of an interface struct instance. + */ + if (!is_inside_struct) { + ss_fragment << to_string(inout.type) << " " << inout.name << ";" << std::endl; + } + } + } + + /* Generate global structs */ + ss_fragment << msl_iface.generate_msl_vertex_out_struct(ShaderStage::FRAGMENT); + ss_fragment << msl_iface.generate_msl_fragment_out_struct(); + ss_fragment << msl_iface.generate_msl_uniform_structs(ShaderStage::FRAGMENT); + + /** GL globals. */ + /* gl_FragCoord will always be assigned to the output position from vertex shading. */ + ss_fragment << "float4 gl_FragCoord;" << std::endl; + if (msl_iface.uses_gl_FragColor) { + ss_fragment << "float4 gl_FragColor;" << std::endl; + } + if (msl_iface.uses_gl_FragDepth) { + ss_fragment << "float gl_FragDepth;" << std::endl; + } + if (msl_iface.uses_gl_PointCoord) { + ss_fragment << "float2 gl_PointCoord;" << std::endl; + } + if (msl_iface.uses_gl_FrontFacing) { + ss_fragment << "MTLBOOL gl_FrontFacing;" << std::endl; + } + + /* Add Texture members. */ + for (const MSLTextureSampler &tex : msl_iface.texture_samplers) { + if (bool(tex.stage & ShaderStage::FRAGMENT)) { + ss_fragment << "\tthread " << tex.get_msl_typestring_wrapper(false) << ";" << std::endl; + } + } + + /* Inject Main GLSL Fragment Source into output stream. */ + ss_fragment << shd_builder_->glsl_fragment_source_ << std::endl; + + /* Class Closing Bracket to end shader global scope. */ + ss_fragment << "};" << std::endl; + + /* Generate Fragment entry-point function. */ + ss_fragment << msl_iface.generate_msl_fragment_entry_stub(); + } + + /* DEBUG: Export source to file for manual verification. */ +#if MTL_SHADER_DEBUG_EXPORT_SOURCE + NSFileManager *sharedFM = [NSFileManager defaultManager]; + NSURL *app_bundle_url = [[NSBundle mainBundle] bundleURL]; + NSURL *shader_dir = [[app_bundle_url URLByDeletingLastPathComponent] + URLByAppendingPathComponent:@"Shaders/" + isDirectory:YES]; + [sharedFM createDirectoryAtURL:shader_dir + withIntermediateDirectories:YES + attributes:nil + error:nil]; + const char *path_cstr = [shader_dir fileSystemRepresentation]; + + std::ofstream vertex_fs; + vertex_fs.open( + (std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedVertexShader.msl") + .c_str()); + vertex_fs << ss_vertex.str(); + vertex_fs.close(); + + if (!msl_iface.uses_transform_feedback) { + std::ofstream fragment_fs; + fragment_fs.open( + (std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedFragmentShader.msl") + .c_str()); + fragment_fs << ss_fragment.str(); + fragment_fs.close(); + } + + shader_debug_printf( + "Vertex Shader Saved to: %s\n", + (std::string(path_cstr) + std::string(this->name) + "_GeneratedFragmentShader.msl").c_str()); +#endif + + /* Set MSL source NSString's. Required by Metal API. */ + NSString *msl_final_vert = [NSString stringWithCString:ss_vertex.str().c_str() + encoding:[NSString defaultCStringEncoding]]; + NSString *msl_final_frag = (msl_iface.uses_transform_feedback) ? + (@"") : + ([NSString stringWithCString:ss_fragment.str().c_str() + encoding:[NSString defaultCStringEncoding]]); + + this->shader_source_from_msl(msl_final_vert, msl_final_frag); + shader_debug_printf("[METAL] BSL Converted into MSL\n"); + +#ifndef NDEBUG + /* In debug mode, we inject the name of the shader into the entry-point function + * name, as these are what show up in the Xcode GPU debugger. */ + this->set_vertex_function_name( + [[NSString stringWithFormat:@"vertex_function_entry_%s", this->name] retain]); + this->set_fragment_function_name( + [[NSString stringWithFormat:@"fragment_function_entry_%s", this->name] retain]); +#else + this->set_vertex_function_name(@"vertex_function_entry"); + this->set_fragment_function_name(@"fragment_function_entry"); +#endif + + /* Bake shader interface. */ + this->set_interface(msl_iface.bake_shader_interface(this->name)); + + /* Update other shader properties. */ + uses_mtl_array_index_ = msl_iface.uses_mtl_array_index_; + use_ssbo_vertex_fetch_mode_ = msl_iface.uses_ssbo_vertex_fetch_mode; + if (msl_iface.uses_ssbo_vertex_fetch_mode) { + ssbo_vertex_fetch_output_prim_type_ = vertex_fetch_ssbo_output_prim_type; + ssbo_vertex_fetch_output_num_verts_ = vertex_fetch_ssbo_num_output_verts; + this->prepare_ssbo_vertex_fetch_metadata(); + } + + /* Successfully completed GLSL to MSL translation. */ + return true; +} + +constexpr size_t const_strlen(const char *str) +{ + return (*str == '\0') ? 0 : const_strlen(str + 1) + 1; +} + +void MTLShader::prepare_ssbo_vertex_fetch_metadata() +{ + BLI_assert(use_ssbo_vertex_fetch_mode_); + + /* Cache global SSBO-vertex-fetch uniforms locations. */ + const ShaderInput *inp_prim_type = interface->uniform_get(UNIFORM_SSBO_INPUT_PRIM_TYPE_STR); + const ShaderInput *inp_vert_count = interface->uniform_get(UNIFORM_SSBO_INPUT_VERT_COUNT_STR); + const ShaderInput *inp_uses_indexed_rendering = interface->uniform_get( + UNIFORM_SSBO_USES_INDEXED_RENDERING_STR); + const ShaderInput *inp_uses_index_mode_u16 = interface->uniform_get( + UNIFORM_SSBO_INDEX_MODE_U16_STR); + + this->uni_ssbo_input_prim_type_loc = (inp_prim_type != nullptr) ? inp_prim_type->location : -1; + this->uni_ssbo_input_vert_count_loc = (inp_vert_count != nullptr) ? inp_vert_count->location : + -1; + this->uni_ssbo_uses_indexed_rendering = (inp_uses_indexed_rendering != nullptr) ? + inp_uses_indexed_rendering->location : + -1; + this->uni_ssbo_uses_index_mode_u16 = (inp_uses_index_mode_u16 != nullptr) ? + inp_uses_index_mode_u16->location : + -1; + + BLI_assert_msg(this->uni_ssbo_input_prim_type_loc != -1, + "uni_ssbo_input_prim_type_loc uniform location invalid!"); + BLI_assert_msg(this->uni_ssbo_input_vert_count_loc != -1, + "uni_ssbo_input_vert_count_loc uniform location invalid!"); + BLI_assert_msg(this->uni_ssbo_uses_indexed_rendering != -1, + "uni_ssbo_uses_indexed_rendering uniform location invalid!"); + BLI_assert_msg(this->uni_ssbo_uses_index_mode_u16 != -1, + "uni_ssbo_uses_index_mode_u16 uniform location invalid!"); + + /* Prepare SSBO-vertex-fetch attribute uniform location cache. */ + MTLShaderInterface *mtl_interface = this->get_interface(); + for (int i = 0; i < mtl_interface->get_total_attributes(); i++) { + const MTLShaderInputAttribute &mtl_shader_attribute = mtl_interface->get_attribute(i); + const char *attr_name = mtl_interface->get_name_at_offset(mtl_shader_attribute.name_offset); + + /* SSBO-vertex-fetch Attribute data is passed via uniforms. here we need to extract the uniform + * address for each attribute, and we can cache it for later use. */ + ShaderSSBOAttributeBinding &cached_ssbo_attr = cached_ssbo_attribute_bindings_[i]; + cached_ssbo_attr.attribute_index = i; + + constexpr int len_UNIFORM_SSBO_STRIDE_STR = const_strlen(UNIFORM_SSBO_STRIDE_STR); + constexpr int len_UNIFORM_SSBO_OFFSET_STR = const_strlen(UNIFORM_SSBO_OFFSET_STR); + constexpr int len_UNIFORM_SSBO_FETCHMODE_STR = const_strlen(UNIFORM_SSBO_FETCHMODE_STR); + constexpr int len_UNIFORM_SSBO_VBO_ID_STR = const_strlen(UNIFORM_SSBO_VBO_ID_STR); + constexpr int len_UNIFORM_SSBO_TYPE_STR = const_strlen(UNIFORM_SSBO_TYPE_STR); + + char strattr_buf_stride[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_STRIDE_STR + 1] = + UNIFORM_SSBO_STRIDE_STR; + char strattr_buf_offset[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_OFFSET_STR + 1] = + UNIFORM_SSBO_OFFSET_STR; + char strattr_buf_fetchmode[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_FETCHMODE_STR + 1] = + UNIFORM_SSBO_FETCHMODE_STR; + char strattr_buf_vbo_id[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_VBO_ID_STR + 1] = + UNIFORM_SSBO_VBO_ID_STR; + char strattr_buf_type[GPU_VERT_ATTR_MAX_LEN + len_UNIFORM_SSBO_TYPE_STR + 1] = + UNIFORM_SSBO_TYPE_STR; + + strcpy(&strattr_buf_stride[len_UNIFORM_SSBO_STRIDE_STR], attr_name); + strcpy(&strattr_buf_offset[len_UNIFORM_SSBO_OFFSET_STR], attr_name); + strcpy(&strattr_buf_fetchmode[len_UNIFORM_SSBO_FETCHMODE_STR], attr_name); + strcpy(&strattr_buf_vbo_id[len_UNIFORM_SSBO_VBO_ID_STR], attr_name); + strcpy(&strattr_buf_type[len_UNIFORM_SSBO_TYPE_STR], attr_name); + + /* Fetch uniform locations and cache for fast access. */ + const ShaderInput *inp_unf_stride = mtl_interface->uniform_get(strattr_buf_stride); + const ShaderInput *inp_unf_offset = mtl_interface->uniform_get(strattr_buf_offset); + const ShaderInput *inp_unf_fetchmode = mtl_interface->uniform_get(strattr_buf_fetchmode); + const ShaderInput *inp_unf_vbo_id = mtl_interface->uniform_get(strattr_buf_vbo_id); + const ShaderInput *inp_unf_attr_type = mtl_interface->uniform_get(strattr_buf_type); + + BLI_assert(inp_unf_stride != nullptr); + BLI_assert(inp_unf_offset != nullptr); + BLI_assert(inp_unf_fetchmode != nullptr); + BLI_assert(inp_unf_vbo_id != nullptr); + BLI_assert(inp_unf_attr_type != nullptr); + + cached_ssbo_attr.uniform_stride = (inp_unf_stride != nullptr) ? inp_unf_stride->location : -1; + cached_ssbo_attr.uniform_offset = (inp_unf_offset != nullptr) ? inp_unf_offset->location : -1; + cached_ssbo_attr.uniform_fetchmode = (inp_unf_fetchmode != nullptr) ? + inp_unf_fetchmode->location : + -1; + cached_ssbo_attr.uniform_vbo_id = (inp_unf_vbo_id != nullptr) ? inp_unf_vbo_id->location : -1; + cached_ssbo_attr.uniform_attr_type = (inp_unf_attr_type != nullptr) ? + inp_unf_attr_type->location : + -1; + + BLI_assert(cached_ssbo_attr.uniform_offset != -1); + BLI_assert(cached_ssbo_attr.uniform_stride != -1); + BLI_assert(cached_ssbo_attr.uniform_fetchmode != -1); + BLI_assert(cached_ssbo_attr.uniform_vbo_id != -1); + BLI_assert(cached_ssbo_attr.uniform_attr_type != -1); + } +} + +void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateInfo *info) +{ + /** Assign info. */ + create_info_ = info; + + /** Prepare Uniforms. */ + for (const shader::ShaderCreateInfo::PushConst &push_constant : create_info_->push_constants_) { + MSLUniform uniform(push_constant.type, + push_constant.name, + bool(push_constant.array_size > 1), + push_constant.array_size); + uniforms.append(uniform); + } + + /** Prepare textures and uniform blocks. + * Perform across both resource categories and extract both + * texture samplers and image types. */ + for (int i = 0; i < 2; i++) { + const Vector<ShaderCreateInfo::Resource> &resources = (i == 0) ? info->pass_resources_ : + info->batch_resources_; + for (const ShaderCreateInfo::Resource &res : resources) { + /* TODO(Metal): Consider adding stage flags to textures in create info. */ + /* Handle sampler types. */ + switch (res.bind_type) { + case shader::ShaderCreateInfo::Resource::BindType::SAMPLER: { + + /* Samplers to have access::sample by default. */ + MSLTextureSamplerAccess access = MSLTextureSamplerAccess::TEXTURE_ACCESS_SAMPLE; + /* TextureBuffers must have read/write/read-write access pattern. */ + if (res.sampler.type == ImageType::FLOAT_BUFFER || + res.sampler.type == ImageType::INT_BUFFER || + res.sampler.type == ImageType::UINT_BUFFER) { + access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ; + } + BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS); + MSLTextureSampler msl_tex( + ShaderStage::BOTH, res.sampler.type, res.sampler.name, access, res.slot); + texture_samplers.append(msl_tex); + } break; + + case shader::ShaderCreateInfo::Resource::BindType::IMAGE: { + /* Flatten qualifier flags into final access state. */ + MSLTextureSamplerAccess access; + if (bool(res.image.qualifiers & Qualifier::READ_WRITE)) { + access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READWRITE; + } + else if (bool(res.image.qualifiers & Qualifier::WRITE)) { + access = MSLTextureSamplerAccess::TEXTURE_ACCESS_WRITE; + } + else { + access = MSLTextureSamplerAccess::TEXTURE_ACCESS_READ; + } + BLI_assert(res.slot >= 0 && res.slot < MTL_MAX_TEXTURE_SLOTS); + MSLTextureSampler msl_tex( + ShaderStage::BOTH, res.image.type, res.image.name, access, res.slot); + texture_samplers.append(msl_tex); + } break; + + case shader::ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: { + MSLUniformBlock ubo; + BLI_assert(res.uniformbuf.type_name.size() > 0); + BLI_assert(res.uniformbuf.name.size() > 0); + int64_t array_offset = res.uniformbuf.name.find_first_of("["); + + ubo.type_name = res.uniformbuf.type_name; + ubo.is_array = (array_offset > -1); + if (ubo.is_array) { + /* If is array UBO, strip out array tag from name. */ + StringRef name_no_array = StringRef(res.uniformbuf.name.c_str(), array_offset); + ubo.name = name_no_array; + } + else { + ubo.name = res.uniformbuf.name; + } + ubo.stage = ShaderStage::VERTEX | ShaderStage::FRAGMENT; + uniform_blocks.append(ubo); + } break; + + case shader::ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: { + /* TODO(Metal): Support shader storage buffer in Metal. + * Pending compute support. */ + } break; + } + } + } + + /** Vertex Inputs. */ + bool all_attr_location_assigned = true; + for (const ShaderCreateInfo::VertIn &attr : info->vertex_inputs_) { + + /* Validate input. */ + BLI_assert(attr.name.size() > 0); + + /* NOTE(Metal): Input attributes may not have a location specified. + * unset locations are resolved during: `resolve_input_attribute_locations`. */ + MSLVertexInputAttribute msl_attr; + bool attr_location_assigned = (attr.index >= 0); + all_attr_location_assigned = all_attr_location_assigned && attr_location_assigned; + msl_attr.layout_location = attr_location_assigned ? attr.index : -1; + msl_attr.type = attr.type; + msl_attr.name = attr.name; + vertex_input_attributes.append(msl_attr); + } + + /* Ensure all attributes are assigned a location. */ + if (!all_attr_location_assigned) { + this->resolve_input_attribute_locations(); + } + + /** Fragment outputs. */ + for (const shader::ShaderCreateInfo::FragOut &frag_out : create_info_->fragment_outputs_) { + + /* Validate input. */ + BLI_assert(frag_out.name.size() > 0); + BLI_assert(frag_out.index >= 0); + + /* Populate MSLGenerator attribute. */ + MSLFragmentOutputAttribute mtl_frag_out; + mtl_frag_out.layout_location = frag_out.index; + mtl_frag_out.layout_index = (frag_out.blend != DualBlend::NONE) ? + ((frag_out.blend == DualBlend::SRC_0) ? 0 : 1) : + -1; + mtl_frag_out.type = frag_out.type; + mtl_frag_out.name = frag_out.name; + + fragment_outputs.append(mtl_frag_out); + } +} + +bool MSLGeneratorInterface::use_argument_buffer_for_samplers() const +{ + /* We can only use argument buffers IF sampler count exceeds static limit of 16, + * AND we can support more samplers with an argument buffer. */ + return texture_samplers.size() >= 16 && GPU_max_samplers() > 16; +} + +uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const +{ + /* NOTE: Sampler bindings and argument buffer shared across stages, + * in case stages share texture/sampler bindings. */ + return texture_samplers.size(); +} + +uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderStage stage) +{ + BLI_assert(stage == ShaderStage::VERTEX || stage == ShaderStage::FRAGMENT); + if (sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] >= 0) { + return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)]; + } + sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] = + (this->uniform_blocks.size() + 1); + return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)]; +} + +void MSLGeneratorInterface::prepare_ssbo_vertex_fetch_uniforms() +{ + BLI_assert(this->uses_ssbo_vertex_fetch_mode); + + /* Add Special Uniforms for SSBO vertex fetch mode. */ + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_INPUT_PRIM_TYPE_STR, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_INPUT_VERT_COUNT_STR, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_USES_INDEXED_RENDERING_STR, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_INDEX_MODE_U16_STR, false)); + + for (const MSLVertexInputAttribute &attr : this->vertex_input_attributes) { + const std::string &uname = attr.name; + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_STRIDE_STR + uname, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_OFFSET_STR + uname, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_FETCHMODE_STR + uname, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_VBO_ID_STR + uname, false)); + this->uniforms.append(MSLUniform(Type::INT, UNIFORM_SSBO_TYPE_STR + uname, false)); + } +} + +std::string MSLGeneratorInterface::generate_msl_vertex_entry_stub() +{ + std::stringstream out; + out << std::endl << "/*** AUTO-GENERATED MSL VERETX SHADER STUB. ***/" << std::endl; + + /* Un-define texture defines from main source - avoid conflict with MSL texture. */ + out << "#undef texture" << std::endl; + out << "#undef textureLod" << std::endl; + + /* Disable special case for booleans being treated as ints in GLSL. */ + out << "#undef bool" << std::endl; + + /* Un-define uniform mappings to avoid name collisions. */ + out << generate_msl_uniform_undefs(ShaderStage::VERTEX); + + /* Generate function entry point signature w/ resource bindings and inputs. */ + out << "vertex "; + if (this->uses_transform_feedback) { + out << "void "; + } + else { + out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexOut "; + } +#ifndef NDEBUG + out << "vertex_function_entry_" << parent_shader_.name_get() << "(\n\t"; +#else + out << "vertex_function_entry(\n\t"; +#endif + + out << this->generate_msl_vertex_inputs_string(); + out << ") {" << std::endl << std::endl; + out << "\tMTLShaderVertexImpl::VertexOut output;" << std::endl + << "\tMTLShaderVertexImpl vertex_shader_instance;" << std::endl; + + /* Copy Vertex Globals. */ + if (this->uses_gl_VertexID) { + out << "vertex_shader_instance.gl_VertexID = gl_VertexID;" << std::endl; + } + if (this->uses_gl_InstanceID) { + out << "vertex_shader_instance.gl_InstanceID = gl_InstanceID-gl_BaseInstanceARB;" << std::endl; + } + if (this->uses_gl_BaseInstanceARB) { + out << "vertex_shader_instance.gl_BaseInstanceARB = gl_BaseInstanceARB;" << std::endl; + } + + /* Copy vertex attributes into local variables. */ + out << this->generate_msl_vertex_attribute_input_population(); + + /* Populate Uniforms and uniform blocks. */ + out << this->generate_msl_texture_vars(ShaderStage::VERTEX); + out << this->generate_msl_global_uniform_population(ShaderStage::VERTEX); + out << this->generate_msl_uniform_block_population(ShaderStage::VERTEX); + + /* Execute original 'main' function within class scope. */ + out << "\t/* Execute Vertex main function */\t" << std::endl + << "\tvertex_shader_instance.main();" << std::endl + << std::endl; + + /* Populate Output values. */ + out << this->generate_msl_vertex_output_population(); + + /* Final point size, + * This is only compiled if the `MTL_global_pointsize` is specified + * as a function specialization in the PSO. This is restricted to + * point primitive types. */ + out << "if(is_function_constant_defined(MTL_global_pointsize)){ output.pointsize = " + "(MTL_global_pointsize > 0.0)?MTL_global_pointsize:output.pointsize; }" + << std::endl; + + /* Populate transform feedback buffer. */ + if (this->uses_transform_feedback) { + out << this->generate_msl_vertex_output_tf_population(); + } + else { + out << "\treturn output;" << std::endl; + } + out << "}"; + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_fragment_entry_stub() +{ + std::stringstream out; + out << std::endl << "/*** AUTO-GENERATED MSL FRAGMENT SHADER STUB. ***/" << std::endl; + + /* Undefine texture defines from main source - avoid conflict with MSL texture. */ + out << "#undef texture" << std::endl; + out << "#undef textureLod" << std::endl; + + /* Disable special case for booleans being treated as integers in GLSL. */ + out << "#undef bool" << std::endl; + + /* Undefine uniform mappings to avoid name collisions. */ + out << generate_msl_uniform_undefs(ShaderStage::FRAGMENT); + + /* Generate function entry point signature w/ resource bindings and inputs. */ +#ifndef NDEBUG + out << "fragment " << get_stage_class_name(ShaderStage::FRAGMENT) + << "::FragmentOut fragment_function_entry_" << parent_shader_.name_get() << "(\n\t"; +#else + out << "fragment " << get_stage_class_name(ShaderStage::FRAGMENT) + << "::FragmentOut fragment_function_entry(\n\t"; +#endif + out << this->generate_msl_fragment_inputs_string(); + out << ") {" << std::endl << std::endl; + out << "\tMTLShaderFragmentImpl::FragmentOut output;" << std::endl + << "\tMTLShaderFragmentImpl fragment_shader_instance;" << std::endl; + + /* Copy Fragment Globals. */ + if (this->uses_gl_PointCoord) { + out << "fragment_shader_instance.gl_PointCoord = gl_PointCoord;" << std::endl; + } + if (this->uses_gl_FrontFacing) { + out << "fragment_shader_instance.gl_FrontFacing = gl_FrontFacing;" << std::endl; + } + + /* Copy vertex attributes into local variable.s */ + out << this->generate_msl_fragment_input_population(); + + /* Barycentrics. */ + if (this->uses_barycentrics) { + + /* Main barycentrics. */ + out << "fragment_shader_instance.gpu_BaryCoord = mtl_barycentric_coord.xyz;"; + + /* barycentricDist represents the world-space distance from the current world-space position + * to the opposite edge of the vertex. */ + out << "float3 worldPos = fragment_shader_instance.worldPosition.xyz;" << std::endl; + out << "float3 wpChange = (length(dfdx(worldPos))+length(dfdy(worldPos)));" << std::endl; + out << "float3 bcChange = " + "(length(dfdx(mtl_barycentric_coord))+length(dfdy(mtl_barycentric_coord)));" + << std::endl; + out << "float3 rateOfChange = wpChange/bcChange;" << std::endl; + + /* Distance to edge using inverse barycentric value, as rather than the length of 0.7 + * contribution, we'd want the distance to the opposite side. */ + out << "fragment_shader_instance.gpu_BarycentricDist.x = length(rateOfChange * " + "(1.0-mtl_barycentric_coord.x));" + << std::endl; + out << "fragment_shader_instance.gpu_BarycentricDist.y = length(rateOfChange * " + "(1.0-mtl_barycentric_coord.y));" + << std::endl; + out << "fragment_shader_instance.gpu_BarycentricDist.z = length(rateOfChange * " + "(1.0-mtl_barycentric_coord.z));" + << std::endl; + } + + /* Populate Uniforms and uniform blocks. */ + out << this->generate_msl_texture_vars(ShaderStage::FRAGMENT); + out << this->generate_msl_global_uniform_population(ShaderStage::FRAGMENT); + out << this->generate_msl_uniform_block_population(ShaderStage::FRAGMENT); + + /* Execute original 'main' function within class scope. */ + out << "\t/* Execute Fragment main function */\t" << std::endl + << "\tfragment_shader_instance.main();" << std::endl + << std::endl; + + /* Populate Output values. */ + out << this->generate_msl_fragment_output_population(); + out << " return output;" << std::endl << "}"; + + return out.str(); +} + +void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream &out, + ShaderStage stage) +{ + BLI_assert(stage == ShaderStage::VERTEX || stage == ShaderStage::FRAGMENT); + /* Generate texture signatures. */ + BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert()); + for (const MSLTextureSampler &tex : this->texture_samplers) { + if (bool(tex.stage & stage)) { + out << ",\n\t" << tex.get_msl_typestring(false) << " [[texture(" << tex.location << ")]]"; + } + } + + /* Generate sampler signatures. */ + /* NOTE: Currently textures and samplers share indices across shading stages, so the limit is + * shared. + * If we exceed the hardware-supported limit, then follow a bind-less model using argument + * buffers. */ + if (this->use_argument_buffer_for_samplers()) { + out << ",\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+" + << (this->get_sampler_argument_buffer_bind_index(stage)) << ")]]"; + } + else { + /* Maximum Limit of samplers defined in the function argument table is + * `MTL_MAX_DEFAULT_SAMPLERS=16`. */ + BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS); + for (const MSLTextureSampler &tex : this->texture_samplers) { + if (bool(tex.stage & stage)) { + out << ",\n\tsampler " << tex.name << "_sampler [[sampler(" << tex.location << ")]]"; + } + } + + /* Fallback. */ + if (this->texture_samplers.size() > 16) { + shader_debug_printf( + "[Metal] Warning: Shader exceeds limit of %u samplers on current hardware\n", + MTL_MAX_DEFAULT_SAMPLERS); + } + } +} + +void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out, + ShaderStage stage) +{ + int ubo_index = 0; + for (const MSLUniformBlock &ubo : this->uniform_blocks) { + if (bool(ubo.stage & stage)) { + /* For literal/existing global types, we do not need the class name-space accessor. */ + out << ",\n\tconstant "; + if (!is_builtin_type(ubo.type_name)) { + out << get_stage_class_name(stage) << "::"; + } + /* #UniformBuffer bind indices start at `MTL_uniform_buffer_base_index + 1`, as + * MTL_uniform_buffer_base_index is reserved for the #PushConstantBlock (push constants). + * MTL_uniform_buffer_base_index is an offset depending on the number of unique VBOs + * bound for the current PSO specialization. */ + out << ubo.type_name << "* " << ubo.name << "[[buffer(MTL_uniform_buffer_base_index+" + << (ubo_index + 1) << ")]]"; + } + ubo_index++; + } +} + +std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string() +{ + std::stringstream out; + + if (this->uses_ssbo_vertex_fetch_mode) { + /* Vertex Buffers bound as raw buffers. */ + for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + out << "\tconstant uchar* MTL_VERTEX_DATA_" << i << " [[buffer(" << i << ")]],\n"; + } + out << "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]],"; + } + else { + if (this->vertex_input_attributes.size() > 0) { + /* Vertex Buffers use input assembly. */ + out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]],"; + } + } + out << "\n\tconstant " << get_stage_class_name(ShaderStage::VERTEX) + << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]"; + + this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX); + + /* Transform feedback buffer binding. */ + if (this->uses_transform_feedback) { + out << ",\n\tdevice " << get_stage_class_name(ShaderStage::VERTEX) + << "::VertexOut_TF* " + "transform_feedback_results[[buffer(MTL_transform_feedback_buffer_index)]]"; + } + + /* Generate texture signatures. */ + this->generate_msl_textures_input_string(out, ShaderStage::VERTEX); + + /* Entry point parameters for gl Globals. */ + if (this->uses_gl_VertexID) { + out << ",\n\tconst uint32_t gl_VertexID [[vertex_id]]"; + } + if (this->uses_gl_InstanceID) { + out << ",\n\tconst uint32_t gl_InstanceID [[instance_id]]"; + } + if (this->uses_gl_BaseInstanceARB) { + out << ",\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]"; + } + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_fragment_inputs_string() +{ + std::stringstream out; + out << get_stage_class_name(ShaderStage::FRAGMENT) + << "::VertexOut v_in [[stage_in]],\n\tconstant " + << get_stage_class_name(ShaderStage::FRAGMENT) + << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]"; + + this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT); + + /* Generate texture signatures. */ + this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT); + + if (this->uses_gl_PointCoord) { + out << ",\n\tconst float2 gl_PointCoord [[point_coord]]"; + } + if (this->uses_gl_FrontFacing) { + out << ",\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]"; + } + + /* Barycentrics. */ + if (this->uses_barycentrics) { + out << ",\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]"; + } + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_uniform_structs(ShaderStage shader_stage) +{ + BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT); + std::stringstream out; + + /* Common Uniforms. */ + out << "typedef struct {" << std::endl; + + for (const MSLUniform &uniform : this->uniforms) { + if (uniform.is_array) { + out << "\t" << to_string(uniform.type) << " " << uniform.name << "[" << uniform.array_elems + << "];" << std::endl; + } + else { + out << "\t" << to_string(uniform.type) << " " << uniform.name << ";" << std::endl; + } + } + out << "} PushConstantBlock;\n\n"; + + /* Member UBO block reference. */ + out << std::endl << "const constant PushConstantBlock *global_uniforms;" << std::endl; + + /* Macro define chain. + * To access uniforms, we generate a macro such that the uniform name can + * be used directly without using the struct's handle. */ + for (const MSLUniform &uniform : this->uniforms) { + out << "#define " << uniform.name << " global_uniforms->" << uniform.name << std::endl; + } + out << std::endl; + return out.str(); +} + +/* NOTE: Uniform macro definition vars can conflict with other parameters. */ +std::string MSLGeneratorInterface::generate_msl_uniform_undefs(ShaderStage shader_stage) +{ + std::stringstream out; + + /* Macro undef chain. */ + for (const MSLUniform &uniform : this->uniforms) { + out << "#undef " << uniform.name << std::endl; + } + /* UBO block undef. */ + for (const MSLUniformBlock &ubo : this->uniform_blocks) { + out << "#undef " << ubo.name << std::endl; + } + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_vertex_in_struct() +{ + std::stringstream out; + + /* Skip struct if no vert attributes. */ + if (this->vertex_input_attributes.size() == 0) { + return ""; + } + + /* Output */ + out << "typedef struct {" << std::endl; + for (const MSLVertexInputAttribute &in_attr : this->vertex_input_attributes) { + /* Matrix and array attributes are not trivially supported and thus + * require each element to be passed as an individual attribute. + * This requires shader source generation of sequential elements. + * The matrix type is then re-packed into a Mat4 inside the entry function. + * + * e.g. + * float4 __internal_modelmatrix_0 [[attribute(0)]]; + * float4 __internal_modelmatrix_1 [[attribute(1)]]; + * float4 __internal_modelmatrix_2 [[attribute(2)]]; + * float4 __internal_modelmatrix_3 [[attribute(3)]]; + */ + if (is_matrix_type(in_attr.type) && !this->uses_ssbo_vertex_fetch_mode) { + for (int elem = 0; elem < get_matrix_location_count(in_attr.type); elem++) { + out << "\t" << get_matrix_subtype(in_attr.type) << " __internal_" << in_attr.name << elem + << " [[attribute(" << (in_attr.layout_location + elem) << ")]];" << std::endl; + } + } + else { + out << "\t" << in_attr.type << " " << in_attr.name << " [[attribute(" + << in_attr.layout_location << ")]];" << std::endl; + } + } + + out << "} VertexIn;" << std::endl << std::endl; + + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_vertex_out_struct(ShaderStage shader_stage) +{ + BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT); + std::stringstream out; + + /* Vertex output struct. */ + out << "typedef struct {" << std::endl; + + /* If we use GL position, our standard output variable will be mapped to '_default_position_'. + * Otherwise, we use the FIRST element in the output array. + * If transform feedback is enabled, we do not need to output position, unless it + * is explicitly specified as a tf output. */ + bool first_attr_is_position = false; + if (this->uses_gl_Position) { + out << "\tfloat4 _default_position_ [[position]];" << std::endl; + } + else { + if (!this->uses_transform_feedback) { + /* Use first output element for position. */ + BLI_assert(this->vertex_output_varyings.size() > 0); + BLI_assert(this->vertex_output_varyings[0].type == "vec4"); + out << "\tfloat4 " << this->vertex_output_varyings[0].name << " [[position]];" << std::endl; + first_attr_is_position = true; + } + } + + /* Generate other vertex output members. */ + bool skip_first_index = first_attr_is_position; + for (const MSLVertexOutputAttribute &v_out : this->vertex_output_varyings) { + + /* Skip first index if used for position. */ + if (skip_first_index) { + skip_first_index = false; + continue; + } + + if (v_out.is_array) { + /* Array types cannot be trivially passed between shading stages. + * Instead we pass each component individually. E.g. vec4 pos[2] + * will be converted to: `vec4 pos_0; vec4 pos_1;` + * The specified interpolation qualifier will be applied per element. */ + /* TODO(Metal): Support array of matrix in-out types if required + * e.g. Mat4 out_matrices[3]. */ + for (int i = 0; i < v_out.array_elems; i++) { + out << "\t" << v_out.type << " " << v_out.instance_name << "_" << v_out.name << i + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + else { + /* Matrix types need to be expressed as their vector sub-components. */ + if (is_matrix_type(v_out.type)) { + BLI_assert(v_out.get_mtl_interpolation_qualifier() == " [[flat]]" && + "Matrix varying types must have [[flat]] interpolation"); + std::string subtype = get_matrix_subtype(v_out.type); + for (int elem = 0; elem < get_matrix_location_count(v_out.type); elem++) { + out << "\t" << subtype << v_out.instance_name << " __matrix_" << v_out.name << elem + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + else { + out << "\t" << v_out.type << " " << v_out.instance_name << "_" << v_out.name + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + } + + /* Add gl_PointSize if written to. */ + if (shader_stage == ShaderStage::VERTEX) { + if (this->uses_gl_PointSize) { + /* If `gl_PointSize` is explicitly written to, + * we will output the written value directly. + * This value can still be overridden by the + * global point-size value. */ + out << "\tfloat pointsize [[point_size]];" << std::endl; + } + else { + /* Otherwise, if point-size is not written to inside the shader, + * then its usage is controlled by whether the `MTL_global_pointsize` + * function constant has been specified. + * This function constant is enabled for all point primitives being rendered. */ + out << "\tfloat pointsize [[point_size, function_constant(MTL_global_pointsize)]];" + << std::endl; + } + } + + /* Add gl_ClipDistance[n]. */ + if (shader_stage == ShaderStage::VERTEX) { + out << "#if defined(USE_CLIP_PLANES) || defined(USE_WORLD_CLIP_PLANES)" << std::endl; + if (this->clip_distances.size() > 1) { + /* Output array of clip distances if specified. */ + out << "\tfloat clipdistance [[clip_distance]] [" << this->clip_distances.size() << "];" + << std::endl; + } + else if (this->clip_distances.size() > 0) { + out << "\tfloat clipdistance [[clip_distance]];" << std::endl; + } + out << "#endif" << std::endl; + } + + /* Add MTL render target array index for multilayered rendering support. */ + if (uses_mtl_array_index_) { + out << "\tuint MTLRenderTargetArrayIndex [[render_target_array_index]];" << std::endl; + } + + out << "} VertexOut;" << std::endl << std::endl; + + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_vertex_transform_feedback_out_struct( + ShaderStage shader_stage) +{ + BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT); + std::stringstream out; + vertex_output_varyings_tf.clear(); + + out << "typedef struct {" << std::endl; + + /* If we use GL position, our standard output variable will be mapped to '_default_position_'. + * Otherwise, we use the FIRST element in the output array -- If transform feedback is enabled, + * we do not need to output position */ + bool first_attr_is_position = false; + if (this->uses_gl_Position) { + + if (parent_shader_.has_transform_feedback_varying("gl_Position")) { + out << "\tfloat4 pos [[position]];" << std::endl; + vertex_output_varyings_tf.append({.type = "vec4", + .name = "gl_Position", + .interpolation_qualifier = "", + .is_array = false, + .array_elems = 1}); + } + } + else { + if (!this->uses_transform_feedback) { + /* Use first output element for position */ + BLI_assert(this->vertex_output_varyings.size() > 0); + BLI_assert(this->vertex_output_varyings[0].type == "vec4"); + first_attr_is_position = true; + } + } + + /* Generate other vertex outputs. */ + bool skip_first_index = first_attr_is_position; + for (const MSLVertexOutputAttribute &v_out : this->vertex_output_varyings) { + + /* Skip first index if used for position. */ + if (skip_first_index) { + skip_first_index = false; + continue; + } + + if (!parent_shader_.has_transform_feedback_varying(v_out.name)) { + continue; + } + vertex_output_varyings_tf.append(v_out); + + if (v_out.is_array) { + /* TODO(Metal): Support array of matrix types if required. */ + for (int i = 0; i < v_out.array_elems; i++) { + out << "\t" << v_out.type << " " << v_out.name << i + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + else { + /* Matrix types need to be expressed as their vector sub-components. */ + if (is_matrix_type(v_out.type)) { + BLI_assert(v_out.get_mtl_interpolation_qualifier() == " [[flat]]" && + "Matrix varying types must have [[flat]] interpolation"); + std::string subtype = get_matrix_subtype(v_out.type); + for (int elem = 0; elem < get_matrix_location_count(v_out.type); elem++) { + out << "\t" << subtype << " __matrix_" << v_out.name << elem + << v_out.get_mtl_interpolation_qualifier() << ";" << std::endl; + } + } + else { + out << "\t" << v_out.type << " " << v_out.name << v_out.get_mtl_interpolation_qualifier() + << ";" << std::endl; + } + } + } + + out << "} VertexOut_TF;" << std::endl << std::endl; + + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_fragment_out_struct() +{ + std::stringstream out; + + /* Output. */ + out << "typedef struct {" << std::endl; + for (int f_output = 0; f_output < this->fragment_outputs.size(); f_output++) { + out << "\t" << to_string(this->fragment_outputs[f_output].type) << " " + << this->fragment_outputs[f_output].name << " [[color(" + << this->fragment_outputs[f_output].layout_location << ")"; + if (this->fragment_outputs[f_output].layout_index >= 0) { + out << ", index(" << this->fragment_outputs[f_output].layout_index << ")"; + } + out << "]]" + << ";" << std::endl; + } + /* Add gl_FragDepth output if used. */ + if (this->uses_gl_FragDepth) { + std::string out_depth_argument = ((this->depth_write == DepthWrite::GREATER) ? + "greater" : + ((this->depth_write == DepthWrite::LESS) ? "less" : + "any")); + out << "\tfloat fragdepth [[depth(" << out_depth_argument << ")]];" << std::endl; + } + + out << "} FragmentOut;" << std::endl; + out << std::endl; + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_global_uniform_population(ShaderStage stage) +{ + /* Populate Global Uniforms. */ + std::stringstream out; + + /* Copy UBO block ref. */ + out << "\t/* Copy Uniform block member reference */" << std::endl; + out << "\t" + << ((stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : "fragment_shader_instance.") + << "global_uniforms = uniforms;" << std::endl; + + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderStage stage) +{ + /* Populate Global Uniforms. */ + std::stringstream out; + out << "\t/* Copy UBO block references into local class variables */" << std::endl; + for (const MSLUniformBlock &ubo : this->uniform_blocks) { + + /* Only include blocks which are used within this stage. */ + if (bool(ubo.stage & stage)) { + /* Generate UBO reference assignment. + * NOTE(Metal): We append `_local` post-fix onto the class member name + * for the ubo to avoid name collision with the UBO accessor macro. + * We only need to add this post-fix for the non-array access variant, + * as the array is indexed directly, rather than requiring a dereference. */ + out << "\t" + << ((stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : + "fragment_shader_instance.") + << ubo.name; + if (!ubo.is_array) { + out << "_local"; + } + out << " = " << ubo.name << ";" << std::endl; + } + } + out << std::endl; + return out.str(); +} + +/* Copy input attributes from stage_in into class local variables. */ +std::string MSLGeneratorInterface::generate_msl_vertex_attribute_input_population() +{ + + /* SSBO Vertex Fetch mode does not require local attribute population, + * we only need to pass over the buffer pointer references. */ + if (this->uses_ssbo_vertex_fetch_mode) { + std::stringstream out; + out << "const constant uchar* GLOBAL_MTL_VERTEX_DATA[MTL_SSBO_VERTEX_FETCH_MAX_VBOS] = {" + << std::endl; + for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) { + char delimiter = (i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS - 1) ? ',' : ' '; + out << "\t\tMTL_VERTEX_DATA_" << i << delimiter << std::endl; + } + out << "};" << std::endl; + out << "\tvertex_shader_instance.MTL_VERTEX_DATA = GLOBAL_MTL_VERTEX_DATA;" << std::endl; + out << "\tvertex_shader_instance.MTL_INDEX_DATA_U16 = MTL_INDEX_DATA;" << std::endl; + out << "\tvertex_shader_instance.MTL_INDEX_DATA_U32 = reinterpret_cast<constant " + "uint32_t*>(MTL_INDEX_DATA);" + << std::endl; + return out.str(); + } + + /* Populate local attribute variables. */ + std::stringstream out; + out << "\t/* Copy Vertex Stage-in attributes into local variables */" << std::endl; + for (int attribute = 0; attribute < this->vertex_input_attributes.size(); attribute++) { + + if (is_matrix_type(this->vertex_input_attributes[attribute].type)) { + /* Reading into an internal matrix from split attributes: Should generate the following: + * vertex_shader_instance.mat_attribute_type = + * mat4(v_in.__internal_mat_attribute_type0, + * v_in.__internal_mat_attribute_type1, + * v_in.__internal_mat_attribute_type2, + * v_in.__internal_mat_attribute_type3). */ + out << "\tvertex_shader_instance." << this->vertex_input_attributes[attribute].name << " = " + << this->vertex_input_attributes[attribute].type << "(v_in.__internal_" + << this->vertex_input_attributes[attribute].name << 0; + for (int elem = 1; + elem < get_matrix_location_count(this->vertex_input_attributes[attribute].type); + elem++) { + out << ",\n" + << "v_in.__internal_" << this->vertex_input_attributes[attribute].name << elem; + } + out << ");"; + } + else { + /* OpenGL uses the `GPU_FETCH_*` functions which can alter how an attribute value is + * interpreted. In Metal, we cannot support all implicit conversions within the vertex + * descriptor/vertex stage-in, so we need to perform value transformation on-read. + * + * This is handled by wrapping attribute reads to local shader registers in a + * suitable conversion function `attribute_conversion_func_name`. + * This conversion function performs a specific transformation on the source + * vertex data, depending on the specified GPU_FETCH_* mode for the current + * vertex format. + * + * The fetch_mode is specified per-attribute using specialization constants + * on the PSO, wherein a unique set of constants is passed in per vertex + * buffer/format configuration. Efficiently enabling pass-through reads + * if no special fetch is required. */ + bool do_attribute_conversion_on_read = false; + std::string attribute_conversion_func_name = get_attribute_conversion_function( + &do_attribute_conversion_on_read, this->vertex_input_attributes[attribute].type); + + if (do_attribute_conversion_on_read) { + out << "\t" << attribute_conversion_func_name << "(MTL_AttributeConvert" << attribute + << ", v_in." << this->vertex_input_attributes[attribute].name + << ", vertex_shader_instance." << this->vertex_input_attributes[attribute].name << ");" + << std::endl; + } + else { + out << "\tvertex_shader_instance." << this->vertex_input_attributes[attribute].name + << " = v_in." << this->vertex_input_attributes[attribute].name << ";" << std::endl; + } + } + } + out << std::endl; + return out.str(); +} + +/* Copy post-main, modified, local class variables into vertex-output struct. */ +std::string MSLGeneratorInterface::generate_msl_vertex_output_population() +{ + + std::stringstream out; + out << "\t/* Copy Vertex Outputs into output struct */" << std::endl; + + /* Output gl_Position with conversion to Metal coordinate-space. */ + if (this->uses_gl_Position) { + out << "\toutput._default_position_ = vertex_shader_instance.gl_Position;" << std::endl; + + /* Invert Y and rescale depth range. + * This is an alternative method to modifying all projection matrices. */ + out << "\toutput._default_position_.y = -output._default_position_.y;" << std::endl; + out << "\toutput._default_position_.z = " + "(output._default_position_.z+output._default_position_.w)/2.0;" + << std::endl; + } + + /* Output Point-size. */ + if (this->uses_gl_PointSize) { + out << "\toutput.pointsize = vertex_shader_instance.gl_PointSize;" << std::endl; + } + + /* Output render target array Index. */ + if (uses_mtl_array_index_) { + out << "\toutput.MTLRenderTargetArrayIndex = " + "vertex_shader_instance.MTLRenderTargetArrayIndex;" + << std::endl; + } + + /* Output clip-distances. */ + out << "#if defined(USE_CLIP_PLANES) || defined(USE_WORLD_CLIP_PLANES)" << std::endl; + if (this->clip_distances.size() > 1) { + for (int cd = 0; cd < this->clip_distances.size(); cd++) { + out << "\toutput.clipdistance[" << cd << "] = vertex_shader_instance.gl_ClipDistance_" << cd + << ";" << std::endl; + } + } + else if (this->clip_distances.size() > 0) { + out << "\toutput.clipdistance = vertex_shader_instance.gl_ClipDistance_0;" << std::endl; + } + out << "#endif" << std::endl; + + /* Populate output vertex variables. */ + int output_id = 0; + for (const MSLVertexOutputAttribute &v_out : this->vertex_output_varyings) { + if (v_out.is_array) { + + for (int i = 0; i < v_out.array_elems; i++) { + out << "\toutput." << v_out.instance_name << "_" << v_out.name << i + << " = vertex_shader_instance."; + + if (v_out.instance_name != "") { + out << v_out.instance_name << "."; + } + + out << v_out.name << "[" << i << "]" + << ";" << std::endl; + } + } + else { + /* Matrix types are split into vectors and need to be reconstructed. */ + if (is_matrix_type(v_out.type)) { + for (int elem = 0; elem < get_matrix_location_count(v_out.type); elem++) { + out << "\toutput." << v_out.instance_name << "__matrix_" << v_out.name << elem + << " = vertex_shader_instance."; + + if (v_out.instance_name != "") { + out << v_out.instance_name << "."; + } + + out << v_out.name << "[" << elem << "];" << std::endl; + } + } + else { + /* If we are not using gl_Position, first vertex output is used for position. + * Ensure it is vec4. If transform feedback is enabled, we do not need position. */ + if (!this->uses_gl_Position && output_id == 0 && !this->uses_transform_feedback) { + + out << "\toutput." << v_out.instance_name << "_" << v_out.name + << " = to_vec4(vertex_shader_instance." << v_out.name << ");" << std::endl; + + /* Invert Y */ + out << "\toutput." << v_out.instance_name << "_" << v_out.name << ".y = -output." + << v_out.name << ".y;" << std::endl; + } + else { + + /* Assign vertex output. */ + out << "\toutput." << v_out.instance_name << "_" << v_out.name + << " = vertex_shader_instance."; + + if (v_out.instance_name != "") { + out << v_out.instance_name << "."; + } + + out << v_out.name << ";" << std::endl; + } + } + } + output_id++; + } + out << std::endl; + return out.str(); +} + +/* Copy desired output varyings into transform feedback structure */ +std::string MSLGeneratorInterface::generate_msl_vertex_output_tf_population() +{ + + std::stringstream out; + out << "\t/* Copy Vertex TF Outputs into transform feedback buffer */" << std::endl; + + /* Populate output vertex variables */ + /* TODO(Metal): Currently do not need to support output matrix types etc; but may need to + * verify for other configurations if these occur in any cases. */ + for (int v_output = 0; v_output < this->vertex_output_varyings_tf.size(); v_output++) { + out << "transform_feedback_results[gl_VertexID]." + << this->vertex_output_varyings_tf[v_output].name << " = vertex_shader_instance." + << this->vertex_output_varyings_tf[v_output].name << ";" << std::endl; + } + out << std::endl; + return out.str(); +} + +/* Copy fragment stage inputs (Vertex Outputs) into local class variables. */ +std::string MSLGeneratorInterface::generate_msl_fragment_input_population() +{ + + /* Populate local attribute variables. */ + std::stringstream out; + out << "\t/* Copy Fragment input into local variables. */" << std::endl; + + /* Special common case for gl_FragCoord, assigning to input position. */ + if (this->uses_gl_Position) { + out << "\tfragment_shader_instance.gl_FragCoord = v_in._default_position_;" << std::endl; + } + else { + /* When gl_Position is not set, first VertexIn element is used for position. */ + out << "\tfragment_shader_instance.gl_FragCoord = v_in." + << this->vertex_output_varyings[0].name << ";" << std::endl; + } + + /* NOTE: We will only assign to the intersection of the vertex output and fragment input. + * Fragment input represents varying variables which are declared (but are not necessarily + * used). The Vertex out defines the set which is passed into the fragment shader, which + * contains out variables declared in the vertex shader, though these are not necessarily + * consumed by the fragment shader. + * + * In the cases where the fragment shader expects a variable, but it does not exist in the + * vertex shader, a warning will be provided. */ + for (int f_input = (this->uses_gl_Position) ? 0 : 1; + f_input < this->fragment_input_varyings.size(); + f_input++) { + bool exists_in_vertex_output = false; + for (int v_o = 0; v_o < this->vertex_output_varyings.size() && !exists_in_vertex_output; + v_o++) { + if (this->fragment_input_varyings[f_input].name == this->vertex_output_varyings[v_o].name) { + exists_in_vertex_output = true; + } + } + if (!exists_in_vertex_output) { + shader_debug_printf( + "[Warning] Fragment shader expects varying input '%s', but this is not passed from " + "the " + "vertex shader\n", + this->fragment_input_varyings[f_input].name.c_str()); + continue; + } + if (this->fragment_input_varyings[f_input].is_array) { + for (int i = 0; i < this->fragment_input_varyings[f_input].array_elems; i++) { + out << "\tfragment_shader_instance."; + + if (this->fragment_input_varyings[f_input].instance_name != "") { + out << this->fragment_input_varyings[f_input].instance_name << "."; + } + + out << this->fragment_input_varyings[f_input].name << "[" << i << "] = v_in." + << this->fragment_input_varyings[f_input].instance_name << "_" + << this->fragment_input_varyings[f_input].name << i << ";" << std::endl; + } + } + else { + /* Matrix types are split into components and need to be regrouped into a matrix. */ + if (is_matrix_type(this->fragment_input_varyings[f_input].type)) { + out << "\tfragment_shader_instance."; + + if (this->fragment_input_varyings[f_input].instance_name != "") { + out << this->fragment_input_varyings[f_input].instance_name << "."; + } + + out << this->fragment_input_varyings[f_input].name << " = " + << this->fragment_input_varyings[f_input].type; + int count = get_matrix_location_count(this->fragment_input_varyings[f_input].type); + for (int elem = 0; elem < count; elem++) { + out << ((elem == 0) ? "(" : "") << "v_in." + << this->fragment_input_varyings[f_input].instance_name << "__matrix_" + << this->fragment_input_varyings[f_input].name << elem + << ((elem < count - 1) ? ",\n" : ""); + } + out << ");" << std::endl; + } + else { + out << "\tfragment_shader_instance."; + + if (this->fragment_input_varyings[f_input].instance_name != "") { + out << this->fragment_input_varyings[f_input].instance_name << "."; + } + + out << this->fragment_input_varyings[f_input].name << " = v_in." + << this->fragment_input_varyings[f_input].instance_name << "_" + << this->fragment_input_varyings[f_input].name << ";" << std::endl; + } + } + } + out << std::endl; + return out.str(); +} + +/* Copy post-main, modified, local class variables into fragment-output struct. */ +std::string MSLGeneratorInterface::generate_msl_fragment_output_population() +{ + + /* Populate output fragment variables. */ + std::stringstream out; + out << "\t/* Copy Fragment Outputs into output struct. */" << std::endl; + + /* Output gl_FragDepth. */ + if (this->uses_gl_FragDepth) { + out << "\toutput.fragdepth = fragment_shader_instance.gl_FragDepth;" << std::endl; + } + + /* Output attributes. */ + for (int f_output = 0; f_output < this->fragment_outputs.size(); f_output++) { + + out << "\toutput." << this->fragment_outputs[f_output].name << " = fragment_shader_instance." + << this->fragment_outputs[f_output].name << ";" << std::endl; + } + out << std::endl; + return out.str(); +} + +std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_stage) +{ + BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT); + + std::stringstream out; + out << "\t/* Populate local texture and sampler members */" << std::endl; + for (int i = 0; i < this->texture_samplers.size(); i++) { + if (bool(this->texture_samplers[i].stage & shader_stage)) { + + /* Assign texture reference. */ + out << "\t" + << ((shader_stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : + "fragment_shader_instance.") + << this->texture_samplers[i].name << ".texture = &" << this->texture_samplers[i].name + << ";" << std::endl; + + /* Assign sampler reference. */ + if (this->use_argument_buffer_for_samplers()) { + out << "\t" + << ((shader_stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : + "fragment_shader_instance.") + << this->texture_samplers[i].name << ".samp = &samplers.sampler_args[" << i << "];" + << std::endl; + } + else { + out << "\t" + << ((shader_stage == ShaderStage::VERTEX) ? "vertex_shader_instance." : + "fragment_shader_instance.") + << this->texture_samplers[i].name << ".samp = &" << this->texture_samplers[i].name + << "_sampler;" << std::endl; + } + } + } + out << std::endl; + return out.str(); +} + +void MSLGeneratorInterface::resolve_input_attribute_locations() +{ + /* Determine used-attribute-location mask. */ + uint32_t used_locations = 0; + for (const MSLVertexInputAttribute &attr : vertex_input_attributes) { + if (attr.layout_location >= 0) { + /* Matrix and array types span multiple location slots. */ + uint32_t location_element_count = get_matrix_location_count(attr.type); + for (uint32_t i = 1; i <= location_element_count; i++) { + /* Ensure our location hasn't already been used. */ + uint32_t location_mask = (i << attr.layout_location); + BLI_assert((used_locations & location_mask) == 0); + used_locations = used_locations | location_mask; + } + } + } + + /* Assign unused location slots to other attributes. */ + for (MSLVertexInputAttribute &attr : vertex_input_attributes) { + if (attr.layout_location == -1) { + /* Determine number of locations required. */ + uint32_t required_attr_slot_count = get_matrix_location_count(attr.type); + + /* Determine free location. + * Starting from 1 is slightly less efficient, however, + * given multi-sized attributes, an earlier slot may remain free. + * given GPU_VERT_ATTR_MAX_LEN is small, this wont matter. */ + for (int loc = 0; loc < GPU_VERT_ATTR_MAX_LEN - (required_attr_slot_count - 1); loc++) { + + uint32_t location_mask = (1 << loc); + /* Generate sliding mask using location and required number of slots, + * to ensure contiguous slots are free. + * slot mask will be a number containing N binary 1's, where N is the + * number of attributes needed. + * e.g. N=4 -> 1111. */ + uint32_t location_slot_mask = (1 << required_attr_slot_count) - 1; + uint32_t sliding_location_slot_mask = location_slot_mask << location_mask; + if ((used_locations & sliding_location_slot_mask) == 0) { + /* Assign location and update mask. */ + attr.layout_location = loc; + used_locations = used_locations | location_slot_mask; + continue; + } + } + + /* Error if could not assign attribute. */ + MTL_LOG_ERROR("Could not assign attribute location to attribute %s for shader %s\n", + attr.name.c_str(), + this->parent_shader_.name_get()); + } + } +} + +void MSLGeneratorInterface::resolve_fragment_output_locations() +{ + int running_location_ind = 0; + + /* This code works under the assumption that either all layout_locations are set, + * or none are. */ + for (int i = 0; i < this->fragment_outputs.size(); i++) { + BLI_assert_msg( + ((running_location_ind > 0) ? (this->fragment_outputs[i].layout_location == -1) : true), + "Error: Mismatched input attributes, some with location specified, some without"); + if (this->fragment_outputs[i].layout_location == -1) { + this->fragment_outputs[i].layout_location = running_location_ind; + running_location_ind++; + } + } +} + +/** + * Add string to name buffer. Utility function to be used in bake_shader_interface. + * Returns the offset of the inserted name. + */ +static uint32_t name_buffer_copystr(char **name_buffer_ptr, + const char *str_to_copy, + uint32_t &name_buffer_size, + uint32_t &name_buffer_offset) +{ + /* Verify input is valid. */ + BLI_assert(str_to_copy != nullptr); + + /* Determine length of new string, and ensure name buffer is large enough. */ + uint32_t ret_len = strlen(str_to_copy); + BLI_assert(ret_len > 0); + + /* If required name buffer size is larger, increase by at least 128 bytes. */ + if (name_buffer_size + ret_len > name_buffer_size) { + name_buffer_size = name_buffer_size + max_ii(128, ret_len); + *name_buffer_ptr = (char *)MEM_reallocN(*name_buffer_ptr, name_buffer_size); + } + + /* Copy string into name buffer. */ + uint32_t insert_offset = name_buffer_offset; + char *current_offset = (*name_buffer_ptr) + insert_offset; + strcpy(current_offset, str_to_copy); + + /* Adjust offset including null terminator. */ + name_buffer_offset += ret_len + 1; + + /* Return offset into name buffer for inserted string. */ + return insert_offset; +} + +MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *name) +{ + MTLShaderInterface *interface = new MTLShaderInterface(name); + interface->init(); + + /* Name buffer. */ + /* Initialize name buffer. */ + uint32_t name_buffer_size = 256; + uint32_t name_buffer_offset = 0; + interface->name_buffer_ = (char *)MEM_mallocN(name_buffer_size, "name_buffer"); + + /* Prepare Interface Input Attributes. */ + int c_offset = 0; + for (int attribute = 0; attribute < this->vertex_input_attributes.size(); attribute++) { + + /* We need a special case for handling matrix types, which splits the matrix into its vector + * components. */ + if (is_matrix_type(this->vertex_input_attributes[attribute].type)) { + + eMTLDataType mtl_type = to_mtl_type( + get_matrix_subtype(this->vertex_input_attributes[attribute].type)); + int size = mtl_get_data_type_size(mtl_type); + for (int elem = 0; + elem < get_matrix_location_count(this->vertex_input_attributes[attribute].type); + elem++) { + /* First attribute matches the core name -- subsequent attributes tagged with + * `__internal_<name><index>`. */ + std::string _internal_name = (elem == 0) ? + this->vertex_input_attributes[attribute].name : + "__internal_" + + this->vertex_input_attributes[attribute].name + + std::to_string(elem); + + /* IF Using SSBO vertex Fetch, we do not need to expose other dummy attributes in the + * shader interface, only the first one for the whole matrix, as we can pass whatever data + * we want in this mode, and do not need to split attributes. */ + if (elem == 0 || !this->uses_ssbo_vertex_fetch_mode) { + interface->add_input_attribute( + name_buffer_copystr(&interface->name_buffer_, + _internal_name.c_str(), + name_buffer_size, + name_buffer_offset), + this->vertex_input_attributes[attribute].layout_location + elem, + mtl_datatype_to_vertex_type(mtl_type), + 0, + size, + c_offset, + (elem == 0) ? + get_matrix_location_count(this->vertex_input_attributes[attribute].type) : + 0); + } + c_offset += size; + } + shader_debug_printf( + "[Note] Matrix Type '%s' added to shader interface as vertex attribute. (Elem Count: " + "%d)\n", + this->vertex_input_attributes[attribute].name.c_str(), + get_matrix_location_count(this->vertex_input_attributes[attribute].type)); + } + else { + + /* Normal attribute types. */ + eMTLDataType mtl_type = to_mtl_type(this->vertex_input_attributes[attribute].type); + int size = mtl_get_data_type_size(mtl_type); + interface->add_input_attribute( + name_buffer_copystr(&interface->name_buffer_, + this->vertex_input_attributes[attribute].name.c_str(), + name_buffer_size, + name_buffer_offset), + this->vertex_input_attributes[attribute].layout_location, + mtl_datatype_to_vertex_type(mtl_type), + 0, + size, + c_offset); + c_offset += size; + } + } + + /* Prepare Interface Default Uniform Block. */ + interface->add_push_constant_block(name_buffer_copystr( + &interface->name_buffer_, "PushConstantBlock", name_buffer_size, name_buffer_offset)); + + for (int uniform = 0; uniform < this->uniforms.size(); uniform++) { + interface->add_uniform( + name_buffer_copystr(&interface->name_buffer_, + this->uniforms[uniform].name.c_str(), + name_buffer_size, + name_buffer_offset), + to_mtl_type(this->uniforms[uniform].type), + (this->uniforms[uniform].is_array) ? this->uniforms[uniform].array_elems : 1); + } + + /* Prepare Interface Uniform Blocks. */ + for (int uniform_block = 0; uniform_block < this->uniform_blocks.size(); uniform_block++) { + interface->add_uniform_block( + name_buffer_copystr(&interface->name_buffer_, + this->uniform_blocks[uniform_block].name.c_str(), + name_buffer_size, + name_buffer_offset), + uniform_block, + 0, + this->uniform_blocks[uniform_block].stage); + } + + /* Texture/sampler bindings to interface. */ + for (const MSLTextureSampler &texture_sampler : this->texture_samplers) { + interface->add_texture(name_buffer_copystr(&interface->name_buffer_, + texture_sampler.name.c_str(), + name_buffer_size, + name_buffer_offset), + texture_sampler.location, + texture_sampler.get_texture_binding_type(), + texture_sampler.stage); + } + + /* Sampler Parameters. */ + interface->set_sampler_properties( + this->use_argument_buffer_for_samplers(), + this->get_sampler_argument_buffer_bind_index(ShaderStage::VERTEX), + this->get_sampler_argument_buffer_bind_index(ShaderStage::FRAGMENT)); + + /* Map Metal bindings to standardized ShaderInput struct name/binding index. */ + interface->prepare_common_shader_inputs(); + + /* Resize name buffer to save some memory. */ + if (name_buffer_offset < name_buffer_size) { + interface->name_buffer_ = (char *)MEM_reallocN(interface->name_buffer_, name_buffer_offset); + } + + return interface; +} + +std::string MSLTextureSampler::get_msl_texture_type_str() const +{ + /* Add Types as needed. */ + switch (this->type) { + case ImageType::FLOAT_1D: { + return "texture1d"; + } + case ImageType::FLOAT_2D: { + return "texture2d"; + } + case ImageType::FLOAT_3D: { + return "texture3d"; + } + case ImageType::FLOAT_CUBE: { + return "texturecube"; + } + case ImageType::FLOAT_1D_ARRAY: { + return "texture1d_array"; + } + case ImageType::FLOAT_2D_ARRAY: { + return "texture2d_array"; + } + case ImageType::FLOAT_CUBE_ARRAY: { + return "texturecube_array"; + } + case ImageType::FLOAT_BUFFER: { + return "texture_buffer"; + } + case ImageType::DEPTH_2D: { + return "depth2d"; + } + case ImageType::SHADOW_2D: { + return "depth2d"; + } + case ImageType::DEPTH_2D_ARRAY: { + return "depth2d_array"; + } + case ImageType::SHADOW_2D_ARRAY: { + return "depth2d_array"; + } + case ImageType::DEPTH_CUBE: { + return "depthcube"; + } + case ImageType::SHADOW_CUBE: { + return "depthcube"; + } + case ImageType::DEPTH_CUBE_ARRAY: { + return "depthcube_array"; + } + case ImageType::SHADOW_CUBE_ARRAY: { + return "depthcube_array"; + } + case ImageType::INT_1D: { + return "texture1d"; + } + case ImageType::INT_2D: { + return "texture2d"; + } + case ImageType::INT_3D: { + return "texture3d"; + } + case ImageType::INT_CUBE: { + return "texturecube"; + } + case ImageType::INT_1D_ARRAY: { + return "texture1d_array"; + } + case ImageType::INT_2D_ARRAY: { + return "texture2d_array"; + } + case ImageType::INT_CUBE_ARRAY: { + return "texturecube_array"; + } + case ImageType::INT_BUFFER: { + return "texture_buffer"; + } + case ImageType::UINT_1D: { + return "texture1d"; + } + case ImageType::UINT_2D: { + return "texture2d"; + } + case ImageType::UINT_3D: { + return "texture3d"; + } + case ImageType::UINT_CUBE: { + return "texturecube"; + } + case ImageType::UINT_1D_ARRAY: { + return "texture1d_array"; + } + case ImageType::UINT_2D_ARRAY: { + return "texture2d_array"; + } + case ImageType::UINT_CUBE_ARRAY: { + return "texturecube_array"; + } + case ImageType::UINT_BUFFER: { + return "texture_buffer"; + } + default: { + /* Unrecognized type. */ + BLI_assert_unreachable(); + return "ERROR"; + } + }; +} + +std::string MSLTextureSampler::get_msl_wrapper_type_str() const +{ + /* Add Types as needed. */ + switch (this->type) { + case ImageType::FLOAT_1D: { + return "_mtl_combined_image_sampler_1d"; + } + case ImageType::FLOAT_2D: { + return "_mtl_combined_image_sampler_2d"; + } + case ImageType::FLOAT_3D: { + return "_mtl_combined_image_sampler_3d"; + } + case ImageType::FLOAT_CUBE: { + return "_mtl_combined_image_sampler_cube"; + } + case ImageType::FLOAT_1D_ARRAY: { + return "_mtl_combined_image_sampler_1d_array"; + } + case ImageType::FLOAT_2D_ARRAY: { + return "_mtl_combined_image_sampler_2d_array"; + } + case ImageType::FLOAT_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_cube_array"; + } + case ImageType::FLOAT_BUFFER: { + return "_mtl_combined_image_sampler_buffer"; + } + case ImageType::DEPTH_2D: { + return "_mtl_combined_image_sampler_depth_2d"; + } + case ImageType::SHADOW_2D: { + return "_mtl_combined_image_sampler_depth_2d"; + } + case ImageType::DEPTH_2D_ARRAY: { + return "_mtl_combined_image_sampler_depth_2d_array"; + } + case ImageType::SHADOW_2D_ARRAY: { + return "_mtl_combined_image_sampler_depth_2d_array"; + } + case ImageType::DEPTH_CUBE: { + return "_mtl_combined_image_sampler_depth_cube"; + } + case ImageType::SHADOW_CUBE: { + return "_mtl_combined_image_sampler_depth_cube"; + } + case ImageType::DEPTH_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_depth_cube_array"; + } + case ImageType::SHADOW_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_depth_cube_array"; + } + case ImageType::INT_1D: { + return "_mtl_combined_image_sampler_1d"; + } + case ImageType::INT_2D: { + return "_mtl_combined_image_sampler_2d"; + } + case ImageType::INT_3D: { + return "_mtl_combined_image_sampler_3d"; + } + case ImageType::INT_CUBE: { + return "_mtl_combined_image_sampler_cube"; + } + case ImageType::INT_1D_ARRAY: { + return "_mtl_combined_image_sampler_1d_array"; + } + case ImageType::INT_2D_ARRAY: { + return "_mtl_combined_image_sampler_2d_array"; + } + case ImageType::INT_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_cube_array"; + } + case ImageType::INT_BUFFER: { + return "_mtl_combined_image_sampler_buffer"; + } + case ImageType::UINT_1D: { + return "_mtl_combined_image_sampler_1d"; + } + case ImageType::UINT_2D: { + return "_mtl_combined_image_sampler_2d"; + } + case ImageType::UINT_3D: { + return "_mtl_combined_image_sampler_3d"; + } + case ImageType::UINT_CUBE: { + return "_mtl_combined_image_sampler_cube"; + } + case ImageType::UINT_1D_ARRAY: { + return "_mtl_combined_image_sampler_1d_array"; + } + case ImageType::UINT_2D_ARRAY: { + return "_mtl_combined_image_sampler_2d_array"; + } + case ImageType::UINT_CUBE_ARRAY: { + return "_mtl_combined_image_sampler_cube_array"; + } + case ImageType::UINT_BUFFER: { + return "_mtl_combined_image_sampler_buffer"; + } + default: { + /* Unrecognized type. */ + BLI_assert_unreachable(); + return "ERROR"; + } + }; +} + +std::string MSLTextureSampler::get_msl_return_type_str() const +{ + /* Add Types as needed */ + switch (this->type) { + /* Floating point return. */ + case ImageType::FLOAT_1D: + case ImageType::FLOAT_2D: + case ImageType::FLOAT_3D: + case ImageType::FLOAT_CUBE: + case ImageType::FLOAT_1D_ARRAY: + case ImageType::FLOAT_2D_ARRAY: + case ImageType::FLOAT_CUBE_ARRAY: + case ImageType::FLOAT_BUFFER: + case ImageType::DEPTH_2D: + case ImageType::SHADOW_2D: + case ImageType::DEPTH_2D_ARRAY: + case ImageType::SHADOW_2D_ARRAY: + case ImageType::DEPTH_CUBE: + case ImageType::SHADOW_CUBE: + case ImageType::DEPTH_CUBE_ARRAY: + case ImageType::SHADOW_CUBE_ARRAY: { + return "float"; + } + /* Integer return. */ + case ImageType::INT_1D: + case ImageType::INT_2D: + case ImageType::INT_3D: + case ImageType::INT_CUBE: + case ImageType::INT_1D_ARRAY: + case ImageType::INT_2D_ARRAY: + case ImageType::INT_CUBE_ARRAY: + case ImageType::INT_BUFFER: { + return "int"; + } + + /* Unsigned Integer return. */ + case ImageType::UINT_1D: + case ImageType::UINT_2D: + case ImageType::UINT_3D: + case ImageType::UINT_CUBE: + case ImageType::UINT_1D_ARRAY: + case ImageType::UINT_2D_ARRAY: + case ImageType::UINT_CUBE_ARRAY: + case ImageType::UINT_BUFFER: { + return "uint32_t"; + } + + default: { + /* Unrecognized type. */ + BLI_assert_unreachable(); + return "ERROR"; + } + }; +} + +eGPUTextureType MSLTextureSampler::get_texture_binding_type() const +{ + /* Add Types as needed */ + switch (this->type) { + case ImageType::FLOAT_1D: { + return GPU_TEXTURE_1D; + } + case ImageType::FLOAT_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::FLOAT_3D: { + return GPU_TEXTURE_3D; + } + case ImageType::FLOAT_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::FLOAT_1D_ARRAY: { + return GPU_TEXTURE_1D_ARRAY; + } + case ImageType::FLOAT_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::FLOAT_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::FLOAT_BUFFER: { + return GPU_TEXTURE_BUFFER; + } + case ImageType::DEPTH_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::SHADOW_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::DEPTH_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::SHADOW_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::DEPTH_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::SHADOW_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::DEPTH_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::SHADOW_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::INT_1D: { + return GPU_TEXTURE_1D; + } + case ImageType::INT_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::INT_3D: { + return GPU_TEXTURE_3D; + } + case ImageType::INT_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::INT_1D_ARRAY: { + return GPU_TEXTURE_1D_ARRAY; + } + case ImageType::INT_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::INT_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::INT_BUFFER: { + return GPU_TEXTURE_BUFFER; + } + case ImageType::UINT_1D: { + return GPU_TEXTURE_1D; + } + case ImageType::UINT_2D: { + return GPU_TEXTURE_2D; + } + case ImageType::UINT_3D: { + return GPU_TEXTURE_3D; + } + case ImageType::UINT_CUBE: { + return GPU_TEXTURE_CUBE; + } + case ImageType::UINT_1D_ARRAY: { + return GPU_TEXTURE_1D_ARRAY; + } + case ImageType::UINT_2D_ARRAY: { + return GPU_TEXTURE_2D_ARRAY; + } + case ImageType::UINT_CUBE_ARRAY: { + return GPU_TEXTURE_CUBE_ARRAY; + } + case ImageType::UINT_BUFFER: { + return GPU_TEXTURE_BUFFER; + } + default: { + BLI_assert_unreachable(); + return GPU_TEXTURE_2D; + } + }; +} + +/** \} */ + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader_interface.hh b/source/blender/gpu/metal/mtl_shader_interface.hh new file mode 100644 index 00000000000..0da84cad997 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_interface.hh @@ -0,0 +1,267 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "MEM_guardedalloc.h" + +#include "BLI_vector.hh" + +#include "gpu_shader_interface.hh" +#include "mtl_capabilities.hh" +#include "mtl_shader_interface_type.hh" + +#include "GPU_common.h" +#include "GPU_common_types.h" +#include "GPU_texture.h" +#include "gpu_texture_private.hh" +#include <Metal/Metal.h> +#include <functional> + +namespace blender::gpu { + +/* #MTLShaderInterface describes the layout and properties of a given shader, + * including input and output bindings, and any special properties or modes + * that the shader may require. + * + * -- Shader input/output bindings -- + * + * We require custom data-structures for the binding information in Metal. + * This is because certain bindings contain and require more information to + * be stored than can be tracked solely within the `ShaderInput` struct. + * e.g. data sizes and offsets. + * + * Upon interface completion, `prepare_common_shader_inputs` is used to + * populate the global `ShaderInput*` array to enable correct functionality + * of shader binding location lookups. These returned locations act as indices + * into the arrays stored here in the #MTLShaderInterface, such that extraction + * of required information can be performed within the back-end. + * + * e.g. `int loc = GPU_shader_get_uniform(...)` + * `loc` will match the index into the `MTLShaderUniform uniforms_[]` array + * to fetch the required Metal specific information. + * + * + * + * -- Argument Buffers and Argument Encoders -- + * + * We can use #ArgumentBuffers (AB's) in Metal to extend the resource bind limitations + * by providing bind-less support. + * + * Argument Buffers are used for sampler bindings when the builtin + * sampler limit of 16 is exceeded, as in all cases for Blender, + * each individual texture is associated with a given sampler, and this + * lower limit would otherwise reduce the total availability of textures + * used in shaders. + * + * In future, argument buffers may be extended to support other resource + * types, if overall bind limits are ever increased within Blender. + * + * The #ArgumentEncoder cache used to store the generated #ArgumentEncoders for a given + * shader permutation. The #ArgumentEncoder is the resource used to write resource binding + * information to a specified buffer, and is unique to the shader's resource interface. + */ + +enum class ShaderStage : uint32_t { + VERTEX = 1 << 0, + FRAGMENT = 1 << 1, + BOTH = (ShaderStage::VERTEX | ShaderStage::FRAGMENT), +}; +ENUM_OPERATORS(ShaderStage, ShaderStage::BOTH); + +inline uint get_shader_stage_index(ShaderStage stage) +{ + switch (stage) { + case ShaderStage::VERTEX: + return 0; + case ShaderStage::FRAGMENT: + return 1; + default: + BLI_assert_unreachable(); + return 0; + } + return 0; +} + +/* Shader input/output binding information. */ +struct MTLShaderInputAttribute { + uint32_t name_offset; + MTLVertexFormat format; + uint32_t index; + uint32_t location; + uint32_t size; + uint32_t buffer_index; + uint32_t offset; + /* For attributes of Matrix/array types, we need to insert "fake" attributes for + * each element, as matrix types are not natively supported. + * + * > 1 if matrix/arrays are used, specifying number of elements. + * = 1 for non-matrix types + * = 0 if used as a dummy slot for "fake" matrix attributes. */ + uint32_t matrix_element_count; +}; + +struct MTLShaderUniformBlock { + uint32_t name_offset; + uint32_t size = 0; + /* Buffer resource bind index in shader `[[buffer(index)]]`. */ + uint32_t buffer_index; + + /* Tracking for manual uniform addition. */ + uint32_t current_offset; + ShaderStage stage_mask; +}; + +struct MTLShaderUniform { + uint32_t name_offset; + /* Index of `MTLShaderUniformBlock` this uniform belongs to. */ + uint32_t size_in_bytes; + uint32_t byte_offset; + eMTLDataType type; + uint32_t array_len; +}; + +struct MTLShaderTexture { + bool used; + uint32_t name_offset; + /* Texture resource bind slot in shader `[[texture(n)]]`. */ + int slot_index; + eGPUTextureType type; + ShaderStage stage_mask; +}; + +struct MTLShaderSampler { + uint32_t name_offset; + /* Sampler resource bind slot in shader `[[sampler(n)]]`. */ + uint32_t slot_index = 0; +}; + +/* Utility Functions. */ +MTLVertexFormat mtl_datatype_to_vertex_type(eMTLDataType type); + +/** + * Implementation of Shader interface for Metal Back-end. + **/ +class MTLShaderInterface : public ShaderInterface { + + private: + /* Argument encoders caching. + * Static size is based on common input permutation variations. */ + static const int ARGUMENT_ENCODERS_CACHE_SIZE = 3; + struct ArgumentEncoderCacheEntry { + id<MTLArgumentEncoder> encoder; + int buffer_index; + }; + ArgumentEncoderCacheEntry arg_encoders_[ARGUMENT_ENCODERS_CACHE_SIZE] = {}; + + /* Vertex input Attributes. */ + uint32_t total_attributes_; + uint32_t total_vert_stride_; + MTLShaderInputAttribute attributes_[MTL_MAX_VERTEX_INPUT_ATTRIBUTES]; + + /* Uniforms. */ + uint32_t total_uniforms_; + MTLShaderUniform uniforms_[MTL_MAX_UNIFORMS_PER_BLOCK]; + + /* Uniform Blocks. */ + uint32_t total_uniform_blocks_; + MTLShaderUniformBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS]; + MTLShaderUniformBlock push_constant_block_; + + /* Textures. */ + /* Textures support explicit binding indices, so some texture slots + * remain unused. */ + uint32_t total_textures_; + int max_texture_index_; + MTLShaderTexture textures_[MTL_MAX_TEXTURE_SLOTS]; + + /* Whether argument buffers are used for sampler bindings. */ + bool sampler_use_argument_buffer_; + int sampler_argument_buffer_bind_index_vert_; + int sampler_argument_buffer_bind_index_frag_; + + /* Attribute Mask. */ + uint32_t enabled_attribute_mask_; + + /* Debug. */ + char name[256]; + + public: + MTLShaderInterface(const char *name); + ~MTLShaderInterface(); + + void init(); + void add_input_attribute(uint32_t name_offset, + uint32_t attribute_location, + MTLVertexFormat format, + uint32_t buffer_index, + uint32_t size, + uint32_t offset, + int matrix_element_count = 1); + uint32_t add_uniform_block(uint32_t name_offset, + uint32_t buffer_index, + uint32_t size, + ShaderStage stage_mask = ShaderStage::BOTH); + void add_uniform(uint32_t name_offset, eMTLDataType type, int array_len = 1); + void add_texture(uint32_t name_offset, + uint32_t texture_slot, + eGPUTextureType tex_binding_type, + ShaderStage stage_mask = ShaderStage::FRAGMENT); + void add_push_constant_block(uint32_t name_offset); + + /* Resolve and cache locations of builtin uniforms and uniform blocks. */ + void map_builtins(); + void set_sampler_properties(bool use_argument_buffer, + uint32_t argument_buffer_bind_index_vert, + uint32_t argument_buffer_bind_index_frag); + + /* Prepare #ShaderInput interface for binding resolution. */ + void prepare_common_shader_inputs(); + + /* Fetch Uniforms. */ + const MTLShaderUniform &get_uniform(uint index) const; + uint32_t get_total_uniforms() const; + + /* Fetch Uniform Blocks. */ + const MTLShaderUniformBlock &get_uniform_block(uint index) const; + uint32_t get_total_uniform_blocks() const; + bool has_uniform_block(uint32_t block_index) const; + uint32_t get_uniform_block_size(uint32_t block_index) const; + + /* Push constant uniform data block should always be available. */ + const MTLShaderUniformBlock &get_push_constant_block() const; + + /* Fetch textures. */ + const MTLShaderTexture &get_texture(uint index) const; + uint32_t get_total_textures() const; + uint32_t get_max_texture_index() const; + bool get_use_argument_buffer_for_samplers(int *vertex_arg_buffer_bind_index, + int *fragment_arg_buffer_bind_index) const; + + /* Fetch Attributes. */ + const MTLShaderInputAttribute &get_attribute(uint index) const; + uint32_t get_total_attributes() const; + uint32_t get_total_vertex_stride() const; + uint32_t get_enabled_attribute_mask() const; + + /* Name buffer fetching. */ + const char *get_name_at_offset(uint32_t offset) const; + + /* Interface name. */ + const char *get_name() const + { + return this->name; + } + + /* Argument buffer encoder management. */ + id<MTLArgumentEncoder> find_argument_encoder(int buffer_index) const; + + void insert_argument_encoder(int buffer_index, id encoder); + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLShaderInterface"); +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader_interface.mm b/source/blender/gpu/metal/mtl_shader_interface.mm new file mode 100644 index 00000000000..3703d5b5684 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_interface.mm @@ -0,0 +1,604 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * GPU shader interface (C --> GLSL) + */ + +#include "BLI_bitmap.h" + +#include "GPU_capabilities.h" + +#include "mtl_common.hh" +#include "mtl_debug.hh" +#include "mtl_shader_interface.hh" +#include "mtl_shader_interface_type.hh" + +#include "BLI_blenlib.h" +#include "BLI_math_base.h" +#include "BLI_utildefines.h" +#include "MEM_guardedalloc.h" + +namespace blender::gpu { + +MTLShaderInterface::MTLShaderInterface(const char *name) +{ + /* Shared ShaderInputs array is populated later on in `prepare_common_shader_inputs` + * after Metal Shader Interface preparation. */ + inputs_ = nullptr; + + if (name != nullptr) { + strcpy(this->name, name); + } + + /* Ensure #ShaderInterface parameters are cleared. */ + this->init(); +} + +MTLShaderInterface::~MTLShaderInterface() +{ + for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) { + if (arg_encoders_[i].encoder != nil) { + id<MTLArgumentEncoder> enc = arg_encoders_[i].encoder; + [enc release]; + } + } +} + +const char *MTLShaderInterface::get_name_at_offset(uint32_t offset) const +{ + return name_buffer_ + offset; +} + +void MTLShaderInterface::init() +{ + total_attributes_ = 0; + total_uniform_blocks_ = 0; + total_uniforms_ = 0; + total_textures_ = 0; + max_texture_index_ = -1; + enabled_attribute_mask_ = 0; + total_vert_stride_ = 0; + sampler_use_argument_buffer_ = false; + sampler_argument_buffer_bind_index_vert_ = -1; + sampler_argument_buffer_bind_index_frag_ = -1; + + /* NULL initialize uniform location markers for builtins. */ + for (const int u : IndexRange(GPU_NUM_UNIFORMS)) { + builtins_[u] = -1; + } + for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) { + builtin_blocks_[ubo] = -1; + } + for (const int tex : IndexRange(MTL_MAX_TEXTURE_SLOTS)) { + textures_[tex].used = false; + textures_[tex].slot_index = -1; + } + + /* Null initialization for argument encoders. */ + for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) { + arg_encoders_[i].encoder = nil; + arg_encoders_[i].buffer_index = -1; + } +} + +void MTLShaderInterface::add_input_attribute(uint32_t name_offset, + uint32_t attribute_location, + MTLVertexFormat format, + uint32_t buffer_index, + uint32_t size, + uint32_t offset, + int matrix_element_count) +{ + MTLShaderInputAttribute &input_attr = attributes_[total_attributes_]; + input_attr.name_offset = name_offset; + input_attr.format = format; + input_attr.location = attribute_location; + input_attr.size = size; + input_attr.buffer_index = buffer_index; + input_attr.offset = offset; + input_attr.matrix_element_count = matrix_element_count; + input_attr.index = total_attributes_; + total_attributes_++; + total_vert_stride_ = max_ii(total_vert_stride_, offset + size); + enabled_attribute_mask_ |= (1 << attribute_location); +} + +uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset, + uint32_t buffer_index, + uint32_t size, + ShaderStage stage_mask) +{ + /* Ensure Size is 16 byte aligned to guarantees alignment rules are satisfied. */ + if ((size % 16) != 0) { + size += 16 - (size % 16); + } + + MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_]; + uni_block.name_offset = name_offset; + /* We offset the buffer binding index by one, as the first slot is reserved for push constant + * data. */ + uni_block.buffer_index = buffer_index + 1; + uni_block.size = size; + uni_block.current_offset = 0; + uni_block.stage_mask = ShaderStage::BOTH; + return (total_uniform_blocks_++); +} + +void MTLShaderInterface::add_push_constant_block(uint32_t name_offset) +{ + push_constant_block_.name_offset = name_offset; + /* Push constant data block is always uniform buffer index 0. */ + push_constant_block_.buffer_index = 0; + /* Size starts at zero and grows as uniforms are added. */ + push_constant_block_.size = 0; + + push_constant_block_.current_offset = 0; + push_constant_block_.stage_mask = ShaderStage::BOTH; +} + +void MTLShaderInterface::add_uniform(uint32_t name_offset, eMTLDataType type, int array_len) +{ + BLI_assert(array_len > 0); + BLI_assert(total_uniforms_ < MTL_MAX_UNIFORMS_PER_BLOCK); + if (total_uniforms_ >= MTL_MAX_UNIFORMS_PER_BLOCK) { + MTL_LOG_WARNING( + "[Warning] Cannot add uniform '%s' to shader interface '%s' as the uniform limit of %d " + "has been reached.\n", + name, + name, + MTL_MAX_UNIFORMS_PER_BLOCK); + return; + } + MTLShaderUniform &uniform = uniforms_[total_uniforms_]; + uniform.name_offset = name_offset; + + /* Determine size and offset alignment -- C++ struct alignment rules: Base address of value must + * match alignment of type. GLSL follows minimum type alignment of 4. */ + int data_type_size = mtl_get_data_type_size(type) * array_len; + int data_type_alignment = max_ii(mtl_get_data_type_alignment(type), 4); + int current_offset = push_constant_block_.current_offset; + if ((current_offset % data_type_alignment) != 0) { + current_offset += data_type_alignment - (current_offset % data_type_alignment); + } + + uniform.size_in_bytes = data_type_size; + uniform.byte_offset = current_offset; + uniform.type = type; + uniform.array_len = array_len; + total_uniforms_++; + + /* Update Push constant block-- update offset, re-size and re-align total memory requirement to + * be 16-byte aligned. Following GLSL std140. */ + push_constant_block_.current_offset = current_offset + data_type_size; + if (push_constant_block_.current_offset > push_constant_block_.size) { + push_constant_block_.size = push_constant_block_.current_offset; + if ((push_constant_block_.size % 16) != 0) { + push_constant_block_.size += 16 - (push_constant_block_.size % 16); + } + } + + /* Validate properties. */ + BLI_assert(uniform.size_in_bytes > 0); + BLI_assert_msg( + current_offset + data_type_size <= push_constant_block_.size, + "Uniform size and offset sits outside the specified size range for the uniform block"); +} + +void MTLShaderInterface::add_texture(uint32_t name_offset, + uint32_t texture_slot, + eGPUTextureType tex_binding_type, + ShaderStage stage_mask) +{ + BLI_assert(texture_slot >= 0 && texture_slot < GPU_max_textures()); + if (texture_slot >= 0 && texture_slot < GPU_max_textures()) { + + MTLShaderTexture &tex = textures_[texture_slot]; + BLI_assert_msg(tex.used == false, "Texture slot already in-use by another binding"); + tex.name_offset = name_offset; + tex.slot_index = texture_slot; + tex.type = tex_binding_type; + tex.stage_mask = stage_mask; + tex.used = true; + total_textures_++; + max_texture_index_ = max_ii(max_texture_index_, texture_slot); + } + else { + BLI_assert_msg(false, "Exceeding maximum supported texture count."); + MTL_LOG_WARNING( + "Could not add additional texture with index %d to shader interface. Maximum " + "supported texture count is %d\n", + texture_slot, + GPU_max_textures()); + } +} + +void MTLShaderInterface::map_builtins() +{ + /* Clear builtin arrays to NULL locations. */ + for (const int u : IndexRange(GPU_NUM_UNIFORMS)) { + builtins_[u] = -1; + } + for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) { + builtin_blocks_[ubo] = -1; + } + + /* Resolve and cache uniform locations for builtin uniforms. */ + for (const int u : IndexRange(GPU_NUM_UNIFORMS)) { + const ShaderInput *uni = this->uniform_get(builtin_uniform_name((GPUUniformBuiltin)u)); + if (uni != nullptr) { + BLI_assert(uni->location >= 0); + if (uni->location >= 0) { + builtins_[u] = uni->location; + MTL_LOG_INFO("Mapped builtin uniform '%s' NB: '%s' to location: %d\n", + builtin_uniform_name((GPUUniformBuiltin)u), + get_name_at_offset(uni->name_offset), + uni->location); + } + } + } + + /* Resolve and cache uniform locations for builtin uniform blocks. */ + for (const int u : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) { + const ShaderInput *uni = this->ubo_get(builtin_uniform_block_name((GPUUniformBlockBuiltin)u)); + + if (uni != nullptr) { + BLI_assert(uni->location >= 0); + if (uni->location >= 0) { + builtin_blocks_[u] = uni->binding; + MTL_LOG_INFO("Mapped builtin uniform block '%s' to location %d\n", + builtin_uniform_block_name((GPUUniformBlockBuiltin)u), + uni->location); + } + } + } +} + +/* Populate #ShaderInput struct based on interface. */ +void MTLShaderInterface::prepare_common_shader_inputs() +{ + /* `ShaderInput inputs_` maps a uniform name to an external + * uniform location, which is used as an array index to look-up + * information in the local #MTLShaderInterface input structs. + * + * #ShaderInput population follows the ordering rules in #gpu_shader_interface. */ + + /* Populate #ShaderInterface counts. */ + attr_len_ = this->get_total_attributes(); + ubo_len_ = this->get_total_uniform_blocks(); + uniform_len_ = this->get_total_uniforms() + this->get_total_textures(); + + /* TODO(Metal): Support storage buffer bindings. Pending compute shader support. */ + ssbo_len_ = 0; + + /* Calculate total inputs and allocate #ShaderInput array. */ + /* NOTE: We use the existing `name_buffer_` allocated for internal input structs. */ + int input_tot_len = attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_; + inputs_ = (ShaderInput *)MEM_callocN(sizeof(ShaderInput) * input_tot_len, __func__); + ShaderInput *current_input = inputs_; + + /* Attributes. */ + for (const int attr_index : IndexRange(total_attributes_)) { + MTLShaderInputAttribute &shd_attr = attributes_[attr_index]; + current_input->name_offset = shd_attr.name_offset; + current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_attr.name_offset)); + current_input->location = attr_index; + current_input->binding = attr_index; + current_input++; + } + + /* UBOs. */ + BLI_assert(&inputs_[attr_len_] >= current_input); + current_input = &inputs_[attr_len_]; + for (const int ubo_index : IndexRange(total_uniform_blocks_)) { + MTLShaderUniformBlock &shd_ubo = ubos_[ubo_index]; + current_input->name_offset = shd_ubo.name_offset; + current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset)); + /* Location refers to the index in the ubos_ array. */ + current_input->location = ubo_index; + /* Final binding location refers to the buffer binding index within the shader (Relative to + * MTL_uniform_buffer_base_index). */ + current_input->binding = shd_ubo.buffer_index; + current_input++; + } + + /* Uniforms. */ + BLI_assert(&inputs_[attr_len_ + ubo_len_] >= current_input); + current_input = &inputs_[attr_len_ + ubo_len_]; + for (const int uniform_index : IndexRange(total_uniforms_)) { + MTLShaderUniform &shd_uni = uniforms_[uniform_index]; + current_input->name_offset = shd_uni.name_offset; + current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_uni.name_offset)); + current_input->location = uniform_index; + current_input->binding = uniform_index; + current_input++; + } + + /* Textures. + * NOTE(Metal): Textures are externally treated as uniforms in #gpu_shader_interface. + * Location for textures resolved as `binding` value. This + * is the index into the local `MTLShaderTexture textures[]` array. + * + * In MSL, we cannot trivially remap which texture slot a given texture + * handle points to, unlike in GLSL, where a uniform sampler/image can be updated + * and queried as both a texture and a uniform. */ + for (int texture_index = 0; texture_index <= max_texture_index_; texture_index++) { + const MTLShaderTexture &shd_tex = textures_[texture_index]; + + /* Not all texture entries are used when explicit texture locations are specified. */ + if (shd_tex.used) { + BLI_assert_msg(shd_tex.slot_index == texture_index, + "Texture binding slot should match array index for texture."); + current_input->name_offset = shd_tex.name_offset; + current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_tex.name_offset)); + + /* Location represents look-up address. + * For Metal, this location is a unique value offset by + * total_uniforms such that it does not overlap. + * + * This range offset allows a check in the uniform look-up + * to ensure texture handles are not treated as standard uniforms in Metal. */ + current_input->location = texture_index + total_uniforms_; + + /* Binding represents texture slot `[[texture(n)]]`. */ + current_input->binding = shd_tex.slot_index; + current_input++; + } + } + + /* SSBO bindings. + * TODO(Metal): Support SSBOs. Pending compute support. */ + BLI_assert(&inputs_[attr_len_ + ubo_len_ + uniform_len_] >= current_input); + current_input = &inputs_[attr_len_ + ubo_len_ + uniform_len_]; + + /* Map builtin uniform indices to uniform binding locations. */ + this->map_builtins(); +} + +void MTLShaderInterface::set_sampler_properties(bool use_argument_buffer, + uint32_t argument_buffer_bind_index_vert, + uint32_t argument_buffer_bind_index_frag) +{ + sampler_use_argument_buffer_ = use_argument_buffer; + sampler_argument_buffer_bind_index_vert_ = argument_buffer_bind_index_vert; + sampler_argument_buffer_bind_index_frag_ = argument_buffer_bind_index_frag; +} + +/* Attributes. */ +const MTLShaderInputAttribute &MTLShaderInterface::get_attribute(uint index) const +{ + BLI_assert(index < MTL_MAX_VERTEX_INPUT_ATTRIBUTES); + BLI_assert(index < get_total_attributes()); + return attributes_[index]; +} + +uint32_t MTLShaderInterface::get_total_attributes() const +{ + return total_attributes_; +} + +uint32_t MTLShaderInterface::get_total_vertex_stride() const +{ + return total_vert_stride_; +} + +uint32_t MTLShaderInterface::get_enabled_attribute_mask() const +{ + return enabled_attribute_mask_; +} + +/* Uniforms. */ +const MTLShaderUniform &MTLShaderInterface::get_uniform(uint index) const +{ + BLI_assert(index < MTL_MAX_UNIFORMS_PER_BLOCK); + BLI_assert(index < get_total_uniforms()); + return uniforms_[index]; +} + +uint32_t MTLShaderInterface::get_total_uniforms() const +{ + return total_uniforms_; +} + +/* Uniform Blocks. */ +const MTLShaderUniformBlock &MTLShaderInterface::get_uniform_block(uint index) const +{ + BLI_assert(index < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + BLI_assert(index < get_total_uniform_blocks()); + return ubos_[index]; +} + +const MTLShaderUniformBlock &MTLShaderInterface::get_push_constant_block() const +{ + return push_constant_block_; +} + +uint32_t MTLShaderInterface::get_total_uniform_blocks() const +{ + return total_uniform_blocks_; +} + +bool MTLShaderInterface::has_uniform_block(uint32_t block_index) const +{ + return (block_index < total_uniform_blocks_); +} + +uint32_t MTLShaderInterface::get_uniform_block_size(uint32_t block_index) const +{ + return (block_index < total_uniform_blocks_) ? ubos_[block_index].size : 0; +} + +/* Textures. */ +const MTLShaderTexture &MTLShaderInterface::get_texture(uint index) const +{ + BLI_assert(index < MTL_MAX_TEXTURE_SLOTS); + BLI_assert(index <= get_max_texture_index()); + return textures_[index]; +} + +uint32_t MTLShaderInterface::get_total_textures() const +{ + return total_textures_; +} + +uint32_t MTLShaderInterface::get_max_texture_index() const +{ + return max_texture_index_; +} + +bool MTLShaderInterface::get_use_argument_buffer_for_samplers( + int *vertex_arg_buffer_bind_index, int *fragment_arg_buffer_bind_index) const +{ + /* Returns argument buffer binding slot for each shader stage. + * The exact bind slot may be different, as each stage has different buffer inputs. */ + *vertex_arg_buffer_bind_index = sampler_argument_buffer_bind_index_vert_; + *fragment_arg_buffer_bind_index = sampler_argument_buffer_bind_index_frag_; + return sampler_use_argument_buffer_; +} + +id<MTLArgumentEncoder> MTLShaderInterface::find_argument_encoder(int buffer_index) const +{ + id encoder = nil; + for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) { + encoder = arg_encoders_[i].buffer_index == buffer_index ? arg_encoders_[i].encoder : encoder; + } + return encoder; +} + +void MTLShaderInterface::insert_argument_encoder(int buffer_index, id encoder) +{ + for (const int i : IndexRange(ARGUMENT_ENCODERS_CACHE_SIZE)) { + if (arg_encoders_[i].encoder == nil) { + arg_encoders_[i].encoder = encoder; + arg_encoders_[i].buffer_index = buffer_index; + return; + } + } + MTL_LOG_WARNING("could not insert encoder into cache!"); +} + +MTLVertexFormat mtl_datatype_to_vertex_type(eMTLDataType type) +{ + switch (type) { + case MTL_DATATYPE_CHAR: + return MTLVertexFormatChar; + case MTL_DATATYPE_UCHAR: + return MTLVertexFormatUChar; + case MTL_DATATYPE_BOOL: + return MTLVertexFormatUChar; + case MTL_DATATYPE_CHAR2: + return MTLVertexFormatChar2; + case MTL_DATATYPE_UCHAR2: + return MTLVertexFormatUChar2; + case MTL_DATATYPE_BOOL2: + return MTLVertexFormatUChar2; + case MTL_DATATYPE_SHORT: + return MTLVertexFormatShort; + case MTL_DATATYPE_USHORT: + return MTLVertexFormatUShort; + case MTL_DATATYPE_CHAR3: + return MTLVertexFormatChar3; + case MTL_DATATYPE_UCHAR3: + return MTLVertexFormatUChar3; + case MTL_DATATYPE_BOOL3: + return MTLVertexFormatUChar3; + case MTL_DATATYPE_CHAR4: + return MTLVertexFormatChar4; + case MTL_DATATYPE_UCHAR4: + return MTLVertexFormatUChar4; + case MTL_DATATYPE_INT: + return MTLVertexFormatInt; + case MTL_DATATYPE_UINT: + return MTLVertexFormatUInt; + case MTL_DATATYPE_BOOL4: + return MTLVertexFormatUChar4; + case MTL_DATATYPE_SHORT2: + return MTLVertexFormatShort2; + case MTL_DATATYPE_USHORT2: + return MTLVertexFormatUShort2; + case MTL_DATATYPE_FLOAT: + return MTLVertexFormatFloat; + case MTL_DATATYPE_HALF2x2: + case MTL_DATATYPE_HALF3x2: + case MTL_DATATYPE_HALF4x2: + BLI_assert_msg(false, "Unsupported raw vertex attribute types in Blender."); + return MTLVertexFormatInvalid; + + case MTL_DATATYPE_SHORT3: + return MTLVertexFormatShort3; + case MTL_DATATYPE_USHORT3: + return MTLVertexFormatUShort3; + case MTL_DATATYPE_SHORT4: + return MTLVertexFormatShort4; + case MTL_DATATYPE_USHORT4: + return MTLVertexFormatUShort4; + case MTL_DATATYPE_INT2: + return MTLVertexFormatInt2; + case MTL_DATATYPE_UINT2: + return MTLVertexFormatUInt2; + case MTL_DATATYPE_FLOAT2: + return MTLVertexFormatFloat2; + case MTL_DATATYPE_LONG: + return MTLVertexFormatInt; + case MTL_DATATYPE_ULONG: + return MTLVertexFormatUInt; + case MTL_DATATYPE_HALF2x3: + case MTL_DATATYPE_HALF2x4: + case MTL_DATATYPE_HALF3x3: + case MTL_DATATYPE_HALF3x4: + case MTL_DATATYPE_HALF4x3: + case MTL_DATATYPE_HALF4x4: + case MTL_DATATYPE_FLOAT2x2: + case MTL_DATATYPE_FLOAT3x2: + case MTL_DATATYPE_FLOAT4x2: + BLI_assert_msg(false, "Unsupported raw vertex attribute types in Blender."); + return MTLVertexFormatInvalid; + + case MTL_DATATYPE_INT3: + return MTLVertexFormatInt3; + case MTL_DATATYPE_INT4: + return MTLVertexFormatInt4; + case MTL_DATATYPE_UINT3: + return MTLVertexFormatUInt3; + case MTL_DATATYPE_UINT4: + return MTLVertexFormatUInt4; + case MTL_DATATYPE_FLOAT3: + return MTLVertexFormatFloat3; + case MTL_DATATYPE_FLOAT4: + return MTLVertexFormatFloat4; + case MTL_DATATYPE_LONG2: + return MTLVertexFormatInt2; + case MTL_DATATYPE_ULONG2: + return MTLVertexFormatUInt2; + case MTL_DATATYPE_FLOAT2x3: + case MTL_DATATYPE_FLOAT2x4: + case MTL_DATATYPE_FLOAT3x3: + case MTL_DATATYPE_FLOAT3x4: + case MTL_DATATYPE_FLOAT4x3: + case MTL_DATATYPE_FLOAT4x4: + BLI_assert_msg(false, "Unsupported raw vertex attribute types in Blender."); + return MTLVertexFormatInvalid; + + case MTL_DATATYPE_LONG3: + return MTLVertexFormatInt3; + case MTL_DATATYPE_LONG4: + return MTLVertexFormatInt4; + case MTL_DATATYPE_ULONG3: + return MTLVertexFormatUInt3; + case MTL_DATATYPE_ULONG4: + return MTLVertexFormatUInt4; + + /* Special Types */ + case MTL_DATATYPE_UINT1010102_NORM: + return MTLVertexFormatUInt1010102Normalized; + case MTL_DATATYPE_INT1010102_NORM: + return MTLVertexFormatInt1010102Normalized; + + default: + BLI_assert(false); + return MTLVertexFormatInvalid; + }; +} + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_shader_interface_type.hh b/source/blender/gpu/metal/mtl_shader_interface_type.hh new file mode 100644 index 00000000000..3c4c87ee25b --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_interface_type.hh @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ +#pragma once + +#include "BLI_assert.h" + +enum eMTLDataType { + MTL_DATATYPE_CHAR, + MTL_DATATYPE_CHAR2, + MTL_DATATYPE_CHAR3, + MTL_DATATYPE_CHAR4, + + MTL_DATATYPE_UCHAR, + MTL_DATATYPE_UCHAR2, + MTL_DATATYPE_UCHAR3, + MTL_DATATYPE_UCHAR4, + + MTL_DATATYPE_BOOL, + MTL_DATATYPE_BOOL2, + MTL_DATATYPE_BOOL3, + MTL_DATATYPE_BOOL4, + + MTL_DATATYPE_SHORT, + MTL_DATATYPE_SHORT2, + MTL_DATATYPE_SHORT3, + MTL_DATATYPE_SHORT4, + + MTL_DATATYPE_USHORT, + MTL_DATATYPE_USHORT2, + MTL_DATATYPE_USHORT3, + MTL_DATATYPE_USHORT4, + + MTL_DATATYPE_INT, + MTL_DATATYPE_INT2, + MTL_DATATYPE_INT3, + MTL_DATATYPE_INT4, + + MTL_DATATYPE_UINT, + MTL_DATATYPE_UINT2, + MTL_DATATYPE_UINT3, + MTL_DATATYPE_UINT4, + + MTL_DATATYPE_FLOAT, + MTL_DATATYPE_FLOAT2, + MTL_DATATYPE_FLOAT3, + MTL_DATATYPE_FLOAT4, + + MTL_DATATYPE_LONG, + MTL_DATATYPE_LONG2, + MTL_DATATYPE_LONG3, + MTL_DATATYPE_LONG4, + + MTL_DATATYPE_ULONG, + MTL_DATATYPE_ULONG2, + MTL_DATATYPE_ULONG3, + MTL_DATATYPE_ULONG4, + + MTL_DATATYPE_HALF2x2, + MTL_DATATYPE_HALF2x3, + MTL_DATATYPE_HALF2x4, + MTL_DATATYPE_HALF3x2, + MTL_DATATYPE_HALF3x3, + MTL_DATATYPE_HALF3x4, + MTL_DATATYPE_HALF4x2, + MTL_DATATYPE_HALF4x3, + MTL_DATATYPE_HALF4x4, + + MTL_DATATYPE_FLOAT2x2, + MTL_DATATYPE_FLOAT2x3, + MTL_DATATYPE_FLOAT2x4, + MTL_DATATYPE_FLOAT3x2, + MTL_DATATYPE_FLOAT3x3, + MTL_DATATYPE_FLOAT3x4, + MTL_DATATYPE_FLOAT4x2, + MTL_DATATYPE_FLOAT4x3, + MTL_DATATYPE_FLOAT4x4, + + MTL_DATATYPE_UINT1010102_NORM, + MTL_DATATYPE_INT1010102_NORM +}; + +inline uint mtl_get_data_type_size(eMTLDataType type) +{ + switch (type) { + case MTL_DATATYPE_CHAR: + case MTL_DATATYPE_UCHAR: + case MTL_DATATYPE_BOOL: + return 1; + case MTL_DATATYPE_CHAR2: + case MTL_DATATYPE_UCHAR2: + case MTL_DATATYPE_BOOL2: + case MTL_DATATYPE_SHORT: + case MTL_DATATYPE_USHORT: + return 2; + + case MTL_DATATYPE_CHAR3: + case MTL_DATATYPE_UCHAR3: + case MTL_DATATYPE_BOOL3: + return 3; + case MTL_DATATYPE_CHAR4: + case MTL_DATATYPE_UCHAR4: + case MTL_DATATYPE_INT: + case MTL_DATATYPE_UINT: + case MTL_DATATYPE_BOOL4: + case MTL_DATATYPE_SHORT2: + case MTL_DATATYPE_USHORT2: + case MTL_DATATYPE_FLOAT: + case MTL_DATATYPE_UINT1010102_NORM: + case MTL_DATATYPE_INT1010102_NORM: + return 4; + + case MTL_DATATYPE_SHORT3: + case MTL_DATATYPE_USHORT3: + case MTL_DATATYPE_SHORT4: + case MTL_DATATYPE_USHORT4: + case MTL_DATATYPE_INT2: + case MTL_DATATYPE_UINT2: + case MTL_DATATYPE_FLOAT2: + case MTL_DATATYPE_LONG: + case MTL_DATATYPE_ULONG: + case MTL_DATATYPE_HALF2x2: + return 8; + + case MTL_DATATYPE_HALF3x2: + return 12; + + case MTL_DATATYPE_INT3: + case MTL_DATATYPE_INT4: + case MTL_DATATYPE_UINT3: + case MTL_DATATYPE_UINT4: + case MTL_DATATYPE_FLOAT3: + case MTL_DATATYPE_FLOAT4: + case MTL_DATATYPE_LONG2: + case MTL_DATATYPE_ULONG2: + case MTL_DATATYPE_HALF2x3: + case MTL_DATATYPE_HALF2x4: + case MTL_DATATYPE_HALF4x2: + return 16; + + case MTL_DATATYPE_HALF3x3: + case MTL_DATATYPE_HALF3x4: + case MTL_DATATYPE_FLOAT3x2: + return 24; + + case MTL_DATATYPE_LONG3: + case MTL_DATATYPE_LONG4: + case MTL_DATATYPE_ULONG3: + case MTL_DATATYPE_ULONG4: + case MTL_DATATYPE_HALF4x3: + case MTL_DATATYPE_HALF4x4: + case MTL_DATATYPE_FLOAT2x3: + case MTL_DATATYPE_FLOAT2x4: + case MTL_DATATYPE_FLOAT4x2: + return 32; + + case MTL_DATATYPE_FLOAT3x3: + case MTL_DATATYPE_FLOAT3x4: + return 48; + + case MTL_DATATYPE_FLOAT4x3: + case MTL_DATATYPE_FLOAT4x4: + return 64; + default: + BLI_assert(false); + return 0; + }; +} + +inline uint mtl_get_data_type_alignment(eMTLDataType type) +{ + switch (type) { + case MTL_DATATYPE_CHAR: + case MTL_DATATYPE_UCHAR: + case MTL_DATATYPE_BOOL: + return 1; + case MTL_DATATYPE_CHAR2: + case MTL_DATATYPE_UCHAR2: + case MTL_DATATYPE_BOOL2: + case MTL_DATATYPE_SHORT: + case MTL_DATATYPE_USHORT: + return 2; + + case MTL_DATATYPE_CHAR3: + case MTL_DATATYPE_UCHAR3: + case MTL_DATATYPE_BOOL3: + return 3; + case MTL_DATATYPE_CHAR4: + case MTL_DATATYPE_UCHAR4: + case MTL_DATATYPE_INT: + case MTL_DATATYPE_UINT: + case MTL_DATATYPE_BOOL4: + case MTL_DATATYPE_SHORT2: + case MTL_DATATYPE_USHORT2: + case MTL_DATATYPE_FLOAT: + case MTL_DATATYPE_HALF2x2: + case MTL_DATATYPE_HALF3x2: + case MTL_DATATYPE_HALF4x2: + case MTL_DATATYPE_UINT1010102_NORM: + case MTL_DATATYPE_INT1010102_NORM: + return 4; + + case MTL_DATATYPE_SHORT3: + case MTL_DATATYPE_USHORT3: + case MTL_DATATYPE_SHORT4: + case MTL_DATATYPE_USHORT4: + case MTL_DATATYPE_INT2: + case MTL_DATATYPE_UINT2: + case MTL_DATATYPE_FLOAT2: + case MTL_DATATYPE_LONG: + case MTL_DATATYPE_ULONG: + case MTL_DATATYPE_HALF2x3: + case MTL_DATATYPE_HALF2x4: + case MTL_DATATYPE_HALF3x3: + case MTL_DATATYPE_HALF3x4: + case MTL_DATATYPE_HALF4x3: + case MTL_DATATYPE_HALF4x4: + case MTL_DATATYPE_FLOAT2x2: + case MTL_DATATYPE_FLOAT3x2: + case MTL_DATATYPE_FLOAT4x2: + return 8; + + case MTL_DATATYPE_INT3: + case MTL_DATATYPE_INT4: + case MTL_DATATYPE_UINT3: + case MTL_DATATYPE_UINT4: + case MTL_DATATYPE_FLOAT3: + case MTL_DATATYPE_FLOAT4: + case MTL_DATATYPE_LONG2: + case MTL_DATATYPE_ULONG2: + case MTL_DATATYPE_FLOAT2x3: + case MTL_DATATYPE_FLOAT2x4: + case MTL_DATATYPE_FLOAT3x3: + case MTL_DATATYPE_FLOAT3x4: + case MTL_DATATYPE_FLOAT4x3: + case MTL_DATATYPE_FLOAT4x4: + return 16; + + case MTL_DATATYPE_LONG3: + case MTL_DATATYPE_LONG4: + case MTL_DATATYPE_ULONG3: + case MTL_DATATYPE_ULONG4: + return 32; + + default: + BLI_assert_msg(false, "Unrecognized MTL datatype."); + return 0; + }; +} diff --git a/source/blender/gpu/metal/mtl_shader_shared.h b/source/blender/gpu/metal/mtl_shader_shared.h new file mode 100644 index 00000000000..f6fd9035001 --- /dev/null +++ b/source/blender/gpu/metal/mtl_shader_shared.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* Global parameters. */ +#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS 6 /* buffer bind 0..5 */ +#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX MTL_SSBO_VERTEX_FETCH_MAX_VBOS + +/* Add Types as needed (Also need to be added to mtl_shader.h). */ +#define GPU_SHADER_ATTR_TYPE_FLOAT 0 +#define GPU_SHADER_ATTR_TYPE_INT 1 +#define GPU_SHADER_ATTR_TYPE_SHORT 2 +#define GPU_SHADER_ATTR_TYPE_CHAR 3 +#define GPU_SHADER_ATTR_TYPE_VEC2 4 +#define GPU_SHADER_ATTR_TYPE_VEC3 5 +#define GPU_SHADER_ATTR_TYPE_VEC4 6 +#define GPU_SHADER_ATTR_TYPE_UVEC2 7 +#define GPU_SHADER_ATTR_TYPE_UVEC3 8 +#define GPU_SHADER_ATTR_TYPE_UVEC4 9 +#define GPU_SHADER_ATTR_TYPE_IVEC2 10 +#define GPU_SHADER_ATTR_TYPE_IVEC3 11 +#define GPU_SHADER_ATTR_TYPE_IVEC4 12 +#define GPU_SHADER_ATTR_TYPE_MAT3 13 +#define GPU_SHADER_ATTR_TYPE_MAT4 14 +#define GPU_SHADER_ATTR_TYPE_UCHAR_NORM 15 +#define GPU_SHADER_ATTR_TYPE_UCHAR2_NORM 16 +#define GPU_SHADER_ATTR_TYPE_UCHAR3_NORM 17 +#define GPU_SHADER_ATTR_TYPE_UCHAR4_NORM 18 +#define GPU_SHADER_ATTR_TYPE_INT1010102_NORM 19 +#define GPU_SHADER_ATTR_TYPE_SHORT3_NORM 20 +#define GPU_SHADER_ATTR_TYPE_CHAR2 21 +#define GPU_SHADER_ATTR_TYPE_CHAR3 22 +#define GPU_SHADER_ATTR_TYPE_CHAR4 23 +#define GPU_SHADER_ATTR_TYPE_UINT 24 diff --git a/source/blender/gpu/metal/mtl_state.hh b/source/blender/gpu/metal/mtl_state.hh index f2d85f9648b..1af56378c5a 100644 --- a/source/blender/gpu/metal/mtl_state.hh +++ b/source/blender/gpu/metal/mtl_state.hh @@ -1,6 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + /** \file * \ingroup gpu */ +#pragma once #include "MEM_guardedalloc.h" @@ -9,6 +12,8 @@ #include "GPU_state.h" #include "gpu_state_private.hh" +#include "mtl_pso_descriptor_state.hh" + namespace blender::gpu { /* Forward Declarations. */ @@ -19,7 +24,7 @@ class MTLContext; * Metal Implementation. **/ class MTLStateManager : public StateManager { - public: + private: /* Current state of the associated MTLContext. * Avoids resetting the whole state for every change. */ @@ -27,24 +32,33 @@ class MTLStateManager : public StateManager { GPUStateMutable current_mutable_; MTLContext *context_; + /* Global pipeline descriptors. */ + MTLRenderPipelineStateDescriptor pipeline_descriptor_; + public: MTLStateManager(MTLContext *ctx); - void apply_state(void) override; - void force_state(void) override; + void apply_state() override; + void force_state() override; void issue_barrier(eGPUBarrier barrier_bits) override; void texture_bind(Texture *tex, eGPUSamplerState sampler, int unit) override; void texture_unbind(Texture *tex) override; - void texture_unbind_all(void) override; + void texture_unbind_all() override; void image_bind(Texture *tex, int unit) override; void image_unbind(Texture *tex) override; - void image_unbind_all(void) override; + void image_unbind_all() override; void texture_unpack_row_length_set(uint len) override; + /* Global pipeline descriptors. */ + MTLRenderPipelineStateDescriptor &get_pipeline_descriptor() + { + return pipeline_descriptor_; + } + private: void set_write_mask(const eGPUWriteMask value); void set_depth_test(const eGPUDepthTest value); @@ -62,10 +76,10 @@ class MTLStateManager : public StateManager { void set_mutable_state(const GPUStateMutable &state); /* METAL State utility functions. */ - void mtl_state_init(void); + void mtl_state_init(); void mtl_depth_range(float near, float far); - void mtl_stencil_mask(unsigned int mask); - void mtl_stencil_set_func(eGPUStencilTest stencil_func, int ref, unsigned int mask); + void mtl_stencil_mask(uint mask); + void mtl_stencil_set_func(eGPUStencilTest stencil_func, int ref, uint mask); MEM_CXX_CLASS_ALLOC_FUNCS("MTLStateManager") }; diff --git a/source/blender/gpu/metal/mtl_state.mm b/source/blender/gpu/metal/mtl_state.mm index fa2f5c54391..31182cf91d1 100644 --- a/source/blender/gpu/metal/mtl_state.mm +++ b/source/blender/gpu/metal/mtl_state.mm @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + /** \file * \ingroup gpu */ @@ -8,6 +10,8 @@ #include "GPU_framebuffer.h" #include "mtl_context.hh" +#include "mtl_framebuffer.hh" +#include "mtl_shader_interface_type.hh" #include "mtl_state.hh" namespace blender::gpu { @@ -16,16 +20,16 @@ namespace blender::gpu { /** \name MTLStateManager * \{ */ -void MTLStateManager::mtl_state_init(void) +void MTLStateManager::mtl_state_init() { - BLI_assert(this->context_); - this->context_->pipeline_state_init(); + BLI_assert(context_); + context_->pipeline_state_init(); } MTLStateManager::MTLStateManager(MTLContext *ctx) : StateManager() { /* Initialize State. */ - this->context_ = ctx; + context_ = ctx; mtl_state_init(); /* Force update using default state. */ @@ -35,15 +39,16 @@ MTLStateManager::MTLStateManager(MTLContext *ctx) : StateManager() set_mutable_state(mutable_state); } -void MTLStateManager::apply_state(void) +void MTLStateManager::apply_state() { this->set_state(this->state); this->set_mutable_state(this->mutable_state); - /* TODO(Metal): Enable after integration of MTLFrameBuffer. */ - /* static_cast<MTLFrameBuffer *>(this->context_->active_fb)->apply_state(); */ + + /* Apply active FrameBuffer state. */ + static_cast<MTLFrameBuffer *>(context_->active_fb)->apply_state(); }; -void MTLStateManager::force_state(void) +void MTLStateManager::force_state() { /* Little exception for clip distances since they need to keep the old count correct. */ uint32_t clip_distances = current_.clip_distances; @@ -103,10 +108,10 @@ void MTLStateManager::set_state(const GPUState &state) void MTLStateManager::mtl_depth_range(float near, float far) { - BLI_assert(this->context_); + BLI_assert(context_); BLI_assert(near >= 0.0 && near < 1.0); BLI_assert(far > 0.0 && far <= 1.0); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; MTLContextDepthStencilState &ds_state = pipeline_state.depth_stencil_state; ds_state.depth_range_near = near; @@ -117,7 +122,7 @@ void MTLStateManager::mtl_depth_range(float near, float far) void MTLStateManager::set_mutable_state(const GPUStateMutable &state) { GPUStateMutable changed = state ^ current_mutable_; - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; if (float_as_uint(changed.point_size) != 0) { pipeline_state.point_size = state.point_size; @@ -150,8 +155,8 @@ void MTLStateManager::set_mutable_state(const GPUStateMutable &state) void MTLStateManager::set_write_mask(const eGPUWriteMask value) { - BLI_assert(this->context_); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + BLI_assert(context_); + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; pipeline_state.depth_stencil_state.depth_write_enable = ((value & GPU_WRITE_DEPTH) != 0); pipeline_state.color_write_mask = (((value & GPU_WRITE_RED) != 0) ? MTLColorWriteMaskRed : MTLColorWriteMaskNone) | @@ -197,7 +202,7 @@ static MTLCompareFunction gpu_stencil_func_to_metal(eGPUStencilTest stencil_func case GPU_STENCIL_ALWAYS: return MTLCompareFunctionAlways; default: - BLI_assert(false && "Unrecognised eGPUStencilTest function"); + BLI_assert(false && "Unrecognized eGPUStencilTest function"); break; } return MTLCompareFunctionAlways; @@ -205,8 +210,8 @@ static MTLCompareFunction gpu_stencil_func_to_metal(eGPUStencilTest stencil_func void MTLStateManager::set_depth_test(const eGPUDepthTest value) { - BLI_assert(this->context_); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + BLI_assert(context_); + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; MTLContextDepthStencilState &ds_state = pipeline_state.depth_stencil_state; ds_state.depth_test_enabled = (value != GPU_DEPTH_NONE); @@ -214,20 +219,18 @@ void MTLStateManager::set_depth_test(const eGPUDepthTest value) pipeline_state.dirty_flags |= MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG; } -void MTLStateManager::mtl_stencil_mask(unsigned int mask) +void MTLStateManager::mtl_stencil_mask(uint mask) { - BLI_assert(this->context_); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + BLI_assert(context_); + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; pipeline_state.depth_stencil_state.stencil_write_mask = mask; pipeline_state.dirty_flags |= MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG; } -void MTLStateManager::mtl_stencil_set_func(eGPUStencilTest stencil_func, - int ref, - unsigned int mask) +void MTLStateManager::mtl_stencil_set_func(eGPUStencilTest stencil_func, int ref, uint mask) { - BLI_assert(this->context_); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + BLI_assert(context_); + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; MTLContextDepthStencilState &ds_state = pipeline_state.depth_stencil_state; ds_state.stencil_func = gpu_stencil_func_to_metal(stencil_func); @@ -275,19 +278,17 @@ void MTLStateManager::set_stencil_test(const eGPUStencilTest test, const eGPUSte { switch (operation) { case GPU_STENCIL_OP_REPLACE: - mtl_stencil_set_op(this->context_, - MTLStencilOperationKeep, - MTLStencilOperationKeep, - MTLStencilOperationReplace); + mtl_stencil_set_op( + context_, MTLStencilOperationKeep, MTLStencilOperationKeep, MTLStencilOperationReplace); break; case GPU_STENCIL_OP_COUNT_DEPTH_PASS: /* Winding inversed due to flipped Y coordinate system in Metal. */ - mtl_stencil_set_op_separate(this->context_, + mtl_stencil_set_op_separate(context_, GPU_CULL_FRONT, MTLStencilOperationKeep, MTLStencilOperationKeep, MTLStencilOperationIncrementWrap); - mtl_stencil_set_op_separate(this->context_, + mtl_stencil_set_op_separate(context_, GPU_CULL_BACK, MTLStencilOperationKeep, MTLStencilOperationKeep, @@ -295,12 +296,12 @@ void MTLStateManager::set_stencil_test(const eGPUStencilTest test, const eGPUSte break; case GPU_STENCIL_OP_COUNT_DEPTH_FAIL: /* Winding inversed due to flipped Y coordinate system in Metal. */ - mtl_stencil_set_op_separate(this->context_, + mtl_stencil_set_op_separate(context_, GPU_CULL_FRONT, MTLStencilOperationKeep, MTLStencilOperationDecrementWrap, MTLStencilOperationKeep); - mtl_stencil_set_op_separate(this->context_, + mtl_stencil_set_op_separate(context_, GPU_CULL_BACK, MTLStencilOperationKeep, MTLStencilOperationIncrementWrap, @@ -308,14 +309,12 @@ void MTLStateManager::set_stencil_test(const eGPUStencilTest test, const eGPUSte break; case GPU_STENCIL_OP_NONE: default: - mtl_stencil_set_op(this->context_, - MTLStencilOperationKeep, - MTLStencilOperationKeep, - MTLStencilOperationKeep); + mtl_stencil_set_op( + context_, MTLStencilOperationKeep, MTLStencilOperationKeep, MTLStencilOperationKeep); } - BLI_assert(this->context_); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + BLI_assert(context_); + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; pipeline_state.depth_stencil_state.stencil_test_enabled = (test != GPU_STENCIL_NONE); pipeline_state.dirty_flags |= MTL_PIPELINE_STATE_DEPTHSTENCIL_FLAG; } @@ -347,8 +346,8 @@ void MTLStateManager::set_logic_op(const bool enable) void MTLStateManager::set_facing(const bool invert) { /* Check Current Context. */ - BLI_assert(this->context_); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + BLI_assert(context_); + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; /* Apply State -- opposite of GL, as METAL default is GPU_CLOCKWISE, GL default is * COUNTERCLOCKWISE. This needs to be the inverse of the default. */ @@ -362,8 +361,8 @@ void MTLStateManager::set_facing(const bool invert) void MTLStateManager::set_backface_culling(const eGPUFaceCullTest test) { /* Check Current Context. */ - BLI_assert(this->context_); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + BLI_assert(context_); + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; /* Apply State. */ pipeline_state.culling_enabled = (test != GPU_CULL_NONE); @@ -386,8 +385,8 @@ void MTLStateManager::set_provoking_vert(const eGPUProvokingVertex vert) void MTLStateManager::set_shadow_bias(const bool enable) { /* Check Current Context. */ - BLI_assert(this->context_); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + BLI_assert(context_); + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; MTLContextDepthStencilState &ds_state = pipeline_state.depth_stencil_state; /* Apply State. */ @@ -500,8 +499,8 @@ void MTLStateManager::set_blend(const eGPUBlend value) } /* Check Current Context. */ - BLI_assert(this->context_); - MTLContextGlobalShaderPipelineState &pipeline_state = this->context_->pipeline_state; + BLI_assert(context_); + MTLContextGlobalShaderPipelineState &pipeline_state = context_->pipeline_state; if (value == GPU_BLEND_SUBTRACT) { pipeline_state.rgb_blend_op = MTLBlendOperationReverseSubtract; @@ -549,58 +548,18 @@ void MTLStateManager::issue_barrier(eGPUBarrier barrier_bits) MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); BLI_assert(ctx); - if (ctx->is_render_pass_active()) { - - /* Apple Silicon does not support memory barriers. - * We do not currently need these due to implicit API guarantees. - * NOTE(Metal): MTLFence/MTLEvent may be required to synchronize work if - * untracked resources are ever used. */ - if ([ctx->device hasUnifiedMemory]) { - return; - } - /* Issue barrier. */ - /* TODO(Metal): To be completed pending implementation of RenderCommandEncoder management. */ - id<MTLRenderCommandEncoder> rec = nil; // ctx->get_active_render_command_encoder(); - BLI_assert(rec); - - /* Only supporting Metal on 10.15 onward anyway - Check required for warnings. */ - if (@available(macOS 10.14, *)) { - MTLBarrierScope scope = 0; - if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS || - barrier_bits & GPU_BARRIER_TEXTURE_FETCH) { - scope = scope | MTLBarrierScopeTextures | MTLBarrierScopeRenderTargets; - } - if (barrier_bits & GPU_BARRIER_SHADER_STORAGE || - barrier_bits & GPU_BARRIER_VERTEX_ATTRIB_ARRAY || - barrier_bits & GPU_BARRIER_ELEMENT_ARRAY) { - scope = scope | MTLBarrierScopeBuffers; - } - - MTLRenderStages before_stage_flags = 0; - MTLRenderStages after_stage_flags = 0; - if (before_stages & GPU_BARRIER_STAGE_VERTEX && - !(before_stages & GPU_BARRIER_STAGE_FRAGMENT)) { - before_stage_flags = before_stage_flags | MTLRenderStageVertex; - } - if (before_stages & GPU_BARRIER_STAGE_FRAGMENT) { - before_stage_flags = before_stage_flags | MTLRenderStageFragment; - } - if (after_stages & GPU_BARRIER_STAGE_VERTEX) { - after_stage_flags = after_stage_flags | MTLRenderStageVertex; - } - if (after_stages & GPU_BARRIER_STAGE_FRAGMENT) { - after_stage_flags = MTLRenderStageFragment; - } - - if (scope != 0) { - [rec memoryBarrierWithScope:scope - afterStages:after_stage_flags - beforeStages:before_stage_flags]; - } - } + /* Apple Silicon does not support memory barriers. + * We do not currently need these due to implicit API guarantees. + * NOTE(Metal): MTLFence/MTLEvent may be required to synchronize work if + * untracked resources are ever used. */ + if ([ctx->device hasUnifiedMemory]) { + return; } + + ctx->main_command_buffer.insert_memory_barrier(barrier_bits, before_stages, after_stages); } + /** \} */ /* -------------------------------------------------------------------- */ @@ -644,7 +603,7 @@ void MTLStateManager::texture_unbind(Texture *tex_) ctx->texture_unbind(mtl_tex); } -void MTLStateManager::texture_unbind_all(void) +void MTLStateManager::texture_unbind_all() { MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(ctx); @@ -667,7 +626,7 @@ void MTLStateManager::image_unbind(Texture *tex_) this->texture_unbind(tex_); } -void MTLStateManager::image_unbind_all(void) +void MTLStateManager::image_unbind_all() { this->texture_unbind_all(); } diff --git a/source/blender/gpu/metal/mtl_texture.hh b/source/blender/gpu/metal/mtl_texture.hh index b820256ec36..be6f3a3a02b 100644 --- a/source/blender/gpu/metal/mtl_texture.hh +++ b/source/blender/gpu/metal/mtl_texture.hh @@ -40,7 +40,7 @@ struct TextureUpdateRoutineSpecialisation { /* Number of channels the destination texture has (min=1, max=4). */ int component_count_output; - inline bool operator==(const TextureUpdateRoutineSpecialisation &other) const + bool operator==(const TextureUpdateRoutineSpecialisation &other) const { return ((input_data_type == other.input_data_type) && (output_data_type == other.output_data_type) && @@ -48,7 +48,7 @@ struct TextureUpdateRoutineSpecialisation { (component_count_output == other.component_count_output)); } - inline uint64_t hash() const + uint64_t hash() const { blender::DefaultHash<std::string> string_hasher; return (uint64_t)string_hasher( @@ -71,12 +71,12 @@ typedef enum { struct DepthTextureUpdateRoutineSpecialisation { DepthTextureUpdateMode data_mode; - inline bool operator==(const DepthTextureUpdateRoutineSpecialisation &other) const + bool operator==(const DepthTextureUpdateRoutineSpecialisation &other) const { return ((data_mode == other.data_mode)); } - inline uint64_t hash() const + uint64_t hash() const { return (uint64_t)(this->data_mode); } @@ -93,10 +93,10 @@ struct TextureReadRoutineSpecialisation { * 0 = Not a Depth format, * 1 = FLOAT DEPTH, * 2 = 24Bit Integer Depth, - * 4 = 32bit unsigned Integer Depth. */ + * 4 = 32bit Unsigned-Integer Depth. */ int depth_format_mode; - inline bool operator==(const TextureReadRoutineSpecialisation &other) const + bool operator==(const TextureReadRoutineSpecialisation &other) const { return ((input_data_type == other.input_data_type) && (output_data_type == other.output_data_type) && @@ -105,7 +105,7 @@ struct TextureReadRoutineSpecialisation { (depth_format_mode == other.depth_format_mode)); } - inline uint64_t hash() const + uint64_t hash() const { blender::DefaultHash<std::string> string_hasher; return (uint64_t)string_hasher(this->input_data_type + this->output_data_type + @@ -125,28 +125,27 @@ static const int MTL_MAX_MIPMAP_COUNT = 15; /* Max: 16384x16384 */ static const int MTL_MAX_FBO_ATTACHED = 16; /* Samplers */ -typedef struct MTLSamplerState { +struct MTLSamplerState { eGPUSamplerState state; /* Mip min and mip max on sampler state always the same. * Level range now controlled with textureView to be consistent with GL baseLevel. */ - inline bool operator==(const MTLSamplerState &other) const + bool operator==(const MTLSamplerState &other) const { /* Add other parameters as needed. */ return (this->state == other.state); } - operator unsigned int() const + operator uint() const { - return (unsigned int)state; + return (uint)state; } operator uint64_t() const { return (uint64_t)state; } - -} MTLSamplerState; +}; const MTLSamplerState DEFAULT_SAMPLER_STATE = {GPU_SAMPLER_DEFAULT /*, 0, 9999*/}; @@ -174,12 +173,12 @@ class MTLTexture : public Texture { /* Texture Storage. */ id<MTLBuffer> texture_buffer_; - unsigned int aligned_w_ = 0; + uint aligned_w_ = 0; /* Blit Frame-buffer. */ GPUFrameBuffer *blit_fb_ = nullptr; - unsigned int blit_fb_slice_ = 0; - unsigned int blit_fb_mip_ = 0; + uint blit_fb_slice_ = 0; + uint blit_fb_mip_ = 0; /* Texture view properties */ /* In Metal, we use texture views to either limit mipmap ranges, @@ -238,7 +237,7 @@ class MTLTexture : public Texture { void update_sub( int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data) override; - void generate_mipmap(void) override; + void generate_mipmap() override; void copy_to(Texture *dst) override; void clear(eGPUDataFormat format, const void *data) override; void swizzle_set(const char swizzle_mask[4]) override; @@ -249,16 +248,16 @@ class MTLTexture : public Texture { void *read(int mip, eGPUDataFormat type) override; /* Remove once no longer required -- will just return 0 for now in MTL path*/ - uint gl_bindcode_get(void) const override; + uint gl_bindcode_get() const override; bool texture_is_baked(); - inline const char *get_name() + const char *get_name() { return name_; } protected: - bool init_internal(void) override; + bool init_internal() override; bool init_internal(GPUVertBuf *vbo) override; bool init_internal(const GPUTexture *src, int mip_offset, @@ -280,7 +279,7 @@ class MTLTexture : public Texture { void ensure_mipmaps(int miplvl); /* Flags a given mip level as being used. */ - void add_subresource(unsigned int level); + void add_subresource(uint level); void read_internal(int mip, int x_off, @@ -299,31 +298,31 @@ class MTLTexture : public Texture { id<MTLTexture> get_metal_handle_base(); MTLSamplerState get_sampler_state(); void blit(id<MTLBlitCommandEncoder> blit_encoder, - unsigned int src_x_offset, - unsigned int src_y_offset, - unsigned int src_z_offset, - unsigned int src_slice, - unsigned int src_mip, + uint src_x_offset, + uint src_y_offset, + uint src_z_offset, + uint src_slice, + uint src_mip, gpu::MTLTexture *dest, - unsigned int dst_x_offset, - unsigned int dst_y_offset, - unsigned int dst_z_offset, - unsigned int dst_slice, - unsigned int dst_mip, - unsigned int width, - unsigned int height, - unsigned int depth); + uint dst_x_offset, + uint dst_y_offset, + uint dst_z_offset, + uint dst_slice, + uint dst_mip, + uint width, + uint height, + uint depth); void blit(gpu::MTLTexture *dest, - unsigned int src_x_offset, - unsigned int src_y_offset, - unsigned int dst_x_offset, - unsigned int dst_y_offset, - unsigned int src_mip, - unsigned int dst_mip, - unsigned int dst_slice, + uint src_x_offset, + uint src_y_offset, + uint dst_x_offset, + uint dst_y_offset, + uint src_mip, + uint dst_mip, + uint dst_slice, int width, int height); - GPUFrameBuffer *get_blit_framebuffer(unsigned int dst_slice, unsigned int dst_mip); + GPUFrameBuffer *get_blit_framebuffer(uint dst_slice, uint dst_mip); MEM_CXX_CLASS_ALLOC_FUNCS("gpu::MTLTexture") @@ -349,7 +348,7 @@ class MTLTexture : public Texture { * - Per-component size matches (e.g. GPU_DATA_UBYTE) * OR GPU_DATA_10_11_11_REV && GPU_R11G11B10 (equiv) * OR D24S8 and GPU_DATA_UINT_24_8 - * We can Use BLIT ENCODER. + * We can use BLIT ENCODER. * * OTHERWISE TRIGGER COMPUTE: * - Compute sizes will vary. Threads per grid WILL match 'extent'. @@ -364,20 +363,20 @@ class MTLTexture : public Texture { }; id<MTLComputePipelineState> texture_update_1d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_update_1d_array_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_update_2d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_update_2d_array_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_update_3d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation); + TextureUpdateRoutineSpecialisation specialization); id<MTLComputePipelineState> mtl_texture_update_impl( - TextureUpdateRoutineSpecialisation specialisation_params, + TextureUpdateRoutineSpecialisation specialization_params, blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> - &specialisation_cache, + &specialization_cache, eGPUTextureType texture_type); /* Depth Update Utilities */ @@ -385,7 +384,7 @@ class MTLTexture : public Texture { * use a compute shader to write to depth, so we must instead render to a depth target. * These processes use vertex/fragment shaders to render texture data from an intermediate * source, in order to prime the depth buffer*/ - GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialisation); + GPUShader *depth_2d_update_sh_get(DepthTextureUpdateRoutineSpecialisation specialization); void update_sub_depth_2d( int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data); @@ -398,20 +397,20 @@ class MTLTexture : public Texture { }; id<MTLComputePipelineState> texture_read_1d_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_read_1d_array_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_read_2d_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_read_2d_array_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> texture_read_3d_get_kernel( - TextureReadRoutineSpecialisation specialisation); + TextureReadRoutineSpecialisation specialization); id<MTLComputePipelineState> mtl_texture_read_impl( - TextureReadRoutineSpecialisation specialisation_params, + TextureReadRoutineSpecialisation specialization_params, blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> - &specialisation_cache, + &specialization_cache, eGPUTextureType texture_type); /* fullscreen blit utilities. */ diff --git a/source/blender/gpu/metal/mtl_texture.mm b/source/blender/gpu/metal/mtl_texture.mm index ca19d1f9e4b..2b7c2333bff 100644 --- a/source/blender/gpu/metal/mtl_texture.mm +++ b/source/blender/gpu/metal/mtl_texture.mm @@ -23,13 +23,6 @@ #include "GHOST_C-api.h" -/* Debug assistance. */ -/* Capture texture update routine for analysis in XCode GPU Frame Debugger. */ -#define DEBUG_TEXTURE_UPDATE_CAPTURE false - -/* Capture texture read routine for analysis in XCode GPU Frame Debugger. */ -#define DEBUG_TEXTURE_READ_CAPTURE false - namespace blender::gpu { /* -------------------------------------------------------------------- */ @@ -41,34 +34,34 @@ void gpu::MTLTexture::mtl_texture_init() BLI_assert(MTLContext::get() != nullptr); /* Status. */ - this->is_baked_ = false; - this->is_dirty_ = false; - this->resource_mode_ = MTL_TEXTURE_MODE_DEFAULT; - this->mtl_max_mips_ = 1; + is_baked_ = false; + is_dirty_ = false; + resource_mode_ = MTL_TEXTURE_MODE_DEFAULT; + mtl_max_mips_ = 1; /* Metal properties. */ - this->texture_ = nil; - this->texture_buffer_ = nil; - this->mip_swizzle_view_ = nil; + texture_ = nil; + texture_buffer_ = nil; + mip_swizzle_view_ = nil; /* Binding information. */ - this->is_bound_ = false; + is_bound_ = false; /* VBO. */ - this->vert_buffer_ = nullptr; - this->vert_buffer_mtl_ = nil; - this->vert_buffer_offset_ = -1; + vert_buffer_ = nullptr; + vert_buffer_mtl_ = nil; + vert_buffer_offset_ = -1; /* Default Swizzle. */ - this->tex_swizzle_mask_[0] = 'r'; - this->tex_swizzle_mask_[1] = 'g'; - this->tex_swizzle_mask_[2] = 'b'; - this->tex_swizzle_mask_[3] = 'a'; - this->mtl_swizzle_mask_ = MTLTextureSwizzleChannelsMake( + tex_swizzle_mask_[0] = 'r'; + tex_swizzle_mask_[1] = 'g'; + tex_swizzle_mask_[2] = 'b'; + tex_swizzle_mask_[3] = 'a'; + mtl_swizzle_mask_ = MTLTextureSwizzleChannelsMake( MTLTextureSwizzleRed, MTLTextureSwizzleGreen, MTLTextureSwizzleBlue, MTLTextureSwizzleAlpha); /* TODO(Metal): Find a way of specifying texture usage externally. */ - this->gpu_image_usage_flags_ = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT; + gpu_image_usage_flags_ = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT; } gpu::MTLTexture::MTLTexture(const char *name) : Texture(name) @@ -89,23 +82,23 @@ gpu::MTLTexture::MTLTexture(const char *name, /* Prep texture from METAL handle. */ BLI_assert(metal_texture != nil); BLI_assert(type == GPU_TEXTURE_2D); - this->type_ = type; + type_ = type; init_2D(metal_texture.width, metal_texture.height, 0, 1, format); /* Assign MTLTexture. */ - this->texture_ = metal_texture; - [this->texture_ retain]; + texture_ = metal_texture; + [texture_ retain]; /* Flag as Baked. */ - this->is_baked_ = true; - this->is_dirty_ = false; - this->resource_mode_ = MTL_TEXTURE_MODE_EXTERNAL; + is_baked_ = true; + is_dirty_ = false; + resource_mode_ = MTL_TEXTURE_MODE_EXTERNAL; } gpu::MTLTexture::~MTLTexture() { /* Unbind if bound. */ - if (this->is_bound_) { + if (is_bound_) { MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); if (ctx != nullptr) { ctx->state_manager->texture_unbind(this); @@ -123,49 +116,49 @@ void gpu::MTLTexture::bake_mip_swizzle_view() { if (texture_view_dirty_flags_) { /* if a texture view was previously created we release it. */ - if (this->mip_swizzle_view_ != nil) { - [this->mip_swizzle_view_ release]; + if (mip_swizzle_view_ != nil) { + [mip_swizzle_view_ release]; + mip_swizzle_view_ = nil; } /* Determine num slices */ int num_slices = 1; - switch (this->type_) { + switch (type_) { case GPU_TEXTURE_1D_ARRAY: - num_slices = this->h_; + num_slices = h_; break; case GPU_TEXTURE_2D_ARRAY: - num_slices = this->d_; + num_slices = d_; break; case GPU_TEXTURE_CUBE: num_slices = 6; break; case GPU_TEXTURE_CUBE_ARRAY: /* d_ is equal to array levels * 6, including face count. */ - num_slices = this->d_; + num_slices = d_; break; default: num_slices = 1; break; } - int range_len = min_ii((this->mip_texture_max_level_ - this->mip_texture_base_level_) + 1, - this->texture_.mipmapLevelCount); + int range_len = min_ii((mip_texture_max_level_ - mip_texture_base_level_) + 1, + texture_.mipmapLevelCount); BLI_assert(range_len > 0); - BLI_assert(mip_texture_base_level_ < this->texture_.mipmapLevelCount); - BLI_assert(this->mip_texture_base_layer_ < num_slices); - this->mip_swizzle_view_ = [this->texture_ - newTextureViewWithPixelFormat:this->texture_.pixelFormat - textureType:this->texture_.textureType - levels:NSMakeRange(this->mip_texture_base_level_, range_len) - slices:NSMakeRange(this->mip_texture_base_layer_, num_slices) - swizzle:this->mtl_swizzle_mask_]; + BLI_assert(mip_texture_base_level_ < texture_.mipmapLevelCount); + BLI_assert(mip_texture_base_layer_ < num_slices); + mip_swizzle_view_ = [texture_ + newTextureViewWithPixelFormat:texture_.pixelFormat + textureType:texture_.textureType + levels:NSMakeRange(mip_texture_base_level_, range_len) + slices:NSMakeRange(mip_texture_base_layer_, num_slices) + swizzle:mtl_swizzle_mask_]; MTL_LOG_INFO( "Updating texture view - MIP TEXTURE BASE LEVEL: %d, MAX LEVEL: %d (Range len: %d)\n", - this->mip_texture_base_level_, - min_ii(this->mip_texture_max_level_, this->texture_.mipmapLevelCount), + mip_texture_base_level_, + min_ii(mip_texture_max_level_, texture_.mipmapLevelCount), range_len); - [this->mip_swizzle_view_ retain]; - this->mip_swizzle_view_.label = [this->texture_ label]; + mip_swizzle_view_.label = [texture_ label]; texture_view_dirty_flags_ = TEXTURE_VIEW_NOT_DIRTY; } } @@ -180,29 +173,29 @@ id<MTLTexture> gpu::MTLTexture::get_metal_handle() this->ensure_baked(); /* Verify VBO texture shares same buffer. */ - if (this->resource_mode_ == MTL_TEXTURE_MODE_VBO) { + if (resource_mode_ == MTL_TEXTURE_MODE_VBO) { int r_offset = -1; /* TODO(Metal): Fetch buffer from MTLVertBuf when implemented. */ id<MTLBuffer> buf = nil; /*vert_buffer_->get_metal_buffer(&r_offset);*/ - BLI_assert(this->vert_buffer_mtl_ != nil); - BLI_assert(buf == this->vert_buffer_mtl_ && r_offset == this->vert_buffer_offset_); + BLI_assert(vert_buffer_mtl_ != nil); + BLI_assert(buf == vert_buffer_mtl_ && r_offset == vert_buffer_offset_); UNUSED_VARS(buf); UNUSED_VARS_NDEBUG(r_offset); } - if (this->is_baked_) { + if (is_baked_) { /* For explicit texture views, ensure we always return the texture view. */ - if (this->resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) { - BLI_assert(this->mip_swizzle_view_ && "Texture view should always have a valid handle."); + if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) { + BLI_assert(mip_swizzle_view_ && "Texture view should always have a valid handle."); } - if (this->mip_swizzle_view_ != nil || texture_view_dirty_flags_) { + if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) { bake_mip_swizzle_view(); - return this->mip_swizzle_view_; + return mip_swizzle_view_; } - return this->texture_; + return texture_; } return nil; } @@ -214,36 +207,36 @@ id<MTLTexture> gpu::MTLTexture::get_metal_handle_base() this->ensure_baked(); /* For explicit texture views, always return the texture view. */ - if (this->resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) { - BLI_assert(this->mip_swizzle_view_ && "Texture view should always have a valid handle."); - if (this->mip_swizzle_view_ != nil || texture_view_dirty_flags_) { + if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) { + BLI_assert(mip_swizzle_view_ && "Texture view should always have a valid handle."); + if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) { bake_mip_swizzle_view(); } - return this->mip_swizzle_view_; + return mip_swizzle_view_; } /* Return base handle. */ - if (this->is_baked_) { - return this->texture_; + if (is_baked_) { + return texture_; } return nil; } void gpu::MTLTexture::blit(id<MTLBlitCommandEncoder> blit_encoder, - unsigned int src_x_offset, - unsigned int src_y_offset, - unsigned int src_z_offset, - unsigned int src_slice, - unsigned int src_mip, + uint src_x_offset, + uint src_y_offset, + uint src_z_offset, + uint src_slice, + uint src_mip, gpu::MTLTexture *dest, - unsigned int dst_x_offset, - unsigned int dst_y_offset, - unsigned int dst_z_offset, - unsigned int dst_slice, - unsigned int dst_mip, - unsigned int width, - unsigned int height, - unsigned int depth) + uint dst_x_offset, + uint dst_y_offset, + uint dst_z_offset, + uint dst_slice, + uint dst_mip, + uint width, + uint height, + uint depth) { BLI_assert(this && dest); @@ -273,13 +266,13 @@ void gpu::MTLTexture::blit(id<MTLBlitCommandEncoder> blit_encoder, } void gpu::MTLTexture::blit(gpu::MTLTexture *dst, - unsigned int src_x_offset, - unsigned int src_y_offset, - unsigned int dst_x_offset, - unsigned int dst_y_offset, - unsigned int src_mip, - unsigned int dst_mip, - unsigned int dst_slice, + uint src_x_offset, + uint src_y_offset, + uint dst_x_offset, + uint dst_y_offset, + uint src_mip, + uint dst_mip, + uint dst_slice, int width, int height) { @@ -348,19 +341,19 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst, } } -GPUFrameBuffer *gpu::MTLTexture::get_blit_framebuffer(unsigned int dst_slice, unsigned int dst_mip) +GPUFrameBuffer *gpu::MTLTexture::get_blit_framebuffer(uint dst_slice, uint dst_mip) { /* Check if layer has changed. */ bool update_attachments = false; - if (!this->blit_fb_) { - this->blit_fb_ = GPU_framebuffer_create("gpu_blit"); + if (!blit_fb_) { + blit_fb_ = GPU_framebuffer_create("gpu_blit"); update_attachments = true; } /* Check if current blit FB has the correct attachment properties. */ - if (this->blit_fb_) { - if (this->blit_fb_slice_ != dst_slice || this->blit_fb_mip_ != dst_mip) { + if (blit_fb_) { + if (blit_fb_slice_ != dst_slice || blit_fb_mip_ != dst_mip) { update_attachments = true; } } @@ -369,7 +362,7 @@ GPUFrameBuffer *gpu::MTLTexture::get_blit_framebuffer(unsigned int dst_slice, un if (format_flag_ & GPU_FORMAT_DEPTH || format_flag_ & GPU_FORMAT_STENCIL) { /* DEPTH TEX */ GPU_framebuffer_ensure_config( - &this->blit_fb_, + &blit_fb_, {GPU_ATTACHMENT_TEXTURE_LAYER_MIP(wrap(static_cast<Texture *>(this)), static_cast<int>(dst_slice), static_cast<int>(dst_mip)), @@ -378,18 +371,18 @@ GPUFrameBuffer *gpu::MTLTexture::get_blit_framebuffer(unsigned int dst_slice, un else { /* COLOR TEX */ GPU_framebuffer_ensure_config( - &this->blit_fb_, + &blit_fb_, {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE_LAYER_MIP(wrap(static_cast<Texture *>(this)), static_cast<int>(dst_slice), static_cast<int>(dst_mip))}); } - this->blit_fb_slice_ = dst_slice; - this->blit_fb_mip_ = dst_mip; + blit_fb_slice_ = dst_slice; + blit_fb_mip_ = dst_mip; } - BLI_assert(this->blit_fb_); - return this->blit_fb_; + BLI_assert(blit_fb_); + return blit_fb_; } MTLSamplerState gpu::MTLTexture::get_sampler_state() @@ -408,7 +401,7 @@ void gpu::MTLTexture::update_sub( BLI_assert(ctx); /* Do not update texture view. */ - BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW); + BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW); /* Ensure mipmaps. */ this->ensure_mipmaps(mip); @@ -418,16 +411,16 @@ void gpu::MTLTexture::update_sub( /* Safety checks. */ #if TRUST_NO_ONE - BLI_assert(mip >= this->mip_min_ && mip <= this->mip_max_); - BLI_assert(mip < this->texture_.mipmapLevelCount); - BLI_assert(this->texture_.mipmapLevelCount >= this->mip_max_); + BLI_assert(mip >= mip_min_ && mip <= mip_max_); + BLI_assert(mip < texture_.mipmapLevelCount); + BLI_assert(texture_.mipmapLevelCount >= mip_max_); #endif /* DEPTH FLAG - Depth formats cannot use direct BLIT - pass off to their own routine which will * do a depth-only render. */ - bool is_depth_format = (this->format_flag_ & GPU_FORMAT_DEPTH); + bool is_depth_format = (format_flag_ & GPU_FORMAT_DEPTH); if (is_depth_format) { - switch (this->type_) { + switch (type_) { case GPU_TEXTURE_2D: { update_sub_depth_2d(mip, offset, extent, type, data); @@ -444,7 +437,7 @@ void gpu::MTLTexture::update_sub( @autoreleasepool { /* Determine totalsize of INPUT Data. */ - int num_channels = to_component_len(this->format_); + int num_channels = to_component_len(format_); int input_bytes_per_pixel = num_channels * to_bytesize(type); int totalsize = 0; @@ -482,29 +475,12 @@ void gpu::MTLTexture::update_sub( BLI_assert(totalsize > 0); /* Determine expected destination data size. */ - MTLPixelFormat destination_format = gpu_texture_format_to_metal(this->format_); + MTLPixelFormat destination_format = gpu_texture_format_to_metal(format_); int expected_dst_bytes_per_pixel = get_mtl_format_bytesize(destination_format); int destination_num_channels = get_mtl_format_num_components(destination_format); - int destination_totalsize = 0; - switch (this->dimensions_count()) { - case 1: - destination_totalsize = expected_dst_bytes_per_pixel * max_ii(expected_update_w, 1); - break; - case 2: - destination_totalsize = expected_dst_bytes_per_pixel * max_ii(expected_update_w, 1) * - max_ii(extent[1], 1); - break; - case 3: - destination_totalsize = expected_dst_bytes_per_pixel * max_ii(expected_update_w, 1) * - max_ii(extent[1], 1) * max_ii(extent[2], 1); - break; - default: - BLI_assert(false); - break; - } - /* Prepare specialisation struct (For texture update routine). */ - TextureUpdateRoutineSpecialisation compute_specialisation_kernel = { + /* Prepare specialization struct (For texture update routine). */ + TextureUpdateRoutineSpecialisation compute_specialization_kernel = { tex_data_format_to_msl_type_str(type), /* INPUT DATA FORMAT */ tex_data_format_to_msl_texture_template_type(type), /* TEXTURE DATA FORMAT */ num_channels, @@ -517,21 +493,21 @@ void gpu::MTLTexture::update_sub( can_use_direct_blit = false; } -#if MTL_VALIDATION_CRASH_DEPTH_1_1_1_WA - if (this->type_ == GPU_TEXTURE_2D || this->type_ == GPU_TEXTURE_2D_ARRAY) { - /* Workaround for crash in validation layer when blitting to depth2D target with - * dimensions (1, 1, 1); */ - if (extent[0] == 1 && extent[1] == 1 && extent[2] == 1 && totalsize == 4) { - can_use_direct_blit = false; + if (is_depth_format) { + if (type_ == GPU_TEXTURE_2D || type_ == GPU_TEXTURE_2D_ARRAY) { + /* Workaround for crash in validation layer when blitting to depth2D target with + * dimensions (1, 1, 1); */ + if (extent[0] == 1 && extent[1] == 1 && extent[2] == 1 && totalsize == 4) { + can_use_direct_blit = false; + } } } -#endif - if (this->format_ == GPU_SRGB8_A8 && !can_use_direct_blit) { + if (format_ == GPU_SRGB8_A8 && !can_use_direct_blit) { MTL_LOG_WARNING( "SRGB data upload does not work correctly using compute upload. " "texname '%s'\n", - this->name_); + name_); } /* Safety Checks. */ @@ -573,49 +549,15 @@ void gpu::MTLTexture::update_sub( } } - /* Debug hook for performing GPU capture of routine. */ - bool DO_CAPTURE = false; -#if DEBUG_TEXTURE_UPDATE_CAPTURE == 1 - DO_CAPTURE = true; - if (DO_CAPTURE) { - MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager]; - MTLCaptureDescriptor *capture_descriptor = [[MTLCaptureDescriptor alloc] init]; - capture_descriptor.captureObject = ctx->device; - NSError *error; - if (![capture_manager startCaptureWithDescriptor:capture_descriptor error:&error]) { - NSString *error_str = [NSString stringWithFormat:@"%@", error]; - const char *error_c_str = [error_str UTF8String]; - MTL_LOG_ERROR("Failed to start capture. Error: %s\n", error_c_str); - } - } -#endif - - /* Fetch or Create command buffer. */ - id<MTLCommandBuffer> cmd_buffer = ctx->get_active_command_buffer(); - bool own_command_buffer = false; - if (cmd_buffer == nil || DO_CAPTURE) { - cmd_buffer = [ctx->queue commandBuffer]; - own_command_buffer = true; - } - else { - /* Finish graphics work. */ - ctx->end_render_pass(); - } - /* Prepare staging buffer for data. */ id<MTLBuffer> staging_buffer = nil; - unsigned long long staging_buffer_offset = 0; + uint64_t staging_buffer_offset = 0; /* Fetch allocation from scratch buffer. */ - MTLTemporaryBufferRange allocation; /* TODO(Metal): Metal Memory manager. */ - /* = ctx->get_memory_manager().scratch_buffer_allocate_range_aligned(totalsize, 256);*/ - memcpy(allocation.host_ptr, data, totalsize); + MTLTemporaryBuffer allocation = + ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(totalsize, 256); + memcpy(allocation.data, data, totalsize); staging_buffer = allocation.metal_buffer; - if (own_command_buffer) { - if (allocation.requires_flush()) { - [staging_buffer didModifyRange:NSMakeRange(allocation.buffer_offset, allocation.size)]; - } - } staging_buffer_offset = allocation.buffer_offset; /* Common Properties. */ @@ -629,23 +571,23 @@ void gpu::MTLTexture::update_sub( return; } id<MTLTexture> texture_handle = ((compatible_write_format == destination_format)) ? - this->texture_ : - [this->texture_ + texture_ : + [texture_ newTextureViewWithPixelFormat:compatible_write_format]; - /* Prepare encoders */ + /* Prepare command encoders. */ id<MTLBlitCommandEncoder> blit_encoder = nil; id<MTLComputeCommandEncoder> compute_encoder = nil; if (can_use_direct_blit) { - blit_encoder = [cmd_buffer blitCommandEncoder]; + blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); BLI_assert(blit_encoder != nil); } else { - compute_encoder = [cmd_buffer computeCommandEncoder]; + compute_encoder = ctx->main_command_buffer.ensure_begin_compute_encoder(); BLI_assert(compute_encoder != nil); } - switch (this->type_) { + switch (type_) { /* 1D */ case GPU_TEXTURE_1D: @@ -657,28 +599,28 @@ void gpu::MTLTexture::update_sub( extent[0] : ctx->pipeline_state.unpack_row_length); int bytes_per_image = bytes_per_row; - int max_array_index = ((this->type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1); + int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1); for (int array_index = 0; array_index < max_array_index; array_index++) { int buffer_array_offset = staging_buffer_offset + (bytes_per_image * array_index); - [blit_encoder copyFromBuffer:staging_buffer - sourceOffset:buffer_array_offset - sourceBytesPerRow:bytes_per_row - sourceBytesPerImage:bytes_per_image - sourceSize:MTLSizeMake(extent[0], 1, 1) - toTexture:texture_handle - destinationSlice:((this->type_ == GPU_TEXTURE_1D_ARRAY) ? - (array_index + offset[1]) : - 0) - destinationLevel:mip - destinationOrigin:MTLOriginMake(offset[0], 0, 0)]; + [blit_encoder + copyFromBuffer:staging_buffer + sourceOffset:buffer_array_offset + sourceBytesPerRow:bytes_per_row + sourceBytesPerImage:bytes_per_image + sourceSize:MTLSizeMake(extent[0], 1, 1) + toTexture:texture_handle + destinationSlice:((type_ == GPU_TEXTURE_1D_ARRAY) ? (array_index + offset[1]) : + 0) + destinationLevel:mip + destinationOrigin:MTLOriginMake(offset[0], 0, 0)]; } } else { /* Use Compute Based update. */ - if (this->type_ == GPU_TEXTURE_1D) { + if (type_ == GPU_TEXTURE_1D) { id<MTLComputePipelineState> pso = texture_update_1d_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], 1, 1}, {offset[0], 0, 0}, @@ -693,9 +635,9 @@ void gpu::MTLTexture::update_sub( dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */ threadsPerThreadgroup:MTLSizeMake(64, 1, 1)]; } - else if (this->type_ == GPU_TEXTURE_1D_ARRAY) { + else if (type_ == GPU_TEXTURE_1D_ARRAY) { id<MTLComputePipelineState> pso = texture_update_1d_array_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], extent[1], 1}, {offset[0], offset[1], 0}, @@ -725,14 +667,14 @@ void gpu::MTLTexture::update_sub( int bytes_per_image = bytes_per_row * extent[1]; int texture_array_relative_offset = 0; - int base_slice = (this->type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0; - int final_slice = base_slice + ((this->type_ == GPU_TEXTURE_2D_ARRAY) ? extent[2] : 1); + int base_slice = (type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0; + int final_slice = base_slice + ((type_ == GPU_TEXTURE_2D_ARRAY) ? extent[2] : 1); for (int array_slice = base_slice; array_slice < final_slice; array_slice++) { if (array_slice > 0) { - BLI_assert(this->type_ == GPU_TEXTURE_2D_ARRAY); - BLI_assert(array_slice < this->d_); + BLI_assert(type_ == GPU_TEXTURE_2D_ARRAY); + BLI_assert(array_slice < d_); } [blit_encoder copyFromBuffer:staging_buffer @@ -750,9 +692,9 @@ void gpu::MTLTexture::update_sub( } else { /* Use Compute texture update. */ - if (this->type_ == GPU_TEXTURE_2D) { + if (type_ == GPU_TEXTURE_2D) { id<MTLComputePipelineState> pso = texture_update_2d_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], extent[1], 1}, {offset[0], offset[1], 0}, @@ -768,9 +710,9 @@ void gpu::MTLTexture::update_sub( extent[0], extent[1], 1) /* Width, Height, Layer */ threadsPerThreadgroup:MTLSizeMake(8, 8, 1)]; } - else if (this->type_ == GPU_TEXTURE_2D_ARRAY) { + else if (type_ == GPU_TEXTURE_2D_ARRAY) { id<MTLComputePipelineState> pso = texture_update_2d_array_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], extent[1], extent[2]}, {offset[0], offset[1], offset[2]}, @@ -810,7 +752,7 @@ void gpu::MTLTexture::update_sub( } else { id<MTLComputePipelineState> pso = texture_update_3d_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureUpdateParams params = {mip, {extent[0], extent[1], extent[2]}, {offset[0], offset[1], offset[2]}, @@ -918,35 +860,15 @@ void gpu::MTLTexture::update_sub( if (texture_.storageMode == MTLStorageModeManaged) { [blit_encoder synchronizeResource:texture_buffer_]; } - - /* End Encoding. */ - [blit_encoder endEncoding]; } else { - - /* End Encoding. */ - [compute_encoder endEncoding]; - /* Textures which use MTLStorageModeManaged need to have updated contents * synced back to CPU to avoid an automatic flush overwriting contents. */ if (texture_.storageMode == MTLStorageModeManaged) { - blit_encoder = [cmd_buffer blitCommandEncoder]; + blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); [blit_encoder synchronizeResource:texture_buffer_]; - [blit_encoder endEncoding]; } } - - if (own_command_buffer) { - [cmd_buffer commit]; - } - -#if DEBUG_TEXTURE_UPDATE_CAPTURE == 1 - if (DO_CAPTURE) { - [cmd_buffer waitUntilCompleted]; - MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager]; - [capture_manager stopCapture]; - } -#endif } } @@ -954,12 +876,12 @@ void gpu::MTLTexture::ensure_mipmaps(int miplvl) { /* Do not update texture view. */ - BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW); + BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW); /* Clamp level to maximum. */ - int effective_h = (this->type_ == GPU_TEXTURE_1D_ARRAY) ? 0 : this->h_; - int effective_d = (this->type_ != GPU_TEXTURE_3D) ? 0 : this->d_; - int max_dimension = max_iii(this->w_, effective_h, effective_d); + int effective_h = (type_ == GPU_TEXTURE_1D_ARRAY) ? 0 : h_; + int effective_d = (type_ != GPU_TEXTURE_3D) ? 0 : d_; + int max_dimension = max_iii(w_, effective_h, effective_d); int max_miplvl = floor(log2(max_dimension)); miplvl = min_ii(max_miplvl, miplvl); @@ -968,15 +890,15 @@ void gpu::MTLTexture::ensure_mipmaps(int miplvl) mipmaps_ = miplvl; /* Check if baked. */ - if (this->is_baked_ && mipmaps_ > mtl_max_mips_) { - this->is_dirty_ = true; + if (is_baked_ && mipmaps_ > mtl_max_mips_) { + is_dirty_ = true; MTL_LOG_WARNING("Texture requires regenerating due to increase in mip-count\n"); } } this->mip_range_set(0, mipmaps_); } -void gpu::MTLTexture::generate_mipmap(void) +void gpu::MTLTexture::generate_mipmap() { /* Fetch Active Context. */ MTLContext *ctx = reinterpret_cast<MTLContext *>(GPU_context_active_get()); @@ -993,44 +915,29 @@ void gpu::MTLTexture::generate_mipmap(void) /* Ensure texture is baked. */ this->ensure_baked(); - BLI_assert(this->is_baked_ && this->texture_ && "MTLTexture is not valid"); + BLI_assert(is_baked_ && texture_ && "MTLTexture is not valid"); - if (this->mipmaps_ == 1 || this->mtl_max_mips_ == 1) { + if (mipmaps_ == 1 || mtl_max_mips_ == 1) { MTL_LOG_WARNING("Call to generate mipmaps on texture with 'mipmaps_=1\n'"); return; } /* Verify if we can perform mipmap generation. */ - if (this->format_ == GPU_DEPTH_COMPONENT32F || this->format_ == GPU_DEPTH_COMPONENT24 || - this->format_ == GPU_DEPTH_COMPONENT16 || this->format_ == GPU_DEPTH32F_STENCIL8 || - this->format_ == GPU_DEPTH24_STENCIL8) { + if (format_ == GPU_DEPTH_COMPONENT32F || format_ == GPU_DEPTH_COMPONENT24 || + format_ == GPU_DEPTH_COMPONENT16 || format_ == GPU_DEPTH32F_STENCIL8 || + format_ == GPU_DEPTH24_STENCIL8) { MTL_LOG_WARNING("Cannot generate mipmaps for textures using DEPTH formats\n"); return; } @autoreleasepool { - id<MTLCommandBuffer> cmd_buffer = ctx->get_active_command_buffer(); - bool own_command_buffer = false; - if (cmd_buffer == nil) { - cmd_buffer = [ctx->queue commandBuffer]; - own_command_buffer = true; - } - else { - /* End active graphics work. */ - ctx->end_render_pass(); - } - - id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; -#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION - [enc insertDebugSignpost:@"Generate MipMaps"]; -#endif - [enc generateMipmapsForTexture:this->texture_]; - [enc endEncoding]; - - if (own_command_buffer) { - [cmd_buffer commit]; + /* Fetch active BlitCommandEncoder. */ + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + if (G.debug & G_DEBUG_GPU) { + [enc insertDebugSignpost:@"Generate MipMaps"]; } + [enc generateMipmapsForTexture:texture_]; } return; } @@ -1055,13 +962,8 @@ void gpu::MTLTexture::copy_to(Texture *dst) this->ensure_baked(); @autoreleasepool { - /* End render pass. */ - ctx->end_render_pass(); - /* Setup blit encoder. */ - id<MTLCommandBuffer> cmd_buffer = ctx->get_active_command_buffer(); - BLI_assert(cmd_buffer != nil); - id<MTLBlitCommandEncoder> blit_encoder = [cmd_buffer blitCommandEncoder]; + id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder(); BLI_assert(blit_encoder != nil); /* TODO(Metal): Consider supporting multiple mip levels IF the GL implementation @@ -1077,7 +979,7 @@ void gpu::MTLTexture::copy_to(Texture *dst) case GPU_TEXTURE_CUBE_ARRAY: case GPU_TEXTURE_3D: { /* Do full texture copy for 3D textures */ - BLI_assert(mt_dst->d_ == this->d_); + BLI_assert(mt_dst->d_ == d_); [blit_encoder copyFromTexture:this->get_metal_handle_base() toTexture:mt_dst->get_metal_handle_base()]; } break; @@ -1100,9 +1002,6 @@ void gpu::MTLTexture::copy_to(Texture *dst) extent[2]); } break; } - - /* End encoding */ - [blit_encoder endEncoding]; } } @@ -1144,8 +1043,8 @@ static MTLTextureSwizzle swizzle_to_mtl(const char swizzle) void gpu::MTLTexture::swizzle_set(const char swizzle_mask[4]) { - if (memcmp(this->tex_swizzle_mask_, swizzle_mask, 4) != 0) { - memcpy(this->tex_swizzle_mask_, swizzle_mask, 4); + if (memcmp(tex_swizzle_mask_, swizzle_mask, 4) != 0) { + memcpy(tex_swizzle_mask_, swizzle_mask, 4); /* Creating the swizzle mask and flagging as dirty if changed. */ MTLTextureSwizzleChannels new_swizzle_mask = MTLTextureSwizzleChannelsMake( @@ -1154,8 +1053,8 @@ void gpu::MTLTexture::swizzle_set(const char swizzle_mask[4]) swizzle_to_mtl(swizzle_mask[2]), swizzle_to_mtl(swizzle_mask[3])); - this->mtl_swizzle_mask_ = new_swizzle_mask; - this->texture_view_dirty_flags_ |= TEXTURE_VIEW_SWIZZLE_DIRTY; + mtl_swizzle_mask_ = new_swizzle_mask; + texture_view_dirty_flags_ |= TEXTURE_VIEW_SWIZZLE_DIRTY; } } @@ -1172,25 +1071,24 @@ void gpu::MTLTexture::mip_range_set(int min, int max) * * TODO(Metal): Add texture initialization flag to determine whether mipmaps are used * or not. Will be important for saving memory for big textures. */ - this->mip_min_ = min; - this->mip_max_ = max; + mip_min_ = min; + mip_max_ = max; - if ((this->type_ == GPU_TEXTURE_1D || this->type_ == GPU_TEXTURE_1D_ARRAY || - this->type_ == GPU_TEXTURE_BUFFER) && + if ((type_ == GPU_TEXTURE_1D || type_ == GPU_TEXTURE_1D_ARRAY || type_ == GPU_TEXTURE_BUFFER) && max > 1) { MTL_LOG_ERROR( " MTLTexture of type TEXTURE_1D_ARRAY or TEXTURE_BUFFER cannot have a mipcount " "greater than 1\n"); - this->mip_min_ = 0; - this->mip_max_ = 0; - this->mipmaps_ = 0; + mip_min_ = 0; + mip_max_ = 0; + mipmaps_ = 0; BLI_assert(false); } /* Mip range for texture view. */ - this->mip_texture_base_level_ = this->mip_min_; - this->mip_texture_max_level_ = this->mip_max_; + mip_texture_base_level_ = mip_min_; + mip_texture_max_level_ = mip_max_; texture_view_dirty_flags_ |= TEXTURE_VIEW_MIP_DIRTY; } @@ -1199,7 +1097,7 @@ void *gpu::MTLTexture::read(int mip, eGPUDataFormat type) /* Prepare Array for return data. */ BLI_assert(!(format_flag_ & GPU_FORMAT_COMPRESSED)); BLI_assert(mip <= mipmaps_); - BLI_assert(validate_data_format_mtl(this->format_, type)); + BLI_assert(validate_data_format_mtl(format_, type)); /* NOTE: mip_size_get() won't override any dimension that is equal to 0. */ int extent[3] = {1, 1, 1}; @@ -1208,12 +1106,12 @@ void *gpu::MTLTexture::read(int mip, eGPUDataFormat type) size_t sample_len = extent[0] * extent[1] * extent[2]; size_t sample_size = to_bytesize(format_, type); size_t texture_size = sample_len * sample_size; - int num_channels = to_component_len(this->format_); + int num_channels = to_component_len(format_); void *data = MEM_mallocN(texture_size + 8, "GPU_texture_read"); /* Ensure texture is baked. */ - if (this->is_baked_) { + if (is_baked_) { this->read_internal( mip, 0, 0, 0, extent[0], extent[1], extent[2], type, num_channels, texture_size + 8, data); } @@ -1239,7 +1137,7 @@ void gpu::MTLTexture::read_internal(int mip, void *r_data) { /* Verify textures are baked. */ - if (!this->is_baked_) { + if (!is_baked_) { MTL_LOG_WARNING("gpu::MTLTexture::read_internal - Trying to read from a non-baked texture!\n"); return; } @@ -1248,14 +1146,14 @@ void gpu::MTLTexture::read_internal(int mip, BLI_assert(ctx); /* Calculate Desired output size. */ - int num_channels = to_component_len(this->format_); + int num_channels = to_component_len(format_); BLI_assert(num_output_components <= num_channels); - unsigned int desired_output_bpp = num_output_components * to_bytesize(desired_output_format); + uint desired_output_bpp = num_output_components * to_bytesize(desired_output_format); /* Calculate Metal data output for trivial copy. */ - unsigned int image_bpp = get_mtl_format_bytesize(this->texture_.pixelFormat); - unsigned int image_components = get_mtl_format_num_components(this->texture_.pixelFormat); - bool is_depth_format = (this->format_flag_ & GPU_FORMAT_DEPTH); + uint image_bpp = get_mtl_format_bytesize(texture_.pixelFormat); + uint image_components = get_mtl_format_num_components(texture_.pixelFormat); + bool is_depth_format = (format_flag_ & GPU_FORMAT_DEPTH); /* Verify if we need to use compute read. */ eGPUDataFormat data_format = to_mtl_internal_data_format(this->format_get()); @@ -1272,12 +1170,12 @@ void gpu::MTLTexture::read_internal(int mip, BLI_assert(num_output_components == 1); BLI_assert(image_components == 1); BLI_assert(data_format == GPU_DATA_FLOAT || data_format == GPU_DATA_UINT_24_8); - BLI_assert(validate_data_format_mtl(this->format_, data_format)); + BLI_assert(validate_data_format_mtl(format_, data_format)); } /* SPECIAL Workaround for R11G11B10 textures requesting a read using: GPU_DATA_10_11_11_REV. */ if (desired_output_format == GPU_DATA_10_11_11_REV) { - BLI_assert(this->format_ == GPU_R11F_G11F_B10F); + BLI_assert(format_ == GPU_R11F_G11F_B10F); /* override parameters - we'll be able to use simple copy, as bpp will match at 4 bytes. */ image_bpp = sizeof(int); @@ -1291,9 +1189,9 @@ void gpu::MTLTexture::read_internal(int mip, } /* Determine size of output data. */ - unsigned int bytes_per_row = desired_output_bpp * width; - unsigned int bytes_per_image = bytes_per_row * height; - unsigned int total_bytes = bytes_per_image * depth; + uint bytes_per_row = desired_output_bpp * width; + uint bytes_per_image = bytes_per_row * height; + uint total_bytes = bytes_per_image * depth; if (can_use_simple_read) { /* DEBUG check that if direct copy is being used, then both the expected output size matches @@ -1307,7 +1205,7 @@ void gpu::MTLTexture::read_internal(int mip, /* Fetch allocation from scratch buffer. */ id<MTLBuffer> destination_buffer = nil; - unsigned int destination_offset = 0; + uint destination_offset = 0; void *destination_buffer_host_ptr = nullptr; /* TODO(Metal): Optimize buffer allocation. */ @@ -1315,10 +1213,10 @@ void gpu::MTLTexture::read_internal(int mip, destination_buffer = [ctx->device newBufferWithLength:max_ii(total_bytes, 256) options:bufferOptions]; destination_offset = 0; - destination_buffer_host_ptr = (void *)((unsigned char *)([destination_buffer contents]) + + destination_buffer_host_ptr = (void *)((uint8_t *)([destination_buffer contents]) + destination_offset); - /* Prepare specialisation struct (For non-trivial texture read routine). */ + /* Prepare specialization struct (For non-trivial texture read routine). */ int depth_format_mode = 0; if (is_depth_format) { depth_format_mode = 1; @@ -1338,7 +1236,7 @@ void gpu::MTLTexture::read_internal(int mip, } } - TextureReadRoutineSpecialisation compute_specialisation_kernel = { + TextureReadRoutineSpecialisation compute_specialization_kernel = { tex_data_format_to_msl_texture_template_type(data_format), /* TEXTURE DATA TYPE */ tex_data_format_to_msl_type_str(desired_output_format), /* OUTPUT DATA TYPE */ num_channels, /* TEXTURE COMPONENT COUNT */ @@ -1348,53 +1246,25 @@ void gpu::MTLTexture::read_internal(int mip, bool copy_successful = false; @autoreleasepool { - bool DO_CAPTURE = false; -#if DEBUG_TEXTURE_READ_CAPTURE == 1 - DO_CAPTURE = true; - if (DO_CAPTURE) { - MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager]; - MTLCaptureDescriptor *capture_descriptor = [[MTLCaptureDescriptor alloc] init]; - capture_descriptor.captureObject = ctx->device; - NSError *error; - if (![capture_manager startCaptureWithDescriptor:capture_descriptor error:&error]) { - NSString *error_str = [NSString stringWithFormat:@"%@", error]; - const char *error_c_str = [error_str UTF8String]; - MTL_LOG_ERROR("Failed to start capture. Error: %s\n", error_c_str); - } - } -#endif - /* TODO(Metal): Verify whether we need some form of barrier here to ensure reads * happen after work with associated texture is finished. */ GPU_finish(); - /* Fetch or Create command buffer. */ - id<MTLCommandBuffer> cmd_buffer = ctx->get_active_command_buffer(); - bool own_command_buffer = false; - if (cmd_buffer == nil || DO_CAPTURE || true) { - cmd_buffer = [ctx->queue commandBuffer]; - own_command_buffer = true; - } - else { - /* End any graphics workloads. */ - ctx->end_render_pass(); - } - /* Texture View for SRGB special case. */ - id<MTLTexture> read_texture = this->texture_; - if (this->format_ == GPU_SRGB8_A8) { - read_texture = [this->texture_ newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm]; + id<MTLTexture> read_texture = texture_; + if (format_ == GPU_SRGB8_A8) { + read_texture = [texture_ newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm]; } /* Perform per-texture type read. */ - switch (this->type_) { + switch (type_) { case GPU_TEXTURE_2D: { if (can_use_simple_read) { /* Use Blit Encoder READ. */ - id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; -#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION - [enc insertDebugSignpost:@"GPUTextureRead"]; -#endif + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + if (G.debug & G_DEBUG_GPU) { + [enc insertDebugSignpost:@"GPUTextureRead"]; + } [enc copyFromTexture:read_texture sourceSlice:0 sourceLevel:mip @@ -1405,15 +1275,15 @@ void gpu::MTLTexture::read_internal(int mip, destinationBytesPerRow:bytes_per_row destinationBytesPerImage:bytes_per_image]; [enc synchronizeResource:destination_buffer]; - [enc endEncoding]; copy_successful = true; } else { /* Use Compute READ. */ - id<MTLComputeCommandEncoder> compute_encoder = [cmd_buffer computeCommandEncoder]; + id<MTLComputeCommandEncoder> compute_encoder = + ctx->main_command_buffer.ensure_begin_compute_encoder(); id<MTLComputePipelineState> pso = texture_read_2d_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureReadParams params = { mip, {width, height, 1}, @@ -1425,15 +1295,13 @@ void gpu::MTLTexture::read_internal(int mip, [compute_encoder setTexture:read_texture atIndex:0]; [compute_encoder dispatchThreads:MTLSizeMake(width, height, 1) /* Width, Height, Layer */ threadsPerThreadgroup:MTLSizeMake(8, 8, 1)]; - [compute_encoder endEncoding]; /* Use Blit encoder to synchronize results back to CPU. */ - id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; -#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION - [enc insertDebugSignpost:@"GPUTextureRead-syncResource"]; -#endif + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + if (G.debug & G_DEBUG_GPU) { + [enc insertDebugSignpost:@"GPUTextureRead-syncResource"]; + } [enc synchronizeResource:destination_buffer]; - [enc endEncoding]; copy_successful = true; } } break; @@ -1441,10 +1309,10 @@ void gpu::MTLTexture::read_internal(int mip, case GPU_TEXTURE_2D_ARRAY: { if (can_use_simple_read) { /* Use Blit Encoder READ. */ - id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; -#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION - [enc insertDebugSignpost:@"GPUTextureRead"]; -#endif + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + if (G.debug & G_DEBUG_GPU) { + [enc insertDebugSignpost:@"GPUTextureRead"]; + } int base_slice = z_off; int final_slice = base_slice + depth; int texture_array_relative_offset = 0; @@ -1463,15 +1331,15 @@ void gpu::MTLTexture::read_internal(int mip, texture_array_relative_offset += bytes_per_image; } - [enc endEncoding]; copy_successful = true; } else { /* Use Compute READ */ - id<MTLComputeCommandEncoder> compute_encoder = [cmd_buffer computeCommandEncoder]; + id<MTLComputeCommandEncoder> compute_encoder = + ctx->main_command_buffer.ensure_begin_compute_encoder(); id<MTLComputePipelineState> pso = texture_read_2d_array_get_kernel( - compute_specialisation_kernel); + compute_specialization_kernel); TextureReadParams params = { mip, {width, height, depth}, @@ -1484,25 +1352,23 @@ void gpu::MTLTexture::read_internal(int mip, [compute_encoder dispatchThreads:MTLSizeMake(width, height, depth) /* Width, Height, Layer */ threadsPerThreadgroup:MTLSizeMake(8, 8, 1)]; - [compute_encoder endEncoding]; /* Use Blit encoder to synchronize results back to CPU. */ - id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; -#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION - [enc insertDebugSignpost:@"GPUTextureRead-syncResource"]; -#endif + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + if (G.debug & G_DEBUG_GPU) { + [enc insertDebugSignpost:@"GPUTextureRead-syncResource"]; + } [enc synchronizeResource:destination_buffer]; - [enc endEncoding]; copy_successful = true; } } break; case GPU_TEXTURE_CUBE_ARRAY: { if (can_use_simple_read) { - id<MTLBlitCommandEncoder> enc = [cmd_buffer blitCommandEncoder]; -#if MTL_DEBUG_COMMAND_BUFFER_EXECUTION - [enc insertDebugSignpost:@"GPUTextureRead"]; -#endif + id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder(); + if (G.debug & G_DEBUG_GPU) { + [enc insertDebugSignpost:@"GPUTextureRead"]; + } int base_slice = z_off; int final_slice = base_slice + depth; int texture_array_relative_offset = 0; @@ -1522,7 +1388,6 @@ void gpu::MTLTexture::read_internal(int mip, texture_array_relative_offset += bytes_per_image; } MTL_LOG_INFO("Copying texture data to buffer GPU_TEXTURE_CUBE_ARRAY\n"); - [enc endEncoding]; copy_successful = true; } else { @@ -1534,27 +1399,13 @@ void gpu::MTLTexture::read_internal(int mip, MTL_LOG_WARNING( "[Warning] gpu::MTLTexture::read_internal simple-copy not yet supported for texture " "type: %d\n", - (int)this->type_); + (int)type_); break; } if (copy_successful) { - /* Ensure GPU copy from texture to host-accessible buffer is complete. */ - if (own_command_buffer) { - [cmd_buffer commit]; - [cmd_buffer waitUntilCompleted]; - } - else { - /* Ensure GPU copy commands have completed. */ - GPU_finish(); - } - -#if DEBUG_TEXTURE_READ_CAPTURE == 1 - if (DO_CAPTURE) { - MTLCaptureManager *capture_manager = [MTLCaptureManager sharedCaptureManager]; - [capture_manager stopCapture]; - } -#endif + /* Ensure GPU copy commands have completed. */ + GPU_finish(); /* Copy data from Shared Memory into ptr. */ memcpy(r_data, destination_buffer_host_ptr, total_bytes); @@ -1576,16 +1427,16 @@ void gpu::MTLTexture::read_internal(int mip, } /* Remove once no longer required -- will just return 0 for now in MTL path. */ -uint gpu::MTLTexture::gl_bindcode_get(void) const +uint gpu::MTLTexture::gl_bindcode_get() const { return 0; } -bool gpu::MTLTexture::init_internal(void) +bool gpu::MTLTexture::init_internal() { - if (this->format_ == GPU_DEPTH24_STENCIL8) { + if (format_ == GPU_DEPTH24_STENCIL8) { /* Apple Silicon requires GPU_DEPTH32F_STENCIL8 instead of GPU_DEPTH24_STENCIL8. */ - this->format_ = GPU_DEPTH32F_STENCIL8; + format_ = GPU_DEPTH32F_STENCIL8; } this->prepare_internal(); @@ -1609,20 +1460,20 @@ bool gpu::MTLTexture::init_internal(const GPUTexture *src, int mip_offset, int l this->prepare_internal(); /* Flag as using texture view. */ - this->resource_mode_ = MTL_TEXTURE_MODE_TEXTURE_VIEW; - this->source_texture_ = src; - this->mip_texture_base_level_ = mip_offset; - this->mip_texture_base_layer_ = layer_offset; + resource_mode_ = MTL_TEXTURE_MODE_TEXTURE_VIEW; + source_texture_ = src; + mip_texture_base_level_ = mip_offset; + mip_texture_base_layer_ = layer_offset; /* Assign texture as view. */ const gpu::MTLTexture *mtltex = static_cast<const gpu::MTLTexture *>(unwrap(src)); - this->texture_ = mtltex->texture_; - BLI_assert(this->texture_); - [this->texture_ retain]; + texture_ = mtltex->texture_; + BLI_assert(texture_); + [texture_ retain]; /* Flag texture as baked -- we do not need explicit initialization. */ - this->is_baked_ = true; - this->is_dirty_ = false; + is_baked_ = true; + is_dirty_ = false; /* Bake mip swizzle view. */ bake_mip_swizzle_view(); @@ -1637,7 +1488,7 @@ bool gpu::MTLTexture::init_internal(const GPUTexture *src, int mip_offset, int l bool gpu::MTLTexture::texture_is_baked() { - return this->is_baked_; + return is_baked_; } /* Prepare texture parameters after initialization, but before baking. */ @@ -1645,22 +1496,21 @@ void gpu::MTLTexture::prepare_internal() { /* Derive implicit usage flags for Depth/Stencil attachments. */ - if (this->format_flag_ & GPU_FORMAT_DEPTH || this->format_flag_ & GPU_FORMAT_STENCIL) { - this->gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT; + if (format_flag_ & GPU_FORMAT_DEPTH || format_flag_ & GPU_FORMAT_STENCIL) { + gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT; } /* Derive maximum number of mip levels by default. * TODO(Metal): This can be removed if max mip counts are specified upfront. */ - if (this->type_ == GPU_TEXTURE_1D || this->type_ == GPU_TEXTURE_1D_ARRAY || - this->type_ == GPU_TEXTURE_BUFFER) { - this->mtl_max_mips_ = 1; + if (type_ == GPU_TEXTURE_1D || type_ == GPU_TEXTURE_1D_ARRAY || type_ == GPU_TEXTURE_BUFFER) { + mtl_max_mips_ = 1; } else { - int effective_h = (this->type_ == GPU_TEXTURE_1D_ARRAY) ? 0 : this->h_; - int effective_d = (this->type_ != GPU_TEXTURE_3D) ? 0 : this->d_; - int max_dimension = max_iii(this->w_, effective_h, effective_d); + int effective_h = (type_ == GPU_TEXTURE_1D_ARRAY) ? 0 : h_; + int effective_d = (type_ != GPU_TEXTURE_3D) ? 0 : d_; + int max_dimension = max_iii(w_, effective_h, effective_d); int max_miplvl = max_ii(floor(log2(max_dimension)) + 1, 1); - this->mtl_max_mips_ = max_miplvl; + mtl_max_mips_ = max_miplvl; } } @@ -1669,101 +1519,91 @@ void gpu::MTLTexture::ensure_baked() /* If properties have changed, re-bake. */ bool copy_previous_contents = false; - if (this->is_baked_ && this->is_dirty_) { + if (is_baked_ && is_dirty_) { copy_previous_contents = true; - id<MTLTexture> previous_texture = this->texture_; + id<MTLTexture> previous_texture = texture_; [previous_texture retain]; this->reset(); } - if (!this->is_baked_) { + if (!is_baked_) { MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(ctx); /* Ensure texture mode is valid. */ - BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_EXTERNAL); - BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW); - BLI_assert(this->resource_mode_ != MTL_TEXTURE_MODE_VBO); + BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_EXTERNAL); + BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW); + BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_VBO); /* Format and mip levels (TODO(Metal): Optimize mipmaps counts, specify up-front). */ - MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_); + MTLPixelFormat mtl_format = gpu_texture_format_to_metal(format_); /* Create texture descriptor. */ - switch (this->type_) { + switch (type_) { /* 1D */ case GPU_TEXTURE_1D: case GPU_TEXTURE_1D_ARRAY: { - BLI_assert(this->w_ > 0); - this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; - this->texture_descriptor_.pixelFormat = mtl_format; - this->texture_descriptor_.textureType = (this->type_ == GPU_TEXTURE_1D_ARRAY) ? - MTLTextureType1DArray : - MTLTextureType1D; - this->texture_descriptor_.width = this->w_; - this->texture_descriptor_.height = 1; - this->texture_descriptor_.depth = 1; - this->texture_descriptor_.arrayLength = (this->type_ == GPU_TEXTURE_1D_ARRAY) ? this->h_ : - 1; - this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? - this->mtl_max_mips_ : - 1; - this->texture_descriptor_.usage = + BLI_assert(w_ > 0); + texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + texture_descriptor_.pixelFormat = mtl_format; + texture_descriptor_.textureType = (type_ == GPU_TEXTURE_1D_ARRAY) ? MTLTextureType1DArray : + MTLTextureType1D; + texture_descriptor_.width = w_; + texture_descriptor_.height = 1; + texture_descriptor_.depth = 1; + texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_1D_ARRAY) ? h_ : 1; + texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; + texture_descriptor_.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */ - this->texture_descriptor_.storageMode = MTLStorageModePrivate; - this->texture_descriptor_.sampleCount = 1; - this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; - this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + texture_descriptor_.storageMode = MTLStorageModePrivate; + texture_descriptor_.sampleCount = 1; + texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; } break; /* 2D */ case GPU_TEXTURE_2D: case GPU_TEXTURE_2D_ARRAY: { - BLI_assert(this->w_ > 0 && this->h_ > 0); - this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; - this->texture_descriptor_.pixelFormat = mtl_format; - this->texture_descriptor_.textureType = (this->type_ == GPU_TEXTURE_2D_ARRAY) ? - MTLTextureType2DArray : - MTLTextureType2D; - this->texture_descriptor_.width = this->w_; - this->texture_descriptor_.height = this->h_; - this->texture_descriptor_.depth = 1; - this->texture_descriptor_.arrayLength = (this->type_ == GPU_TEXTURE_2D_ARRAY) ? this->d_ : - 1; - this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? - this->mtl_max_mips_ : - 1; - this->texture_descriptor_.usage = + BLI_assert(w_ > 0 && h_ > 0); + texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + texture_descriptor_.pixelFormat = mtl_format; + texture_descriptor_.textureType = (type_ == GPU_TEXTURE_2D_ARRAY) ? MTLTextureType2DArray : + MTLTextureType2D; + texture_descriptor_.width = w_; + texture_descriptor_.height = h_; + texture_descriptor_.depth = 1; + texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_2D_ARRAY) ? d_ : 1; + texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; + texture_descriptor_.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */ - this->texture_descriptor_.storageMode = MTLStorageModePrivate; - this->texture_descriptor_.sampleCount = 1; - this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; - this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + texture_descriptor_.storageMode = MTLStorageModePrivate; + texture_descriptor_.sampleCount = 1; + texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; } break; /* 3D */ case GPU_TEXTURE_3D: { - BLI_assert(this->w_ > 0 && this->h_ > 0 && this->d_ > 0); - this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; - this->texture_descriptor_.pixelFormat = mtl_format; - this->texture_descriptor_.textureType = MTLTextureType3D; - this->texture_descriptor_.width = this->w_; - this->texture_descriptor_.height = this->h_; - this->texture_descriptor_.depth = this->d_; - this->texture_descriptor_.arrayLength = 1; - this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? - this->mtl_max_mips_ : - 1; - this->texture_descriptor_.usage = + BLI_assert(w_ > 0 && h_ > 0 && d_ > 0); + texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + texture_descriptor_.pixelFormat = mtl_format; + texture_descriptor_.textureType = MTLTextureType3D; + texture_descriptor_.width = w_; + texture_descriptor_.height = h_; + texture_descriptor_.depth = d_; + texture_descriptor_.arrayLength = 1; + texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; + texture_descriptor_.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */ - this->texture_descriptor_.storageMode = MTLStorageModePrivate; - this->texture_descriptor_.sampleCount = 1; - this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; - this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + texture_descriptor_.storageMode = MTLStorageModePrivate; + texture_descriptor_.sampleCount = 1; + texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; } break; /* CUBE TEXTURES */ @@ -1771,69 +1611,63 @@ void gpu::MTLTexture::ensure_baked() case GPU_TEXTURE_CUBE_ARRAY: { /* NOTE: For a cube-map 'Texture::d_' refers to total number of faces, * not just array slices. */ - BLI_assert(this->w_ > 0 && this->h_ > 0); - this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; - this->texture_descriptor_.pixelFormat = mtl_format; - this->texture_descriptor_.textureType = (this->type_ == GPU_TEXTURE_CUBE_ARRAY) ? - MTLTextureTypeCubeArray : - MTLTextureTypeCube; - this->texture_descriptor_.width = this->w_; - this->texture_descriptor_.height = this->h_; - this->texture_descriptor_.depth = 1; - this->texture_descriptor_.arrayLength = (this->type_ == GPU_TEXTURE_CUBE_ARRAY) ? - this->d_ / 6 : - 1; - this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? - this->mtl_max_mips_ : - 1; - this->texture_descriptor_.usage = + BLI_assert(w_ > 0 && h_ > 0); + texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + texture_descriptor_.pixelFormat = mtl_format; + texture_descriptor_.textureType = (type_ == GPU_TEXTURE_CUBE_ARRAY) ? + MTLTextureTypeCubeArray : + MTLTextureTypeCube; + texture_descriptor_.width = w_; + texture_descriptor_.height = h_; + texture_descriptor_.depth = 1; + texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_CUBE_ARRAY) ? d_ / 6 : 1; + texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; + texture_descriptor_.usage = MTLTextureUsageRenderTarget | MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */ - this->texture_descriptor_.storageMode = MTLStorageModePrivate; - this->texture_descriptor_.sampleCount = 1; - this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; - this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + texture_descriptor_.storageMode = MTLStorageModePrivate; + texture_descriptor_.sampleCount = 1; + texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; } break; /* GPU_TEXTURE_BUFFER */ case GPU_TEXTURE_BUFFER: { - this->texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; - this->texture_descriptor_.pixelFormat = mtl_format; - this->texture_descriptor_.textureType = MTLTextureTypeTextureBuffer; - this->texture_descriptor_.width = this->w_; - this->texture_descriptor_.height = 1; - this->texture_descriptor_.depth = 1; - this->texture_descriptor_.arrayLength = 1; - this->texture_descriptor_.mipmapLevelCount = (this->mtl_max_mips_ > 0) ? - this->mtl_max_mips_ : - 1; - this->texture_descriptor_.usage = + texture_descriptor_ = [[MTLTextureDescriptor alloc] init]; + texture_descriptor_.pixelFormat = mtl_format; + texture_descriptor_.textureType = MTLTextureTypeTextureBuffer; + texture_descriptor_.width = w_; + texture_descriptor_.height = 1; + texture_descriptor_.depth = 1; + texture_descriptor_.arrayLength = 1; + texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1; + texture_descriptor_.usage = MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite | MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */ - this->texture_descriptor_.storageMode = MTLStorageModePrivate; - this->texture_descriptor_.sampleCount = 1; - this->texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; - this->texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; + texture_descriptor_.storageMode = MTLStorageModePrivate; + texture_descriptor_.sampleCount = 1; + texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache; + texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault; } break; default: { - MTL_LOG_ERROR("[METAL] Error: Cannot create texture with unknown type: %d\n", this->type_); + MTL_LOG_ERROR("[METAL] Error: Cannot create texture with unknown type: %d\n", type_); return; } break; } /* Determine Resource Mode. */ - this->resource_mode_ = MTL_TEXTURE_MODE_DEFAULT; + resource_mode_ = MTL_TEXTURE_MODE_DEFAULT; /* Create texture. */ - this->texture_ = [ctx->device newTextureWithDescriptor:this->texture_descriptor_]; - - [this->texture_descriptor_ release]; - this->texture_descriptor_ = nullptr; - this->texture_.label = [NSString stringWithUTF8String:this->get_name()]; - BLI_assert(this->texture_); - this->is_baked_ = true; - this->is_dirty_ = false; + texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_]; + + [texture_descriptor_ release]; + texture_descriptor_ = nullptr; + texture_.label = [NSString stringWithUTF8String:this->get_name()]; + BLI_assert(texture_); + is_baked_ = true; + is_dirty_ = false; } /* Re-apply previous contents. */ @@ -1850,30 +1684,30 @@ void gpu::MTLTexture::reset() MTL_LOG_INFO("Texture %s reset. Size %d, %d, %d\n", this->get_name(), w_, h_, d_); /* Delete associated METAL resources. */ - if (this->texture_ != nil) { - [this->texture_ release]; - this->texture_ = nil; - this->is_baked_ = false; - this->is_dirty_ = true; + if (texture_ != nil) { + [texture_ release]; + texture_ = nil; + is_baked_ = false; + is_dirty_ = true; } - if (this->mip_swizzle_view_ != nil) { - [this->mip_swizzle_view_ release]; - this->mip_swizzle_view_ = nil; + if (mip_swizzle_view_ != nil) { + [mip_swizzle_view_ release]; + mip_swizzle_view_ = nil; } - if (this->texture_buffer_ != nil) { - [this->texture_buffer_ release]; + if (texture_buffer_ != nil) { + [texture_buffer_ release]; } /* Blit framebuffer. */ - if (this->blit_fb_) { - GPU_framebuffer_free(this->blit_fb_); - this->blit_fb_ = nullptr; + if (blit_fb_) { + GPU_framebuffer_free(blit_fb_); + blit_fb_ = nullptr; } - BLI_assert(this->texture_ == nil); - BLI_assert(this->mip_swizzle_view_ == nil); + BLI_assert(texture_ == nil); + BLI_assert(mip_swizzle_view_ == nil); } /** \} */ diff --git a/source/blender/gpu/metal/mtl_texture_util.mm b/source/blender/gpu/metal/mtl_texture_util.mm index 27efc770e73..928393fb39e 100644 --- a/source/blender/gpu/metal/mtl_texture_util.mm +++ b/source/blender/gpu/metal/mtl_texture_util.mm @@ -124,7 +124,7 @@ MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format) return MTLPixelFormatDepth16Unorm; default: - BLI_assert(!"Unrecognised GPU pixel format!\n"); + BLI_assert(!"Unrecognized GPU pixel format!\n"); return MTLPixelFormatRGBA8Unorm; } } @@ -183,7 +183,7 @@ int get_mtl_format_bytesize(MTLPixelFormat tex_format) return 2; default: - BLI_assert(!"Unrecognised GPU pixel format!\n"); + BLI_assert(!"Unrecognized GPU pixel format!\n"); return 1; } } @@ -238,7 +238,7 @@ int get_mtl_format_num_components(MTLPixelFormat tex_format) return 1; default: - BLI_assert(!"Unrecognised GPU pixel format!\n"); + BLI_assert(!"Unrecognized GPU pixel format!\n"); return 1; } } @@ -305,13 +305,13 @@ bool mtl_format_supports_blending(MTLPixelFormat format) * \{ */ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl( - TextureUpdateRoutineSpecialisation specialisation_params, + TextureUpdateRoutineSpecialisation specialization_params, blender::Map<TextureUpdateRoutineSpecialisation, id<MTLComputePipelineState>> - &specialisation_cache, + &specialization_cache, eGPUTextureType texture_type) { /* Check whether the Kernel exists. */ - id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params); + id<MTLComputePipelineState> *result = specialization_cache.lookup_ptr(specialization_params); if (result != nullptr) { return *result; } @@ -332,18 +332,18 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl( options.languageVersion = MTLLanguageVersion2_2; options.preprocessorMacros = @{ @"INPUT_DATA_TYPE" : - [NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()], + [NSString stringWithUTF8String:specialization_params.input_data_type.c_str()], @"OUTPUT_DATA_TYPE" : - [NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()], + [NSString stringWithUTF8String:specialization_params.output_data_type.c_str()], @"COMPONENT_COUNT_INPUT" : - [NSNumber numberWithInt:specialisation_params.component_count_input], + [NSNumber numberWithInt:specialization_params.component_count_input], @"COMPONENT_COUNT_OUTPUT" : - [NSNumber numberWithInt:specialisation_params.component_count_output], + [NSNumber numberWithInt:specialization_params.component_count_output], @"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))] }; /* Prepare shader library for conversion routine. */ - NSError *error = NULL; + NSError *error = nullptr; id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src options:options error:&error] autorelease]; @@ -370,7 +370,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl( /* Store PSO. */ [compute_pso retain]; - specialisation_cache.add_new(specialisation_params, compute_pso); + specialization_cache.add_new(specialization_params, compute_pso); return_pso = compute_pso; } @@ -379,53 +379,53 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl( } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_update_impl(specialisation, + return mtl_texture_update_impl(specialization, mtl_context->get_texture_utils().texture_1d_update_compute_psos, GPU_TEXTURE_1D); } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_1d_array_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); return mtl_texture_update_impl( - specialisation, + specialization, mtl_context->get_texture_utils().texture_1d_array_update_compute_psos, GPU_TEXTURE_1D_ARRAY); } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_update_impl(specialisation, + return mtl_texture_update_impl(specialization, mtl_context->get_texture_utils().texture_2d_update_compute_psos, GPU_TEXTURE_2D); } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_2d_array_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); return mtl_texture_update_impl( - specialisation, + specialization, mtl_context->get_texture_utils().texture_2d_array_update_compute_psos, GPU_TEXTURE_2D_ARRAY); } id<MTLComputePipelineState> gpu::MTLTexture::texture_update_3d_get_kernel( - TextureUpdateRoutineSpecialisation specialisation) + TextureUpdateRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_update_impl(specialisation, + return mtl_texture_update_impl(specialization, mtl_context->get_texture_utils().texture_3d_update_compute_psos, GPU_TEXTURE_3D); } @@ -434,7 +434,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::texture_update_3d_get_kernel( * Currently does not appear to be hit. */ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get( - DepthTextureUpdateRoutineSpecialisation specialisation) + DepthTextureUpdateRoutineSpecialisation specialization) { /* Check whether the Kernel exists. */ @@ -442,13 +442,13 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get( BLI_assert(mtl_context != nullptr); GPUShader **result = mtl_context->get_texture_utils().depth_2d_update_shaders.lookup_ptr( - specialisation); + specialization); if (result != nullptr) { return *result; } const char *fragment_source = nullptr; - switch (specialisation.data_mode) { + switch (specialization.data_mode) { case MTL_DEPTH_UPDATE_MODE_FLOAT: fragment_source = datatoc_depth_2d_update_float_frag_glsl; break; @@ -469,7 +469,7 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get( nullptr, nullptr, "depth_2d_update_sh_get"); - mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialisation, shader); + mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader); return shader; } @@ -493,13 +493,13 @@ void gpu::MTLTexture::update_sub_depth_2d( int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data) { /* Verify we are in a valid configuration. */ - BLI_assert(ELEM(this->format_, + BLI_assert(ELEM(format_, GPU_DEPTH_COMPONENT24, GPU_DEPTH_COMPONENT32F, GPU_DEPTH_COMPONENT16, GPU_DEPTH24_STENCIL8, GPU_DEPTH32F_STENCIL8)); - BLI_assert(validate_data_format_mtl(this->format_, type)); + BLI_assert(validate_data_format_mtl(format_, type)); BLI_assert(ELEM(type, GPU_DATA_FLOAT, GPU_DATA_UINT_24_8, GPU_DATA_UINT)); /* Determine whether we are in GPU_DATA_UINT_24_8 or GPU_DATA_FLOAT mode. */ @@ -507,18 +507,18 @@ void gpu::MTLTexture::update_sub_depth_2d( eGPUTextureFormat format = (is_float) ? GPU_R32F : GPU_R32I; /* Shader key - Add parameters here for different configurations. */ - DepthTextureUpdateRoutineSpecialisation specialisation; + DepthTextureUpdateRoutineSpecialisation specialization; switch (type) { case GPU_DATA_FLOAT: - specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_FLOAT; + specialization.data_mode = MTL_DEPTH_UPDATE_MODE_FLOAT; break; case GPU_DATA_UINT_24_8: - specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT24; + specialization.data_mode = MTL_DEPTH_UPDATE_MODE_INT24; break; case GPU_DATA_UINT: - specialisation.data_mode = MTL_DEPTH_UPDATE_MODE_INT32; + specialization.data_mode = MTL_DEPTH_UPDATE_MODE_INT32; break; default: @@ -528,7 +528,7 @@ void gpu::MTLTexture::update_sub_depth_2d( /* Push contents into an r32_tex and render contents to depth using a shader. */ GPUTexture *r32_tex_tmp = GPU_texture_create_2d( - "depth_intermediate_copy_tex", this->w_, this->h_, 1, format, nullptr); + "depth_intermediate_copy_tex", w_, h_, 1, format, nullptr); GPU_texture_filter_mode(r32_tex_tmp, false); GPU_texture_wrap_mode(r32_tex_tmp, false, true); gpu::MTLTexture *mtl_tex = static_cast<gpu::MTLTexture *>(unwrap(r32_tex_tmp)); @@ -538,13 +538,13 @@ void gpu::MTLTexture::update_sub_depth_2d( GPUFrameBuffer *depth_fb_temp = GPU_framebuffer_create("depth_intermediate_copy_fb"); GPU_framebuffer_texture_attach(depth_fb_temp, wrap(static_cast<Texture *>(this)), 0, mip); GPU_framebuffer_bind(depth_fb_temp); - if (extent[0] == this->w_ && extent[1] == this->h_) { + if (extent[0] == w_ && extent[1] == h_) { /* Skip load if the whole texture is being updated. */ GPU_framebuffer_clear_depth(depth_fb_temp, 0.0); GPU_framebuffer_clear_stencil(depth_fb_temp, 0); } - GPUShader *depth_2d_update_sh = depth_2d_update_sh_get(specialisation); + GPUShader *depth_2d_update_sh = depth_2d_update_sh_get(specialization); BLI_assert(depth_2d_update_sh != nullptr); GPUBatch *quad = GPU_batch_preset_quad(); GPU_batch_set_shader(quad, depth_2d_update_sh); @@ -553,7 +553,7 @@ void gpu::MTLTexture::update_sub_depth_2d( GPU_batch_uniform_1i(quad, "mip", mip); GPU_batch_uniform_2f(quad, "extent", (float)extent[0], (float)extent[1]); GPU_batch_uniform_2f(quad, "offset", (float)offset[0], (float)offset[1]); - GPU_batch_uniform_2f(quad, "size", (float)this->w_, (float)this->h_); + GPU_batch_uniform_2f(quad, "size", (float)w_, (float)h_); bool depth_write_prev = GPU_depth_mask_get(); uint stencil_mask_prev = GPU_stencil_mask_get(); @@ -591,13 +591,13 @@ void gpu::MTLTexture::update_sub_depth_2d( * \{ */ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( - TextureReadRoutineSpecialisation specialisation_params, + TextureReadRoutineSpecialisation specialization_params, blender::Map<TextureReadRoutineSpecialisation, id<MTLComputePipelineState>> - &specialisation_cache, + &specialization_cache, eGPUTextureType texture_type) { /* Check whether the Kernel exists. */ - id<MTLComputePipelineState> *result = specialisation_cache.lookup_ptr(specialisation_params); + id<MTLComputePipelineState> *result = specialization_cache.lookup_ptr(specialization_params); if (result != nullptr) { return *result; } @@ -615,24 +615,24 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( /* Defensive Debug Checks. */ long long int depth_scale_factor = 1; - if (specialisation_params.depth_format_mode > 0) { - BLI_assert(specialisation_params.component_count_input == 1); - BLI_assert(specialisation_params.component_count_output == 1); - switch (specialisation_params.depth_format_mode) { + if (specialization_params.depth_format_mode > 0) { + BLI_assert(specialization_params.component_count_input == 1); + BLI_assert(specialization_params.component_count_output == 1); + switch (specialization_params.depth_format_mode) { case 1: /* FLOAT */ depth_scale_factor = 1; break; case 2: - /* D24 unsigned int */ + /* D24 uint */ depth_scale_factor = 0xFFFFFFu; break; case 4: - /* D32 unsigned int */ + /* D32 uint */ depth_scale_factor = 0xFFFFFFFFu; break; default: - BLI_assert_msg(0, "Unrecognised mode"); + BLI_assert_msg(0, "Unrecognized mode"); break; } } @@ -642,24 +642,24 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( options.languageVersion = MTLLanguageVersion2_2; options.preprocessorMacros = @{ @"INPUT_DATA_TYPE" : - [NSString stringWithUTF8String:specialisation_params.input_data_type.c_str()], + [NSString stringWithUTF8String:specialization_params.input_data_type.c_str()], @"OUTPUT_DATA_TYPE" : - [NSString stringWithUTF8String:specialisation_params.output_data_type.c_str()], + [NSString stringWithUTF8String:specialization_params.output_data_type.c_str()], @"COMPONENT_COUNT_INPUT" : - [NSNumber numberWithInt:specialisation_params.component_count_input], + [NSNumber numberWithInt:specialization_params.component_count_input], @"COMPONENT_COUNT_OUTPUT" : - [NSNumber numberWithInt:specialisation_params.component_count_output], + [NSNumber numberWithInt:specialization_params.component_count_output], @"WRITE_COMPONENT_COUNT" : - [NSNumber numberWithInt:min_ii(specialisation_params.component_count_input, - specialisation_params.component_count_output)], + [NSNumber numberWithInt:min_ii(specialization_params.component_count_input, + specialization_params.component_count_output)], @"IS_DEPTH_FORMAT" : - [NSNumber numberWithInt:((specialisation_params.depth_format_mode > 0) ? 1 : 0)], + [NSNumber numberWithInt:((specialization_params.depth_format_mode > 0) ? 1 : 0)], @"DEPTH_SCALE_FACTOR" : [NSNumber numberWithLongLong:depth_scale_factor], @"TEX_TYPE" : [NSNumber numberWithInt:((int)(texture_type))] }; /* Prepare shader library for conversion routine. */ - NSError *error = NULL; + NSError *error = nullptr; id<MTLLibrary> temp_lib = [[ctx->device newLibraryWithSource:tex_update_kernel_src options:options error:&error] autorelease]; @@ -687,7 +687,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( /* Store PSO. */ [compute_pso retain]; - specialisation_cache.add_new(specialisation_params, compute_pso); + specialization_cache.add_new(specialization_params, compute_pso); return_pso = compute_pso; } @@ -696,51 +696,51 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl( } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_2d_read_compute_psos, GPU_TEXTURE_2D); } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_2d_array_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_2d_array_read_compute_psos, GPU_TEXTURE_2D_ARRAY); } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_1d_read_compute_psos, GPU_TEXTURE_1D); } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_1d_array_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_1d_array_read_compute_psos, GPU_TEXTURE_1D_ARRAY); } id<MTLComputePipelineState> gpu::MTLTexture::texture_read_3d_get_kernel( - TextureReadRoutineSpecialisation specialisation) + TextureReadRoutineSpecialisation specialization) { MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); BLI_assert(mtl_context != nullptr); - return mtl_texture_read_impl(specialisation, + return mtl_texture_read_impl(specialization, mtl_context->get_texture_utils().texture_3d_read_compute_psos, GPU_TEXTURE_3D); } diff --git a/source/blender/gpu/metal/mtl_uniform_buffer.hh b/source/blender/gpu/metal/mtl_uniform_buffer.hh new file mode 100644 index 00000000000..789a85f0a92 --- /dev/null +++ b/source/blender/gpu/metal/mtl_uniform_buffer.hh @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#pragma once + +#include "MEM_guardedalloc.h" +#include "gpu_uniform_buffer_private.hh" + +#include "mtl_context.hh" + +namespace blender::gpu { + +/** + * Implementation of Uniform Buffers using Metal. + **/ +class MTLUniformBuf : public UniformBuf { + private: + /* Allocation Handle. */ + gpu::MTLBuffer *metal_buffer_ = nullptr; + + /* Whether buffer has contents, if false, no GPU buffer will + * have yet been allocated. */ + bool has_data_ = false; + + /* Bind-state tracking. */ + int bind_slot_ = -1; + MTLContext *bound_ctx_ = nullptr; + + public: + MTLUniformBuf(size_t size, const char *name); + ~MTLUniformBuf(); + + void update(const void *data) override; + void bind(int slot) override; + void unbind() override; + + id<MTLBuffer> get_metal_buffer(int *r_offset); + int get_size(); + const char *get_name() + { + return name_; + } + + MEM_CXX_CLASS_ALLOC_FUNCS("MTLUniformBuf"); +}; + +} // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_uniform_buffer.mm b/source/blender/gpu/metal/mtl_uniform_buffer.mm new file mode 100644 index 00000000000..4893014dedf --- /dev/null +++ b/source/blender/gpu/metal/mtl_uniform_buffer.mm @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + */ + +#include "BKE_global.h" + +#include "BLI_string.h" + +#include "gpu_backend.hh" +#include "gpu_context_private.hh" + +#include "mtl_backend.hh" +#include "mtl_context.hh" +#include "mtl_debug.hh" +#include "mtl_uniform_buffer.hh" + +namespace blender::gpu { + +MTLUniformBuf::MTLUniformBuf(size_t size, const char *name) : UniformBuf(size, name) +{ +} + +MTLUniformBuf::~MTLUniformBuf() +{ + if (metal_buffer_ != nullptr) { + metal_buffer_->free(); + metal_buffer_ = nullptr; + } + has_data_ = false; + + /* Ensure UBO is not bound to active CTX. + * UBO bindings are reset upon Context-switch so we do not need + * to check deactivated context's. */ + MTLContext *ctx = MTLContext::get(); + if (ctx) { + for (int i = 0; i < MTL_MAX_UNIFORM_BUFFER_BINDINGS; i++) { + MTLUniformBufferBinding &slot = ctx->pipeline_state.ubo_bindings[i]; + if (slot.bound && slot.ubo == this) { + slot.bound = false; + slot.ubo = nullptr; + } + } + } +} + +void MTLUniformBuf::update(const void *data) +{ + BLI_assert(this); + BLI_assert(size_in_bytes_ > 0); + + /* Free existing allocation. + * The previous UBO resource will be tracked by the memory manager, + * in case dependent GPU work is still executing. */ + if (metal_buffer_ != nullptr) { + metal_buffer_->free(); + metal_buffer_ = nullptr; + } + + /* Allocate MTL buffer */ + MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get())); + BLI_assert(ctx); + BLI_assert(ctx->device); + UNUSED_VARS_NDEBUG(ctx); + + if (data != nullptr) { + metal_buffer_ = MTLContext::get_global_memory_manager().allocate_with_data( + size_in_bytes_, true, data); + has_data_ = true; + + metal_buffer_->set_label(@"Uniform Buffer"); + BLI_assert(metal_buffer_ != nullptr); + BLI_assert(metal_buffer_->get_metal_buffer() != nil); + } + else { + /* If data is not yet present, no buffer will be allocated and MTLContext will use an empty + * null buffer, containing zeroes, if the UBO is bound. */ + metal_buffer_ = nullptr; + has_data_ = false; + } +} + +void MTLUniformBuf::bind(int slot) +{ + if (slot < 0) { + MTL_LOG_WARNING("Failed to bind UBO %p. uniform location %d invalid.\n", this, slot); + return; + } + + BLI_assert(slot < MTL_MAX_UNIFORM_BUFFER_BINDINGS); + + /* Bind current UBO to active context. */ + MTLContext *ctx = MTLContext::get(); + BLI_assert(ctx); + + MTLUniformBufferBinding &ctx_ubo_bind_slot = ctx->pipeline_state.ubo_bindings[slot]; + ctx_ubo_bind_slot.ubo = this; + ctx_ubo_bind_slot.bound = true; + + bind_slot_ = slot; + bound_ctx_ = ctx; + + /* Check if we have any deferred data to upload. */ + if (data_ != nullptr) { + this->update(data_); + MEM_SAFE_FREE(data_); + } + + /* Ensure there is at least an empty dummy buffer. */ + if (metal_buffer_ == nullptr) { + this->update(nullptr); + } +} + +void MTLUniformBuf::unbind() +{ + /* Unbind in debug mode to validate missing binds. + * Otherwise, only perform a full unbind upon destruction + * to ensure no lingering references. */ +#ifndef NDEBUG + if (true) { +#else + if (G.debug & G_DEBUG_GPU) { +#endif + if (bound_ctx_ != nullptr && bind_slot_ > -1) { + MTLUniformBufferBinding &ctx_ubo_bind_slot = + bound_ctx_->pipeline_state.ubo_bindings[bind_slot_]; + if (ctx_ubo_bind_slot.bound && ctx_ubo_bind_slot.ubo == this) { + ctx_ubo_bind_slot.bound = false; + ctx_ubo_bind_slot.ubo = nullptr; + } + } + } + + /* Reset bind index. */ + bind_slot_ = -1; + bound_ctx_ = nullptr; +} + +id<MTLBuffer> MTLUniformBuf::get_metal_buffer(int *r_offset) +{ + BLI_assert(this); + *r_offset = 0; + if (metal_buffer_ != nullptr && has_data_) { + *r_offset = 0; + metal_buffer_->debug_ensure_used(); + return metal_buffer_->get_metal_buffer(); + } + else { + *r_offset = 0; + return nil; + } +} + +int MTLUniformBuf::get_size() +{ + BLI_assert(this); + return size_in_bytes_; +} + +} // blender::gpu diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc index 98998e2e902..9051003bcd5 100644 --- a/source/blender/gpu/opengl/gl_backend.cc +++ b/source/blender/gpu/opengl/gl_backend.cc @@ -10,8 +10,6 @@ #include "gpu_capabilities_private.hh" #include "gpu_platform_private.hh" -#include "glew-mx.h" - #include "gl_debug.hh" #include "gl_backend.hh" @@ -53,7 +51,12 @@ void GLBackend::platform_init() os = GPU_OS_UNIX; #endif - if (strstr(vendor, "ATI") || strstr(vendor, "AMD")) { + if (!vendor) { + printf("Warning: No OpenGL vendor detected.\n"); + device = GPU_DEVICE_UNKNOWN; + driver = GPU_DRIVER_ANY; + } + else if (strstr(vendor, "ATI") || strstr(vendor, "AMD")) { device = GPU_DEVICE_ATI; driver = GPU_DRIVER_OFFICIAL; } @@ -115,7 +118,7 @@ void GLBackend::platform_init() } /* Detect support level */ - if (!GLEW_VERSION_3_3) { + if (!(epoxy_gl_version() >= 33)) { support_level = GPU_SUPPORT_LEVEL_UNSUPPORTED; } else { @@ -224,6 +227,7 @@ static void detect_workarounds() GLContext::unused_fb_slot_workaround = true; /* Turn off extensions. */ GCaps.shader_image_load_store_support = false; + GCaps.shader_draw_parameters_support = false; GCaps.shader_storage_buffer_objects_support = false; GLContext::base_instance_support = false; GLContext::clear_texture_support = false; @@ -245,14 +249,14 @@ static void detect_workarounds() return; } - /* Limit support for GLEW_ARB_base_instance to OpenGL 4.0 and higher. NVIDIA Quadro FX 4800 - * (TeraScale) report that they support GLEW_ARB_base_instance, but the driver does not support + /* Limit support for GL_ARB_base_instance to OpenGL 4.0 and higher. NVIDIA Quadro FX 4800 + * (TeraScale) report that they support GL_ARB_base_instance, but the driver does not support * GLEW_ARB_draw_indirect as it has an OpenGL3 context what also matches the minimum needed * requirements. * * We use it as a target for glMapBuffer(Range) what is part of the OpenGL 4 API. So better * disable it when we don't have an OpenGL4 context (See T77657) */ - if (!GLEW_VERSION_4_0) { + if (!(epoxy_gl_version() >= 40)) { GLContext::base_instance_support = false; } if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_WIN, GPU_DRIVER_OFFICIAL) && @@ -268,6 +272,7 @@ static void detect_workarounds() GLContext::unused_fb_slot_workaround = true; GCaps.mip_render_workaround = true; GCaps.shader_image_load_store_support = false; + GCaps.shader_draw_parameters_support = false; GCaps.broken_amd_driver = true; } /* Compute shaders have some issues with those versions (see T94936). */ @@ -281,12 +286,14 @@ static void detect_workarounds() strstr(renderer, "AMD TAHITI"))) { GLContext::unused_fb_slot_workaround = true; GCaps.shader_image_load_store_support = false; + GCaps.shader_draw_parameters_support = false; GCaps.broken_amd_driver = true; } /* Fix slowdown on this particular driver. (see T77641) */ if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OPENSOURCE) && strstr(version, "Mesa 19.3.4")) { GCaps.shader_image_load_store_support = false; + GCaps.shader_draw_parameters_support = false; GCaps.broken_amd_driver = true; } /* See T82856: AMD drivers since 20.11 running on a polaris architecture doesn't support the @@ -315,7 +322,8 @@ static void detect_workarounds() /* Limit this fix to older hardware with GL < 4.5. This means Broadwell GPUs are * covered since they only support GL 4.4 on windows. * This fixes some issues with workbench anti-aliasing on Win + Intel GPU. (see T76273) */ - if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_WIN, GPU_DRIVER_OFFICIAL) && !GLEW_VERSION_4_5) { + if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_WIN, GPU_DRIVER_OFFICIAL) && + !(epoxy_gl_version() >= 45)) { GLContext::copy_image_support = false; } /* Special fix for these specific GPUs. @@ -330,7 +338,7 @@ static void detect_workarounds() strstr(renderer, "HD Graphics 2500"))) { GLContext::texture_cube_map_array_support = false; } - /* Maybe not all of these drivers have problems with `GLEW_ARB_base_instance`. + /* Maybe not all of these drivers have problems with `GL_ARB_base_instance`. * But it's hard to test each case. * We get crashes from some crappy Intel drivers don't work well with shaders created in * different rendering contexts. */ @@ -355,7 +363,8 @@ static void detect_workarounds() } /* There is a bug on older Nvidia GPU where GL_ARB_texture_gather * is reported to be supported but yield a compile error (see T55802). */ - if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_ANY) && !GLEW_VERSION_4_0) { + if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_ANY) && + !(epoxy_gl_version() >= 40)) { GLContext::texture_gather_support = false; } @@ -377,6 +386,11 @@ static void detect_workarounds() } } + /* Disable TF on macOS. */ + if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY)) { + GCaps.transform_feedback_support = false; + } + /* Some Intel drivers have issues with using mips as frame-buffer targets if * GL_TEXTURE_MAX_LEVEL is higher than the target MIP. * Only check at the end after all other workarounds because this uses the drawing code. @@ -422,7 +436,6 @@ static void detect_workarounds() /** Internal capabilities. */ GLint GLContext::max_cubemap_size = 0; -GLint GLContext::max_texture_3d_size = 0; GLint GLContext::max_ubo_binds = 0; GLint GLContext::max_ubo_size = 0; GLint GLContext::max_ssbo_binds = 0; @@ -459,7 +472,7 @@ float GLContext::derivative_signs[2] = {1.0f, 1.0f}; void GLBackend::capabilities_init() { - BLI_assert(GLEW_VERSION_3_3); + BLI_assert(epoxy_gl_version() >= 33); /* Common Capabilities. */ glGetIntegerv(GL_MAX_TEXTURE_SIZE, &GCaps.max_texture_size); glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &GCaps.max_texture_layers); @@ -484,9 +497,15 @@ void GLBackend::capabilities_init() glGetIntegerv(GL_NUM_EXTENSIONS, &GCaps.extensions_len); GCaps.extension_get = gl_extension_get; - GCaps.mem_stats_support = GLEW_NVX_gpu_memory_info || GLEW_ATI_meminfo; - GCaps.shader_image_load_store_support = GLEW_ARB_shader_image_load_store; - GCaps.compute_shader_support = GLEW_ARB_compute_shader && GLEW_VERSION_4_3; + GCaps.max_samplers = GCaps.max_textures; + GCaps.mem_stats_support = epoxy_has_gl_extension("GL_NVX_gpu_memory_info") || + epoxy_has_gl_extension("GL_ATI_meminfo"); + GCaps.shader_image_load_store_support = epoxy_has_gl_extension("GL_ARB_shader_image_load_store"); + GCaps.shader_draw_parameters_support = epoxy_has_gl_extension("GL_ARB_shader_draw_parameters"); + GCaps.compute_shader_support = epoxy_has_gl_extension("GL_ARB_compute_shader") && + epoxy_gl_version() >= 43; + GCaps.max_samplers = GCaps.max_textures; + if (GCaps.compute_shader_support) { glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &GCaps.max_work_group_count[0]); glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &GCaps.max_work_group_count[1]); @@ -498,35 +517,56 @@ void GLBackend::capabilities_init() &GCaps.max_shader_storage_buffer_bindings); glGetIntegerv(GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, &GCaps.max_compute_shader_storage_blocks); } - GCaps.shader_storage_buffer_objects_support = GLEW_ARB_shader_storage_buffer_object; + GCaps.shader_storage_buffer_objects_support = epoxy_has_gl_extension( + "GL_ARB_shader_storage_buffer_object"); + GCaps.transform_feedback_support = true; + /* GL specific capabilities. */ - glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GLContext::max_texture_3d_size); + glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GCaps.max_texture_3d_size); glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &GLContext::max_cubemap_size); glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_BLOCKS, &GLContext::max_ubo_binds); glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &GLContext::max_ubo_size); if (GCaps.shader_storage_buffer_objects_support) { - glGetIntegerv(GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, &GLContext::max_ssbo_binds); + GLint max_ssbo_binds; + GLContext::max_ssbo_binds = 999999; + glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &max_ssbo_binds); + GLContext::max_ssbo_binds = min_ii(GLContext::max_ssbo_binds, max_ssbo_binds); + glGetIntegerv(GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, &max_ssbo_binds); + GLContext::max_ssbo_binds = min_ii(GLContext::max_ssbo_binds, max_ssbo_binds); + glGetIntegerv(GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, &max_ssbo_binds); + GLContext::max_ssbo_binds = min_ii(GLContext::max_ssbo_binds, max_ssbo_binds); + if (GLContext::max_ssbo_binds < 8) { + /* Does not meet our minimum requirements. */ + GCaps.shader_storage_buffer_objects_support = false; + } glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &GLContext::max_ssbo_size); } - GLContext::base_instance_support = GLEW_ARB_base_instance; - GLContext::clear_texture_support = GLEW_ARB_clear_texture; - GLContext::copy_image_support = GLEW_ARB_copy_image; - GLContext::debug_layer_support = GLEW_VERSION_4_3 || GLEW_KHR_debug || GLEW_ARB_debug_output; - GLContext::direct_state_access_support = GLEW_ARB_direct_state_access; - GLContext::explicit_location_support = GLEW_VERSION_4_3; - GLContext::geometry_shader_invocations = GLEW_ARB_gpu_shader5; - GLContext::fixed_restart_index_support = GLEW_ARB_ES3_compatibility; - GLContext::layered_rendering_support = GLEW_AMD_vertex_shader_layer; - GLContext::native_barycentric_support = GLEW_AMD_shader_explicit_vertex_parameter; - GLContext::multi_bind_support = GLEW_ARB_multi_bind; - GLContext::multi_draw_indirect_support = GLEW_ARB_multi_draw_indirect; - GLContext::shader_draw_parameters_support = GLEW_ARB_shader_draw_parameters; - GLContext::stencil_texturing_support = GLEW_VERSION_4_3; - GLContext::texture_cube_map_array_support = GLEW_ARB_texture_cube_map_array; - GLContext::texture_filter_anisotropic_support = GLEW_EXT_texture_filter_anisotropic; - GLContext::texture_gather_support = GLEW_ARB_texture_gather; - GLContext::texture_storage_support = GLEW_VERSION_4_3; - GLContext::vertex_attrib_binding_support = GLEW_ARB_vertex_attrib_binding; + GLContext::base_instance_support = epoxy_has_gl_extension("GL_ARB_base_instance"); + GLContext::clear_texture_support = epoxy_has_gl_extension("GL_ARB_clear_texture"); + GLContext::copy_image_support = epoxy_has_gl_extension("GL_ARB_copy_image"); + GLContext::debug_layer_support = epoxy_gl_version() >= 43 || + epoxy_has_gl_extension("GL_KHR_debug") || + epoxy_has_gl_extension("GL_ARB_debug_output"); + GLContext::direct_state_access_support = epoxy_has_gl_extension("GL_ARB_direct_state_access"); + GLContext::explicit_location_support = epoxy_gl_version() >= 43; + GLContext::geometry_shader_invocations = epoxy_has_gl_extension("GL_ARB_gpu_shader5"); + GLContext::fixed_restart_index_support = epoxy_has_gl_extension("GL_ARB_ES3_compatibility"); + GLContext::layered_rendering_support = epoxy_has_gl_extension("GL_AMD_vertex_shader_layer"); + GLContext::native_barycentric_support = epoxy_has_gl_extension( + "GL_AMD_shader_explicit_vertex_parameter"); + GLContext::multi_bind_support = epoxy_has_gl_extension("GL_ARB_multi_bind"); + GLContext::multi_draw_indirect_support = epoxy_has_gl_extension("GL_ARB_multi_draw_indirect"); + GLContext::shader_draw_parameters_support = epoxy_has_gl_extension( + "GL_ARB_shader_draw_parameters"); + GLContext::stencil_texturing_support = epoxy_gl_version() >= 43; + GLContext::texture_cube_map_array_support = epoxy_has_gl_extension( + "GL_ARB_texture_cube_map_array"); + GLContext::texture_filter_anisotropic_support = epoxy_has_gl_extension( + "GL_EXT_texture_filter_anisotropic"); + GLContext::texture_gather_support = epoxy_has_gl_extension("GL_ARB_texture_gather"); + GLContext::texture_storage_support = epoxy_gl_version() >= 43; + GLContext::vertex_attrib_binding_support = epoxy_has_gl_extension( + "GL_ARB_vertex_attrib_binding"); detect_workarounds(); diff --git a/source/blender/gpu/opengl/gl_backend.hh b/source/blender/gpu/opengl/gl_backend.hh index 29249111294..8646d94e2fd 100644 --- a/source/blender/gpu/opengl/gl_backend.hh +++ b/source/blender/gpu/opengl/gl_backend.hh @@ -42,11 +42,15 @@ class GLBackend : public GPUBackend { } ~GLBackend() { - GLTexture::samplers_free(); - GLBackend::platform_exit(); } + void delete_resources() override + { + /* Delete any resources with context active. */ + GLTexture::samplers_free(); + } + static GLBackend *get() { return static_cast<GLBackend *>(GPUBackend::get()); @@ -129,11 +133,11 @@ class GLBackend : public GPUBackend { dynamic_cast<GLStorageBuf *>(indirect_buf)->bind_as(GL_DISPATCH_INDIRECT_BUFFER); /* This barrier needs to be here as it only work on the currently bound indirect buffer. */ - glMemoryBarrier(GL_DRAW_INDIRECT_BUFFER); + glMemoryBarrier(GL_COMMAND_BARRIER_BIT); glDispatchComputeIndirect((GLintptr)0); /* Unbind. */ - glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); + glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0); } /* Render Frame Coordination */ diff --git a/source/blender/gpu/opengl/gl_batch.cc b/source/blender/gpu/opengl/gl_batch.cc index e738413879e..ff8867fe3e6 100644 --- a/source/blender/gpu/opengl/gl_batch.cc +++ b/source/blender/gpu/opengl/gl_batch.cc @@ -11,16 +11,14 @@ #include "BLI_assert.h" -#include "glew-mx.h" - #include "gpu_batch_private.hh" #include "gpu_shader_private.hh" -#include "gl_backend.hh" #include "gl_context.hh" #include "gl_debug.hh" #include "gl_index_buffer.hh" #include "gl_primitive.hh" +#include "gl_storage_buffer.hh" #include "gl_vertex_array.hh" #include "gl_batch.hh" @@ -329,4 +327,55 @@ void GLBatch::draw(int v_first, int v_count, int i_first, int i_count) } } +void GLBatch::draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) +{ + GL_CHECK_RESOURCES("Batch"); + + this->bind(0); + + /* TODO(fclem): Make the barrier and binding optional if consecutive draws are issued. */ + dynamic_cast<GLStorageBuf *>(unwrap(indirect_buf))->bind_as(GL_DRAW_INDIRECT_BUFFER); + /* This barrier needs to be here as it only work on the currently bound indirect buffer. */ + glMemoryBarrier(GL_COMMAND_BARRIER_BIT); + + GLenum gl_type = to_gl(prim_type); + if (elem) { + const GLIndexBuf *el = this->elem_(); + GLenum index_type = to_gl(el->index_type_); + glDrawElementsIndirect(gl_type, index_type, (GLvoid *)offset); + } + else { + glDrawArraysIndirect(gl_type, (GLvoid *)offset); + } + /* Unbind. */ + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); +} + +void GLBatch::multi_draw_indirect(GPUStorageBuf *indirect_buf, + int count, + intptr_t offset, + intptr_t stride) +{ + GL_CHECK_RESOURCES("Batch"); + + this->bind(0); + + /* TODO(fclem): Make the barrier and binding optional if consecutive draws are issued. */ + dynamic_cast<GLStorageBuf *>(unwrap(indirect_buf))->bind_as(GL_DRAW_INDIRECT_BUFFER); + /* This barrier needs to be here as it only work on the currently bound indirect buffer. */ + glMemoryBarrier(GL_COMMAND_BARRIER_BIT); + + GLenum gl_type = to_gl(prim_type); + if (elem) { + const GLIndexBuf *el = this->elem_(); + GLenum index_type = to_gl(el->index_type_); + glMultiDrawElementsIndirect(gl_type, index_type, (GLvoid *)offset, count, stride); + } + else { + glMultiDrawArraysIndirect(gl_type, (GLvoid *)offset, count, stride); + } + /* Unbind. */ + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); +} + /** \} */ diff --git a/source/blender/gpu/opengl/gl_batch.hh b/source/blender/gpu/opengl/gl_batch.hh index a25e495b3b1..714aa1220be 100644 --- a/source/blender/gpu/opengl/gl_batch.hh +++ b/source/blender/gpu/opengl/gl_batch.hh @@ -17,8 +17,6 @@ #include "gl_index_buffer.hh" #include "gl_vertex_buffer.hh" -#include "glew-mx.h" - namespace blender { namespace gpu { @@ -35,9 +33,9 @@ class GLShaderInterface; class GLVaoCache { private: /** Context for which the vao_cache_ was generated. */ - GLContext *context_ = NULL; + GLContext *context_ = nullptr; /** Last interface this batch was drawn with. */ - GLShaderInterface *interface_ = NULL; + GLShaderInterface *interface_ = nullptr; /** Cached VAO for the last interface. */ GLuint vao_id_ = 0; /** Used when arb_base_instance is not supported. */ @@ -93,6 +91,11 @@ class GLBatch : public Batch { public: void draw(int v_first, int v_count, int i_first, int i_count) override; + void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override; + void multi_draw_indirect(GPUStorageBuf *indirect_buf, + int count, + intptr_t offset, + intptr_t stride) override; void bind(int i_first); /* Convenience getters. */ diff --git a/source/blender/gpu/opengl/gl_compute.cc b/source/blender/gpu/opengl/gl_compute.cc index 2fbf23c227d..1f8bb69dc3a 100644 --- a/source/blender/gpu/opengl/gl_compute.cc +++ b/source/blender/gpu/opengl/gl_compute.cc @@ -8,8 +8,6 @@ #include "gl_debug.hh" -#include "glew-mx.h" - namespace blender::gpu { void GLCompute::dispatch(int group_x_len, int group_y_len, int group_z_len) diff --git a/source/blender/gpu/opengl/gl_context.cc b/source/blender/gpu/opengl/gl_context.cc index 72892ffcd34..31bd7e0c4dd 100644 --- a/source/blender/gpu/opengl/gl_context.cc +++ b/source/blender/gpu/opengl/gl_context.cc @@ -6,7 +6,6 @@ */ #include "BLI_assert.h" -#include "BLI_system.h" #include "BLI_utildefines.h" #include "BKE_global.h" @@ -77,7 +76,7 @@ GLContext::GLContext(void *ghost_window, GLSharedOrphanLists &shared_orphan_list } } else { - /* For off-screen contexts. Default frame-buffer is NULL. */ + /* For off-screen contexts. Default frame-buffer is null. */ back_left = new GLFrameBuffer("back_left", this, GL_NONE, 0, 0, 0); } @@ -150,6 +149,16 @@ void GLContext::deactivate() is_active_ = false; } +void GLContext::begin_frame() +{ + /* No-op. */ +} + +void GLContext::end_frame() +{ + /* No-op. */ +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -295,12 +304,12 @@ void GLContext::vao_cache_unregister(GLVaoCache *cache) void GLContext::memory_statistics_get(int *r_total_mem, int *r_free_mem) { /* TODO(merwin): use Apple's platform API to get this info. */ - if (GLEW_NVX_gpu_memory_info) { + if (epoxy_has_gl_extension("GL_NVX_gpu_memory_info")) { /* Returned value in Kb. */ glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, r_total_mem); glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, r_free_mem); } - else if (GLEW_ATI_meminfo) { + else if (epoxy_has_gl_extension("GL_ATI_meminfo")) { int stats[4]; glGetIntegerv(GL_TEXTURE_FREE_MEMORY_ATI, stats); diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh index c333c8a4afd..1d413750fd4 100644 --- a/source/blender/gpu/opengl/gl_context.hh +++ b/source/blender/gpu/opengl/gl_context.hh @@ -16,8 +16,6 @@ #include "gl_state.hh" -#include "glew-mx.h" - #include <mutex> namespace blender { @@ -42,7 +40,6 @@ class GLContext : public Context { /** Capabilities. */ static GLint max_cubemap_size; - static GLint max_texture_3d_size; static GLint max_ubo_size; static GLint max_ubo_binds; static GLint max_ssbo_size; @@ -106,6 +103,8 @@ class GLContext : public Context { void activate() override; void deactivate() override; + void begin_frame() override; + void end_frame() override; void flush() override; void finish() override; diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc index f82138e0d65..4c9f766c93c 100644 --- a/source/blender/gpu/opengl/gl_debug.cc +++ b/source/blender/gpu/opengl/gl_debug.cc @@ -19,8 +19,6 @@ #include "CLG_log.h" -#include "glew-mx.h" - #include "gl_context.hh" #include "gl_uniform_buffer.hh" @@ -138,8 +136,8 @@ void init_gl_callbacks() char msg[256] = ""; const char format[] = "Successfully hooked OpenGL debug callback using %s"; - if (GLEW_VERSION_4_3 || GLEW_KHR_debug) { - SNPRINTF(msg, format, GLEW_VERSION_4_3 ? "OpenGL 4.3" : "KHR_debug extension"); + if (epoxy_gl_version() >= 43 || epoxy_has_gl_extension("GL_KHR_debug")) { + SNPRINTF(msg, format, epoxy_gl_version() >= 43 ? "OpenGL 4.3" : "KHR_debug extension"); glEnable(GL_DEBUG_OUTPUT); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); glDebugMessageCallback((GLDEBUGPROC)debug_callback, nullptr); @@ -151,7 +149,7 @@ void init_gl_callbacks() -1, msg); } - else if (GLEW_ARB_debug_output) { + else if (epoxy_has_gl_extension("GL_ARB_debug_output")) { SNPRINTF(msg, format, "ARB_debug_output"); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); glDebugMessageCallbackARB((GLDEBUGPROCARB)debug_callback, nullptr); @@ -189,7 +187,7 @@ void check_gl_error(const char *info) case err: { \ char msg[256]; \ SNPRINTF(msg, "%s : %s", #err, info); \ - debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL); \ + debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, nullptr); \ break; \ } @@ -327,7 +325,8 @@ static const char *to_str_suffix(GLenum type) void object_label(GLenum type, GLuint object, const char *name) { - if ((G.debug & G_DEBUG_GPU) && (GLEW_VERSION_4_3 || GLEW_KHR_debug)) { + if ((G.debug & G_DEBUG_GPU) && + (epoxy_gl_version() >= 43 || epoxy_has_gl_extension("GL_KHR_debug"))) { char label[64]; SNPRINTF(label, "%s%s%s", to_str_prefix(type), name, to_str_suffix(type)); /* Small convenience for caller. */ @@ -365,7 +364,8 @@ namespace blender::gpu { void GLContext::debug_group_begin(const char *name, int index) { - if ((G.debug & G_DEBUG_GPU) && (GLEW_VERSION_4_3 || GLEW_KHR_debug)) { + if ((G.debug & G_DEBUG_GPU) && + (epoxy_gl_version() >= 43 || epoxy_has_gl_extension("GL_KHR_debug"))) { /* Add 10 to avoid collision with other indices from other possible callback layers. */ index += 10; glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, index, -1, name); @@ -374,7 +374,8 @@ void GLContext::debug_group_begin(const char *name, int index) void GLContext::debug_group_end() { - if ((G.debug & G_DEBUG_GPU) && (GLEW_VERSION_4_3 || GLEW_KHR_debug)) { + if ((G.debug & G_DEBUG_GPU) && + (epoxy_gl_version() >= 43 || epoxy_has_gl_extension("GL_KHR_debug"))) { glPopDebugGroup(); } } diff --git a/source/blender/gpu/opengl/gl_debug.hh b/source/blender/gpu/opengl/gl_debug.hh index e24b6f2bb23..b573196216d 100644 --- a/source/blender/gpu/opengl/gl_debug.hh +++ b/source/blender/gpu/opengl/gl_debug.hh @@ -8,8 +8,6 @@ #include "gl_context.hh" -#include "glew-mx.h" - /* Manual line breaks for readability. */ /* clang-format off */ #define _VA_ARG_LIST1(t) t diff --git a/source/blender/gpu/opengl/gl_debug_layer.cc b/source/blender/gpu/opengl/gl_debug_layer.cc index c02b6b26068..79d1b54828d 100644 --- a/source/blender/gpu/opengl/gl_debug_layer.cc +++ b/source/blender/gpu/opengl/gl_debug_layer.cc @@ -10,8 +10,6 @@ #include "BLI_utildefines.h" -#include "glew-mx.h" - #include "gl_debug.hh" using GPUvoidptr = void *; diff --git a/source/blender/gpu/opengl/gl_drawlist.cc b/source/blender/gpu/opengl/gl_drawlist.cc index 2f87c859273..fd76d8c58f8 100644 --- a/source/blender/gpu/opengl/gl_drawlist.cc +++ b/source/blender/gpu/opengl/gl_drawlist.cc @@ -11,9 +11,6 @@ #include "BLI_assert.h" #include "GPU_batch.h" -#include "GPU_capabilities.h" - -#include "glew-mx.h" #include "gpu_context_private.hh" #include "gpu_drawlist_private.hh" diff --git a/source/blender/gpu/opengl/gl_framebuffer.cc b/source/blender/gpu/opengl/gl_framebuffer.cc index 57eeabba0a0..bd9fba4250d 100644 --- a/source/blender/gpu/opengl/gl_framebuffer.cc +++ b/source/blender/gpu/opengl/gl_framebuffer.cc @@ -7,8 +7,6 @@ #include "BKE_global.h" -#include "GPU_capabilities.h" - #include "gl_backend.hh" #include "gl_debug.hh" #include "gl_state.hh" diff --git a/source/blender/gpu/opengl/gl_framebuffer.hh b/source/blender/gpu/opengl/gl_framebuffer.hh index 3927ff27532..8ee04a584bd 100644 --- a/source/blender/gpu/opengl/gl_framebuffer.hh +++ b/source/blender/gpu/opengl/gl_framebuffer.hh @@ -11,8 +11,6 @@ #include "MEM_guardedalloc.h" -#include "glew-mx.h" - #include "gpu_framebuffer_private.hh" namespace blender::gpu { @@ -30,9 +28,9 @@ class GLFrameBuffer : public FrameBuffer { /** OpenGL handle. */ GLuint fbo_id_ = 0; /** Context the handle is from. Frame-buffers are not shared across contexts. */ - GLContext *context_ = NULL; + GLContext *context_ = nullptr; /** State Manager of the same contexts. */ - GLStateManager *state_manager_ = NULL; + GLStateManager *state_manager_ = nullptr; /** Copy of the GL state. Contains ONLY color attachments enums for slot binding. */ GLenum gl_attachments_[GPU_FB_MAX_COLOR_ATTACHMENT]; /** Internal frame-buffers are immutable. */ @@ -77,6 +75,11 @@ class GLFrameBuffer : public FrameBuffer { eGPUDataFormat data_format, const void *clear_value) override; + /* Attachment load-stores are currently no-op's in OpenGL. */ + void attachment_set_loadstore_op(GPUAttachmentType /*type*/, + eGPULoadOp /*load_action*/, + eGPUStoreOp /*store_action*/) override{}; + void read(eGPUFrameBufferBits planes, eGPUDataFormat format, const int area[4], diff --git a/source/blender/gpu/opengl/gl_immediate.cc b/source/blender/gpu/opengl/gl_immediate.cc index c32a6afd8cf..a332a2fbc7c 100644 --- a/source/blender/gpu/opengl/gl_immediate.cc +++ b/source/blender/gpu/opengl/gl_immediate.cc @@ -7,8 +7,6 @@ * Mimics old style opengl immediate mode drawing. */ -#include "BKE_global.h" - #include "gpu_context_private.hh" #include "gpu_shader_private.hh" #include "gpu_vertex_format_private.h" diff --git a/source/blender/gpu/opengl/gl_immediate.hh b/source/blender/gpu/opengl/gl_immediate.hh index eb94dc20e21..5c6ff510cef 100644 --- a/source/blender/gpu/opengl/gl_immediate.hh +++ b/source/blender/gpu/opengl/gl_immediate.hh @@ -11,8 +11,6 @@ #include "MEM_guardedalloc.h" -#include "glew-mx.h" - #include "gpu_immediate_private.hh" namespace blender::gpu { diff --git a/source/blender/gpu/opengl/gl_index_buffer.cc b/source/blender/gpu/opengl/gl_index_buffer.cc index 8cedb831272..566169182e3 100644 --- a/source/blender/gpu/opengl/gl_index_buffer.cc +++ b/source/blender/gpu/opengl/gl_index_buffer.cc @@ -6,7 +6,6 @@ */ #include "gl_context.hh" -#include "gl_debug.hh" #include "gl_index_buffer.hh" diff --git a/source/blender/gpu/opengl/gl_index_buffer.hh b/source/blender/gpu/opengl/gl_index_buffer.hh index 8a10884d48b..974c01d2b65 100644 --- a/source/blender/gpu/opengl/gl_index_buffer.hh +++ b/source/blender/gpu/opengl/gl_index_buffer.hh @@ -11,7 +11,7 @@ #include "gpu_index_buffer_private.hh" -#include "glew-mx.h" +#include <epoxy/gl.h> namespace blender::gpu { @@ -35,9 +35,11 @@ class GLIndexBuf : public IndexBuf { { additional_vertex_offset += index_start_; if (index_type_ == GPU_INDEX_U32) { - return (GLuint *)0 + additional_vertex_offset; + return reinterpret_cast<void *>(static_cast<intptr_t>(additional_vertex_offset) * + sizeof(GLuint)); } - return (GLushort *)0 + additional_vertex_offset; + return reinterpret_cast<void *>(static_cast<intptr_t>(additional_vertex_offset) * + sizeof(GLushort)); } GLuint restart_index() const @@ -51,6 +53,10 @@ class GLIndexBuf : public IndexBuf { private: bool is_active() const; + void strip_restart_indices() override + { + /* No-op. */ + } MEM_CXX_CLASS_ALLOC_FUNCS("GLIndexBuf") }; diff --git a/source/blender/gpu/opengl/gl_primitive.hh b/source/blender/gpu/opengl/gl_primitive.hh index 2a8590e8b3e..c4c7734a2cd 100644 --- a/source/blender/gpu/opengl/gl_primitive.hh +++ b/source/blender/gpu/opengl/gl_primitive.hh @@ -13,8 +13,6 @@ #include "GPU_primitive.h" -#include "glew-mx.h" - namespace blender::gpu { static inline GLenum to_gl(GPUPrimType prim_type) diff --git a/source/blender/gpu/opengl/gl_query.hh b/source/blender/gpu/opengl/gl_query.hh index e15a2584e07..a851ab4ecdd 100644 --- a/source/blender/gpu/opengl/gl_query.hh +++ b/source/blender/gpu/opengl/gl_query.hh @@ -11,7 +11,7 @@ #include "gpu_query.hh" -#include "glew-mx.h" +#include <epoxy/gl.h> namespace blender::gpu { diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 83b00da0446..1f2ef36716e 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -13,7 +13,6 @@ #include "GPU_capabilities.h" #include "GPU_platform.h" -#include "gl_backend.hh" #include "gl_debug.hh" #include "gl_vertex_buffer.hh" @@ -546,7 +545,7 @@ std::string GLShader::vertex_interface_declare(const ShaderCreateInfo &info) con if (!GLContext::native_barycentric_support) { /* Disabled or unsupported. */ } - else if (GLEW_AMD_shader_explicit_vertex_parameter) { + else if (epoxy_has_gl_extension("GL_AMD_shader_explicit_vertex_parameter")) { /* Need this for stable barycentric. */ ss << "flat out vec4 gpu_pos_flat;\n"; ss << "out vec4 gpu_pos;\n"; @@ -582,7 +581,7 @@ std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) c ss << "noperspective in vec3 gpu_BaryCoordNoPersp;\n"; ss << "#define gpu_position_at_vertex(v) gpu_pos[v]\n"; } - else if (GLEW_AMD_shader_explicit_vertex_parameter) { + else if (epoxy_has_gl_extension("GL_AMD_shader_explicit_vertex_parameter")) { std::cout << "native" << std::endl; /* NOTE(fclem): This won't work with geometry shader. Hopefully, we don't need geometry * shader workaround if this extension/feature is detected. */ @@ -613,7 +612,7 @@ std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) c if (info.early_fragment_test_) { ss << "layout(early_fragment_tests) in;\n"; } - if (GLEW_ARB_conservative_depth) { + if (epoxy_has_gl_extension("GL_ARB_conservative_depth")) { ss << "layout(" << to_string(info.depth_write_) << ") out float gl_FragDepth;\n"; } ss << "\n/* Outputs. */\n"; @@ -806,7 +805,7 @@ static char *glsl_patch_default_get() size_t slen = 0; /* Version need to go first. */ - if (GLEW_VERSION_4_3) { + if (epoxy_gl_version() >= 43) { STR_CONCAT(patch, slen, "#version 430\n"); } else { @@ -817,8 +816,8 @@ static char *glsl_patch_default_get() * don't use an extension for something already available! */ if (GLContext::texture_gather_support) { STR_CONCAT(patch, slen, "#extension GL_ARB_texture_gather: enable\n"); - /* Some drivers don't agree on GLEW_ARB_texture_gather and the actual support in the - * shader so double check the preprocessor define (see T56544). */ + /* Some drivers don't agree on epoxy_has_gl_extension("GL_ARB_texture_gather") and the actual + * support in the shader so double check the preprocessor define (see T56544). */ STR_CONCAT(patch, slen, "#ifdef GL_ARB_texture_gather\n"); STR_CONCAT(patch, slen, "# define GPU_ARB_texture_gather\n"); STR_CONCAT(patch, slen, "#endif\n"); @@ -836,7 +835,7 @@ static char *glsl_patch_default_get() STR_CONCAT(patch, slen, "#extension GL_ARB_texture_cube_map_array : enable\n"); STR_CONCAT(patch, slen, "#define GPU_ARB_texture_cube_map_array\n"); } - if (GLEW_ARB_conservative_depth) { + if (epoxy_has_gl_extension("GL_ARB_conservative_depth")) { STR_CONCAT(patch, slen, "#extension GL_ARB_conservative_depth : enable\n"); } if (GPU_shader_image_load_store_support()) { @@ -1137,108 +1136,6 @@ void GLShader::uniform_int(int location, int comp_len, int array_size, const int /** \name GPUVertFormat from Shader * \{ */ -static uint calc_component_size(const GLenum gl_type) -{ - switch (gl_type) { - case GL_FLOAT_VEC2: - case GL_INT_VEC2: - case GL_UNSIGNED_INT_VEC2: - return 2; - case GL_FLOAT_VEC3: - case GL_INT_VEC3: - case GL_UNSIGNED_INT_VEC3: - return 3; - case GL_FLOAT_VEC4: - case GL_FLOAT_MAT2: - case GL_INT_VEC4: - case GL_UNSIGNED_INT_VEC4: - return 4; - case GL_FLOAT_MAT3: - return 9; - case GL_FLOAT_MAT4: - return 16; - case GL_FLOAT_MAT2x3: - case GL_FLOAT_MAT3x2: - return 6; - case GL_FLOAT_MAT2x4: - case GL_FLOAT_MAT4x2: - return 8; - case GL_FLOAT_MAT3x4: - case GL_FLOAT_MAT4x3: - return 12; - default: - return 1; - } -} - -static void get_fetch_mode_and_comp_type(int gl_type, - GPUVertCompType *r_comp_type, - GPUVertFetchMode *r_fetch_mode) -{ - switch (gl_type) { - case GL_FLOAT: - case GL_FLOAT_VEC2: - case GL_FLOAT_VEC3: - case GL_FLOAT_VEC4: - case GL_FLOAT_MAT2: - case GL_FLOAT_MAT3: - case GL_FLOAT_MAT4: - case GL_FLOAT_MAT2x3: - case GL_FLOAT_MAT2x4: - case GL_FLOAT_MAT3x2: - case GL_FLOAT_MAT3x4: - case GL_FLOAT_MAT4x2: - case GL_FLOAT_MAT4x3: - *r_comp_type = GPU_COMP_F32; - *r_fetch_mode = GPU_FETCH_FLOAT; - break; - case GL_INT: - case GL_INT_VEC2: - case GL_INT_VEC3: - case GL_INT_VEC4: - *r_comp_type = GPU_COMP_I32; - *r_fetch_mode = GPU_FETCH_INT; - break; - case GL_UNSIGNED_INT: - case GL_UNSIGNED_INT_VEC2: - case GL_UNSIGNED_INT_VEC3: - case GL_UNSIGNED_INT_VEC4: - *r_comp_type = GPU_COMP_U32; - *r_fetch_mode = GPU_FETCH_INT; - break; - default: - BLI_assert(0); - } -} - -void GLShader::vertformat_from_shader(GPUVertFormat *format) const -{ - GPU_vertformat_clear(format); - - GLint attr_len; - glGetProgramiv(shader_program_, GL_ACTIVE_ATTRIBUTES, &attr_len); - - for (int i = 0; i < attr_len; i++) { - char name[256]; - GLenum gl_type; - GLint size; - glGetActiveAttrib(shader_program_, i, sizeof(name), nullptr, &size, &gl_type, name); - - /* Ignore OpenGL names like `gl_BaseInstanceARB`, `gl_InstanceID` and `gl_VertexID`. */ - if (glGetAttribLocation(shader_program_, name) == -1) { - continue; - } - - GPUVertCompType comp_type; - GPUVertFetchMode fetch_mode; - get_fetch_mode_and_comp_type(gl_type, &comp_type, &fetch_mode); - - int comp_len = calc_component_size(gl_type) * size; - - GPU_vertformat_attr_add(format, name, comp_type, comp_len, fetch_mode); - } -} - int GLShader::program_handle_get() const { return (int)this->shader_program_; diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh index 9c21d0c6230..bebbb2fa82e 100644 --- a/source/blender/gpu/opengl/gl_shader.hh +++ b/source/blender/gpu/opengl/gl_shader.hh @@ -9,7 +9,7 @@ #include "MEM_guardedalloc.h" -#include "glew-mx.h" +#include <epoxy/gl.h> #include "gpu_shader_create_info.hh" #include "gpu_shader_private.hh" @@ -67,8 +67,6 @@ class GLShader : public Shader { void uniform_float(int location, int comp_len, int array_size, const float *data) override; void uniform_int(int location, int comp_len, int array_size, const int *data) override; - void vertformat_from_shader(GPUVertFormat *format) const override; - /** DEPRECATED: Kept only because of BGL API. */ int program_handle_get() const override; diff --git a/source/blender/gpu/opengl/gl_shader_interface.cc b/source/blender/gpu/opengl/gl_shader_interface.cc index f6a7eee80c3..b230706b020 100644 --- a/source/blender/gpu/opengl/gl_shader_interface.cc +++ b/source/blender/gpu/opengl/gl_shader_interface.cc @@ -9,7 +9,6 @@ #include "BLI_bitmap.h" -#include "gl_backend.hh" #include "gl_batch.hh" #include "gl_context.hh" @@ -17,6 +16,7 @@ #include "GPU_capabilities.h" +using namespace blender::gpu::shader; namespace blender::gpu { /* -------------------------------------------------------------------- */ @@ -152,6 +152,52 @@ static inline int ssbo_binding(int32_t program, uint32_t ssbo_index) /** \name Creation / Destruction * \{ */ +static Type gpu_type_from_gl_type(int gl_type) +{ + switch (gl_type) { + case GL_FLOAT: + return Type::FLOAT; + case GL_FLOAT_VEC2: + return Type::VEC2; + case GL_FLOAT_VEC3: + return Type::VEC3; + case GL_FLOAT_VEC4: + return Type::VEC4; + case GL_FLOAT_MAT3: + return Type::MAT3; + case GL_FLOAT_MAT4: + return Type::MAT4; + case GL_UNSIGNED_INT: + return Type::UINT; + case GL_UNSIGNED_INT_VEC2: + return Type::UVEC2; + case GL_UNSIGNED_INT_VEC3: + return Type::UVEC3; + case GL_UNSIGNED_INT_VEC4: + return Type::UVEC4; + case GL_INT: + return Type::INT; + case GL_INT_VEC2: + return Type::IVEC2; + case GL_INT_VEC3: + return Type::IVEC3; + case GL_INT_VEC4: + return Type::IVEC4; + case GL_BOOL: + return Type::BOOL; + case GL_FLOAT_MAT2: + case GL_FLOAT_MAT2x3: + case GL_FLOAT_MAT2x4: + case GL_FLOAT_MAT3x2: + case GL_FLOAT_MAT3x4: + case GL_FLOAT_MAT4x2: + case GL_FLOAT_MAT4x3: + default: + BLI_assert(0); + } + return Type::FLOAT; +} + GLShaderInterface::GLShaderInterface(GLuint program) { /* Necessary to make #glUniform works. */ @@ -247,6 +293,9 @@ GLShaderInterface::GLShaderInterface(GLuint program) name_buffer_offset += set_input_name(input, name, name_len); enabled_attr_mask_ |= (1 << input->location); + + /* Used in `GPU_shader_get_attribute_info`. */ + attr_types_[input->location] = (uint8_t)gpu_type_from_gl_type(type); } /* Uniform Blocks */ @@ -319,6 +368,13 @@ GLShaderInterface::GLShaderInterface(GLuint program) builtin_blocks_[u] = (block != nullptr) ? block->binding : -1; } + /* Builtin Storage Buffers */ + for (int32_t u_int = 0; u_int < GPU_NUM_STORAGE_BUFFERS; u_int++) { + GPUStorageBufferBuiltin u = static_cast<GPUStorageBufferBuiltin>(u_int); + const ShaderInput *block = this->ssbo_get(builtin_storage_block_name(u)); + builtin_buffers_[u] = (block != nullptr) ? block->binding : -1; + } + MEM_freeN(uniforms_from_blocks); /* Resize name buffer to save some memory. */ @@ -399,7 +455,11 @@ GLShaderInterface::GLShaderInterface(GLuint program, const shader::ShaderCreateI } if (input->location != -1) { enabled_attr_mask_ |= (1 << input->location); + + /* Used in `GPU_shader_get_attribute_info`. */ + attr_types_[input->location] = (uint8_t)attr.type; } + input++; } @@ -482,6 +542,13 @@ GLShaderInterface::GLShaderInterface(GLuint program, const shader::ShaderCreateI builtin_blocks_[u] = (block != nullptr) ? block->binding : -1; } + /* Builtin Storage Buffers */ + for (int32_t u_int = 0; u_int < GPU_NUM_STORAGE_BUFFERS; u_int++) { + GPUStorageBufferBuiltin u = static_cast<GPUStorageBufferBuiltin>(u_int); + const ShaderInput *block = this->ssbo_get(builtin_storage_block_name(u)); + builtin_buffers_[u] = (block != nullptr) ? block->binding : -1; + } + this->sort_inputs(); // this->debug_print(); diff --git a/source/blender/gpu/opengl/gl_shader_interface.hh b/source/blender/gpu/opengl/gl_shader_interface.hh index e3dce31758b..e31879d4340 100644 --- a/source/blender/gpu/opengl/gl_shader_interface.hh +++ b/source/blender/gpu/opengl/gl_shader_interface.hh @@ -16,8 +16,6 @@ #include "BLI_vector.hh" -#include "glew-mx.h" - #include "gpu_shader_create_info.hh" #include "gpu_shader_interface.hh" diff --git a/source/blender/gpu/opengl/gl_state.cc b/source/blender/gpu/opengl/gl_state.cc index 68a88938f69..46422124112 100644 --- a/source/blender/gpu/opengl/gl_state.cc +++ b/source/blender/gpu/opengl/gl_state.cc @@ -12,10 +12,7 @@ #include "GPU_capabilities.h" -#include "glew-mx.h" - #include "gl_context.hh" -#include "gl_debug.hh" #include "gl_framebuffer.hh" #include "gl_texture.hh" @@ -566,14 +563,14 @@ void GLStateManager::image_bind(Texture *tex_, int unit) } images_[unit] = tex->tex_id_; formats_[unit] = to_gl_internal_format(tex->format_); - tex->is_bound_ = true; + tex->is_bound_image_ = true; dirty_image_binds_ |= 1ULL << unit; } void GLStateManager::image_unbind(Texture *tex_) { GLTexture *tex = static_cast<GLTexture *>(tex_); - if (!tex->is_bound_) { + if (!tex->is_bound_image_) { return; } @@ -584,7 +581,7 @@ void GLStateManager::image_unbind(Texture *tex_) dirty_image_binds_ |= 1ULL << i; } } - tex->is_bound_ = false; + tex->is_bound_image_ = false; } void GLStateManager::image_unbind_all() diff --git a/source/blender/gpu/opengl/gl_state.hh b/source/blender/gpu/opengl/gl_state.hh index f29eefbca82..74c68e51755 100644 --- a/source/blender/gpu/opengl/gl_state.hh +++ b/source/blender/gpu/opengl/gl_state.hh @@ -13,7 +13,7 @@ #include "gpu_state_private.hh" -#include "glew-mx.h" +#include <epoxy/gl.h> namespace blender { namespace gpu { diff --git a/source/blender/gpu/opengl/gl_storage_buffer.cc b/source/blender/gpu/opengl/gl_storage_buffer.cc index b30674fe5fa..5d876308b3c 100644 --- a/source/blender/gpu/opengl/gl_storage_buffer.cc +++ b/source/blender/gpu/opengl/gl_storage_buffer.cc @@ -5,8 +5,6 @@ * \ingroup gpu */ -#include "BKE_global.h" - #include "BLI_string.h" #include "gpu_backend.hh" @@ -74,7 +72,7 @@ void GLStorageBuf::bind(int slot) if (slot >= GLContext::max_ssbo_binds) { fprintf( stderr, - "Error: Trying to bind \"%s\" ssbo to slot %d which is above the reported limit of %d.", + "Error: Trying to bind \"%s\" ssbo to slot %d which is above the reported limit of %d.\n", name_, slot, GLContext::max_ssbo_binds); @@ -168,6 +166,23 @@ void GLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uin } } +void GLStorageBuf::read(void *data) +{ + if (ssbo_id_ == 0) { + this->init(); + } + + if (GLContext::direct_state_access_support) { + glGetNamedBufferSubData(ssbo_id_, 0, size_in_bytes_, data); + } + else { + /* This binds the buffer to GL_ARRAY_BUFFER and upload the data if any. */ + glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_id_); + glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, size_in_bytes_, data); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + } +} + /** \} */ } // namespace blender::gpu diff --git a/source/blender/gpu/opengl/gl_storage_buffer.hh b/source/blender/gpu/opengl/gl_storage_buffer.hh index 96052fe0065..680ce911bc7 100644 --- a/source/blender/gpu/opengl/gl_storage_buffer.hh +++ b/source/blender/gpu/opengl/gl_storage_buffer.hh @@ -11,8 +11,6 @@ #include "gpu_storage_buffer_private.hh" -#include "glew-mx.h" - namespace blender { namespace gpu { @@ -37,6 +35,7 @@ class GLStorageBuf : public StorageBuf { void unbind() override; void clear(eGPUTextureFormat internal_format, eGPUDataFormat data_format, void *data) override; void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override; + void read(void *data) override; /* Special internal function to bind SSBOs to indirect argument targets. */ void bind_as(GLenum target); diff --git a/source/blender/gpu/opengl/gl_texture.cc b/source/blender/gpu/opengl/gl_texture.cc index 14f84273925..02fc7dfdb5f 100644 --- a/source/blender/gpu/opengl/gl_texture.cc +++ b/source/blender/gpu/opengl/gl_texture.cc @@ -5,8 +5,6 @@ * \ingroup gpu */ -#include "BKE_global.h" - #include "DNA_userdef_types.h" #include "GPU_capabilities.h" @@ -42,6 +40,7 @@ GLTexture::~GLTexture() if (ctx != nullptr && is_bound_) { /* This avoid errors when the texture is still inside the bound texture array. */ ctx->state_manager->texture_unbind(this); + ctx->state_manager->image_unbind(this); } GLContext::tex_free(tex_id_); } @@ -312,6 +311,12 @@ void GLTexture::update_sub( */ void GLTexture::generate_mipmap() { + /* Allow users to provide mipmaps stored in compressed textures. + * Skip generating mipmaps to avoid overriding the existing ones. */ + if (format_flag_ & GPU_FORMAT_COMPRESSED) { + return; + } + /* Some drivers have bugs when using #glGenerateMipmap with depth textures (see T56789). * In this case we just create a complete texture with mipmaps manually without * down-sampling. You must initialize the texture levels using other methods like @@ -598,7 +603,7 @@ bool GLTexture::proxy_check(int mip) { /* Manual validation first, since some implementation have issues with proxy creation. */ int max_size = GPU_max_texture_size(); - int max_3d_size = GLContext::max_texture_3d_size; + int max_3d_size = GPU_max_texture_3d_size(); int max_cube_size = GLContext::max_cubemap_size; int size[3] = {1, 1, 1}; this->mip_size_get(mip, size); diff --git a/source/blender/gpu/opengl/gl_texture.hh b/source/blender/gpu/opengl/gl_texture.hh index e5b879f1f15..b7d72455c25 100644 --- a/source/blender/gpu/opengl/gl_texture.hh +++ b/source/blender/gpu/opengl/gl_texture.hh @@ -13,8 +13,6 @@ #include "gpu_texture_private.hh" -#include "glew-mx.h" - struct GPUFrameBuffer; namespace blender { @@ -33,10 +31,12 @@ class GLTexture : public Texture { /** opengl identifier for texture. */ GLuint tex_id_ = 0; /** Legacy workaround for texture copy. Created when using framebuffer_get(). */ - struct GPUFrameBuffer *framebuffer_ = NULL; + struct GPUFrameBuffer *framebuffer_ = nullptr; /** True if this texture is bound to at least one texture unit. */ /* TODO(fclem): How do we ensure thread safety here? */ bool is_bound_ = false; + /** Same as is_bound_ but for image slots. */ + bool is_bound_image_ = false; /** True if pixels in the texture have been initialized. */ bool has_pixels_ = false; diff --git a/source/blender/gpu/opengl/gl_uniform_buffer.cc b/source/blender/gpu/opengl/gl_uniform_buffer.cc index b8bcaf0047e..022fbcfdf29 100644 --- a/source/blender/gpu/opengl/gl_uniform_buffer.cc +++ b/source/blender/gpu/opengl/gl_uniform_buffer.cc @@ -5,14 +5,10 @@ * \ingroup gpu */ -#include "BKE_global.h" - #include "BLI_string.h" -#include "gpu_backend.hh" #include "gpu_context_private.hh" -#include "gl_backend.hh" #include "gl_debug.hh" #include "gl_uniform_buffer.hh" @@ -69,11 +65,12 @@ void GLUniformBuf::update(const void *data) void GLUniformBuf::bind(int slot) { if (slot >= GLContext::max_ubo_binds) { - fprintf(stderr, - "Error: Trying to bind \"%s\" ubo to slot %d which is above the reported limit of %d.", - name_, - slot, - GLContext::max_ubo_binds); + fprintf( + stderr, + "Error: Trying to bind \"%s\" ubo to slot %d which is above the reported limit of %d.\n", + name_, + slot, + GLContext::max_ubo_binds); return; } diff --git a/source/blender/gpu/opengl/gl_uniform_buffer.hh b/source/blender/gpu/opengl/gl_uniform_buffer.hh index 8d945a8e7dc..e602532dc5a 100644 --- a/source/blender/gpu/opengl/gl_uniform_buffer.hh +++ b/source/blender/gpu/opengl/gl_uniform_buffer.hh @@ -11,8 +11,6 @@ #include "gpu_uniform_buffer_private.hh" -#include "glew-mx.h" - namespace blender { namespace gpu { diff --git a/source/blender/gpu/opengl/gl_vertex_array.cc b/source/blender/gpu/opengl/gl_vertex_array.cc index cfcf77fe705..6897ac9f4a2 100644 --- a/source/blender/gpu/opengl/gl_vertex_array.cc +++ b/source/blender/gpu/opengl/gl_vertex_array.cc @@ -7,11 +7,11 @@ #include "gpu_shader_interface.hh" #include "gpu_vertex_buffer_private.hh" -#include "gpu_vertex_format_private.h" #include "gl_batch.hh" #include "gl_context.hh" #include "gl_index_buffer.hh" +#include "gl_storage_buffer.hh" #include "gl_vertex_buffer.hh" #include "gl_vertex_array.hh" @@ -22,7 +22,7 @@ namespace blender::gpu { /** \name Vertex Array Bindings * \{ */ -/* Returns enabled vertex pointers as a bitflag (one bit per attrib). */ +/** Returns enabled vertex pointers as a bit-flag (one bit per attribute). */ static uint16_t vbo_bind(const ShaderInterface *interface, const GPUVertFormat *format, uint v_first, @@ -119,6 +119,18 @@ void GLVertArray::update_bindings(const GLuint vao, } } + if (batch->resource_id_buf) { + const ShaderInput *input = interface->attr_get("drw_ResourceID"); + if (input) { + dynamic_cast<GLStorageBuf *>(unwrap(batch->resource_id_buf))->bind_as(GL_ARRAY_BUFFER); + glEnableVertexAttribArray(input->location); + glVertexAttribDivisor(input->location, 1); + glVertexAttribIPointer( + input->location, 1, to_gl(GPU_COMP_I32), sizeof(uint32_t), (GLvoid *)nullptr); + attr_mask &= ~(1 << input->location); + } + } + if (attr_mask != 0 && GLContext::vertex_attrib_binding_support) { for (uint16_t mask = 1, a = 0; a < 16; a++, mask <<= 1) { if (attr_mask & mask) { diff --git a/source/blender/gpu/opengl/gl_vertex_array.hh b/source/blender/gpu/opengl/gl_vertex_array.hh index d1d6c5604b5..4f417beed29 100644 --- a/source/blender/gpu/opengl/gl_vertex_array.hh +++ b/source/blender/gpu/opengl/gl_vertex_array.hh @@ -7,8 +7,6 @@ #pragma once -#include "glew-mx.h" - #include "GPU_batch.h" #include "gl_shader_interface.hh" diff --git a/source/blender/gpu/opengl/gl_vertex_buffer.hh b/source/blender/gpu/opengl/gl_vertex_buffer.hh index e0a21587b60..deb966961f2 100644 --- a/source/blender/gpu/opengl/gl_vertex_buffer.hh +++ b/source/blender/gpu/opengl/gl_vertex_buffer.hh @@ -9,8 +9,6 @@ #include "MEM_guardedalloc.h" -#include "glew-mx.h" - #include "GPU_texture.h" #include "gpu_vertex_buffer_private.hh" diff --git a/source/blender/gpu/shaders/common/gpu_shader_common_color_utils.glsl b/source/blender/gpu/shaders/common/gpu_shader_common_color_utils.glsl index fe89985ae7f..33108d3a989 100644 --- a/source/blender/gpu/shaders/common/gpu_shader_common_color_utils.glsl +++ b/source/blender/gpu/shaders/common/gpu_shader_common_color_utils.glsl @@ -140,6 +140,84 @@ void hsl_to_rgb(vec4 hsl, out vec4 outcol) outcol = vec4((nr - 0.5) * chroma + l, (ng - 0.5) * chroma + l, (nb - 0.5) * chroma + l, hsl.w); } +/* ** YCCA to RGBA ** */ + +void ycca_to_rgba_itu_601(vec4 ycca, out vec4 color) +{ + ycca.xyz *= 255.0; + ycca.xyz -= vec3(16.0, 128.0, 128.0); + color.rgb = mat3(vec3(1.164), 0.0, -0.392, 2.017, 1.596, -0.813, 0.0) * ycca.xyz; + color.rgb /= 255.0; + color.a = ycca.a; +} + +void ycca_to_rgba_itu_709(vec4 ycca, out vec4 color) +{ + ycca.xyz *= 255.0; + ycca.xyz -= vec3(16.0, 128.0, 128.0); + color.rgb = mat3(vec3(1.164), 0.0, -0.213, 2.115, 1.793, -0.534, 0.0) * ycca.xyz; + color.rgb /= 255.0; + color.a = ycca.a; +} + +void ycca_to_rgba_jpeg(vec4 ycca, out vec4 color) +{ + ycca.xyz *= 255.0; + color.rgb = mat3(vec3(1.0), 0.0, -0.34414, 1.772, 1.402, -0.71414, 0.0) * ycca.xyz; + color.rgb += vec3(-179.456, 135.45984, -226.816); + color.rgb /= 255.0; + color.a = ycca.a; +} + +/* ** RGBA to YCCA ** */ + +void rgba_to_ycca_itu_601(vec4 rgba, out vec4 ycca) +{ + rgba.rgb *= 255.0; + ycca.xyz = mat3(0.257, -0.148, 0.439, 0.504, -0.291, -0.368, 0.098, 0.439, -0.071) * rgba.rgb; + ycca.xyz += vec3(16.0, 128.0, 128.0); + ycca.xyz /= 255.0; + ycca.a = rgba.a; +} + +void rgba_to_ycca_itu_709(vec4 rgba, out vec4 ycca) +{ + rgba.rgb *= 255.0; + ycca.xyz = mat3(0.183, -0.101, 0.439, 0.614, -0.338, -0.399, 0.062, 0.439, -0.040) * rgba.rgb; + ycca.xyz += vec3(16.0, 128.0, 128.0); + ycca.xyz /= 255.0; + ycca.a = rgba.a; +} + +void rgba_to_ycca_jpeg(vec4 rgba, out vec4 ycca) +{ + rgba.rgb *= 255.0; + ycca.xyz = mat3(0.299, -0.16874, 0.5, 0.587, -0.33126, -0.41869, 0.114, 0.5, -0.08131) * + rgba.rgb; + ycca.xyz += vec3(0.0, 128.0, 128.0); + ycca.xyz /= 255.0; + ycca.a = rgba.a; +} + +/* ** YUVA to RGBA ** */ + +void yuva_to_rgba_itu_709(vec4 yuva, out vec4 color) +{ + color.rgb = mat3(vec3(1.0), 0.0, -0.21482, 2.12798, 1.28033, -0.38059, 0.0) * yuva.xyz; + color.a = yuva.a; +} + +/* ** RGBA to YUVA ** */ + +void rgba_to_yuva_itu_709(vec4 rgba, out vec4 yuva) +{ + yuva.xyz = mat3(0.2126, -0.09991, 0.615, 0.7152, -0.33609, -0.55861, 0.0722, 0.436, -0.05639) * + rgba.rgb; + yuva.a = rgba.a; +} + +/* ** Alpha Handling ** */ + void color_alpha_clear(vec4 color, out vec4 result) { result = vec4(color.rgb, 1.0); @@ -147,15 +225,50 @@ void color_alpha_clear(vec4 color, out vec4 result) void color_alpha_premultiply(vec4 color, out vec4 result) { - result = vec4(color.rgb * color.a, 1.0); + result = vec4(color.rgb * color.a, color.a); } void color_alpha_unpremultiply(vec4 color, out vec4 result) { if (color.a == 0.0 || color.a == 1.0) { - result = vec4(color.rgb, 1.0); + result = color; } else { - result = vec4(color.rgb / color.a, 1.0); + result = vec4(color.rgb / color.a, color.a); + } +} + +float linear_rgb_to_srgb(float color) +{ + if (color < 0.0031308) { + return (color < 0.0) ? 0.0 : color * 12.92; + } + + return 1.055 * pow(color, 1.0 / 2.4) - 0.055; +} + +vec3 linear_rgb_to_srgb(vec3 color) +{ + return vec3( + linear_rgb_to_srgb(color.r), linear_rgb_to_srgb(color.g), linear_rgb_to_srgb(color.b)); +} + +float srgb_to_linear_rgb(float color) +{ + if (color < 0.04045) { + return (color < 0.0) ? 0.0 : color * (1.0 / 12.92); } + + return pow((color + 0.055) * (1.0 / 1.055), 2.4); +} + +vec3 srgb_to_linear_rgb(vec3 color) +{ + return vec3( + srgb_to_linear_rgb(color.r), srgb_to_linear_rgb(color.g), srgb_to_linear_rgb(color.b)); +} + +float get_luminance(vec3 color, vec3 luminance_coefficients) +{ + return dot(color, luminance_coefficients); } diff --git a/source/blender/gpu/shaders/common/gpu_shader_common_curves.glsl b/source/blender/gpu/shaders/common/gpu_shader_common_curves.glsl index 8948ed77557..db8e114ec7a 100644 --- a/source/blender/gpu/shaders/common/gpu_shader_common_curves.glsl +++ b/source/blender/gpu/shaders/common/gpu_shader_common_curves.glsl @@ -95,6 +95,81 @@ void curves_combined_only(float factor, result = mix(color, result, factor); } +/* Contrary to standard tone curve implementations, the film-like implementation tries to preserve + * the hue of the colors as much as possible. To understand why this might be a problem, consider + * the violet color (0.5, 0.0, 1.0). If this color was to be evaluated at a power curve x^4, the + * color will be blue (0.0625, 0.0, 1.0). So the color changes and not just its luminosity, which + * is what film-like tone curves tries to avoid. + * + * First, the channels with the lowest and highest values are identified and evaluated at the + * curve. Then, the third channel---the median---is computed while maintaining the original hue of + * the color. To do that, we look at the equation for deriving the hue from RGB values. Assuming + * the maximum, minimum, and median channels are known, and ignoring the 1/3 period offset of the + * hue, the equation is: + * + * hue = (median - min) / (max - min) [1] + * + * Since we have the new values for the minimum and maximum after evaluating at the curve, we also + * have: + * + * hue = (new_median - new_min) / (new_max - new_min) [2] + * + * Since we want the hue to be equivalent, by equating [1] and [2] and rearranging: + * + * (new_median - new_min) / (new_max - new_min) = (median - min) / (max - min) + * new_median - new_min = (new_max - new_min) * (median - min) / (max - min) + * new_median = new_min + (new_max - new_min) * (median - min) / (max - min) + * new_median = new_min + (median - min) * ((new_max - new_min) / (max - min)) [QED] + * + * Which gives us the median color that preserves the hue. More intuitively, the median is computed + * such that the change in the distance from the median to the minimum is proportional to the + * change in the distance from the minimum to the maximum. Finally, each of the new minimum, + * maximum, and median values are written to the color channel that they were originally extracted + * from. */ +void curves_film_like(float factor, + vec4 color, + vec4 black_level, + vec4 white_level, + sampler1DArray curve_map, + const float layer, + float range_minimum, + float range_divider, + float start_slope, + float end_slope, + out vec4 result) +{ + vec4 balanced = white_balance(color, black_level, white_level); + + /* Find the maximum, minimum, and median of the color channels. */ + float minimum = min(balanced.r, min(balanced.g, balanced.b)); + float maximum = max(balanced.r, max(balanced.g, balanced.b)); + float median = max(min(balanced.r, balanced.g), min(balanced.b, max(balanced.r, balanced.g))); + + /* Evaluate alpha curve map at the maximum and minimum channels. The alpha curve is the Combined + * curve in the UI. */ + float min_parameter = NORMALIZE_PARAMETER(minimum, range_minimum, range_divider); + float max_parameter = NORMALIZE_PARAMETER(maximum, range_minimum, range_divider); + float new_min = texture(curve_map, vec2(min_parameter, layer)).a; + float new_max = texture(curve_map, vec2(max_parameter, layer)).a; + + /* Then, extrapolate if needed. */ + new_min = extrapolate_if_needed(min_parameter, new_min, start_slope, end_slope); + new_max = extrapolate_if_needed(max_parameter, new_max, start_slope, end_slope); + + /* Compute the new median using the ratio between the new and the original range. */ + float scaling_ratio = (new_max - new_min) / (maximum - minimum); + float new_median = new_min + (median - minimum) * scaling_ratio; + + /* Write each value to its original channel. */ + bvec3 channel_is_min = equal(balanced.rgb, vec3(minimum)); + vec3 median_or_min = mix(vec3(new_median), vec3(new_min), channel_is_min); + bvec3 channel_is_max = equal(balanced.rgb, vec3(maximum)); + result.rgb = mix(median_or_min, vec3(new_max), channel_is_max); + result.a = color.a; + + result = mix(color, result, clamp(factor, 0.0, 1.0)); +} + void curves_vector(vec3 vector, sampler1DArray curve_map, const float layer, diff --git a/source/blender/gpu/shaders/common/gpu_shader_common_math_utils.glsl b/source/blender/gpu/shaders/common/gpu_shader_common_math_utils.glsl index 124654963fd..1ba22b4c5da 100644 --- a/source/blender/gpu/shaders/common/gpu_shader_common_math_utils.glsl +++ b/source/blender/gpu/shaders/common/gpu_shader_common_math_utils.glsl @@ -34,6 +34,17 @@ float compatible_pow(float x, float y) return pow(x, y); } +/* A version of pow that returns a fallback value if the computation is undefined. From the spec: + * The result is undefined if x < 0 or if x = 0 and y is less than or equal 0. */ +float fallback_pow(float x, float y, float fallback) +{ + if (x < 0.0 || (x == 0.0 && y <= 0.0)) { + return fallback; + } + + return pow(x, y); +} + float wrap(float a, float b, float c) { float range = b - c; @@ -114,8 +125,24 @@ void vector_copy(vec3 normal, out vec3 outnormal) outnormal = normal; } +vec3 fallback_pow(vec3 a, float b, vec3 fallback) +{ + return vec3(fallback_pow(a.x, b, fallback.x), + fallback_pow(a.y, b, fallback.y), + fallback_pow(a.z, b, fallback.z)); +} + /* Matirx Math */ +/* Return a 2D rotation matrix with the angle that the input 2D vector makes with the x axis. */ +mat2 vector_to_rotation_matrix(vec2 vector) +{ + vec2 normalized_vector = normalize(vector); + float cos_angle = normalized_vector.x; + float sin_angle = normalized_vector.y; + return mat2(cos_angle, sin_angle, -sin_angle, cos_angle); +} + mat3 euler_to_mat3(vec3 euler) { float cx = cos(euler.x); diff --git a/source/blender/gpu/shaders/common/gpu_shader_common_mix_rgb.glsl b/source/blender/gpu/shaders/common/gpu_shader_common_mix_rgb.glsl index f9652f1150b..39f3c722dd2 100644 --- a/source/blender/gpu/shaders/common/gpu_shader_common_mix_rgb.glsl +++ b/source/blender/gpu/shaders/common/gpu_shader_common_mix_rgb.glsl @@ -2,28 +2,24 @@ void mix_blend(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); outcol = mix(col1, col2, fac); outcol.a = col1.a; } void mix_add(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); outcol = mix(col1, col1 + col2, fac); outcol.a = col1.a; } void mix_mult(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); outcol = mix(col1, col1 * col2, fac); outcol.a = col1.a; } void mix_screen(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); float facm = 1.0 - fac; outcol = vec4(1.0) - (vec4(facm) + fac * (vec4(1.0) - col2)) * (vec4(1.0) - col1); @@ -32,7 +28,6 @@ void mix_screen(float fac, vec4 col1, vec4 col2, out vec4 outcol) void mix_overlay(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); float facm = 1.0 - fac; outcol = col1; @@ -61,14 +56,30 @@ void mix_overlay(float fac, vec4 col1, vec4 col2, out vec4 outcol) void mix_sub(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); outcol = mix(col1, col1 - col2, fac); outcol.a = col1.a; } void mix_div(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); + float facm = 1.0 - fac; + + outcol = vec4(vec3(0.0), col1.a); + + if (col2.r != 0.0) { + outcol.r = facm * col1.r + fac * col1.r / col2.r; + } + if (col2.g != 0.0) { + outcol.g = facm * col1.g + fac * col1.g / col2.g; + } + if (col2.b != 0.0) { + outcol.b = facm * col1.b + fac * col1.b / col2.b; + } +} + +/* A variant of mix_div that fallback to the first color upon zero division. */ +void mix_div_fallback(float fac, vec4 col1, vec4 col2, out vec4 outcol) +{ float facm = 1.0 - fac; outcol = col1; @@ -86,28 +97,24 @@ void mix_div(float fac, vec4 col1, vec4 col2, out vec4 outcol) void mix_diff(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); outcol = mix(col1, abs(col1 - col2), fac); outcol.a = col1.a; } void mix_dark(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); outcol.rgb = mix(col1.rgb, min(col1.rgb, col2.rgb), fac); outcol.a = col1.a; } void mix_light(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); outcol.rgb = mix(col1.rgb, max(col1.rgb, col2.rgb), fac); outcol.a = col1.a; } void mix_dodge(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); outcol = col1; if (outcol.r != 0.0) { @@ -150,7 +157,6 @@ void mix_dodge(float fac, vec4 col1, vec4 col2, out vec4 outcol) void mix_burn(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); float tmp, facm = 1.0 - fac; outcol = col1; @@ -200,7 +206,6 @@ void mix_burn(float fac, vec4 col1, vec4 col2, out vec4 outcol) void mix_hue(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); float facm = 1.0 - fac; outcol = col1; @@ -220,7 +225,6 @@ void mix_hue(float fac, vec4 col1, vec4 col2, out vec4 outcol) void mix_sat(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); float facm = 1.0 - fac; outcol = col1; @@ -238,7 +242,6 @@ void mix_sat(float fac, vec4 col1, vec4 col2, out vec4 outcol) void mix_val(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); float facm = 1.0 - fac; vec4 hsv, hsv2; @@ -251,7 +254,6 @@ void mix_val(float fac, vec4 col1, vec4 col2, out vec4 outcol) void mix_color(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); float facm = 1.0 - fac; outcol = col1; @@ -272,22 +274,26 @@ void mix_color(float fac, vec4 col1, vec4 col2, out vec4 outcol) void mix_soft(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); float facm = 1.0 - fac; vec4 one = vec4(1.0); vec4 scr = one - (one - col2) * (one - col1); outcol = facm * col1 + fac * ((one - col1) * col2 * col1 + col1 * scr); + outcol.a = col1.a; } void mix_linear(float fac, vec4 col1, vec4 col2, out vec4 outcol) { - fac = clamp(fac, 0.0, 1.0); - outcol = col1 + fac * (2.0 * (col2 - vec4(0.5))); + outcol.a = col1.a; } -void clamp_color(vec3 vec, vec3 min, vec3 max, out vec3 out_vec) +void clamp_color(vec4 vec, const vec4 min, const vec4 max, out vec4 out_vec) { out_vec = clamp(vec, min, max); } + +void multiply_by_alpha(float factor, vec4 color, out float result) +{ + result = factor * color.a; +} diff --git a/source/blender/gpu/shaders/compositor/compositor_alpha_crop.glsl b/source/blender/gpu/shaders/compositor/compositor_alpha_crop.glsl new file mode 100644 index 00000000000..d55c8efd4c6 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_alpha_crop.glsl @@ -0,0 +1,11 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + /* The lower bound is inclusive and upper bound is exclusive. */ + bool is_inside = all(greaterThanEqual(texel, lower_bound)) && all(lessThan(texel, upper_bound)); + /* Write the pixel color if it is inside the cropping region, otherwise, write zero. */ + vec4 color = is_inside ? texture_load(input_tx, texel) : vec4(0.0); + imageStore(output_img, texel, color); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_bilateral_blur.glsl b/source/blender/gpu/shaders/compositor/compositor_bilateral_blur.glsl new file mode 100644 index 00000000000..c7c5ada7a9f --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_bilateral_blur.glsl @@ -0,0 +1,31 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + vec4 center_determinator = texture_load(determinator_tx, texel); + + /* Go over the pixels in the blur window of the specified radius around the center pixel, and for + * pixels whose determinator is close enough to the determinator of the center pixel, accumulate + * their color as well as their weights. */ + float accumulated_weight = 0.0; + vec4 accumulated_color = vec4(0.0); + for (int y = -radius; y <= radius; y++) { + for (int x = -radius; x <= radius; x++) { + vec4 determinator = texture_load(determinator_tx, texel + ivec2(x, y)); + float difference = dot(abs(center_determinator - determinator).rgb, vec3(1.0)); + + if (difference < threshold) { + accumulated_weight += 1.0; + accumulated_color += texture_load(input_tx, texel + ivec2(x, y)); + } + } + } + + /* Write the accumulated color divided by the accumulated weight if any pixel in the window was + * accumulated, otherwise, write a fallback black color. */ + vec4 fallback = vec4(vec3(0.0), 1.0); + vec4 color = (accumulated_weight != 0.0) ? (accumulated_color / accumulated_weight) : fallback; + imageStore(output_img, texel, color); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_bokeh_image.glsl b/source/blender/gpu/shaders/compositor/compositor_bokeh_image.glsl new file mode 100644 index 00000000000..6e98aa9fe17 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_bokeh_image.glsl @@ -0,0 +1,118 @@ +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +/* Get the 2D vertex position of the vertex with the given index in the regular polygon + * representing this bokeh. The polygon is rotated by the rotation amount and have a unit + * circumradius. The regular polygon is one whose vertices' exterior angles are given by + * exterior_angle. See the bokeh function for more information. */ +vec2 get_regular_polygon_vertex_position(int vertex_index) +{ + float angle = exterior_angle * vertex_index - rotation; + return vec2(cos(angle), sin(angle)); +} + +/* Find the closest point to the given point on the given line. This assumes the length of the + * given line is not zero. */ +vec2 closest_point_on_line(vec2 point, vec2 line_start, vec2 line_end) +{ + vec2 line_vector = line_end - line_start; + vec2 point_vector = point - line_start; + float line_length_squared = dot(line_vector, line_vector); + float parameter = dot(point_vector, line_vector) / line_length_squared; + return line_start + line_vector * parameter; +} + +/* Compute the value of the bokeh at the given point. The computed bokeh is essentially a regular + * polygon centered in space having the given circumradius. The regular polygon is one whose + * vertices' exterior angles are given by "exterior_angle", which relates to the number of vertices + * n through the equation "exterior angle = 2 pi / n". The regular polygon may additionally morph + * into a shape with the given properties: + * + * - The regular polygon may have a circular hole in its center whose radius is controlled by the + * "catadioptric" value. + * - The regular polygon is rotated by the "rotation" value. + * - The regular polygon can morph into a circle controlled by the "roundness" value, such that it + * becomes a full circle at unit roundness. + * + * The function returns 0 when the point lies inside the regular polygon and 1 otherwise. However, + * at the edges, it returns a narrow band gradient as a form of anti-aliasing. */ +float bokeh(vec2 point, float circumradius) +{ + /* Get the index of the vertex of the regular polygon whose polar angle is maximum but less than + * the polar angle of the given point, taking rotation into account. This essentially finds the + * vertex closest to the given point in the clock-wise direction. */ + float angle = mod(atan(point.y, point.x) + rotation, M_2PI); + int vertex_index = int(angle / exterior_angle); + + /* Compute the shortest distance between the origin and the polygon edge composed from the + * previously selected vertex and the one following it. */ + vec2 first_vertex = get_regular_polygon_vertex_position(vertex_index) * circumradius; + vec2 second_vertex = get_regular_polygon_vertex_position(vertex_index + 1) * circumradius; + vec2 closest_point = closest_point_on_line(point, first_vertex, second_vertex); + float distance_to_edge = length(closest_point); + + /* Mix the distance to the edge with the circumradius, making it tend to the distance to a + * circle when roundness tends to 1. */ + float distance_to_edge_round = mix(distance_to_edge, circumradius, roundness); + + /* The point is outside of the bokeh, so we return 0. */ + float distance = length(point); + if (distance > distance_to_edge_round) { + return 0.0; + } + + /* The point is inside the catadioptric hole and is not part of the bokeh, so we return 0. */ + float catadioptric_distance = distance_to_edge_round * catadioptric; + if (distance < catadioptric_distance) { + return 0.0; + } + + /* The point is very close to the edge of the bokeh, so we return the difference between the + * distance to the edge and the distance as a form of anti-aliasing. */ + if (distance_to_edge_round - distance < 1.0) { + return distance_to_edge_round - distance; + } + + /* The point is very close to the edge of the catadioptric hole, so we return the difference + * between the distance to the hole and the distance as a form of anti-aliasing. */ + if (catadioptric != 0.0 && distance - catadioptric_distance < 1.0) { + return distance - catadioptric_distance; + } + + /* Otherwise, the point is part of the bokeh and we return 1. */ + return 1.0; +} + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Since we need the regular polygon to occupy the entirety of the output image, the circumradius + * of the regular polygon is half the width of the output image. */ + float circumradius = float(imageSize(output_img).x) / 2.0; + + /* Move the texel coordinates such that the regular polygon is centered. */ + vec2 point = vec2(texel) - circumradius; + + /* Each of the color channels of the output image contains a bokeh with a different circumradius. + * The largest one occupies the whole image as stated above, while the other two have circumradii + * that are shifted by an amount that is proportional to the "lens_shift" value. The alpha + * channel of the output is the average of all three values. */ + float min_shift = abs(lens_shift * circumradius); + float min = mix(bokeh(point, circumradius - min_shift), 0.0, min_shift == circumradius); + + float median_shift = min_shift / 2.0; + float median = bokeh(point, circumradius - median_shift); + + float max = bokeh(point, circumradius); + vec4 bokeh = vec4(min, median, max, (max + median + min) / 3.0); + + /* If the lens shift is negative, swap the min and max bokeh values, which are stored in the red + * and blue channels respectively. Note that we take the absolute value of the lens shift above, + * so the sign of the lens shift only controls this swap. */ + if (lens_shift < 0) { + bokeh = bokeh.zyxw; + } + + imageStore(output_img, texel, bokeh); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_box_mask.glsl b/source/blender/gpu/shaders/compositor/compositor_box_mask.glsl new file mode 100644 index 00000000000..fad23f28fde --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_box_mask.glsl @@ -0,0 +1,27 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + vec2 uv = vec2(texel) / vec2(domain_size - ivec2(1)); + uv -= location; + uv.y *= float(domain_size.y) / float(domain_size.x); + uv = mat2(cos_angle, -sin_angle, sin_angle, cos_angle) * uv; + bool is_inside = all(lessThan(abs(uv), size)); + + float base_mask_value = texture_load(base_mask_tx, texel).x; + float value = texture_load(mask_value_tx, texel).x; + +#if defined(CMP_NODE_MASKTYPE_ADD) + float output_mask_value = is_inside ? max(base_mask_value, value) : base_mask_value; +#elif defined(CMP_NODE_MASKTYPE_SUBTRACT) + float output_mask_value = is_inside ? clamp(base_mask_value - value, 0.0, 1.0) : base_mask_value; +#elif defined(CMP_NODE_MASKTYPE_MULTIPLY) + float output_mask_value = is_inside ? base_mask_value * value : 0.0; +#elif defined(CMP_NODE_MASKTYPE_NOT) + float output_mask_value = is_inside ? (base_mask_value > 0.0 ? 0.0 : value) : base_mask_value; +#endif + + imageStore(output_mask_img, texel, vec4(output_mask_value)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_convert.glsl b/source/blender/gpu/shaders/compositor/compositor_convert.glsl new file mode 100644 index 00000000000..044fb057ca5 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_convert.glsl @@ -0,0 +1,8 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + vec4 value = texture_load(input_tx, texel); + imageStore(output_img, texel, CONVERT_EXPRESSION(value)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_despeckle.glsl b/source/blender/gpu/shaders/compositor/compositor_despeckle.glsl new file mode 100644 index 00000000000..e4743d69d17 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_despeckle.glsl @@ -0,0 +1,70 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +/* Returns true if the given color is close enough to the given reference color within the + * threshold supplied by the user, and returns false otherwise. */ +bool is_close(vec4 reference_color, vec4 color) +{ + return all(lessThan(abs(reference_color - color).rgb, vec3(threshold))); +} + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* A 3x3 weights kernel whose weights are the inverse of the distance to the center of the + * kernel. So the center weight is zero, the corners weights are (1 / sqrt(2)), and the rest + * of the weights are 1. The total sum of weights is 4 plus quadruple the corner weight. */ + float corner_weight = 1.0 / sqrt(2.0); + float sum_of_weights = 4.0 + corner_weight * 4.0; + mat3 weights = mat3(vec3(corner_weight, 1.0, corner_weight), + vec3(1.0, 0.0, 1.0), + vec3(corner_weight, 1.0, corner_weight)); + + vec4 center_color = texture_load(input_tx, texel); + + /* Go over the pixels in the 3x3 window around the center pixel and compute the total sum of + * their colors multiplied by their weights. Additionally, for pixels whose colors are not close + * enough to the color of the center pixel, accumulate their color as well as their weights. */ + vec4 sum_of_colors = vec4(0); + float accumulated_weight = 0.0; + vec4 accumulated_color = vec4(0); + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + float weight = weights[j][i]; + vec4 color = texture_load(input_tx, texel + ivec2(i - 1, j - 1)) * weight; + sum_of_colors += color; + if (!is_close(center_color, color)) { + accumulated_color += color; + accumulated_weight += weight; + } + } + } + + /* If the accumulated weight is zero, that means all pixels in the 3x3 window are similar and no + * need to despeckle anything, so write the original center color and return. */ + if (accumulated_weight == 0.0) { + imageStore(output_img, texel, center_color); + return; + } + + /* If the ratio between the accumulated weights and the total sum of weights is not larger than + * the user specified neighbor threshold, then the number of pixels in the neighborhood that are + * not close enough to the center pixel is low, and no need to despeckle anything, so write the + * original center color and return. */ + if (accumulated_weight / sum_of_weights < neighbor_threshold) { + imageStore(output_img, texel, center_color); + return; + } + + /* If the weighted average color of the neighborhood is close enough to the center pixel, then no + * need to despeckle anything, so write the original center color and return. */ + if (is_close(center_color, sum_of_colors / sum_of_weights)) { + imageStore(output_img, texel, center_color); + return; + } + + /* We need to despeckle, so write the mean accumulated color. */ + float factor = texture_load(factor_tx, texel).x; + vec4 mean_color = accumulated_color / accumulated_weight; + imageStore(output_img, texel, mix(center_color, mean_color, factor)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_directional_blur.glsl b/source/blender/gpu/shaders/compositor/compositor_directional_blur.glsl new file mode 100644 index 00000000000..1805cb5a7f5 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_directional_blur.glsl @@ -0,0 +1,21 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + ivec2 input_size = texture_size(input_tx); + + /* Add 0.5 to evaluate the input sampler at the center of the pixel. */ + vec2 coordinates = vec2(texel) + vec2(0.5); + + /* For each iteration, accumulate the input at the normalize coordinates, hence the divide by + * input size, then transform the coordinates for the next iteration. */ + vec4 accumulated_color = vec4(0.0); + for (int i = 0; i < iterations; i++) { + accumulated_color += texture(input_tx, coordinates / input_size); + coordinates = (mat3(inverse_transformation) * vec3(coordinates, 1.0)).xy; + } + + /* Write the accumulated color divided by the number of iterations. */ + imageStore(output_img, texel, accumulated_color / iterations); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_edge_filter.glsl b/source/blender/gpu/shaders/compositor/compositor_edge_filter.glsl new file mode 100644 index 00000000000..67e27c22602 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_edge_filter.glsl @@ -0,0 +1,31 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Compute the dot product between the 3x3 window around the pixel and the edge detection kernel + * in the X direction and Y direction. The Y direction kernel is computed by transposing the + * given X direction kernel. */ + vec3 color_x = vec3(0); + vec3 color_y = vec3(0); + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + vec3 color = texture_load(input_tx, texel + ivec2(i - 1, j - 1)).rgb; + color_x += color * kernel[j][i]; + color_y += color * kernel[i][j]; + } + } + + /* Compute the channel-wise magnitude of the 2D vector composed from the X and Y edge detection + * filter results. */ + vec3 magnitude = sqrt(color_x * color_x + color_y * color_y); + + /* Mix the channel-wise magnitude with the original color at the center of the kernel using the + * input factor. */ + vec4 color = texture_load(input_tx, texel); + magnitude = mix(color.rgb, magnitude, texture_load(factor_tx, texel).x); + + /* Store the channel-wise magnitude with the original alpha of the input. */ + imageStore(output_img, texel, vec4(magnitude, color.a)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_ellipse_mask.glsl b/source/blender/gpu/shaders/compositor/compositor_ellipse_mask.glsl new file mode 100644 index 00000000000..28f725067e0 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_ellipse_mask.glsl @@ -0,0 +1,27 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + vec2 uv = vec2(texel) / vec2(domain_size - ivec2(1)); + uv -= location; + uv.y *= float(domain_size.y) / float(domain_size.x); + uv = mat2(cos_angle, -sin_angle, sin_angle, cos_angle) * uv; + bool is_inside = length(uv / radius) < 1.0; + + float base_mask_value = texture_load(base_mask_tx, texel).x; + float value = texture_load(mask_value_tx, texel).x; + +#if defined(CMP_NODE_MASKTYPE_ADD) + float output_mask_value = is_inside ? max(base_mask_value, value) : base_mask_value; +#elif defined(CMP_NODE_MASKTYPE_SUBTRACT) + float output_mask_value = is_inside ? clamp(base_mask_value - value, 0.0, 1.0) : base_mask_value; +#elif defined(CMP_NODE_MASKTYPE_MULTIPLY) + float output_mask_value = is_inside ? base_mask_value * value : 0.0; +#elif defined(CMP_NODE_MASKTYPE_NOT) + float output_mask_value = is_inside ? (base_mask_value > 0.0 ? 0.0 : value) : base_mask_value; +#endif + + imageStore(output_mask_img, texel, vec4(output_mask_value)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_filter.glsl b/source/blender/gpu/shaders/compositor/compositor_filter.glsl new file mode 100644 index 00000000000..e501c563dda --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_filter.glsl @@ -0,0 +1,20 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Compute the dot product between the 3x3 window around the pixel and the filter kernel. */ + vec4 color = vec4(0); + for (int j = 0; j < 3; j++) { + for (int i = 0; i < 3; i++) { + color += texture_load(input_tx, texel + ivec2(i - 1, j - 1)) * kernel[j][i]; + } + } + + /* Mix with the original color at the center of the kernel using the input factor. */ + color = mix(texture_load(input_tx, texel), color, texture_load(factor_tx, texel).x); + + /* Store the color making sure it is not negative. */ + imageStore(output_img, texel, max(color, 0.0)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_flip.glsl b/source/blender/gpu/shaders/compositor/compositor_flip.glsl new file mode 100644 index 00000000000..919c454ee63 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_flip.glsl @@ -0,0 +1,15 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + ivec2 size = texture_size(input_tx); + ivec2 flipped_texel = texel; + if (flip_x) { + flipped_texel.x = size.x - texel.x - 1; + } + if (flip_y) { + flipped_texel.y = size.y - texel.y - 1; + } + imageStore(output_img, texel, texture_load(input_tx, flipped_texel)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_image_crop.glsl b/source/blender/gpu/shaders/compositor/compositor_image_crop.glsl new file mode 100644 index 00000000000..f20e033dee4 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_image_crop.glsl @@ -0,0 +1,7 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + imageStore(output_img, texel, texture_load(input_tx, texel + lower_bound)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_morphological_distance.glsl b/source/blender/gpu/shaders/compositor/compositor_morphological_distance.glsl new file mode 100644 index 00000000000..09f896b7a9d --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_morphological_distance.glsl @@ -0,0 +1,24 @@ +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Find the minimum/maximum value in the circular window of the given radius around the pixel. By + * circular window, we mean that pixels in the window whose distance to the center of window is + * larger than the given radius are skipped and not considered. Consequently, the dilation or + * erosion that take place produces round results as opposed to squarish ones. This is + * essentially a morphological operator with a circular structuring element. The LIMIT value + * should be FLT_MAX if OPERATOR is min and FLT_MIN if OPERATOR is max. */ + float value = LIMIT; + for (int y = -radius; y <= radius; y++) { + for (int x = -radius; x <= radius; x++) { + if (x * x + y * y <= radius * radius) { + value = OPERATOR(value, texture_load(input_tx, texel + ivec2(x, y), vec4(LIMIT)).x); + } + } + } + + imageStore(output_img, texel, vec4(value)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_morphological_distance_feather.glsl b/source/blender/gpu/shaders/compositor/compositor_morphological_distance_feather.glsl new file mode 100644 index 00000000000..acdd8a40342 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_morphological_distance_feather.glsl @@ -0,0 +1,101 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +/* The Morphological Distance Feather operation is a linear combination between the result of two + * operations. The first operation is a Gaussian blur with a radius equivalent to the dilate/erode + * distance, which is straightforward and implemented as a separable filter similar to the blur + * operation. + * + * The second operation is an approximation of a morphological inverse distance operation evaluated + * at a distance falloff function. The result of a morphological inverse distance operation is a + * narrow band distance field that starts at its maximum value at boundaries where a difference in + * values took place and linearly deceases until it reaches zero in the span of a number of pixels + * equivalent to the erode/dilate distance. Additionally, instead of linearly decreasing, the user + * may choose a different falloff which is evaluated at the computed distance. For dilation, the + * distance field decreases outwards, and for erosion, the distance field decreased inwards. + * + * The reason why the result of a Gaussian blur is mixed in with the distance field is because the + * distance field is merely approximated and not accurately computed, the defects of which is more + * apparent away from boundaries and especially at corners where the distance field should take a + * circular shape. That's why the Gaussian blur is mostly mixed only further from boundaries. + * + * The morphological inverse distance operation is approximated using a separable implementation + * and intertwined with the Gaussian blur implementation as follows. A search window of a radius + * equivalent to the dilate/erode distance is applied on the image to find either the minimum or + * maximum pixel value multiplied by its corresponding falloff value in the window. For dilation, + * we try to find the maximum, and for erosion, we try to find the minimum. Additionally, we also + * save the falloff value where the minimum or maximum was found. The found value will be that of + * the narrow band distance field and the saved falloff value will be used as the mixing factor + * with the Gaussian blur. + * + * To make sense of the aforementioned algorithm, assume we are dilating a binary image by 5 pixels + * whose half has a value of 1 and the other half has a value of zero. Consider the following: + * + * - A pixel of value 1 already has the maximum possible value, so its value will remain unchanged + * regardless of its position. + * - A pixel of value 0 that is right at the boundary of the 1's region will have a maximum value + * of around 0.8 depending on the falloff. That's because the search window intersects the 1's + * region, which when multiplied by the falloff gives the first value of the falloff, which is + * larger than the initially zero value computed at the center of the search window. + * - A pixel of value 0 that is 3 pixels away from the boundary will have a maximum value of around + * 0.4 depending on the falloff. That's because the search window intersects the 1's region, + * which when multiplied by the falloff gives the third value of the falloff, which is larger + * than the initially zero value computed at the center of the search window. + * - Finally, a pixel of value 0 that is 6 pixels away from the boundary will have a maximum value + * of 0, because the search window doesn't intersects the 1's region and only spans zero values. + * + * The previous example demonstrates how the distance field naturally arises, and the same goes for + * the erode case, except the minimum value is computed instead. + */ +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* A value for accumulating the blur result. */ + float accumulated_value = 0.0; + + /* Compute the contribution of the center pixel to the blur result. */ + float center_value = texture_load(input_tx, texel).x; + accumulated_value += center_value * texture_load(weights_tx, 0).x; + + /* Start with the center value as the maximum/minimum distance and reassign to the true maximum + * or minimum in the search loop below. Additionally, the center falloff is always 1.0, so start + * with that. */ + float limit_distance = center_value; + float limit_distance_falloff = 1.0; + + /* Compute the contributions of the pixels to the right and left, noting that the weights and + * falloffs textures only store the weights and falloffs for the positive half, but since the + * they are both symmetric, the same weights and falloffs are used for the negative half and we + * compute both of their contributions. */ + for (int i = 1; i < texture_size(weights_tx); i++) { + float weight = texture_load(weights_tx, i).x; + float falloff = texture_load(falloffs_tx, i).x; + + /* Loop for two iterations, where s takes the value of -1 and 1, which is used as the sign + * needed to evaluated the positive and negative sides as explain above. */ + for (int s = -1; s < 2; s += 2) { + /* Compute the contribution of the pixel to the blur result. */ + float value = texture_load(input_tx, texel + ivec2(s * i, 0)).x; + accumulated_value += value * weight; + + /* The distance is computed such that its highest value is the pixel value itself, so + * multiply the distance falloff by the pixel value. */ + float falloff_distance = value * falloff; + + /* Find either the maximum or the minimum for the dilate and erode cases respectively. */ + if (COMPARE(falloff_distance, limit_distance)) { + limit_distance = falloff_distance; + limit_distance_falloff = falloff; + } + } + } + + /* Mix between the limit distance and the blurred accumulated value such that the limit distance + * is used for pixels closer to the boundary and the blurred value is used for pixels away from + * the boundary. */ + float value = mix(accumulated_value, limit_distance, limit_distance_falloff); + + /* Write the value using the transposed texel. See the execute_distance_feather_horizontal_pass + * method for more information on the rational behind this. */ + imageStore(output_img, texel.yx, vec4(value)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_morphological_distance_threshold.glsl b/source/blender/gpu/shaders/compositor/compositor_morphological_distance_threshold.glsl new file mode 100644 index 00000000000..e6625e7419f --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_morphological_distance_threshold.glsl @@ -0,0 +1,88 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +/* The Morphological Distance Threshold operation is effectively three consecutive operations + * implemented as a single operation. The three operations are as follows: + * + * .-----------. .--------------. .----------------. + * | Threshold |-->| Dilate/Erode |-->| Distance Inset | + * '-----------' '--------------' '----------------' + * + * The threshold operation just converts the input into a binary image, where the pixel is 1 if it + * is larger than 0.5 and 0 otherwise. Pixels that are 1 in the output of the threshold operation + * are said to be masked. The dilate/erode operation is a dilate or erode morphological operation + * with a circular structuring element depending on the sign of the distance, where it is a dilate + * operation if the distance is positive and an erode operation otherwise. This is equivalent to + * the Morphological Distance operation, see its implementation for more information. Finally, the + * distance inset is an operation that converts the binary image into a narrow band distance field. + * That is, pixels that are unmasked will remain 0, while pixels that are masked will start from + * zero at the boundary of the masked region and linearly increase until reaching 1 in the span of + * a number pixels given by the inset value. + * + * As a performance optimization, the dilate/erode operation is omitted and its effective result is + * achieved by slightly adjusting the distance inset operation. The base distance inset operation + * works by computing the signed distance from the current center pixel to the nearest pixel with a + * different value. Since our image is a binary image, that means that if the pixel is masked, we + * compute the signed distance to the nearest unmasked pixel, and if the pixel unmasked, we compute + * the signed distance to the nearest masked pixel. The distance is positive if the pixel is masked + * and negative otherwise. The distance is then normalized by dividing by the given inset value and + * clamped to the [0, 1] range. Since distances larger than the inset value are eventually clamped, + * the distance search window is limited to a radius equivalent to the inset value. + * + * To archive the effective result of the omitted dilate/erode operation, we adjust the distance + * inset operation as follows. First, we increase the radius of the distance search window by the + * radius of the dilate/erode operation. Then we adjust the resulting narrow band signed distance + * field as follows. + * + * For the erode case, we merely subtract the erode distance, which makes the outermost erode + * distance number of pixels zero due to clamping, consequently achieving the result of the erode, + * while retaining the needed inset because we increased the distance search window by the same + * amount we subtracted. + * + * Similarly, for the dilate case, we add the dilate distance, which makes the dilate distance + * number of pixels just outside of the masked region positive and part of the narrow band distance + * field, consequently achieving the result of the dilate, while at the same time, the innermost + * dilate distance number of pixels become 1 due to clamping, retaining the needed inset because we + * increased the distance search window by the same amount we added. + * + * Since the erode/dilate distance is already signed appropriately as described before, we just add + * it in both cases. */ +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Apply a threshold operation on the center pixel, where the threshold is currently hard-coded + * at 0.5. The pixels with values larger than the threshold are said to be masked. */ + bool is_center_masked = texture_load(input_tx, texel).x > 0.5; + + /* Since the distance search window will access pixels outside of the bounds of the image, we use + * a texture loader with a fallback value. And since we don't want those values to affect the + * result, the fallback value is chosen such that the inner condition fails, which is when the + * sampled pixel and the center pixel are the same, so choose a fallback that will be considered + * masked if the center pixel is masked and unmasked otherwise. */ + vec4 fallback = vec4(is_center_masked ? 1.0 : 0.0); + + /* Since the distance search window is limited to the given radius, the maximum possible squared + * distance to the center is double the squared radius. */ + int minimum_squared_distance = radius * radius * 2; + + /* Find the squared distance to the nearest different pixel in the search window of the given + * radius. */ + for (int y = -radius; y <= radius; y++) { + for (int x = -radius; x <= radius; x++) { + bool is_sample_masked = texture_load(input_tx, texel + ivec2(x, y), fallback).x > 0.5; + if (is_center_masked != is_sample_masked) { + minimum_squared_distance = min(minimum_squared_distance, x * x + y * y); + } + } + } + + /* Compute the actual distance from the squared distance and assign it an appropriate sign + * depending on whether it lies in a masked region or not. */ + float signed_minimum_distance = sqrt(minimum_squared_distance) * (is_center_masked ? 1.0 : -1.0); + + /* Add the erode/dilate distance and divide by the inset amount as described in the discussion, + * then clamp to the [0, 1] range. */ + float value = clamp((signed_minimum_distance + distance) / inset, 0.0, 1.0); + + imageStore(output_img, texel, vec4(value)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_morphological_step.glsl b/source/blender/gpu/shaders/compositor/compositor_morphological_step.glsl new file mode 100644 index 00000000000..6992bc2afa5 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_morphological_step.glsl @@ -0,0 +1,19 @@ +#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Find the minimum/maximum value in the window of the given radius around the pixel. This is + * essentially a morphological operator with a square structuring element. The LIMIT value should + * be FLT_MAX if OPERATOR is min and FLT_MIN if OPERATOR is max. */ + float value = LIMIT; + for (int i = -radius; i <= radius; i++) { + value = OPERATOR(value, texture_load(input_tx, texel + ivec2(i, 0), vec4(LIMIT)).x); + } + + /* Write the value using the transposed texel. See the execute_step_horizontal_pass method for + * more information on the rational behind this. */ + imageStore(output_img, texel.yx, vec4(value)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_projector_lens_distortion.glsl b/source/blender/gpu/shaders/compositor/compositor_projector_lens_distortion.glsl new file mode 100644 index 00000000000..ab44dac93e6 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_projector_lens_distortion.glsl @@ -0,0 +1,16 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Get the normalized coordinates of the pixel centers. */ + vec2 normalized_texel = (vec2(texel) + vec2(0.5)) / vec2(texture_size(input_tx)); + + /* Sample the red and blue channels shifted by the dispersion amount. */ + const float red = texture(input_tx, normalized_texel + vec2(dispersion, 0.0)).r; + const float green = texture_load(input_tx, texel).g; + const float blue = texture(input_tx, normalized_texel - vec2(dispersion, 0.0)).b; + + imageStore(output_img, texel, vec4(red, green, blue, 1.0)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_realize_on_domain.glsl b/source/blender/gpu/shaders/compositor/compositor_realize_on_domain.glsl new file mode 100644 index 00000000000..b8561e5f059 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_realize_on_domain.glsl @@ -0,0 +1,29 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Add 0.5 to evaluate the input sampler at the center of the pixel. */ + vec2 coordinates = vec2(texel) + vec2(0.5); + + /* Transform the input image by transforming the domain coordinates with the inverse of input + * image's transformation. The inverse transformation is an affine matrix and thus the + * coordinates should be in homogeneous coordinates. */ + coordinates = (mat3(inverse_transformation) * vec3(coordinates, 1.0)).xy; + + /* Since an input image with an identity transformation is supposed to be centered in the domain, + * we subtract the offset between the lower left corners of the input image and the domain, which + * is half the difference between their sizes, because the difference in size is on both sides of + * the centered image. Additionally, we floor the offset to retain the 0.5 offset added above in + * case the difference in sizes was odd. */ + ivec2 domain_size = imageSize(domain_img); + ivec2 input_size = texture_size(input_tx); + vec2 offset = floor((domain_size - input_size) / 2.0); + + /* Subtract the offset and divide by the input image size to get the relevant coordinates into + * the sampler's expected [0, 1] range. */ + vec2 normalized_coordinates = (coordinates - offset) / input_size; + + imageStore(domain_img, texel, texture(input_tx, normalized_coordinates)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_screen_lens_distortion.glsl b/source/blender/gpu/shaders/compositor/compositor_screen_lens_distortion.glsl new file mode 100644 index 00000000000..dc572ea5aaf --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_screen_lens_distortion.glsl @@ -0,0 +1,151 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_hash.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +/* A model that approximates lens distortion parameterized by a distortion parameter and dependent + * on the squared distance to the center of the image. The distorted pixel is then computed as the + * scalar multiplication of the pixel coordinates with the value returned by this model. See the + * compute_distorted_uv function for more details. */ +float compute_distortion_scale(float distortion, float distance_squared) +{ + return 1.0 / (1.0 + sqrt(max(0.0, 1.0 - distortion * distance_squared))); +} + +/* A vectorized version of compute_distortion_scale that is applied on the chromatic distortion + * parameters passed to the shader. */ +vec3 compute_chromatic_distortion_scale(float distance_squared) +{ + return 1.0 / (1.0 + sqrt(max(vec3(0.0), 1.0 - chromatic_distortion * distance_squared))); +} + +/* Compute the image coordinates after distortion by the given distortion scale computed by the + * compute_distortion_scale function. Note that the function expects centered normalized UV + * coordinates but outputs non-centered image coordinates. */ +vec2 compute_distorted_uv(vec2 uv, float scale) +{ + return (uv * scale + 0.5) * texture_size(input_tx) - 0.5; +} + +/* Compute the number of integration steps that should be used to approximate the distorted pixel + * using a heuristic, see the compute_number_of_steps function for more details. The numbers of + * steps is proportional to the number of pixels spanned by the distortion amount. For jitter + * distortion, the square root of the distortion amount plus 1 is used with a minimum of 2 steps. + * For non-jitter distortion, the distortion amount plus 1 is used as the number of steps */ +int compute_number_of_integration_steps_heuristic(float distortion) +{ +#if defined(JITTER) + return distortion < 4.0 ? 2 : int(sqrt(distortion + 1.0)); +#else + return int(distortion + 1.0); +#endif +} + +/* Compute the number of integration steps that should be used to compute each channel of the + * distorted pixel. Each of the channels are distorted by their respective chromatic distortion + * amount, then the amount of distortion between each two consecutive channels is computed, this + * amount is then used to heuristically infer the number of needed integration steps, see the + * integrate_distortion function for more information. */ +ivec3 compute_number_of_integration_steps(vec2 uv, float distance_squared) +{ + /* Distort each channel by its respective chromatic distortion amount. */ + vec3 distortion_scale = compute_chromatic_distortion_scale(distance_squared); + vec2 distorted_uv_red = compute_distorted_uv(uv, distortion_scale.r); + vec2 distorted_uv_green = compute_distorted_uv(uv, distortion_scale.g); + vec2 distorted_uv_blue = compute_distorted_uv(uv, distortion_scale.b); + + /* Infer the number of needed integration steps to compute the distorted red channel starting + * from the green channel. */ + float distortion_red = distance(distorted_uv_red, distorted_uv_green); + int steps_red = compute_number_of_integration_steps_heuristic(distortion_red); + + /* Infer the number of needed integration steps to compute the distorted blue channel starting + * from the green channel. */ + float distortion_blue = distance(distorted_uv_green, distorted_uv_blue); + int steps_blue = compute_number_of_integration_steps_heuristic(distortion_blue); + + /* The number of integration steps used to compute the green channel is the sum of both the red + * and the blue channel steps because it is computed once with each of them. */ + return ivec3(steps_red, steps_red + steps_blue, steps_blue); +} + +/* Returns a random jitter amount, which is essentially a random value in the [0, 1] range. If + * jitter is not enabled, return a constant 0.5 value instead. */ +float get_jitter(int seed) +{ +#if defined(JITTER) + return hash_uint3_to_float(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y, seed); +#else + return 0.5; +#endif +} + +/* Each color channel may have a different distortion with the guarantee that the red will have the + * lowest distortion while the blue will have the highest one. If each channel is distorted + * independently, the image will look disintegrated, with each channel seemingly merely shifted. + * Consequently, the distorted pixels needs to be computed by integrating along the path of change + * of distortion starting from one channel to another. For instance, to compute the distorted red + * from the distorted green, we accumulate the color of the distorted pixel starting from the + * distortion of the red, taking small steps until we reach the distortion of the green. The pixel + * color is weighted such that it is maximum at the start distortion and zero at the end distortion + * in an arithmetic progression. The integration steps can be augmented with random values to + * simulate lens jitter. Finally, it should be noted that this function integrates both the start + * and end channels in reverse directions for more efficient computation. */ +vec3 integrate_distortion(int start, int end, float distance_squared, vec2 uv, int steps) +{ + vec3 accumulated_color = vec3(0.0); + float distortion_amount = chromatic_distortion[end] - chromatic_distortion[start]; + for (int i = 0; i < steps; i++) { + /* The increment will be in the [0, 1) range across iterations. */ + float increment = (i + get_jitter(i)) / steps; + float distortion = chromatic_distortion[start] + increment * distortion_amount; + float distortion_scale = compute_distortion_scale(distortion, distance_squared); + + /* Sample the color at the distorted coordinates and accumulate it weighted by the increment + * value for both the start and end channels. */ + vec2 distorted_uv = compute_distorted_uv(uv, distortion_scale); + vec4 color = texture(input_tx, distorted_uv / texture_size(input_tx)); + accumulated_color[start] += (1.0 - increment) * color[start]; + accumulated_color[end] += increment * color[end]; + } + return accumulated_color; +} + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + + /* Compute the UV image coordinates in the range [-1, 1] as well as the squared distance to the + * center of the image, which is at (0, 0) in the UV coordinates. */ + vec2 center = texture_size(input_tx) / 2.0; + vec2 uv = scale * (texel + 0.5 - center) / center; + float distance_squared = dot(uv, uv); + + /* If any of the color channels will get distorted outside of the screen beyond what is possible, + * write a zero transparent color and return. */ + if (any(greaterThan(chromatic_distortion * distance_squared, vec3(1.0)))) { + imageStore(output_img, texel, vec4(0.0)); + return; + } + + /* Compute the number of integration steps that should be used to compute each channel of the + * distorted pixel. */ + ivec3 number_of_steps = compute_number_of_integration_steps(uv, distance_squared); + + /* Integrate the distortion of the red and green, then the green and blue channels. That means + * the green will be integrated twice, but this is accounted for in the number of steps which the + * color will later be divided by. See the compute_number_of_integration_steps function for more + * details. */ + vec3 color = vec3(0.0); + color += integrate_distortion(0, 1, distance_squared, uv, number_of_steps.r); + color += integrate_distortion(1, 2, distance_squared, uv, number_of_steps.b); + + /* The integration above performed weighted accumulation, and thus the color needs to be divided + * by the sum of the weights. Assuming no jitter, the weights are generated as an arithmetic + * progression starting from (0.5 / n) to ((n - 0.5) / n) for n terms. The sum of an arithmetic + * progression can be computed as (n * (start + end) / 2), which when subsisting the start and + * end reduces to (n / 2). So the color should be multiplied by 2 / n. The jitter sequence + * approximately sums to the same value because it is a uniform random value whose mean value is + * 0.5, so the expression doesn't change regardless of jitter. */ + color *= 2.0 / vec3(number_of_steps); + + imageStore(output_img, texel, vec4(color, 1.0)); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_set_alpha.glsl b/source/blender/gpu/shaders/compositor/compositor_set_alpha.glsl new file mode 100644 index 00000000000..7dd40581790 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_set_alpha.glsl @@ -0,0 +1,8 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); + vec4 color = vec4(texture_load(image_tx, texel).rgb, texture_load(alpha_tx, texel).x); + imageStore(output_img, texel, color); +} diff --git a/source/blender/gpu/shaders/compositor/compositor_split_viewer.glsl b/source/blender/gpu/shaders/compositor/compositor_split_viewer.glsl new file mode 100644 index 00000000000..866b9045da2 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/compositor_split_viewer.glsl @@ -0,0 +1,14 @@ +#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl) + +void main() +{ + ivec2 texel = ivec2(gl_GlobalInvocationID.xy); +#if defined(SPLIT_HORIZONTAL) + bool condition = (view_size.x * split_ratio) < texel.x; +#elif defined(SPLIT_VERTICAL) + bool condition = (view_size.y * split_ratio) < texel.y; +#endif + vec4 color = condition ? texture_load(first_image_tx, texel) : + texture_load(second_image_tx, texel); + imageStore(output_img, texel, color); +} diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_alpha_crop_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_alpha_crop_info.hh new file mode 100644 index 00000000000..11f2f329cd8 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_alpha_crop_info.hh @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_alpha_crop) + .local_group_size(16, 16) + .push_constant(Type::IVEC2, "lower_bound") + .push_constant(Type::IVEC2, "upper_bound") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_alpha_crop.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_bilateral_blur_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_bilateral_blur_info.hh new file mode 100644 index 00000000000..301cd6acd9e --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_bilateral_blur_info.hh @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_bilateral_blur) + .local_group_size(16, 16) + .push_constant(Type::INT, "radius") + .push_constant(Type::FLOAT, "threshold") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .sampler(1, ImageType::FLOAT_2D, "determinator_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_bilateral_blur.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_bokeh_image_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_bokeh_image_info.hh new file mode 100644 index 00000000000..3541de53070 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_bokeh_image_info.hh @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_bokeh_image) + .local_group_size(16, 16) + .push_constant(Type::FLOAT, "exterior_angle") + .push_constant(Type::FLOAT, "rotation") + .push_constant(Type::FLOAT, "roundness") + .push_constant(Type::FLOAT, "catadioptric") + .push_constant(Type::FLOAT, "lens_shift") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_bokeh_image.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_box_mask_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_box_mask_info.hh new file mode 100644 index 00000000000..ecb253bbab1 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_box_mask_info.hh @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_box_mask_shared) + .local_group_size(16, 16) + .push_constant(Type::IVEC2, "domain_size") + .push_constant(Type::VEC2, "location") + .push_constant(Type::VEC2, "size") + .push_constant(Type::FLOAT, "cos_angle") + .push_constant(Type::FLOAT, "sin_angle") + .sampler(0, ImageType::FLOAT_2D, "base_mask_tx") + .sampler(1, ImageType::FLOAT_2D, "mask_value_tx") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_mask_img") + .compute_source("compositor_box_mask.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_box_mask_add) + .additional_info("compositor_box_mask_shared") + .define("CMP_NODE_MASKTYPE_ADD") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_box_mask_subtract) + .additional_info("compositor_box_mask_shared") + .define("CMP_NODE_MASKTYPE_SUBTRACT") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_box_mask_multiply) + .additional_info("compositor_box_mask_shared") + .define("CMP_NODE_MASKTYPE_MULTIPLY") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_box_mask_not) + .additional_info("compositor_box_mask_shared") + .define("CMP_NODE_MASKTYPE_NOT") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_convert_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_convert_info.hh new file mode 100644 index 00000000000..35e60056736 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_convert_info.hh @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_convert_shared) + .local_group_size(16, 16) + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .typedef_source("gpu_shader_compositor_type_conversion.glsl") + .compute_source("compositor_convert.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_convert_float_to_vector) + .additional_info("compositor_convert_shared") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "vec4(vec3_from_float(value.x), 0.0)") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_convert_float_to_color) + .additional_info("compositor_convert_shared") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "vec4_from_float(value.x)") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_convert_color_to_float) + .additional_info("compositor_convert_shared") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "vec4(float_from_vec4(value), vec3(0.0))") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_convert_color_to_vector) + .additional_info("compositor_convert_shared") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "vec4(vec3_from_vec4(value), 0.0)") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_convert_vector_to_float) + .additional_info("compositor_convert_shared") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "vec4(float_from_vec3(value.xyz), vec3(0.0))") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_convert_vector_to_color) + .additional_info("compositor_convert_shared") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "vec4_from_vec3(value.xyz)") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_extract_alpha_from_color) + .additional_info("compositor_convert_shared") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "vec4(value.a, vec3(0.0))") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_convert_color_to_half_color) + .additional_info("compositor_convert_shared") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "value") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_convert_float_to_half_float) + .additional_info("compositor_convert_shared") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "vec4(value.r, vec3(0.0))") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_convert_color_to_opaque) + .additional_info("compositor_convert_shared") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .define("CONVERT_EXPRESSION(value)", "vec4(value.rgb, 1.0)") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_despeckle_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_despeckle_info.hh new file mode 100644 index 00000000000..df86c3a8258 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_despeckle_info.hh @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_despeckle) + .local_group_size(16, 16) + .push_constant(Type::FLOAT, "threshold") + .push_constant(Type::FLOAT, "neighbor_threshold") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .sampler(1, ImageType::FLOAT_2D, "factor_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_despeckle.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_directional_blur_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_directional_blur_info.hh new file mode 100644 index 00000000000..bb9199dcd26 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_directional_blur_info.hh @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_directional_blur) + .local_group_size(16, 16) + .push_constant(Type::INT, "iterations") + .push_constant(Type::MAT4, "inverse_transformation") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_directional_blur.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_edge_filter_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_edge_filter_info.hh new file mode 100644 index 00000000000..916ec62bdba --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_edge_filter_info.hh @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_edge_filter) + .local_group_size(16, 16) + .push_constant(Type::MAT4, "kernel") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .sampler(1, ImageType::FLOAT_2D, "factor_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_edge_filter.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_ellipse_mask_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_ellipse_mask_info.hh new file mode 100644 index 00000000000..52db91c94e5 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_ellipse_mask_info.hh @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_ellipse_mask_shared) + .local_group_size(16, 16) + .push_constant(Type::IVEC2, "domain_size") + .push_constant(Type::VEC2, "location") + .push_constant(Type::VEC2, "radius") + .push_constant(Type::FLOAT, "cos_angle") + .push_constant(Type::FLOAT, "sin_angle") + .sampler(0, ImageType::FLOAT_2D, "base_mask_tx") + .sampler(1, ImageType::FLOAT_2D, "mask_value_tx") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_mask_img") + .compute_source("compositor_ellipse_mask.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_ellipse_mask_add) + .additional_info("compositor_ellipse_mask_shared") + .define("CMP_NODE_MASKTYPE_ADD") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_ellipse_mask_subtract) + .additional_info("compositor_ellipse_mask_shared") + .define("CMP_NODE_MASKTYPE_SUBTRACT") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_ellipse_mask_multiply) + .additional_info("compositor_ellipse_mask_shared") + .define("CMP_NODE_MASKTYPE_MULTIPLY") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_ellipse_mask_not) + .additional_info("compositor_ellipse_mask_shared") + .define("CMP_NODE_MASKTYPE_NOT") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_filter_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_filter_info.hh new file mode 100644 index 00000000000..9d565cf4b8a --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_filter_info.hh @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_filter) + .local_group_size(16, 16) + .push_constant(Type::MAT4, "kernel") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .sampler(1, ImageType::FLOAT_2D, "factor_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_filter.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_flip_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_flip_info.hh new file mode 100644 index 00000000000..db831518cb7 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_flip_info.hh @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_flip) + .local_group_size(16, 16) + .push_constant(Type::BOOL, "flip_x") + .push_constant(Type::BOOL, "flip_y") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_flip.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_image_crop_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_image_crop_info.hh new file mode 100644 index 00000000000..e7736744c40 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_image_crop_info.hh @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_image_crop) + .local_group_size(16, 16) + .push_constant(Type::IVEC2, "lower_bound") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_image_crop.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_feather_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_feather_info.hh new file mode 100644 index 00000000000..9f17f60129d --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_feather_info.hh @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_morphological_distance_feather_shared) + .local_group_size(16, 16) + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .sampler(1, ImageType::FLOAT_1D, "weights_tx") + .sampler(2, ImageType::FLOAT_1D, "falloffs_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_morphological_distance_feather.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_morphological_distance_feather_dilate) + .additional_info("compositor_morphological_distance_feather_shared") + .define("COMPARE(x, y)", "x > y") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_morphological_distance_feather_erode) + .additional_info("compositor_morphological_distance_feather_shared") + .define("COMPARE(x, y)", "x < y") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_info.hh new file mode 100644 index 00000000000..fc960e119e5 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_info.hh @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_morphological_distance_shared) + .local_group_size(16, 16) + .push_constant(Type::INT, "radius") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_morphological_distance.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_morphological_distance_dilate) + .additional_info("compositor_morphological_distance_shared") + .define("OPERATOR(a, b)", "max(a, b)") + .define("LIMIT", "FLT_MIN") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_morphological_distance_erode) + .additional_info("compositor_morphological_distance_shared") + .define("OPERATOR(a, b)", "min(a, b)") + .define("LIMIT", "FLT_MAX") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_threshold_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_threshold_info.hh new file mode 100644 index 00000000000..b1d64f61b80 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_distance_threshold_info.hh @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_morphological_distance_threshold) + .local_group_size(16, 16) + .push_constant(Type::INT, "radius") + .push_constant(Type::INT, "distance") + .push_constant(Type::FLOAT, "inset") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_morphological_distance_threshold.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_morphological_step_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_step_info.hh new file mode 100644 index 00000000000..e97ffd9feea --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_morphological_step_info.hh @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_morphological_step_shared) + .local_group_size(16, 16) + .push_constant(Type::INT, "radius") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_morphological_step.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_morphological_step_dilate) + .additional_info("compositor_morphological_step_shared") + .define("OPERATOR(a, b)", "max(a, b)") + .define("LIMIT", "FLT_MIN") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_morphological_step_erode) + .additional_info("compositor_morphological_step_shared") + .define("OPERATOR(a, b)", "min(a, b)") + .define("LIMIT", "FLT_MAX") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_projector_lens_distortion_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_projector_lens_distortion_info.hh new file mode 100644 index 00000000000..98fe1731703 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_projector_lens_distortion_info.hh @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_projector_lens_distortion) + .local_group_size(16, 16) + .push_constant(Type::FLOAT, "dispersion") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_projector_lens_distortion.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_realize_on_domain_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_realize_on_domain_info.hh new file mode 100644 index 00000000000..4528649ae98 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_realize_on_domain_info.hh @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_realize_on_domain_shared) + .local_group_size(16, 16) + .push_constant(Type::MAT4, "inverse_transformation") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .compute_source("compositor_realize_on_domain.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_realize_on_domain_color) + .additional_info("compositor_realize_on_domain_shared") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "domain_img") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_realize_on_domain_vector) + .additional_info("compositor_realize_on_domain_shared") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "domain_img") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_realize_on_domain_float) + .additional_info("compositor_realize_on_domain_shared") + .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "domain_img") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_screen_lens_distortion_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_screen_lens_distortion_info.hh new file mode 100644 index 00000000000..c42f2b328d4 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_screen_lens_distortion_info.hh @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_screen_lens_distortion_shared) + .local_group_size(16, 16) + .push_constant(Type::VEC3, "chromatic_distortion") + .push_constant(Type::FLOAT, "scale") + .sampler(0, ImageType::FLOAT_2D, "input_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_screen_lens_distortion.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_screen_lens_distortion) + .additional_info("compositor_screen_lens_distortion_shared") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_screen_lens_distortion_jitter) + .additional_info("compositor_screen_lens_distortion_shared") + .define("JITTER") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_set_alpha_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_set_alpha_info.hh new file mode 100644 index 00000000000..ca28194e921 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_set_alpha_info.hh @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_set_alpha) + .local_group_size(16, 16) + .sampler(0, ImageType::FLOAT_2D, "image_tx") + .sampler(1, ImageType::FLOAT_2D, "alpha_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_set_alpha.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/infos/compositor_split_viewer_info.hh b/source/blender/gpu/shaders/compositor/infos/compositor_split_viewer_info.hh new file mode 100644 index 00000000000..d5793b0ce59 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/infos/compositor_split_viewer_info.hh @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "gpu_shader_create_info.hh" + +GPU_SHADER_CREATE_INFO(compositor_split_viewer_shared) + .local_group_size(16, 16) + .push_constant(Type::FLOAT, "split_ratio") + .push_constant(Type::IVEC2, "view_size") + .sampler(0, ImageType::FLOAT_2D, "first_image_tx") + .sampler(1, ImageType::FLOAT_2D, "second_image_tx") + .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img") + .compute_source("compositor_split_viewer.glsl"); + +GPU_SHADER_CREATE_INFO(compositor_split_viewer_horizontal) + .additional_info("compositor_split_viewer_shared") + .define("SPLIT_HORIZONTAL") + .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(compositor_split_viewer_vertical) + .additional_info("compositor_split_viewer_shared") + .define("SPLIT_VERTICAL") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_alpha_over.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_alpha_over.glsl new file mode 100644 index 00000000000..8e3e033147f --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_alpha_over.glsl @@ -0,0 +1,48 @@ +void node_composite_alpha_over_mixed( + float factor, vec4 color, vec4 over_color, float premultiply_factor, out vec4 result) +{ + if (over_color.a <= 0.0) { + result = color; + } + else if (factor == 1.0 && over_color.a >= 1.0) { + result = over_color; + } + else { + float add_factor = 1.0 - premultiply_factor + over_color.a * premultiply_factor; + float premultiplier = factor * add_factor; + float multiplier = 1.0 - factor * over_color.a; + + result = multiplier * color + vec2(premultiplier, factor).xxxy * over_color; + } +} + +void node_composite_alpha_over_key(float factor, vec4 color, vec4 over_color, out vec4 result) +{ + if (over_color.a <= 0.0) { + result = color; + } + else if (factor == 1.0 && over_color.a >= 1.0) { + result = over_color; + } + else { + result = mix(color, vec4(over_color.rgb, 1.0), factor * over_color.a); + } +} + +void node_composite_alpha_over_premultiply(float factor, + vec4 color, + vec4 over_color, + out vec4 result) +{ + if (over_color.a < 0.0) { + result = color; + } + else if (factor == 1.0 && over_color.a >= 1.0) { + result = over_color; + } + else { + float multiplier = 1.0 - factor * over_color.a; + + result = multiplier * color + factor * over_color; + } +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_bright_contrast.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_bright_contrast.glsl new file mode 100644 index 00000000000..ce71b4fd8a4 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_bright_contrast.glsl @@ -0,0 +1,38 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +/* The algorithm is by Werner D. Streidt + * (http://visca.com/ffactory/archives/5-99/msg00021.html) + * Extracted of OpenCV demhist.c + */ + +#define FLT_EPSILON 1.192092896e-07F + +void node_composite_bright_contrast( + vec4 color, float brightness, float contrast, const float use_premultiply, out vec4 result) +{ + brightness /= 100.0; + float delta = contrast / 200.0; + + float multiplier, offset; + if (contrast > 0.0) { + multiplier = 1.0 - delta * 2.0; + multiplier = 1.0 / max(multiplier, FLT_EPSILON); + offset = multiplier * (brightness - delta); + } + else { + delta *= -1.0; + multiplier = max(1.0 - delta * 2.0, 0.0); + offset = multiplier * brightness + delta; + } + + if (use_premultiply != 0.0) { + color_alpha_unpremultiply(color, color); + } + + result.rgb = color.rgb * multiplier + offset; + result.a = color.a; + + if (use_premultiply != 0.0) { + color_alpha_premultiply(result, result); + } +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_channel_matte.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_channel_matte.glsl new file mode 100644 index 00000000000..f2dcc9543f2 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_channel_matte.glsl @@ -0,0 +1,52 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +#define CMP_NODE_CHANNEL_MATTE_CS_RGB 1.0 +#define CMP_NODE_CHANNEL_MATTE_CS_HSV 2.0 +#define CMP_NODE_CHANNEL_MATTE_CS_YUV 3.0 +#define CMP_NODE_CHANNEL_MATTE_CS_YCC 4.0 + +void node_composite_channel_matte(vec4 color, + const float color_space, + const float matte_channel, + const vec2 limit_channels, + float max_limit, + float min_limit, + out vec4 result, + out float matte) +{ + vec4 channels; + if (color_space == CMP_NODE_CHANNEL_MATTE_CS_HSV) { + rgb_to_hsv(color, channels); + } + else if (color_space == CMP_NODE_CHANNEL_MATTE_CS_YUV) { + rgba_to_yuva_itu_709(color, channels); + } + else if (color_space == CMP_NODE_CHANNEL_MATTE_CS_YCC) { + rgba_to_ycca_itu_709(color, channels); + } + else { + channels = color; + } + + float matte_value = channels[int(matte_channel)]; + float limit_value = max(channels[int(limit_channels.x)], channels[int(limit_channels.y)]); + + float alpha = 1.0 - (matte_value - limit_value); + if (alpha > max_limit) { + alpha = color.a; + } + else if (alpha < min_limit) { + alpha = 0.0; + } + else { + alpha = (alpha - min_limit) / (max_limit - min_limit); + } + + matte = min(alpha, color.a); + result = color * matte; +} + +#undef CMP_NODE_CHANNEL_MATTE_CS_RGB +#undef CMP_NODE_CHANNEL_MATTE_CS_HSV +#undef CMP_NODE_CHANNEL_MATTE_CS_YUV +#undef CMP_NODE_CHANNEL_MATTE_CS_YCC diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_chroma_matte.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_chroma_matte.glsl new file mode 100644 index 00000000000..5d6bea0c9db --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_chroma_matte.glsl @@ -0,0 +1,43 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_math_utils.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +/* Algorithm from the book Video Demystified. Chapter 7. Chroma Keying. */ +void node_composite_chroma_matte(vec4 color, + vec4 key, + float acceptance, + float cutoff, + float falloff, + out vec4 result, + out float matte) +{ + vec4 color_ycca; + rgba_to_ycca_itu_709(color, color_ycca); + vec4 key_ycca; + rgba_to_ycca_itu_709(key, key_ycca); + + /* Normalize the CrCb components into the [-1, 1] range. */ + vec2 color_cc = color_ycca.yz * 2.0 - 1.0; + vec2 key_cc = key_ycca.yz * 2.0 - 1.0; + + /* Rotate the color onto the space of the key such that x axis of the color space passes through + * the key color. */ + color_cc = vector_to_rotation_matrix(key_cc * vec2(1.0, -1.0)) * color_cc; + + /* Compute foreground key. If positive, the value is in the [0, 1] range. */ + float foreground_key = color_cc.x - (abs(color_cc.y) / acceptance); + + /* Negative foreground key values retain the original alpha. Positive values are scaled by the + * falloff, while colors that make an angle less than the cutoff angle get a zero alpha. */ + float alpha = color.a; + if (foreground_key > 0.0) { + alpha = 1.0 - (foreground_key / falloff); + + if (abs(atan(color_cc.y, color_cc.x)) < (cutoff / 2.0)) { + alpha = 0.0; + } + } + + /* Compute output. */ + matte = min(alpha, color.a); + result = color * matte; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_balance.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_balance.glsl new file mode 100644 index 00000000000..bffb94cdedb --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_balance.glsl @@ -0,0 +1,34 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +void node_composite_color_balance_lgg( + float factor, vec4 color, vec3 lift, vec3 gamma, vec3 gain, out vec4 result) +{ + lift = 2.0 - lift; + vec3 srgb_color = linear_rgb_to_srgb(color.rgb); + vec3 lift_balanced = ((srgb_color - 1.0) * lift) + 1.0; + + vec3 gain_balanced = lift_balanced * gain; + gain_balanced = max(gain_balanced, vec3(0.0)); + + vec3 linear_color = srgb_to_linear_rgb(gain_balanced); + gamma = mix(gamma, vec3(1e-6), equal(gamma, vec3(0.0))); + vec3 gamma_balanced = pow(linear_color, 1.0 / gamma); + + result.rgb = mix(color.rgb, gamma_balanced, min(factor, 1.0)); + result.a = color.a; +} + +void node_composite_color_balance_asc_cdl(float factor, + vec4 color, + vec3 offset, + vec3 power, + vec3 slope, + float offset_basis, + out vec4 result) +{ + offset += offset_basis; + vec3 balanced = color.rgb * slope + offset; + balanced = pow(max(balanced, vec3(0.0)), power); + result.rgb = mix(color.rgb, balanced, min(factor, 1.0)); + result.a = color.a; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_correction.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_correction.glsl new file mode 100644 index 00000000000..9b4858f03be --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_correction.glsl @@ -0,0 +1,87 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_math_utils.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +void node_composite_color_correction(vec4 color, + float mask, + const vec3 enabled_channels, + float start_midtones, + float end_midtones, + float master_saturation, + float master_contrast, + float master_gamma, + float master_gain, + float master_lift, + float shadows_saturation, + float shadows_contrast, + float shadows_gamma, + float shadows_gain, + float shadows_lift, + float midtones_saturation, + float midtones_contrast, + float midtones_gamma, + float midtones_gain, + float midtones_lift, + float highlights_saturation, + float highlights_contrast, + float highlights_gamma, + float highlights_gain, + float highlights_lift, + const vec3 luminance_coefficients, + out vec4 result) +{ + const float margin = 0.10; + const float margin_divider = 0.5 / margin; + float level = (color.r + color.g + color.b) / 3.0; + float level_shadows = 0.0; + float level_midtones = 0.0; + float level_highlights = 0.0; + if (level < (start_midtones - margin)) { + level_shadows = 1.0; + } + else if (level < (start_midtones + margin)) { + level_midtones = ((level - start_midtones) * margin_divider) + 0.5; + level_shadows = 1.0 - level_midtones; + } + else if (level < (end_midtones - margin)) { + level_midtones = 1.0; + } + else if (level < (end_midtones + margin)) { + level_highlights = ((level - end_midtones) * margin_divider) + 0.5; + level_midtones = 1.0 - level_highlights; + } + else { + level_highlights = 1.0; + } + + float contrast = level_shadows * shadows_contrast; + contrast += level_midtones * midtones_contrast; + contrast += level_highlights * highlights_contrast; + contrast *= master_contrast; + float saturation = level_shadows * shadows_saturation; + saturation += level_midtones * midtones_saturation; + saturation += level_highlights * highlights_saturation; + saturation *= master_saturation; + float gamma = level_shadows * shadows_gamma; + gamma += level_midtones * midtones_gamma; + gamma += level_highlights * highlights_gamma; + gamma *= master_gamma; + float gain = level_shadows * shadows_gain; + gain += level_midtones * midtones_gain; + gain += level_highlights * highlights_gain; + gain *= master_gain; + float lift = level_shadows * shadows_lift; + lift += level_midtones * midtones_lift; + lift += level_highlights * highlights_lift; + lift += master_lift; + + float inverse_gamma = 1.0 / gamma; + float luma = get_luminance(color.rgb, luminance_coefficients); + + vec3 corrected = luma + saturation * (color.rgb - luma); + corrected = 0.5 + (corrected - 0.5) * contrast; + corrected = fallback_pow(corrected * gain + lift, inverse_gamma, corrected); + corrected = mix(color.rgb, corrected, min(mask, 1.0)); + + result.rgb = mix(corrected, color.rgb, equal(enabled_channels, vec3(0.0))); + result.a = color.a; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_matte.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_matte.glsl new file mode 100644 index 00000000000..038471bc1bc --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_matte.glsl @@ -0,0 +1,27 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +void node_composite_color_matte(vec4 color, + vec4 key, + float hue_epsilon, + float saturation_epsilon, + float value_epsilon, + out vec4 result, + out float matte) + +{ + vec4 color_hsva; + rgb_to_hsv(color, color_hsva); + vec4 key_hsva; + rgb_to_hsv(key, key_hsva); + + bool is_within_saturation = distance(color_hsva.y, key_hsva.y) < saturation_epsilon; + bool is_within_value = distance(color_hsva.z, key_hsva.z) < value_epsilon; + bool is_within_hue = distance(color_hsva.x, key_hsva.x) < hue_epsilon; + /* Hue wraps around, so check the distance around the boundary. */ + float min_hue = min(color_hsva.x, key_hsva.x); + float max_hue = max(color_hsva.x, key_hsva.x); + is_within_hue = is_within_hue || ((min_hue + (1.0 - max_hue)) < hue_epsilon); + + matte = (is_within_hue && is_within_saturation && is_within_value) ? 0.0 : color.a; + result = color * matte; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_spill.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_spill.glsl new file mode 100644 index 00000000000..0adad53ad80 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_spill.glsl @@ -0,0 +1,13 @@ +void node_composite_color_spill(vec4 color, + float factor, + const float spill_channel, + vec3 spill_scale, + const vec2 limit_channels, + float limit_scale, + out vec4 result) +{ + float average_limit = (color[int(limit_channels.x)] + color[int(limit_channels.y)]) / 2.0; + float map = factor * color[int(spill_channel)] - limit_scale * average_limit; + result.rgb = map > 0.0 ? color.rgb + spill_scale * map : color.rgb; + result.a = color.a; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_to_luminance.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_to_luminance.glsl new file mode 100644 index 00000000000..bcdd625bd4f --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_color_to_luminance.glsl @@ -0,0 +1,6 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +void color_to_luminance(vec4 color, const vec3 luminance_coefficients, out float result) +{ + result = get_luminance(color.rgb, luminance_coefficients); +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_difference_matte.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_difference_matte.glsl new file mode 100644 index 00000000000..d769cadce3c --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_difference_matte.glsl @@ -0,0 +1,10 @@ +void node_composite_difference_matte( + vec4 color, vec4 key, float tolerance, float falloff, out vec4 result, out float matte) +{ + vec4 difference = abs(color - key); + float average_difference = (difference.r + difference.g + difference.b) / 3.0; + bool is_opaque = average_difference > tolerance + falloff; + float alpha = is_opaque ? color.a : (max(0.0, average_difference - tolerance) / falloff); + matte = min(alpha, color.a); + result = color * matte; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_distance_matte.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_distance_matte.glsl new file mode 100644 index 00000000000..9beed66826c --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_distance_matte.glsl @@ -0,0 +1,26 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +void node_composite_distance_matte_rgba( + vec4 color, vec4 key, float tolerance, float falloff, out vec4 result, out float matte) +{ + float difference = distance(color.rgb, key.rgb); + bool is_opaque = difference > tolerance + falloff; + float alpha = is_opaque ? color.a : max(0.0, difference - tolerance) / falloff; + matte = min(alpha, color.a); + result = color * matte; +} + +void node_composite_distance_matte_ycca( + vec4 color, vec4 key, float tolerance, float falloff, out vec4 result, out float matte) +{ + vec4 color_ycca; + rgba_to_ycca_itu_709(color, color_ycca); + vec4 key_ycca; + rgba_to_ycca_itu_709(key, key_ycca); + + float difference = distance(color_ycca.yz, key_ycca.yz); + bool is_opaque = difference > tolerance + falloff; + float alpha = is_opaque ? color.a : max(0.0, difference - tolerance) / falloff; + matte = min(alpha, color.a); + result = color * matte; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_exposure.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_exposure.glsl new file mode 100644 index 00000000000..f246635a91e --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_exposure.glsl @@ -0,0 +1,6 @@ +void node_composite_exposure(vec4 color, float exposure, out vec4 result) +{ + float multiplier = exp2(exposure); + result.rgb = color.rgb * multiplier; + result.a = color.a; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_gamma.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_gamma.glsl new file mode 100644 index 00000000000..53070d4b0e2 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_gamma.glsl @@ -0,0 +1,7 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_math_utils.glsl) + +void node_composite_gamma(vec4 color, float gamma, out vec4 result) +{ + result.rgb = fallback_pow(color.rgb, gamma, color.rgb); + result.a = color.a; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_hue_correct.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_hue_correct.glsl new file mode 100644 index 00000000000..99eb125cdf2 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_hue_correct.glsl @@ -0,0 +1,39 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +/* Curve maps are stored in sampler objects that are evaluated in the [0, 1] range, so normalize + * parameters accordingly. */ +#define NORMALIZE_PARAMETER(parameter, minimum, range) ((parameter - minimum) * range) + +void node_composite_hue_correct(float factor, + vec4 color, + sampler1DArray curve_map, + const float layer, + vec3 minimums, + vec3 range_dividers, + out vec4 result) +{ + vec4 hsv; + rgb_to_hsv(color, hsv); + + /* First, adjust the hue channel on its own, since corrections in the saturation and value + * channels depends on the new value of the hue, not its original value. A curve map value of 0.5 + * means no change in hue, so adjust the value to get an identity at 0.5. Since the identity of + * addition is 0, we subtract 0.5 (0.5 - 0.5 = 0). */ + const float hue_parameter = NORMALIZE_PARAMETER(hsv.x, minimums.x, range_dividers.x); + hsv.x += texture(curve_map, vec2(hue_parameter, layer)).x - 0.5; + + /* Second, adjust the saturation and value based on the new value of the hue. A curve map value + * of 0.5 means no change in hue, so adjust the value to get an identity at 0.5. Since the + * identity of duplication is 1, we multiply by 2 (0.5 * 2 = 1). */ + vec2 parameters = NORMALIZE_PARAMETER(hsv.x, minimums.yz, range_dividers.yz); + hsv.y *= texture(curve_map, vec2(parameters.x, layer)).y * 2.0; + hsv.z *= texture(curve_map, vec2(parameters.y, layer)).z * 2.0; + + /* Sanitize the new hue and saturation values. */ + hsv.x = fract(hsv.x); + hsv.y = clamp(hsv.y, 0.0, 1.0); + + hsv_to_rgb(hsv, result); + + result = mix(color, result, factor); +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_hue_saturation_value.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_hue_saturation_value.glsl new file mode 100644 index 00000000000..dd5eb33d318 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_hue_saturation_value.glsl @@ -0,0 +1,16 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +void node_composite_hue_saturation_value( + vec4 color, float hue, float saturation, float value, float factor, out vec4 result) +{ + vec4 hsv; + rgb_to_hsv(color, hsv); + + hsv.x = fract(hsv.x + hue + 0.5); + hsv.y = clamp(hsv.y * saturation, 0.0, 1.0); + hsv.z = hsv.z * value; + + hsv_to_rgb(hsv, result); + + result = mix(color, result, factor); +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_invert.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_invert.glsl new file mode 100644 index 00000000000..59be746da7f --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_invert.glsl @@ -0,0 +1,13 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +void node_composite_invert(float fac, vec4 color, float do_rgb, float do_alpha, out vec4 result) +{ + result = color; + if (do_rgb != 0.0) { + result.rgb = 1.0 - result.rgb; + } + if (do_alpha != 0.0) { + result.a = 1.0 - result.a; + } + result = mix(color, result, fac); +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_luminance_matte.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_luminance_matte.glsl new file mode 100644 index 00000000000..3647ac583fe --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_luminance_matte.glsl @@ -0,0 +1,14 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +void node_composite_luminance_matte(vec4 color, + float high, + float low, + const vec3 luminance_coefficients, + out vec4 result, + out float matte) +{ + float luminance = get_luminance(color.rgb, luminance_coefficients); + float alpha = clamp(0.0, 1.0, (luminance - low) / (high - low)); + matte = min(alpha, color.a); + result = color * matte; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_main.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_main.glsl new file mode 100644 index 00000000000..27624223dbc --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_main.glsl @@ -0,0 +1,7 @@ +/* The compute shader that will be dispatched by the compositor ShaderOperation. It just calls the + * evaluate function that will be dynamically generated and appended to this shader in the + * ShaderOperation::generate_code method. */ +void main() +{ + evaluate(); +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_map_value.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_map_value.glsl new file mode 100644 index 00000000000..20874b4ef44 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_map_value.glsl @@ -0,0 +1,56 @@ +/* An arbitrary value determined by Blender. */ +#define BLENDER_ZMAX 10000.0 + +void node_composite_map_range(float value, + float from_min, + float from_max, + float to_min, + float to_max, + const float should_clamp, + out float result) +{ + if (abs(from_max - from_min) < 1e-6) { + result = 0.0; + } + else { + if (value >= -BLENDER_ZMAX && value <= BLENDER_ZMAX) { + result = (value - from_min) / (from_max - from_min); + result = to_min + result * (to_max - to_min); + } + else if (value > BLENDER_ZMAX) { + result = to_max; + } + else { + result = to_min; + } + + if (should_clamp != 0.0) { + if (to_max > to_min) { + result = clamp(result, to_min, to_max); + } + else { + result = clamp(result, to_max, to_min); + } + } + } +} + +void node_composite_map_value(float value, + float offset, + float size, + const float use_min, + float min, + const float use_max, + float max, + out float result) +{ + result = (value + offset) * size; + + if (use_min != 0.0 && result < min) { + result = min; + } + + if (use_max != 0.0 && result > max) { + result = max; + } +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_normal.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_normal.glsl new file mode 100644 index 00000000000..a2e3b6c4aaa --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_normal.glsl @@ -0,0 +1,9 @@ +void node_composite_normal(vec3 input_vector, + vec3 input_normal, + out vec3 result_normal, + out float result_dot) +{ + vec3 normal = normalize(input_normal); + result_normal = normal; + result_dot = -dot(input_vector, normal); +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_posterize.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_posterize.glsl new file mode 100644 index 00000000000..ee8ae234abe --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_posterize.glsl @@ -0,0 +1,6 @@ +void node_composite_posterize(vec4 color, float steps, out vec4 result) +{ + steps = clamp(steps, 2.0, 1024.0); + result = floor(color * steps) / steps; + result.a = color.a; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_separate_combine.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_separate_combine.glsl new file mode 100644 index 00000000000..d72d2260394 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_separate_combine.glsl @@ -0,0 +1,132 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +/* ** Combine/Separate XYZ ** */ + +void node_composite_combine_xyz(float x, float y, float z, out vec3 vector) +{ + vector = vec3(x, y, z); +} + +void node_composite_separate_xyz(vec3 vector, out float x, out float y, out float z) +{ + x = vector.x; + y = vector.y; + z = vector.z; +} + +/* ** Combine/Separate RGBA ** */ + +void node_composite_combine_rgba(float r, float g, float b, float a, out vec4 color) +{ + color = vec4(r, g, b, a); +} + +void node_composite_separate_rgba(vec4 color, out float r, out float g, out float b, out float a) +{ + r = color.r; + g = color.g; + b = color.b; + a = color.a; +} + +/* ** Combine/Separate HSVA ** */ + +void node_composite_combine_hsva(float h, float s, float v, float a, out vec4 color) +{ + hsv_to_rgb(vec4(h, s, v, a), color); +} + +void node_composite_separate_hsva(vec4 color, out float h, out float s, out float v, out float a) +{ + vec4 hsva; + rgb_to_hsv(color, hsva); + h = hsva.x; + s = hsva.y; + v = hsva.z; + a = hsva.a; +} + +/* ** Combine/Separate HSLA ** */ + +void node_composite_combine_hsla(float h, float s, float l, float a, out vec4 color) +{ + hsl_to_rgb(vec4(h, s, l, a), color); +} + +void node_composite_separate_hsla(vec4 color, out float h, out float s, out float l, out float a) +{ + vec4 hsla; + rgb_to_hsl(color, hsla); + h = hsla.x; + s = hsla.y; + l = hsla.z; + a = hsla.a; +} + +/* ** Combine/Separate YCCA ** */ + +void node_composite_combine_ycca_itu_601(float y, float cb, float cr, float a, out vec4 color) +{ + ycca_to_rgba_itu_601(vec4(y, cb, cr, a), color); +} + +void node_composite_combine_ycca_itu_709(float y, float cb, float cr, float a, out vec4 color) +{ + ycca_to_rgba_itu_709(vec4(y, cb, cr, a), color); +} + +void node_composite_combine_ycca_jpeg(float y, float cb, float cr, float a, out vec4 color) +{ + ycca_to_rgba_jpeg(vec4(y, cb, cr, a), color); +} + +void node_composite_separate_ycca_itu_601( + vec4 color, out float y, out float cb, out float cr, out float a) +{ + vec4 ycca; + rgba_to_ycca_itu_601(color, ycca); + y = ycca.x; + cb = ycca.y; + cr = ycca.z; + a = ycca.a; +} + +void node_composite_separate_ycca_itu_709( + vec4 color, out float y, out float cb, out float cr, out float a) +{ + vec4 ycca; + rgba_to_ycca_itu_709(color, ycca); + y = ycca.x; + cb = ycca.y; + cr = ycca.z; + a = ycca.a; +} + +void node_composite_separate_ycca_jpeg( + vec4 color, out float y, out float cb, out float cr, out float a) +{ + vec4 ycca; + rgba_to_ycca_jpeg(color, ycca); + y = ycca.x; + cb = ycca.y; + cr = ycca.z; + a = ycca.a; +} + +/* ** Combine/Separate YUVA ** */ + +void node_composite_combine_yuva_itu_709(float y, float u, float v, float a, out vec4 color) +{ + yuva_to_rgba_itu_709(vec4(y, u, v, a), color); +} + +void node_composite_separate_yuva_itu_709( + vec4 color, out float y, out float u, out float v, out float a) +{ + vec4 yuva; + rgba_to_yuva_itu_709(color, yuva); + y = yuva.x; + u = yuva.y; + v = yuva.z; + a = yuva.a; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_set_alpha.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_set_alpha.glsl new file mode 100644 index 00000000000..95380d1ed0f --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_set_alpha.glsl @@ -0,0 +1,9 @@ +void node_composite_set_alpha_apply(vec4 color, float alpha, out vec4 result) +{ + result = color * alpha; +} + +void node_composite_set_alpha_replace(vec4 color, float alpha, out vec4 result) +{ + result = vec4(color.rgb, alpha); +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_store_output.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_store_output.glsl new file mode 100644 index 00000000000..7fba26907b5 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_store_output.glsl @@ -0,0 +1,26 @@ +/* The following functions are called to store the given value in the output identified by the + * given ID. The ID is an unsigned integer that is encoded in a float, so floatBitsToUint is called + * to get the actual identifier. The functions have an output value as their last argument that is + * used to establish an output link that is then used to track the nodes that contribute to the + * output of the compositor node tree. + * + * The store_[float|vector|color] functions are dynamically generated in + * ShaderOperation::generate_code_for_outputs. */ + +void node_compositor_store_output_float(const float id, float value, out float out_value) +{ + store_float(floatBitsToUint(id), value); + out_value = value; +} + +void node_compositor_store_output_vector(const float id, vec3 vector, out vec3 out_vector) +{ + store_vector(floatBitsToUint(id), vector); + out_vector = vector; +} + +void node_compositor_store_output_color(const float id, vec4 color, out vec4 out_color) +{ + store_color(floatBitsToUint(id), color); + out_color = color; +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_texture_utilities.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_texture_utilities.glsl new file mode 100644 index 00000000000..128fc6aeaf5 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_texture_utilities.glsl @@ -0,0 +1,35 @@ +/* A shorthand for 1D textureSize with a zero LOD. */ +int texture_size(sampler1D sampler) +{ + return textureSize(sampler, 0); +} + +/* A shorthand for 1D texelFetch with zero LOD and bounded access clamped to border. */ +vec4 texture_load(sampler1D sampler, int x) +{ + const int texture_bound = texture_size(sampler) - 1; + return texelFetch(sampler, clamp(x, 0, texture_bound), 0); +} + +/* A shorthand for 2D textureSize with a zero LOD. */ +ivec2 texture_size(sampler2D sampler) +{ + return textureSize(sampler, 0); +} + +/* A shorthand for 2D texelFetch with zero LOD and bounded access clamped to border. */ +vec4 texture_load(sampler2D sampler, ivec2 texel) +{ + const ivec2 texture_bounds = texture_size(sampler) - ivec2(1); + return texelFetch(sampler, clamp(texel, ivec2(0), texture_bounds), 0); +} + +/* A shorthand for 2D texelFetch with zero LOD and a fallback value for out-of-bound access. */ +vec4 texture_load(sampler2D sampler, ivec2 texel, vec4 fallback) +{ + const ivec2 texture_bounds = texture_size(sampler) - ivec2(1); + if (any(lessThan(texel, ivec2(0))) || any(greaterThan(texel, texture_bounds))) { + return fallback; + } + return texelFetch(sampler, texel, 0); +} diff --git a/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_type_conversion.glsl b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_type_conversion.glsl new file mode 100644 index 00000000000..75c76fd7341 --- /dev/null +++ b/source/blender/gpu/shaders/compositor/library/gpu_shader_compositor_type_conversion.glsl @@ -0,0 +1,29 @@ +float float_from_vec4(vec4 vector) +{ + return dot(vector.rgb, vec3(1.0)) / 3.0; +} + +float float_from_vec3(vec3 vector) +{ + return dot(vector, vec3(1.0)) / 3.0; +} + +vec3 vec3_from_vec4(vec4 vector) +{ + return vector.rgb; +} + +vec3 vec3_from_float(float value) +{ + return vec3(value); +} + +vec4 vec4_from_vec3(vec3 vector) +{ + return vec4(vector, 1.0); +} + +vec4 vec4_from_float(float value) +{ + return vec4(vec3(value), 1.0); +} diff --git a/source/blender/gpu/shaders/gpu_shader_2D_flat_color_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_flat_color_vert.glsl deleted file mode 100644 index cf948bb2533..00000000000 --- a/source/blender/gpu/shaders/gpu_shader_2D_flat_color_vert.glsl +++ /dev/null @@ -1,6 +0,0 @@ - -void main() -{ - gl_Position = ModelViewProjectionMatrix * vec4(pos, 0.0, 1.0); - finalColor = color; -} diff --git a/source/blender/gpu/shaders/gpu_shader_2D_image_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_image_vert.glsl index 0b5e3759dfb..8191fb6a8d6 100644 --- a/source/blender/gpu/shaders/gpu_shader_2D_image_vert.glsl +++ b/source/blender/gpu/shaders/gpu_shader_2D_image_vert.glsl @@ -10,6 +10,5 @@ out vec2 texCoord_interp; void main() { gl_Position = ModelViewProjectionMatrix * vec4(pos.xy, 0.0f, 1.0f); - gl_Position.z = 1.0; texCoord_interp = texCoord; } diff --git a/source/blender/gpu/shaders/gpu_shader_2D_line_dashed_uniform_color_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_line_dashed_uniform_color_vert.glsl deleted file mode 100644 index 7878dc18362..00000000000 --- a/source/blender/gpu/shaders/gpu_shader_2D_line_dashed_uniform_color_vert.glsl +++ /dev/null @@ -1,13 +0,0 @@ - -/* - * Vertex Shader for dashed lines with 2D coordinates, - * with uniform multi-colors or uniform single-color, and unary thickness. - * - * Dashed is performed in screen space. - */ - -void main() -{ - gl_Position = ModelViewProjectionMatrix * vec4(pos, 0.0, 1.0); - stipple_start = stipple_pos = viewport_size * 0.5 * (gl_Position.xy / gl_Position.w); -} diff --git a/source/blender/gpu/shaders/gpu_shader_2D_nodelink_frag.glsl b/source/blender/gpu/shaders/gpu_shader_2D_nodelink_frag.glsl index ecda17a7495..433aad85cf6 100644 --- a/source/blender/gpu/shaders/gpu_shader_2D_nodelink_frag.glsl +++ b/source/blender/gpu/shaders/gpu_shader_2D_nodelink_frag.glsl @@ -26,5 +26,5 @@ void main() fragColor.a *= alpha; } - fragColor.a *= smoothstep(1.0, 0.1, abs(colorGradient)); + fragColor.a *= smoothstep(lineThickness, lineThickness - 0.6, abs(colorGradient)); } diff --git a/source/blender/gpu/shaders/gpu_shader_2D_nodelink_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_nodelink_vert.glsl index 779bcc59487..794af5b69a5 100644 --- a/source/blender/gpu/shaders/gpu_shader_2D_nodelink_vert.glsl +++ b/source/blender/gpu/shaders/gpu_shader_2D_nodelink_vert.glsl @@ -12,10 +12,8 @@ void main(void) { - /* Define where along the noodle the gradient will starts and ends. - * Use 0.25 instead of 0.35-0.65, because of a visual shift issue. */ - const float start_gradient_threshold = 0.25; - const float end_gradient_threshold = 0.55; + const float start_gradient_threshold = 0.35; + const float end_gradient_threshold = 0.65; #ifdef USE_INSTANCE # define colStart (colid_doarrow[0] < 3 ? start_color : node_link_data.colors[colid_doarrow[0]]) @@ -40,6 +38,31 @@ void main(void) vec4 colEnd = node_link_data.colors[2]; #endif + float line_thickness = thickness; + + if (gl_VertexID < MID_VERTEX) { + /* Outline pass. */ + finalColor = colShadow; + } + else { + /* Second pass. */ + if (uv.x < start_gradient_threshold) { + finalColor = colStart; + } + else if (uv.x > end_gradient_threshold) { + finalColor = colEnd; + } + else { + float mixFactor = (uv.x - start_gradient_threshold) / + (end_gradient_threshold - start_gradient_threshold); + finalColor = mix(colStart, colEnd, mixFactor); + } + line_thickness *= 0.65f; + if (doMuted) { + finalColor[3] = 0.65; + } + } + /* Parameters for the dashed line. */ isMainLine = expand.y != 1.0 ? 0 : 1; dashFactor = dash_factor; @@ -76,35 +99,14 @@ void main(void) exp_axis = ModelViewProjectionMatrix[0].xy * exp_axis.xx + ModelViewProjectionMatrix[1].xy * exp_axis.yy; - float expand_dist = (uv.y * 2.0 - 1.0); + float expand_dist = line_thickness * (uv.y * 2.0 - 1.0); colorGradient = expand_dist; - - if (gl_VertexID < MID_VERTEX) { - /* Shadow pass */ - finalColor = colShadow; - } - else { - /* Second pass */ - if (uv.x < start_gradient_threshold) { - finalColor = colStart; - } - else if (uv.x > end_gradient_threshold) { - finalColor = colEnd; - } - else { - /* Add 0.1 to avoid a visual shift issue. */ - finalColor = mix(colStart, colEnd, uv.x + 0.1); - } - expand_dist *= 0.5; - if (doMuted) { - finalColor[3] = 0.65; - } - } + lineThickness = line_thickness; finalColor[3] *= dim_factor; /* Expand into a line */ - gl_Position.xy += exp_axis * node_link_data.expandSize * expand_dist * thickness; + gl_Position.xy += exp_axis * node_link_data.expandSize * expand_dist; /* If the link is not muted or is not a reroute arrow the points are squashed to the center of * the line. Magic numbers are defined in drawnode.c */ diff --git a/source/blender/gpu/shaders/gpu_shader_2D_smooth_color_frag.glsl b/source/blender/gpu/shaders/gpu_shader_2D_smooth_color_frag.glsl deleted file mode 100644 index 8690ba0767a..00000000000 --- a/source/blender/gpu/shaders/gpu_shader_2D_smooth_color_frag.glsl +++ /dev/null @@ -1,7 +0,0 @@ -#pragma BLENDER_REQUIRE(gpu_shader_colorspace_lib.glsl) - -void main() -{ - fragColor = finalColor; - fragColor = blender_srgb_to_framebuffer_space(fragColor); -} diff --git a/source/blender/gpu/shaders/gpu_shader_2D_smooth_color_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_smooth_color_vert.glsl deleted file mode 100644 index cf948bb2533..00000000000 --- a/source/blender/gpu/shaders/gpu_shader_2D_smooth_color_vert.glsl +++ /dev/null @@ -1,6 +0,0 @@ - -void main() -{ - gl_Position = ModelViewProjectionMatrix * vec4(pos, 0.0, 1.0); - finalColor = color; -} diff --git a/source/blender/gpu/shaders/gpu_shader_codegen_lib.glsl b/source/blender/gpu/shaders/gpu_shader_codegen_lib.glsl index 6091a5c834a..94707de71ed 100644 --- a/source/blender/gpu/shaders/gpu_shader_codegen_lib.glsl +++ b/source/blender/gpu/shaders/gpu_shader_codegen_lib.glsl @@ -187,8 +187,10 @@ struct ClosureTransparency { struct GlobalData { /** World position. */ vec3 P; - /** Surface Normal. */ + /** Surface Normal. Normalized, overridden by bump displacement. */ vec3 N; + /** Raw interpolated normal (non-normalized) data. */ + vec3 Ni; /** Geometric Normal. */ vec3 Ng; /** Curve Tangent Space. */ diff --git a/source/blender/gpu/shaders/gpu_shader_image_modulate_alpha_frag.glsl b/source/blender/gpu/shaders/gpu_shader_image_modulate_alpha_frag.glsl deleted file mode 100644 index 26f96a5da32..00000000000 --- a/source/blender/gpu/shaders/gpu_shader_image_modulate_alpha_frag.glsl +++ /dev/null @@ -1,6 +0,0 @@ - -void main() -{ - fragColor = texture(image, texCoord_interp); - fragColor.a *= alpha; -} diff --git a/source/blender/gpu/shaders/infos/gpu_shader_2D_flat_color_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_2D_flat_color_info.hh deleted file mode 100644 index 24a06a37a44..00000000000 --- a/source/blender/gpu/shaders/infos/gpu_shader_2D_flat_color_info.hh +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2022 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup gpu - */ - -#include "gpu_shader_create_info.hh" - -#include "gpu_interface_info.hh" - -GPU_SHADER_CREATE_INFO(gpu_shader_2D_flat_color) - .vertex_in(0, Type::VEC2, "pos") - .vertex_in(1, Type::VEC4, "color") - .vertex_out(flat_color_iface) - .fragment_out(0, Type::VEC4, "fragColor") - .push_constant(Type::MAT4, "ModelViewProjectionMatrix") - .vertex_source("gpu_shader_2D_flat_color_vert.glsl") - .fragment_source("gpu_shader_flat_color_frag.glsl") - .additional_info("gpu_srgb_to_framebuffer_space") - .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/infos/gpu_shader_2D_image_color_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_2D_image_color_info.hh deleted file mode 100644 index 021bd9ebb95..00000000000 --- a/source/blender/gpu/shaders/infos/gpu_shader_2D_image_color_info.hh +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2022 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup gpu - */ - -#include "gpu_shader_create_info.hh" - -GPU_SHADER_CREATE_INFO(gpu_shader_2D_image_color) - .additional_info("gpu_shader_2D_image_common") - .push_constant(Type::VEC4, "color") - .fragment_source("gpu_shader_image_color_frag.glsl") - .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/infos/gpu_shader_2D_image_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_2D_image_info.hh index 06aad15c18a..a92dca0ce90 100644 --- a/source/blender/gpu/shaders/infos/gpu_shader_2D_image_info.hh +++ b/source/blender/gpu/shaders/infos/gpu_shader_2D_image_info.hh @@ -16,8 +16,3 @@ GPU_SHADER_CREATE_INFO(gpu_shader_2D_image_common) .push_constant(Type::MAT4, "ModelViewProjectionMatrix") .sampler(0, ImageType::FLOAT_2D, "image") .vertex_source("gpu_shader_2D_image_vert.glsl"); - -GPU_SHADER_CREATE_INFO(gpu_shader_2D_image) - .additional_info("gpu_shader_2D_image_common") - .fragment_source("gpu_shader_image_frag.glsl") - .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/infos/gpu_shader_2D_nodelink_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_2D_nodelink_info.hh index 6a419242d21..c7a6635fef7 100644 --- a/source/blender/gpu/shaders/infos/gpu_shader_2D_nodelink_info.hh +++ b/source/blender/gpu/shaders/infos/gpu_shader_2D_nodelink_info.hh @@ -12,6 +12,7 @@ GPU_SHADER_INTERFACE_INFO(nodelink_iface, "") .smooth(Type::FLOAT, "colorGradient") .smooth(Type::FLOAT, "lineU") .flat(Type::FLOAT, "lineLength") + .flat(Type::FLOAT, "lineThickness") .flat(Type::FLOAT, "dashFactor") .flat(Type::FLOAT, "dashAlpha") .flat(Type::INT, "isMainLine"); diff --git a/source/blender/gpu/shaders/infos/gpu_shader_2D_smooth_color_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_2D_smooth_color_info.hh deleted file mode 100644 index d6edeef0dfb..00000000000 --- a/source/blender/gpu/shaders/infos/gpu_shader_2D_smooth_color_info.hh +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2022 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup gpu - */ - -#include "gpu_interface_info.hh" -#include "gpu_shader_create_info.hh" - -GPU_SHADER_CREATE_INFO(gpu_shader_2D_smooth_color) - .vertex_in(0, Type::VEC2, "pos") - .vertex_in(1, Type::VEC4, "color") - .vertex_out(smooth_color_iface) - .fragment_out(0, Type::VEC4, "fragColor") - .push_constant(Type::MAT4, "ModelViewProjectionMatrix") - .vertex_source("gpu_shader_2D_smooth_color_vert.glsl") - .fragment_source("gpu_shader_2D_smooth_color_frag.glsl") - .additional_info("gpu_srgb_to_framebuffer_space") - .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/infos/gpu_shader_2D_uniform_color_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_2D_uniform_color_info.hh deleted file mode 100644 index 56ccc3f105c..00000000000 --- a/source/blender/gpu/shaders/infos/gpu_shader_2D_uniform_color_info.hh +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2022 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup gpu - */ - -#include "gpu_shader_create_info.hh" - -GPU_SHADER_CREATE_INFO(gpu_shader_2D_uniform_color) - .vertex_in(0, Type::VEC2, "pos") - .fragment_out(0, Type::VEC4, "fragColor") - .push_constant(Type::MAT4, "ModelViewProjectionMatrix") - .push_constant(Type::VEC4, "color") - .vertex_source("gpu_shader_2D_vert.glsl") - .fragment_source("gpu_shader_uniform_color_frag.glsl") - .additional_info("gpu_srgb_to_framebuffer_space") - .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/infos/gpu_shader_3D_image_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_3D_image_info.hh index 94cf58933af..8abd140397f 100644 --- a/source/blender/gpu/shaders/infos/gpu_shader_3D_image_info.hh +++ b/source/blender/gpu/shaders/infos/gpu_shader_3D_image_info.hh @@ -8,13 +8,22 @@ #include "gpu_interface_info.hh" #include "gpu_shader_create_info.hh" -GPU_SHADER_CREATE_INFO(gpu_shader_3D_image) +GPU_SHADER_CREATE_INFO(gpu_shader_3D_image_common) .vertex_in(0, Type::VEC3, "pos") .vertex_in(1, Type::VEC2, "texCoord") .vertex_out(smooth_tex_coord_interp_iface) .fragment_out(0, Type::VEC4, "fragColor") .push_constant(Type::MAT4, "ModelViewProjectionMatrix") .sampler(0, ImageType::FLOAT_2D, "image") - .vertex_source("gpu_shader_3D_image_vert.glsl") + .vertex_source("gpu_shader_3D_image_vert.glsl"); + +GPU_SHADER_CREATE_INFO(gpu_shader_3D_image) + .additional_info("gpu_shader_3D_image_common") .fragment_source("gpu_shader_image_frag.glsl") .do_static_compilation(true); + +GPU_SHADER_CREATE_INFO(gpu_shader_3D_image_color) + .additional_info("gpu_shader_3D_image_common") + .push_constant(Type::VEC4, "color") + .fragment_source("gpu_shader_image_color_frag.glsl") + .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/infos/gpu_shader_3D_image_modulate_alpha_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_3D_image_modulate_alpha_info.hh deleted file mode 100644 index 35ddaa5c71c..00000000000 --- a/source/blender/gpu/shaders/infos/gpu_shader_3D_image_modulate_alpha_info.hh +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2022 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup gpu - */ - -#include "gpu_interface_info.hh" -#include "gpu_shader_create_info.hh" - -GPU_SHADER_CREATE_INFO(gpu_shader_3D_image_modulate_alpha) - .vertex_in(0, Type::VEC3, "pos") - .vertex_in(1, Type::VEC2, "texCoord") - .vertex_out(smooth_tex_coord_interp_iface) - .fragment_out(0, Type::VEC4, "fragColor") - .push_constant(Type::MAT4, "ModelViewProjectionMatrix") - .push_constant(Type::FLOAT, "alpha") - .sampler(0, ImageType::FLOAT_2D, "image", Frequency::PASS) - .vertex_source("gpu_shader_3D_image_vert.glsl") - .fragment_source("gpu_shader_image_modulate_alpha_frag.glsl") - .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/infos/gpu_shader_3D_line_dashed_uniform_color_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_3D_line_dashed_uniform_color_info.hh deleted file mode 100644 index 2987077ffba..00000000000 --- a/source/blender/gpu/shaders/infos/gpu_shader_3D_line_dashed_uniform_color_info.hh +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later - * Copyright 2022 Blender Foundation. All rights reserved. */ - -/** \file - * \ingroup gpu - */ - -#include "gpu_interface_info.hh" -#include "gpu_shader_create_info.hh" - -/* TODO(jbakker): Skipped as data doesn't fit as push constant. */ -GPU_SHADER_CREATE_INFO(gpu_shader_3D_line_dashed_uniform_color) - .vertex_in(0, Type::VEC3, "pos") - .vertex_out(flat_color_iface) - .push_constant(Type::MAT4, "ModelViewProjectionMatrix") - .vertex_source("gpu_shader_3D_line_dashed_uniform_color_vert.glsl") - .fragment_source("gpu_shader_2D_line_dashed_frag.glsl") - .do_static_compilation(true); diff --git a/source/blender/gpu/shaders/infos/gpu_shader_3D_polyline_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_3D_polyline_info.hh index 6840dfe25de..396ee64454c 100644 --- a/source/blender/gpu/shaders/infos/gpu_shader_3D_polyline_info.hh +++ b/source/blender/gpu/shaders/infos/gpu_shader_3D_polyline_info.hh @@ -37,7 +37,7 @@ GPU_SHADER_CREATE_INFO(gpu_shader_3D_polyline_uniform_color) GPU_SHADER_CREATE_INFO(gpu_shader_3D_polyline_uniform_color_clipped) .do_static_compilation(true) - /* TODO(fclem): Put in an UBO to fit the 128byte requirement. */ + /* TODO(fclem): Put in a UBO to fit the 128byte requirement. */ .push_constant(Type::MAT4, "ModelMatrix") .push_constant(Type::VEC4, "ClipPlane") .define("CLIP") diff --git a/source/blender/gpu/shaders/infos/gpu_shader_line_dashed_uniform_color_info.hh b/source/blender/gpu/shaders/infos/gpu_shader_line_dashed_uniform_color_info.hh index 57cb02c8484..a2ac08c689b 100644 --- a/source/blender/gpu/shaders/infos/gpu_shader_line_dashed_uniform_color_info.hh +++ b/source/blender/gpu/shaders/infos/gpu_shader_line_dashed_uniform_color_info.hh @@ -13,7 +13,8 @@ GPU_SHADER_INTERFACE_INFO(gpu_shader_line_dashed_interface, "") .no_perspective(Type::VEC2, "stipple_start") /* In screen space */ .flat(Type::VEC2, "stipple_pos"); /* In screen space */ -GPU_SHADER_CREATE_INFO(gpu_shader_line_dashed) +GPU_SHADER_CREATE_INFO(gpu_shader_3D_line_dashed_uniform_color) + .vertex_in(0, Type::VEC3, "pos") .vertex_out(flat_color_iface) .push_constant(Type::MAT4, "ModelViewProjectionMatrix") .push_constant(Type::VEC2, "viewport_size") @@ -25,18 +26,8 @@ GPU_SHADER_CREATE_INFO(gpu_shader_line_dashed) .push_constant(Type::VEC4, "color2") .vertex_out(gpu_shader_line_dashed_interface) .fragment_out(0, Type::VEC4, "fragColor") - .fragment_source("gpu_shader_2D_line_dashed_frag.glsl"); - -GPU_SHADER_CREATE_INFO(gpu_shader_2D_line_dashed_uniform_color) - .vertex_in(0, Type::VEC2, "pos") - .vertex_source("gpu_shader_2D_line_dashed_uniform_color_vert.glsl") - .additional_info("gpu_shader_line_dashed") - .do_static_compilation(true); - -GPU_SHADER_CREATE_INFO(gpu_shader_3D_line_dashed_uniform_color) - .vertex_in(0, Type::VEC3, "pos") .vertex_source("gpu_shader_3D_line_dashed_uniform_color_vert.glsl") - .additional_info("gpu_shader_line_dashed") + .fragment_source("gpu_shader_2D_line_dashed_frag.glsl") .do_static_compilation(true); GPU_SHADER_CREATE_INFO(gpu_shader_3D_line_dashed_uniform_color_clipped) diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl index 2ae53b35b3f..bacf089deb1 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_attribute.glsl @@ -12,6 +12,23 @@ void node_attribute_temperature(vec4 attr, out vec4 out_attr) out_attr.w = 1.0; } +void node_attribute_density(vec4 attr, out float out_attr) +{ + out_attr = attr.x; +} + +void node_attribute_flame(vec4 attr, out float out_attr) +{ + out_attr = attr.x; +} + +void node_attribute_uniform(vec4 attr, const float attr_hash, out vec4 out_attr) +{ + /* Temporary solution to support both old UBO attribs and new SSBO loading. + * Old UBO load is already done through `attr` and will just be passed through. */ + out_attr = attr_load_uniform(attr, floatBitsToUint(attr_hash)); +} + void node_attribute( vec4 attr, out vec4 outcol, out vec3 outvec, out float outf, out float outalpha) { diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_displacement.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_displacement.glsl index cdcdbe50917..52b4edf665f 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_displacement.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_displacement.glsl @@ -1,6 +1,6 @@ void node_displacement_object(float height, float midlevel, float scale, vec3 N, out vec3 result) { - N = transform_direction(ModelMatrix, N); + N = transform_direction(ModelMatrixInverse, N); result = (height - midlevel) * scale * normalize(N); /* Apply object scale and orientation. */ result = transform_direction(ModelMatrix, result); diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_eevee_specular.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_eevee_specular.glsl index 530907859e9..c95a41c58fc 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_eevee_specular.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_eevee_specular.glsl @@ -34,6 +34,13 @@ void node_eevee_specular(vec4 diffuse, diffuse_data.N = N; diffuse_data.sss_id = 0u; + /* WORKAROUND: Nasty workaround to the current interface with the closure evaluation. + * Ideally the occlusion input should be move to the output node or removed all-together. + * This is temporary to avoid a regression in 3.2 and should be removed after EEVEE-Next rewrite. + */ + diffuse_data.sss_radius.r = occlusion; + diffuse_data.sss_radius.g = -1.0; /* Flag */ + ClosureReflection reflection_data; reflection_data.weight = alpha; if (true) { @@ -41,7 +48,7 @@ void node_eevee_specular(vec4 diffuse, vec2 split_sum = brdf_lut(NV, roughness); vec3 brdf = F_brdf_single_scatter(specular.rgb, vec3(1.0), split_sum); - reflection_data.color = specular.rgb * brdf; + reflection_data.color = brdf; reflection_data.N = N; reflection_data.roughness = roughness; } diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_mix_color.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_mix_color.glsl new file mode 100644 index 00000000000..933a8de9cb7 --- /dev/null +++ b/source/blender/gpu/shaders/material/gpu_shader_material_mix_color.glsl @@ -0,0 +1,537 @@ +#pragma BLENDER_REQUIRE(gpu_shader_common_color_utils.glsl) + +void node_mix_blend(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + outcol = mix(col1, col2, fac); + outcol.a = col1.a; +} + +void node_mix_add(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + outcol = mix(col1, col1 + col2, fac); + outcol.a = col1.a; +} + +void node_mix_mult(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + outcol = mix(col1, col1 * col2, fac); + outcol.a = col1.a; +} + +void node_mix_screen(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + float facm = 1.0 - fac; + + outcol = vec4(1.0) - (vec4(facm) + fac * (vec4(1.0) - col2)) * (vec4(1.0) - col1); + outcol.a = col1.a; +} + +void node_mix_overlay(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + float facm = 1.0 - fac; + + outcol = col1; + + if (outcol.r < 0.5) { + outcol.r *= facm + 2.0 * fac * col2.r; + } + else { + outcol.r = 1.0 - (facm + 2.0 * fac * (1.0 - col2.r)) * (1.0 - outcol.r); + } + + if (outcol.g < 0.5) { + outcol.g *= facm + 2.0 * fac * col2.g; + } + else { + outcol.g = 1.0 - (facm + 2.0 * fac * (1.0 - col2.g)) * (1.0 - outcol.g); + } + + if (outcol.b < 0.5) { + outcol.b *= facm + 2.0 * fac * col2.b; + } + else { + outcol.b = 1.0 - (facm + 2.0 * fac * (1.0 - col2.b)) * (1.0 - outcol.b); + } +} + +void node_mix_sub(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + outcol = mix(col1, col1 - col2, fac); + outcol.a = col1.a; +} + +/* A variant of mix_div that fallback to the first color upon zero division. */ +void node_mix_div_fallback(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + float facm = 1.0 - fac; + + outcol = col1; + + if (col2.r != 0.0) { + outcol.r = facm * outcol.r + fac * outcol.r / col2.r; + } + if (col2.g != 0.0) { + outcol.g = facm * outcol.g + fac * outcol.g / col2.g; + } + if (col2.b != 0.0) { + outcol.b = facm * outcol.b + fac * outcol.b / col2.b; + } +} + +void node_mix_diff(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + outcol = mix(col1, abs(col1 - col2), fac); + outcol.a = col1.a; +} + +void node_mix_dark(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + outcol.rgb = mix(col1.rgb, min(col1.rgb, col2.rgb), fac); + outcol.a = col1.a; +} + +void node_mix_light(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + outcol.rgb = mix(col1.rgb, max(col1.rgb, col2.rgb), fac); + outcol.a = col1.a; +} + +void node_mix_dodge(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + outcol = col1; + + if (outcol.r != 0.0) { + float tmp = 1.0 - fac * col2.r; + if (tmp <= 0.0) { + outcol.r = 1.0; + } + else if ((tmp = outcol.r / tmp) > 1.0) { + outcol.r = 1.0; + } + else { + outcol.r = tmp; + } + } + if (outcol.g != 0.0) { + float tmp = 1.0 - fac * col2.g; + if (tmp <= 0.0) { + outcol.g = 1.0; + } + else if ((tmp = outcol.g / tmp) > 1.0) { + outcol.g = 1.0; + } + else { + outcol.g = tmp; + } + } + if (outcol.b != 0.0) { + float tmp = 1.0 - fac * col2.b; + if (tmp <= 0.0) { + outcol.b = 1.0; + } + else if ((tmp = outcol.b / tmp) > 1.0) { + outcol.b = 1.0; + } + else { + outcol.b = tmp; + } + } +} + +void node_mix_burn(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + float tmp, facm = 1.0 - fac; + + outcol = col1; + + tmp = facm + fac * col2.r; + if (tmp <= 0.0) { + outcol.r = 0.0; + } + else if ((tmp = (1.0 - (1.0 - outcol.r) / tmp)) < 0.0) { + outcol.r = 0.0; + } + else if (tmp > 1.0) { + outcol.r = 1.0; + } + else { + outcol.r = tmp; + } + + tmp = facm + fac * col2.g; + if (tmp <= 0.0) { + outcol.g = 0.0; + } + else if ((tmp = (1.0 - (1.0 - outcol.g) / tmp)) < 0.0) { + outcol.g = 0.0; + } + else if (tmp > 1.0) { + outcol.g = 1.0; + } + else { + outcol.g = tmp; + } + + tmp = facm + fac * col2.b; + if (tmp <= 0.0) { + outcol.b = 0.0; + } + else if ((tmp = (1.0 - (1.0 - outcol.b) / tmp)) < 0.0) { + outcol.b = 0.0; + } + else if (tmp > 1.0) { + outcol.b = 1.0; + } + else { + outcol.b = tmp; + } +} + +void node_mix_hue(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + float facm = 1.0 - fac; + + outcol = col1; + + vec4 hsv, hsv2, tmp; + rgb_to_hsv(col2, hsv2); + + if (hsv2.y != 0.0) { + rgb_to_hsv(outcol, hsv); + hsv.x = hsv2.x; + hsv_to_rgb(hsv, tmp); + + outcol = mix(outcol, tmp, fac); + outcol.a = col1.a; + } +} + +void node_mix_sat(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + float facm = 1.0 - fac; + + outcol = col1; + + vec4 hsv, hsv2; + rgb_to_hsv(outcol, hsv); + + if (hsv.y != 0.0) { + rgb_to_hsv(col2, hsv2); + + hsv.y = facm * hsv.y + fac * hsv2.y; + hsv_to_rgb(hsv, outcol); + } +} + +void node_mix_val(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + float facm = 1.0 - fac; + + vec4 hsv, hsv2; + rgb_to_hsv(col1, hsv); + rgb_to_hsv(col2, hsv2); + + hsv.z = facm * hsv.z + fac * hsv2.z; + hsv_to_rgb(hsv, outcol); +} + +void node_mix_color(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + float facm = 1.0 - fac; + + outcol = col1; + + vec4 hsv, hsv2, tmp; + rgb_to_hsv(col2, hsv2); + + if (hsv2.y != 0.0) { + rgb_to_hsv(outcol, hsv); + hsv.x = hsv2.x; + hsv.y = hsv2.y; + hsv_to_rgb(hsv, tmp); + + outcol = mix(outcol, tmp, fac); + outcol.a = col1.a; + } +} + +void node_mix_soft(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + float facm = 1.0 - fac; + + vec4 one = vec4(1.0); + vec4 scr = one - (one - col2) * (one - col1); + outcol = facm * col1 + fac * ((one - col1) * col2 * col1 + col1 * scr); +} + +void node_mix_linear(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + outcol = col1 + fac * (2.0 * (col2 - vec4(0.5))); +} + +void node_mix_float(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + outfloat = mix(f1, f2, fac); +} + +void node_mix_vector(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + + outvec = mix(v1, v2, fac); +} + +void node_mix_vector_non_uniform(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + outvec = mix(v1, v2, facvec); +} + +void node_mix_rgba(float fac, + vec3 facvec, + float f1, + float f2, + vec3 v1, + vec3 v2, + vec4 col1, + vec4 col2, + out float outfloat, + out vec3 outvec, + out vec4 outcol) +{ + outcol = mix(col1, col2, fac); +} + +void node_mix_clamp_vector(vec3 vec, vec3 min, vec3 max, out vec3 outvec) +{ + outvec = clamp(vec, min, max); +} + +void node_mix_clamp_value(float value, float min, float max, out float outfloat) +{ + outfloat = clamp(value, min, max); +} diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_noise.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_noise.glsl index 881e38ea11a..480334f9bbd 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_noise.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_noise.glsl @@ -13,7 +13,6 @@ * + + | * @ + + + + @ @------> x * v0 v1 - * */ float bi_mix(float v0, float v1, float v2, float v3, float x, float y) { diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_normal_map.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_normal_map.glsl index a54dc59ddfe..3fc4992f7c4 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_normal_map.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_normal_map.glsl @@ -3,13 +3,13 @@ void node_normal_map(vec4 tangent, vec3 texnormal, out vec3 outnormal) { if (all(equal(tangent, vec4(0.0, 0.0, 0.0, 1.0)))) { - outnormal = g_data.N; + outnormal = g_data.Ni; return; } tangent *= (FrontFacing ? 1.0 : -1.0); - vec3 B = tangent.w * cross(g_data.N, tangent.xyz) * sign(ObjectInfo.w); + vec3 B = tangent.w * cross(g_data.Ni, tangent.xyz) * sign(ObjectInfo.w); - outnormal = texnormal.x * tangent.xyz + texnormal.y * B + texnormal.z * g_data.N; + outnormal = texnormal.x * tangent.xyz + texnormal.y * B + texnormal.z * g_data.Ni; outnormal = normalize(outnormal); } #endif diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_principled.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_principled.glsl index 2e695fa3e14..0d8f2272c10 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_principled.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_principled.glsl @@ -149,25 +149,37 @@ void node_bsdf_principled(vec4 base_color, max(roughness, transmission_roughness); refraction_data.ior = ior; + /* Ref. T98190: Defines are optimizations for old compilers. + * Might become unecessary with EEVEE-Next. */ if (do_diffuse == 0.0 && do_refraction == 0.0 && do_clearcoat != 0.0) { +#ifdef PRINCIPLED_CLEARCOAT /* Metallic & Clearcoat case. */ result = closure_eval(reflection_data, clearcoat_data); +#endif } else if (do_diffuse == 0.0 && do_refraction == 0.0 && do_clearcoat == 0.0) { +#ifdef PRINCIPLED_METALLIC /* Metallic case. */ result = closure_eval(reflection_data); +#endif } else if (do_diffuse != 0.0 && do_refraction == 0.0 && do_clearcoat == 0.0) { +#ifdef PRINCIPLED_DIELECTRIC /* Dielectric case. */ result = closure_eval(diffuse_data, reflection_data); +#endif } else if (do_diffuse == 0.0 && do_refraction != 0.0 && do_clearcoat == 0.0) { +#ifdef PRINCIPLED_GLASS /* Glass case. */ result = closure_eval(reflection_data, refraction_data); +#endif } else { +#ifdef PRINCIPLED_ANY /* Un-optimized case. */ result = closure_eval(diffuse_data, reflection_data, clearcoat_data, refraction_data); +#endif } Closure emission_cl = closure_eval(emission_data); Closure transparency_cl = closure_eval(transparency_data); diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_tex_musgrave.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_tex_musgrave.glsl index 961fe23e67e..7171c5f2b36 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_tex_musgrave.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_tex_musgrave.glsl @@ -153,13 +153,12 @@ void node_tex_musgrave_hybrid_multi_fractal_1d(vec3 co, float lacunarity = max(lac, 1e-5); float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - float value = snoise(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0; + float value = 0.0; + float weight = 1.0; - for (int i = 1; (weight > 0.001f) && (i < int(octaves)); i++) { + for (int i = 0; (weight > 0.001f) && (i < int(octaves)); i++) { if (weight > 1.0) { weight = 1.0; } @@ -172,8 +171,12 @@ void node_tex_musgrave_hybrid_multi_fractal_1d(vec3 co, } float rmd = octaves - floor(octaves); - if (rmd != 0.0) { - value += rmd * ((snoise(p) + offset) * pwr); + if ((rmd != 0.0) && (weight > 0.001f)) { + if (weight > 1.0) { + weight = 1.0; + } + float signal = (snoise(p) + offset) * pwr; + value += rmd * weight * signal; } fac = value; @@ -375,13 +378,12 @@ void node_tex_musgrave_hybrid_multi_fractal_2d(vec3 co, float lacunarity = max(lac, 1e-5); float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - float value = snoise(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0; + float value = 0.0; + float weight = 1.0; - for (int i = 1; (weight > 0.001f) && (i < int(octaves)); i++) { + for (int i = 0; (weight > 0.001f) && (i < int(octaves)); i++) { if (weight > 1.0) { weight = 1.0; } @@ -394,8 +396,12 @@ void node_tex_musgrave_hybrid_multi_fractal_2d(vec3 co, } float rmd = octaves - floor(octaves); - if (rmd != 0.0) { - value += rmd * ((snoise(p) + offset) * pwr); + if ((rmd != 0.0) && (weight > 0.001f)) { + if (weight > 1.0) { + weight = 1.0; + } + float signal = (snoise(p) + offset) * pwr; + value += rmd * weight * signal; } fac = value; @@ -597,13 +603,12 @@ void node_tex_musgrave_hybrid_multi_fractal_3d(vec3 co, float lacunarity = max(lac, 1e-5); float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - float value = snoise(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0; + float value = 0.0; + float weight = 1.0; - for (int i = 1; (weight > 0.001f) && (i < int(octaves)); i++) { + for (int i = 0; (weight > 0.001f) && (i < int(octaves)); i++) { if (weight > 1.0) { weight = 1.0; } @@ -616,8 +621,12 @@ void node_tex_musgrave_hybrid_multi_fractal_3d(vec3 co, } float rmd = octaves - floor(octaves); - if (rmd != 0.0) { - value += rmd * ((snoise(p) + offset) * pwr); + if ((rmd != 0.0) && (weight > 0.001f)) { + if (weight > 1.0) { + weight = 1.0; + } + float signal = (snoise(p) + offset) * pwr; + value += rmd * weight * signal; } fac = value; @@ -819,13 +828,12 @@ void node_tex_musgrave_hybrid_multi_fractal_4d(vec3 co, float lacunarity = max(lac, 1e-5); float pwHL = pow(lacunarity, -H); - float pwr = pwHL; - float value = snoise(p) + offset; - float weight = gain * value; - p *= lacunarity; + float pwr = 1.0; + float value = 0.0; + float weight = 1.0; - for (int i = 1; (weight > 0.001f) && (i < int(octaves)); i++) { + for (int i = 0; (weight > 0.001f) && (i < int(octaves)); i++) { if (weight > 1.0) { weight = 1.0; } @@ -838,8 +846,12 @@ void node_tex_musgrave_hybrid_multi_fractal_4d(vec3 co, } float rmd = octaves - floor(octaves); - if (rmd != 0.0) { - value += rmd * ((snoise(p) + offset) * pwr); + if ((rmd != 0.0) && (weight > 0.001f)) { + if (weight > 1.0) { + weight = 1.0; + } + float signal = (snoise(p) + offset) * pwr; + value += rmd * weight * signal; } fac = value; diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_tex_voronoi.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_tex_voronoi.glsl index 0fb8ef15f5f..aac3d98b43b 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_tex_voronoi.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_tex_voronoi.glsl @@ -15,7 +15,6 @@ * * With optimization to change -2..2 scan window to -1..1 for better performance, * as explained in https://www.shadertoy.com/view/llG3zy. - * */ /* **** 1D Voronoi **** */ diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_texture_coordinates.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_texture_coordinates.glsl index 204f134dfa6..c849553ae4c 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_texture_coordinates.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_texture_coordinates.glsl @@ -1,4 +1,9 @@ +void node_tex_coord_position(out vec3 out_pos) +{ + out_pos = g_data.P; +} + void node_tex_coord(mat4 obmatinv, vec3 attr_orco, vec4 attr_uv, diff --git a/source/blender/gpu/shaders/metal/mtl_shader_common.msl b/source/blender/gpu/shaders/metal/mtl_shader_common.msl new file mode 100644 index 00000000000..c504cdbacb1 --- /dev/null +++ b/source/blender/gpu/shaders/metal/mtl_shader_common.msl @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* Common Metal header to be included in all compiled Metal shaders. + * Both native MSL shaders and GLSL shaders. */ + +using namespace metal; + +/* Should match GPUVertFetchMode. */ +typedef enum { + GPU_FETCH_FLOAT = 0, + GPU_FETCH_INT, + GPU_FETCH_INT_TO_FLOAT_UNIT, + GPU_FETCH_INT_TO_FLOAT, +} GPUVertFetchMode; + +/* Consant to flag base binding index of uniform buffers. */ +constant int MTL_uniform_buffer_base_index [[function_constant(0)]]; + +/* Default Point Size. + * Unused if function constant not set. */ +constant float MTL_global_pointsize [[function_constant(1)]]; + +/* Attribute conversions flags (Up to 16 attributes supported in Blender). */ +constant int MTL_AttributeConvert0 [[function_constant(2)]]; +constant int MTL_AttributeConvert1 [[function_constant(3)]]; +constant int MTL_AttributeConvert2 [[function_constant(4)]]; +constant int MTL_AttributeConvert3 [[function_constant(5)]]; +constant int MTL_AttributeConvert4 [[function_constant(6)]]; +constant int MTL_AttributeConvert5 [[function_constant(7)]]; +constant int MTL_AttributeConvert6 [[function_constant(8)]]; +constant int MTL_AttributeConvert7 [[function_constant(9)]]; +constant int MTL_AttributeConvert8 [[function_constant(10)]]; +constant int MTL_AttributeConvert9 [[function_constant(11)]]; +constant int MTL_AttributeConvert10 [[function_constant(12)]]; +constant int MTL_AttributeConvert11 [[function_constant(13)]]; +constant int MTL_AttributeConvert12 [[function_constant(14)]]; +constant int MTL_AttributeConvert13 [[function_constant(15)]]; +constant int MTL_AttributeConvert14 [[function_constant(16)]]; +constant int MTL_AttributeConvert15 [[function_constant(17)]]; + +/* Consant to flag binding index of transform feedback buffer. + * Unused if function constant not set. */ +constant int MTL_transform_feedback_buffer_index [[function_constant(18)]]; + +/** Internal attribute conversion functionality. */ +/* Following descriptions in mtl_shader.hh, Metal only supports some implicit + * attribute type conversions. These conversions occur when there is a difference + * between the type specified in the vertex descriptor (In the input vertex buffers), + * and the attribute type in the shader's VertexIn struct (ShaderInterface). + * + * The supported implicit conversions are described here: + * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format?language=objc + * + * For unsupported conversions, the mtl_shader_generator will create an attribute reading function + * which performs this conversion manually upon read, depending on the requested fetchmode. + * + * These conversions use the function constants above, so any branching is optimized out during + * backend shader compilation (PSO creation). + * + * NOTE: Not all possibilities have been covered here, any additional conversion routines should + * be added as needed, and mtl_shader_generator should also be updated with any newly required + * read functions. + * + * These paths are only needed for cases where implicit conversion will not happen, in which + * case the value will be read as the type in the shader. + */ +#define internal_vertex_attribute_convert_read_float(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float(as_type<int>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float(as_type<int>(v_in)) / float(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } + +#define internal_vertex_attribute_convert_read_float2(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float2(as_type<int2>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float2(as_type<int2>(v_in)) / float2(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } + +#define internal_vertex_attribute_convert_read_float3(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float3(as_type<int3>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float3(as_type<int3>(v_in)) / float3(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } + +#define internal_vertex_attribute_convert_read_float4(ATTR, v_in, v_out) \ + if (ATTR == GPU_FETCH_INT_TO_FLOAT) { \ + v_out = float4(as_type<int4>(v_in)); \ + } \ + else if (ATTR == GPU_FETCH_INT_TO_FLOAT_UNIT) { \ + v_out = float4(as_type<int4>(v_in)) / float4(__INT_MAX__); \ + } \ + else { \ + v_out = v_in; \ + } diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl new file mode 100644 index 00000000000..3b32783620d --- /dev/null +++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl @@ -0,0 +1,1065 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/** Special header for mapping commonly defined tokens to API-specific variations. + * Where possible, this will adhere closely to base GLSL, where semantics are the same. + * However, host code shader code may need modifying to support types where necessary variations + * exist between APIs but are not expressed through the source. (e.g. distinctio between depth2d + * and texture2d types in metal). + */ + +/* Base instance with offsets. */ +#define gpu_BaseInstance gl_BaseInstanceARB +#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance) + +/* derivative signs. */ +#define DFDX_SIGN 1.0 +#define DFDY_SIGN 1.0 + +/* Type definitions. */ +#define vec2 float2 +#define vec3 float3 +#define vec4 float4 +#define mat2 float2x2 +#define mat2x2 float2x2 +#define mat3 float3x3 +#define mat4 float4x4 +#define ivec2 int2 +#define ivec3 int3 +#define ivec4 int4 +#define uvec2 uint2 +#define uvec3 uint3 +#define uvec4 uint4 +/* MTLBOOL is used for native boolean's generated by the Metal backend, to avoid type-emulation + * for GLSL bools, which are treated as integers. */ +#define MTLBOOL bool +#define bool int +#define bvec2 bool2 +#define bvec3 bool3 +#define bvec4 bool4 +#define vec3_1010102_Unorm uint +#define vec3_1010102_Inorm int + +/* Strip GLSL Decorators. */ +#define in +#define flat +#define smooth +#define noperspective +#define layout(std140) struct +#define uniform + +/* Used to replace 'out' in function parameters with threadlocal reference + * shortened to avoid expanding the glsl source string. */ +#define THD thread + +/* Generate wrapper structs for combined texture and sampler type. */ +#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS +# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \ + template<typename T, access A = access::sample> struct STRUCT_NAME { \ + thread TEX_TYPE<T, A> *texture; \ + constant sampler *samp; \ + } +#else +# define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \ + template<typename T, access A = access::sample> struct STRUCT_NAME { \ + thread TEX_TYPE<T, A> *texture; \ + thread sampler *samp; \ + } +#endif + +/* Add any types as needed. */ +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d, texture1d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d_array, texture1d_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d, texture2d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d, depth2d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d_array, texture2d_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d_array, depth2d_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_3d, texture3d); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_buffer, texture_buffer); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube, texturecube); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube_array, texturecube_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube, texturecube_array); +COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube_array, texturecube_array); + +/* Sampler struct for argument buffer. */ +#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS +struct SStruct { + array<sampler, ARGUMENT_BUFFER_NUM_SAMPLERS> sampler_args [[id(0)]]; +}; +#endif + +/* Samplers as function parameters. */ +#define sampler1D thread _mtl_combined_image_sampler_1d<float> +#define sampler1DArray thread _mtl_combined_image_sampler_1d_array<float> +#define sampler2D thread _mtl_combined_image_sampler_2d<float> +#define depth2D thread _mtl_combined_image_sampler_depth_2d<float> +#define sampler2DArray thread _mtl_combined_image_sampler_2d_array<float> +#define sampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float> +#define depth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float> +#define sampler3D thread _mtl_combined_image_sampler_3d<float> +#define samplerBuffer thread _mtl_combined_image_sampler_buffer<float, access::read> +#define samplerCube thread _mtl_combined_image_sampler_cube<float> +#define samplerCubeArray thread _mtl_combined_image_sampler_cube_array<float> + +#define usampler1D thread _mtl_combined_image_sampler_1d<uint> +#define usampler1DArray thread _mtl_combined_image_sampler_1d_array<uint> +#define usampler2D thread _mtl_combined_image_sampler_2d<uint> +#define udepth2D thread _mtl_combined_image_sampler_depth_2d<uint> +#define usampler2DArray thread _mtl_combined_image_sampler_2d_array<uint> +#define usampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint> +#define udepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint> +#define usampler3D thread _mtl_combined_image_sampler_3d<uint> +#define usamplerBuffer thread _mtl_combined_image_sampler_buffer<uint, access::read> +#define usamplerCube thread _mtl_combined_image_sampler_cube<uint> +#define usamplerCubeArray thread _mtl_combined_image_sampler_cube_array<uint> + +#define isampler1D thread _mtl_combined_image_sampler_1d<int> +#define isampler1DArray thread _mtl_combined_image_sampler_1d_array<int> +#define isampler2D thread _mtl_combined_image_sampler_2d<int> +#define idepth2D thread _mtl_combined_image_sampler_depth_2d<int> +#define isampler2DArray thread _mtl_combined_image_sampler_2d_array<int> +#define isampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int> +#define idepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int> +#define isampler3D thread _mtl_combined_image_sampler_3d<int> +#define isamplerBuffer thread _mtl_combined_image_sampler_buffer<int, access::read> +#define isamplerCube thread _mtl_combined_image_sampler_cube<int> +#define isamplerCubeArray thread _mtl_combined_image_sampler_cube_array<int> + +/* Vector accessor aliases. */ +#define st xy + +/* Texture functions. */ +#define texelFetch _texelFetch_internal +#define texelFetchOffset(__tex, __texel, __lod, __offset) \ + _texelFetch_internal(__tex, __texel, __lod, __offset) +#define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv) +#define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias))) +#define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod))) +#define textureLodOffset(__tex, __uv, __lod, __offset) \ + _texture_internal_level(__tex, __uv, level(float(__lod)), __offset) +#define textureGather2(__tex, __uv) _texture_gather_internal(__tex, __uv, 0) +#define textureGather3(__tex, __uv, __comp) _texture_gather_internal(__tex, __uv, __comp) +#define textureGatherOffset(__tex, __offset, __uv, __comp) \ + _texture_gather_internal(__tex, __uv, __comp, __offset) + +#define TEXURE_MACRO(_1, _2, _3, TEXNAME, ...) TEXNAME +#define texture(...) TEXURE_MACRO(__VA_ARGS__, texture3, texture2)(__VA_ARGS__) +#define textureGather(...) TEXURE_MACRO(__VA_ARGS__, textureGather3, textureGather2)(__VA_ARGS__) + +/* Texture-write functions. */ +#define imageStore(_tex, _coord, _value) _texture_write_internal(_tex, _coord, _value) + +/* Singular return values from texture functions of type DEPTH are often indexed with either .r or + * .x. This is a lightweight wrapper type for handling this syntax. */ +union _msl_return_float { + float r; + float x; + inline operator float() const + { + return r; + } +}; + +/* Add custom texture sampling/reading routines for each type to account for special return cases, + * e.g. returning a float with an r parameter Note: Cannot use template specialization for input + * type, as return types are specific to the signature of 'tex'. */ +/* Texture Read. */ +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, T texel) +{ + float w = tex.texture->get_width(); + if (texel >= 0 && texel < w) { + return tex.texture->read(uint(texel)); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T> +inline vec<S, 4> _texelFetch_internal( + const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel) +{ + float w = tex.texture->get_width(); + if (texel >= 0 && texel < w) { + return tex.texture->read(uint(texel)); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, + T texel, + uint lod, + T offset = 0) +{ + float w = tex.texture->get_width(); + if ((texel + offset) >= 0 && (texel + offset) < w) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel + offset), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, + vec<T, 1> texel, + uint lod, + vec<T, 1> offset = 0) +{ + float w = tex.texture->get_width(); + if ((texel + offset) >= 0 && (texel + offset) < w) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel + offset), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, int n, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex, + vec<T, n> texel, + uint lod, + vec<T, n> offset = vec<T, n>(0)) +{ + float w = tex.texture->get_width(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel.x + offset.x), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex, + vec<T, 2> texel, + uint lod, + vec<T, 2> offset = vec<T, 2>(0, 0)) +{ + + float w = tex.texture->get_width(); + float h = tex.texture->get_array_size(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h) { + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A> tex, + vec<T, 2> texel, + uint lod, + vec<T, 2> offset = vec<T, 2>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h) { + return tex.texture->read(uint2(texel + offset), lod); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex, + vec<T, 3> texel, + uint lod, + vec<T, 3> offset = vec<T, 3>(0)) +{ + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + float d = tex.texture->get_array_size(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { + return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); + } + else { + return vec<S, 4>(0); + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A> tex, + vec<T, 3> texel, + uint lod, + vec<T, 3> offset = vec<T, 3>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + float d = tex.texture->get_depth() >> lod; + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { + return tex.texture->read(uint3(texel + offset), lod); + } + else { + return vec<S, 4>(0); + } +} + +template<typename T, access A> +inline _msl_return_float _texelFetch_internal( + thread _mtl_combined_image_sampler_depth_2d<float, A> tex, + vec<T, 2> texel, + uint lod, + vec<T, 2> offset = vec<T, 2>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h) { + _msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)}; + return fl; + } + else { + _msl_return_float fl = {0}; + return fl; + } +} + +template<typename S, typename T, access A> +inline vec<S, 4> _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array<S, A> tex, + vec<T, 3> texel, + uint lod, + vec<T, 3> offset = vec<T, 3>(0)) +{ + + float w = tex.texture->get_width() >> lod; + float h = tex.texture->get_height() >> lod; + float d = tex.texture->get_array_size(); + if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && + (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) { + return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); + } + else { + return vec<S, 4>(0); + } +} + +/* Sample. */ +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_1d<T, access::sample> tex, float u) +{ + return tex.texture->sample(*tex.samp, u); +} + +inline float4 _texture_internal_samp( + thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, float2 ua) +{ + return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); +} + +inline int4 _texture_internal_samp(thread _mtl_combined_image_sampler_2d<int, access::sample> tex, + float2 uv) +{ + return tex.texture->sample(*tex.samp, uv); +} + +inline uint4 _texture_internal_samp( + thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, float2 uv) +{ + return tex.texture->sample(*tex.samp, uv); +} + +inline float4 _texture_internal_samp( + thread _mtl_combined_image_sampler_2d<float, access::sample> tex, float2 uv) +{ + return tex.texture->sample(*tex.samp, uv); +} + +inline _msl_return_float _texture_internal_samp( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, float2 uv) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uv)}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_3d<T, access::sample> tex, float3 uvw) +{ + return tex.texture->sample(*tex.samp, uvw); +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, float3 uva) +{ + return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z)); +} + +inline _msl_return_float _texture_internal_samp( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float3 uva) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z))}; + return fl; +} + +inline _msl_return_float _texture_internal_samp( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float4 uvac) +{ + _msl_return_float fl = { + tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0))}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_cube<T, access::sample> tex, float3 uvs) +{ + return tex.texture->sample(*tex.samp, uvs.xyz); +} + +template<typename T> +inline vec<T, 4> _texture_internal_samp( + thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, float4 coord_a) +{ + return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w)); +} + +/* Sample Level. */ +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_1d<T, access::sample> tex, + float u, + level options, + int offset = 0) +{ + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->sample(*tex.samp, u); +} + +inline float4 _texture_internal_level( + thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, + float2 ua, + level options, + int offset = 0) +{ + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); +} + +inline int4 _texture_internal_level(thread _mtl_combined_image_sampler_2d<int, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline uint4 _texture_internal_level( + thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline float4 _texture_internal_level( + thread _mtl_combined_image_sampler_2d<float, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline _msl_return_float _texture_internal_level( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, + float2 uv, + level options, + int2 offset = int2(0)) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_3d<T, access::sample> tex, + float3 uvw, + level options = level(0), + int3 offset = int3(0)) +{ + return tex.texture->sample(*tex.samp, uvw, options, offset); +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, + float3 uva, + level options = level(0), + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset); +} + +inline _msl_return_float _texture_internal_level( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, + float3 uva, + level options = level(0), + int2 offset = int2(0)) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset)}; + return fl; +} + +inline _msl_return_float _texture_internal_level( + thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, + float4 uvac, + level options = level(0), + int2 offset = int2(0)) +{ + _msl_return_float fl = { + tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0), offset)}; + return fl; +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_cube<T, access::sample> tex, + float3 uvs, + level options = level(0), + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uvs.xyz, options); +} + +template<typename T> +inline vec<T, 4> _texture_internal_level( + thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, + float4 coord_a, + level options = level(0), + int3 offset = int3(0)) +{ + return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w), options); +} + +/* Sample Bias. */ +template<typename T> +inline vec<T, 4> _texture_internal_bias( + thread _mtl_combined_image_sampler_1d<T, access::sample> tex, + float u, + bias options = bias(0.0), + int offset = 0) +{ + return tex.texture->sample(*tex.samp, u); +} + +inline float4 _texture_internal_bias( + thread _mtl_combined_image_sampler_2d<float, access::sample> tex, + float2 uv, + bias options = bias(0.0), + int2 offset = int2(0)) +{ + return tex.texture->sample(*tex.samp, uv, options, offset); +} + +inline _msl_return_float _texture_internal_bias( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, + float2 uv, + bias options = bias(0), + int2 offset = int2(0)) +{ + _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; + return fl; +} + +/* Texture Gather. */ +component int_to_component(const int comp) +{ + switch (comp) { + default: + case 0: + return component::x; + case 1: + return component::y; + case 2: + return component::z; + case 3: + return component::w; + } + return component::x; +} + +inline float4 _texture_gather_internal( + thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, + float2 uv, + const int comp = 0, + int2 offset = int2(0)) +{ + return tex.texture->gather(*tex.samp, uv, offset); +} + +template<typename T> +inline vec<T, 4> _texture_gather_internal( + thread _mtl_combined_image_sampler_2d<T, access::sample> tex, + float2 uv, + const int comp = 0, + int2 offset = int2(0)) +{ + return tex.texture->gather(*tex.samp, uv, offset); +} + +template<typename T> +inline vec<T, 4> _texture_gather_internal( + thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, + float2 uv, + const int comp = 0, + int2 offset = int2(0)) +{ + return tex.texture->gather(*tex.samp, uv, offset); +} + +/* Texture write support. */ +template<typename S, typename T, access A> +inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex, + T _coord, + vec<S, 4> value) +{ + float w = tex.texture->get_width(); + float h = tex.texture->get_height(); + if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) { + tex.texture->write(value, uint2(_coord.xy)); + } +} + +template<typename S, typename T, access A> +inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex, + T _coord, + vec<S, 4> value) +{ + float w = tex.texture->get_width(); + float h = tex.texture->get_height(); + float d = tex.texture->get_depth(); + if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 && + _coord.z < d) { + tex.texture->write(value, uint3(_coord.xyz)); + } +} + +/* SSBO Vertex Fetch Mode. */ +#ifdef MTL_SSBO_VERTEX_FETCH +/* Enabled when geometry is passed via raw buffer bindings, rather than using + * vertex assembly in the vertex-descriptor. + * + * To describe the layout of input attribute data, we will generate uniforms (defaulting to 0) + * with the names per unique input attribute with name `attr`: + * + * - uniform_ssbo_stride_##attr -- Representing the stride between element. + * - uniform_ssbo_offset_##attr -- Representing the base offset within the vertex. + * - uniform_ssbo_fetchmode_##attr - Whether using per-vertex (=0) or per-instance fetch (=1). + * - uniform_ssbo_vbo_id_##attr - buffer binding index for VBO with data for this attribute. + * - uniform_ssbo_type_##attr - The type of data in the currently bound buffer. + * + * If the uniform_ssbo_type_* does not match with the desired type, then it is the responsibility + * of the shader to perform the conversion. Types should always be read as the raw attribute type, + * and then converted. e.g. If the uniform_ssbo_type_* is `int`, but we want to read it to be + * normalized to a float. + * The implementation should query the attribute type using vertex_fetch_get_attr_type(attr_name): + * + * float fweight = 0.0; + * if(vertex_fetch_get_attr_type(in_weight) == GPU_SHADER_ATTR_TYPE_INT) { + * int iweight = vertex_fetch_attribute(gl_VertexID, in_weight, int); + * fweight = (float)iweight/(float)INT32_MAX; + * } else { + * fweight = = vertex_fetch_attribute(gl_VertexID, in_weight, float); + * } + * + * Note: These uniforms are generated as part of the same data block used for regular uniforms + * and attribute data is written prior to each draw call, depending on the configuration of + * the vertex descriptor for an MTLBatch or MTLImmedaite call. */ +# define PPCAT_NX(A, B) A##B +# define PPCAT(A, B) PPCAT_NX(A, B) + +# define RESOLVE_VERTEX(v_id) \ + ((UNIFORM_SSBO_USES_INDEXED_RENDERING_STR > 0) ? \ + ((UNIFORM_SSBO_INDEX_MODE_U16_STR > 0) ? MTL_INDEX_DATA_U16[v_id] : \ + MTL_INDEX_DATA_U32[v_id]) : \ + v_id) +# define ATTR_TYPE(attr) PPCAT(SSBO_ATTR_TYPE_, attr) +# define vertex_fetch_attribute_raw(n, attr, type) \ + (reinterpret_cast<constant type *>( \ + &MTL_VERTEX_DATA[PPCAT(UNIFORM_SSBO_VBO_ID_STR, attr)] \ + [(PPCAT(UNIFORM_SSBO_STRIDE_STR, attr) * \ + ((PPCAT(UNIFORM_SSBO_FETCHMODE_STR, attr)) ? gl_InstanceID : n)) + \ + PPCAT(UNIFORM_SSBO_OFFSET_STR, attr)]))[0] +# define vertex_fetch_attribute(n, attr, type) \ + vertex_fetch_attribute_raw(RESOLVE_VERTEX(n), attr, type) +# define vertex_id_from_index_id(n) RESOLVE_VERTEX(n) +# define vertex_fetch_get_input_prim_type() UNIFORM_SSBO_INPUT_PRIM_TYPE_STR +# define vertex_fetch_get_input_vert_count() UNIFORM_SSBO_INPUT_VERT_COUNT_STR +# define vertex_fetch_get_attr_type(attr) PPCAT(UNIFORM_SSBO_TYPE_STR, attr) + +/* Must mirror GPU_primitive.h. */ +# define GPU_PRIM_POINTS 0 +# define GPU_PRIM_LINES 1 +# define GPU_PRIM_TRIS 2 +# define GPU_PRIM_LINE_STRIP 3 +# define GPU_PRIM_LINE_LOOP 4 +# define GPU_PRIM_TRI_STRIP 5 +# define GPU_PRIM_TRI_FAN 6 +# define GPU_PRIM_LINES_ADJ 7 +# define GPU_PRIM_TRIS_ADJ 8 +# define GPU_PRIM_LINE_STRIP_ADJ 9 +#endif + +/* Common Functions. */ +#define dFdx(x) dfdx(x) +#define dFdy(x) dfdy(x) +#define mod(x, y) _mtlmod(x, y) +#define discard discard_fragment() +#define inversesqrt rsqrt + +inline float radians(float deg) +{ + /* Constant factor: M_PI_F/180.0. */ + return deg * 0.01745329251f; +} + +inline float degrees(float rad) +{ + /* Constant factor: 180.0/M_PI_F. */ + return rad * 57.2957795131; +} + +#define select(A, B, C) mix(A, B, C) + +/* Type conversions and type truncations. */ +inline float4 to_float4(float3 val) +{ + return float4(val, 1.0); +} + +/* Type conversions and type truncations (Utility Functions). */ +inline float3x3 mat4_to_mat3(float4x4 matrix) +{ + return float3x3(matrix[0].xyz, matrix[1].xyz, matrix[2].xyz); +} + +inline int floatBitsToInt(float f) +{ + return as_type<int>(f); +} + +inline int2 floatBitsToInt(float2 f) +{ + return as_type<int2>(f); +} + +inline int3 floatBitsToInt(float3 f) +{ + return as_type<int3>(f); +} + +inline int4 floatBitsToInt(float4 f) +{ + return as_type<int4>(f); +} + +inline uint floatBitsToUint(float f) +{ + return as_type<uint>(f); +} + +inline uint2 floatBitsToUint(float2 f) +{ + return as_type<uint2>(f); +} + +inline uint3 floatBitsToUint(float3 f) +{ + return as_type<uint3>(f); +} + +inline uint4 floatBitsToUint(float4 f) +{ + return as_type<uint4>(f); +} + +inline float intBitsToFloat(int f) +{ + return as_type<float>(f); +} + +inline float2 intBitsToFloat(int2 f) +{ + return as_type<float2>(f); +} + +inline float3 intBitsToFloat(int3 f) +{ + return as_type<float3>(f); +} + +inline float4 intBitsToFloat(int4 f) +{ + return as_type<float4>(f); +} + +/* Texture size functions. Add texture types as needed. */ +template<typename T, access A> +int textureSize(thread _mtl_combined_image_sampler_1d<T, A> image, uint lod) +{ + return int(image.texture->get_width()); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_1d_array<T, A> image, uint lod) +{ + return int2(image.texture->get_width(), image.texture->get_array_size()); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_2d<T, A> image, uint lod) +{ + return int2(image.texture->get_width(lod), image.texture->get_height(lod)); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_depth_2d<T, A> image, uint lod) +{ + return int2(image.texture->get_width(lod), image.texture->get_height(lod)); +} + +template<typename T, access A> +int3 textureSize(thread _mtl_combined_image_sampler_2d_array<T, A> image, uint lod) +{ + return int3(image.texture->get_width(lod), + image.texture->get_height(lod), + image.texture->get_array_size()); +} + +template<typename T, access A> +int3 textureSize(thread _mtl_combined_image_sampler_depth_2d_array<T, A> image, uint lod) +{ + return int3(image.texture->get_width(lod), + image.texture->get_height(lod), + image.texture->get_array_size()); +} + +template<typename T, access A> +int2 textureSize(thread _mtl_combined_image_sampler_cube<T, A> image, uint lod) +{ + return int2(image.texture->get_width(lod), image.texture->get_height(lod)); +} + +template<typename T, access A> +int3 textureSize(thread _mtl_combined_image_sampler_3d<T, A> image, uint lod) +{ + return int3(image.texture->get_width(lod), + image.texture->get_height(lod), + image.texture->get_depth(lod)); +} + +/* Equality and comparison functions. */ +#define lessThan(a, b) ((a) < (b)) +#define lessThanEqual(a, b) ((a) <= (b)) +#define greaterThan(a, b) ((a) > (b)) +#define greaterThanEqual(a, b) ((a) >= (b)) +#define equal(a, b) ((a) == (b)) +#define notEqual(a, b) ((a) != (b)) + +template<typename T, int n> bool all(vec<T, n> x) +{ + bool _all = true; + for (int i = 0; i < n; i++) { + _all = _all && (x[i] > 0); + } + return _all; +} + +template<typename T, int n> bool any(vec<T, n> x) +{ + bool _any = false; + for (int i = 0; i < n; i++) { + _any = _any || (x[i] > 0); + } + return _any; +} + +/* Modulo functionality. */ +int _mtlmod(int a, int b) +{ + return a - b * (a / b); +} + +template<typename T, int n> vec<T, n> _mtlmod(vec<T, n> x, vec<T, n> y) +{ + return x - y * floor(x / y); +} + +template<typename T, int n, typename U> vec<T, n> _mtlmod(vec<T, n> x, U y) +{ + return x - vec<T, n>(y) * floor(x / vec<T, n>(y)); +} + +template<typename T, typename U, int n> vec<U, n> _mtlmod(T x, vec<U, n> y) +{ + return vec<U, n>(x) - y * floor(vec<U, n>(x) / y); +} + +/* Mathematical functions. */ +template<typename T> T atan(T y, T x) +{ + return atan2(y, x); +} + +/* Matrix Inverse. */ +float4x4 inverse(float4x4 a) +{ + float b00 = a[0][0] * a[1][1] - a[0][1] * a[1][0]; + float b01 = a[0][0] * a[1][2] - a[0][2] * a[1][0]; + float b02 = a[0][0] * a[1][3] - a[0][3] * a[1][0]; + float b03 = a[0][1] * a[1][2] - a[0][2] * a[1][1]; + float b04 = a[0][1] * a[1][3] - a[0][3] * a[1][1]; + float b05 = a[0][2] * a[1][3] - a[0][3] * a[1][2]; + float b06 = a[2][0] * a[3][1] - a[2][1] * a[3][0]; + float b07 = a[2][0] * a[3][2] - a[2][2] * a[3][0]; + float b08 = a[2][0] * a[3][3] - a[2][3] * a[3][0]; + float b09 = a[2][1] * a[3][2] - a[2][2] * a[3][1]; + float b10 = a[2][1] * a[3][3] - a[2][3] * a[3][1]; + float b11 = a[2][2] * a[3][3] - a[2][3] * a[3][2]; + + float invdet = 1.0 / (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06); + + return float4x4(a[1][1] * b11 - a[1][2] * b10 + a[1][3] * b09, + a[0][2] * b10 - a[0][1] * b11 - a[0][3] * b09, + a[3][1] * b05 - a[3][2] * b04 + a[3][3] * b03, + a[2][2] * b04 - a[2][1] * b05 - a[2][3] * b03, + a[1][2] * b08 - a[1][0] * b11 - a[1][3] * b07, + a[0][0] * b11 - a[0][2] * b08 + a[0][3] * b07, + a[3][2] * b02 - a[3][0] * b05 - a[3][3] * b01, + a[2][0] * b05 - a[2][2] * b02 + a[2][3] * b01, + a[1][0] * b10 - a[1][1] * b08 + a[1][3] * b06, + a[0][1] * b08 - a[0][0] * b10 - a[0][3] * b06, + a[3][0] * b04 - a[3][1] * b02 + a[3][3] * b00, + a[2][1] * b02 - a[2][0] * b04 - a[2][3] * b00, + a[1][1] * b07 - a[1][0] * b09 - a[1][2] * b06, + a[0][0] * b09 - a[0][1] * b07 + a[0][2] * b06, + a[3][1] * b01 - a[3][0] * b03 - a[3][2] * b00, + a[2][0] * b03 - a[2][1] * b01 + a[2][2] * b00) * + invdet; +} + +float3x3 inverse(float3x3 m) +{ + + float invdet = 1.0 / (m[0][0] * (m[1][1] * m[2][2] - m[2][1] * m[1][2]) - + m[1][0] * (m[0][1] * m[2][2] - m[2][1] * m[0][2]) + + m[2][0] * (m[0][1] * m[1][2] - m[1][1] * m[0][2])); + + float3x3 inverse(0); + inverse[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]); + inverse[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]); + inverse[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]); + inverse[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]); + inverse[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]); + inverse[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]); + inverse[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]); + inverse[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]); + inverse[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]); + inverse = inverse * invdet; + + return inverse; +} + +/* Additional overloads for builtin functions. */ +float distance(float x, float y) +{ + return abs(y - x); +} + +/* Overload for mix(A, B, float ratio). */ +template<typename T, int Size> vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, float val) +{ + return mix(a, b, vec<T, Size>(val)); +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T, int Size> +vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, vec<int, Size> mask) +{ + vec<T, Size> result; + for (int i = 0; i < Size; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Using vec<bool, S> does not appear to work, splitting cases. */ +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bvec4 mask) +{ + vec<T, 4> result; + for (int i = 0; i < 4; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bvec3 mask) +{ + vec<T, 3> result; + for (int i = 0; i < 3; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bvec2 mask) +{ + vec<T, 2> result; + for (int i = 0; i < 2; i++) { + result[i] = mask[i] ? b[i] : a[i]; + } + return result; +} + +/* Overload for mix(A, B, bvec<N>). */ +template<typename T> T mix(T a, T b, MTLBOOL mask) +{ + return (mask) ? b : a; +} + +template<typename T, unsigned int Size> bool is_zero(vec<T, Size> a) +{ + for (int i = 0; i < Size; i++) { + if (a[i] != T(0)) { + return false; + } + } + return true; +} + +/* Matrix conversion fallback. */ +mat3 MAT3(vec3 a, vec3 b, vec3 c) +{ + return mat3(a, b, c); +} +mat3 MAT3(float f) +{ + return mat3(f); +} +mat3 MAT3(mat4 m) +{ + return mat4_to_mat3(m); +}
\ No newline at end of file diff --git a/source/blender/gpu/tests/gpu_shader_builtin_test.cc b/source/blender/gpu/tests/gpu_shader_builtin_test.cc index 5dc70a8bf0f..567f1370f00 100644 --- a/source/blender/gpu/tests/gpu_shader_builtin_test.cc +++ b/source/blender/gpu/tests/gpu_shader_builtin_test.cc @@ -32,12 +32,8 @@ static void test_shader_builtin() test_compile_builtin_shader(GPU_SHADER_TEXT, GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_KEYFRAME_SHAPE, GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_SIMPLE_LIGHTING, GPU_SHADER_CFG_DEFAULT); - test_compile_builtin_shader(GPU_SHADER_2D_UNIFORM_COLOR, GPU_SHADER_CFG_DEFAULT); - test_compile_builtin_shader(GPU_SHADER_2D_FLAT_COLOR, GPU_SHADER_CFG_DEFAULT); - test_compile_builtin_shader(GPU_SHADER_2D_SMOOTH_COLOR, GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_3D_IMAGE, GPU_SHADER_CFG_DEFAULT); - test_compile_builtin_shader(GPU_SHADER_2D_IMAGE, GPU_SHADER_CFG_DEFAULT); - test_compile_builtin_shader(GPU_SHADER_2D_IMAGE_COLOR, GPU_SHADER_CFG_DEFAULT); + test_compile_builtin_shader(GPU_SHADER_3D_IMAGE_COLOR, GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_2D_IMAGE_DESATURATE_COLOR, GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_2D_IMAGE_RECT_COLOR, GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_2D_IMAGE_MULTI_RECT_COLOR, GPU_SHADER_CFG_DEFAULT); @@ -60,7 +56,6 @@ static void test_shader_builtin() GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_3D_POINT_VARYING_SIZE_VARYING_COLOR, GPU_SHADER_CFG_DEFAULT); - test_compile_builtin_shader(GPU_SHADER_2D_LINE_DASHED_UNIFORM_COLOR, GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_GPENCIL_STROKE, GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_2D_AREA_BORDERS, GPU_SHADER_CFG_DEFAULT); test_compile_builtin_shader(GPU_SHADER_2D_WIDGET_BASE, GPU_SHADER_CFG_DEFAULT); diff --git a/source/blender/gpu/tests/gpu_shader_test.cc b/source/blender/gpu/tests/gpu_shader_test.cc index adb9b059fc6..35ffc647c97 100644 --- a/source/blender/gpu/tests/gpu_shader_test.cc +++ b/source/blender/gpu/tests/gpu_shader_test.cc @@ -14,8 +14,6 @@ #include "gpu_testing.hh" -#include "GPU_glew.h" - namespace blender::gpu::tests { static void test_gpu_shader_compute_2d() @@ -48,7 +46,7 @@ void main() { /* Create texture to store result and attach to shader. */ GPUTexture *texture = GPU_texture_create_2d( - "gpu_shader_compute_2d", SIZE, SIZE, 0, GPU_RGBA32F, nullptr); + "gpu_shader_compute_2d", SIZE, SIZE, 1, GPU_RGBA32F, nullptr); EXPECT_NE(texture, nullptr); GPU_shader_bind(shader); @@ -109,7 +107,7 @@ void main() { /* Construct Texture. */ GPUTexture *texture = GPU_texture_create_1d( - "gpu_shader_compute_1d", SIZE, 0, GPU_RGBA32F, nullptr); + "gpu_shader_compute_1d", SIZE, 1, GPU_RGBA32F, nullptr); EXPECT_NE(texture, nullptr); GPU_shader_bind(shader); diff --git a/source/blender/gpu/tests/gpu_testing.cc b/source/blender/gpu/tests/gpu_testing.cc index 4e93e062b50..224a9afcf59 100644 --- a/source/blender/gpu/tests/gpu_testing.cc +++ b/source/blender/gpu/tests/gpu_testing.cc @@ -26,7 +26,6 @@ void GPUTest::SetUp() void GPUTest::TearDown() { GPU_exit(); - GPU_backend_exit(); GPU_context_discard(context); GHOST_DisposeOpenGLContext(ghost_system, ghost_context); GHOST_DisposeSystem(ghost_system); |